From 4d06258a8dc2e9cf6f3815832dd53a498141f500 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 28 Feb 2020 22:47:31 +0200 Subject: [PATCH 1/6] Handle __constant__ robustly. --- clang/lib/CodeGen/CGDeclCXX.cpp | 10 +- clang/lib/CodeGen/CodeGenModule.cpp | 33 ++++-- clang/lib/CodeGen/TargetInfo.cpp | 24 +++- clang/lib/Sema/SemaDecl.cpp | 9 +- clang/lib/Sema/SemaOverload.cpp | 3 +- llvm/lib/Transforms/HC/CMakeLists.txt | 1 + .../HC/PromoteConstant/CMakeLists.txt | 3 + .../HC/PromoteConstant/PromoteConstant.cpp | 111 ++++++++++++++++++ 8 files changed, 170 insertions(+), 24 deletions(-) create mode 100644 llvm/lib/Transforms/HC/PromoteConstant/CMakeLists.txt create mode 100644 llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 720a5f1912a5a..470b3b5012686 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -444,9 +444,13 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, D->hasAttr())) return; - if ( (getLangOpts().CPlusPlusAMP && getLangOpts().DevicePath && - D->hasAttr()) || - (getLangOpts().OpenMP && getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit))) + if (getLangOpts().CPlusPlusAMP && getLangOpts().DevicePath) + if (D->hasAttr() || + (D->hasAttr() && + D->getAttr()->getAnnotation() == "__HIP_constant__")) + return; + if (getLangOpts().OpenMP && + getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit)) return; // Check if we've already initialized this decl. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 1490fea42c174..99506fd3abe66 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -448,11 +448,7 @@ void CodeGenModule::Release() { } EmitCtorList(GlobalCtors, "llvm.global_ctors"); EmitCtorList(GlobalDtors, "llvm.global_dtors"); - // skip global annotation for HCC kernel path - if (Context.getLangOpts().CPlusPlusAMP && getCodeGenOpts().AMPIsDevice) { - } else { - EmitGlobalAnnotations(); - } + EmitGlobalAnnotations(); EmitStaticExternCAliases(); EmitDeferredUnusedCoverageMappings(); if (CoverageMapping) @@ -2980,7 +2976,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { } } - PrettyStackTraceDecl CrashInfo(const_cast(D), D->getLocation(), + PrettyStackTraceDecl CrashInfo(const_cast(D), D->getLocation(), Context.getSourceManager(), "Generating code for declaration"); @@ -3936,15 +3932,23 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { return LangAS::cuda_device; } - if (LangOpts.CPlusPlusAMP && LangOpts.DevicePath && - D && D->hasAttr()) - return LangAS::hcc_tilestatic; + if (LangOpts.CPlusPlusAMP && LangOpts.DevicePath) { + if (D && D->hasAttr()) + return LangAS::hcc_tilestatic; + if (D && D->hasAttr() && + D->getAttr()->getAnnotation() == "__HIP_constant__") + return LangAS::opencl_constant; + if (D && D->getType().isConstQualified()) + return LangAS::opencl_constant; + return LangAS::hcc_global; + } if (LangOpts.OpenMP) { LangAS AS; if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS)) return AS; } + return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } @@ -4226,6 +4230,17 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, } } + if (GV && LangOpts.CPlusPlusAMP) { + // This mimics the CUDA-specific processing done above. + bool IsHIPConstant = D->hasAttr() && + D->getAttr()->getAnnotation() == "__HIP_constant__"; + + if (LangOpts.DevicePath) + if (Linkage != llvm::GlobalValue::InternalLinkage && + (D->hasAttr() || IsHIPConstant)) + GV->setExternallyInitialized(true); + } + if (!IsHIPPinnedShadowVar) GV->setInitializer(Init); if (emitter) emitter->finalize(GV); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index c10797c603e08..0609f4f4f4485 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -8046,12 +8046,26 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D, llvm::GlobalValue *GV) { if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) return false; + if (D->hasAttr()) + return true; + if (isa(D)) { + if (D->hasAttr()) + return true; + if (D->hasAttr() && + D->getAttr()->getAnnotation() == "__HIP_global_function__") + return true; + } + if (isa(D)) { + if (D->hasAttr() || D->hasAttr()) + return true; + if (D->hasAttr()) + return true; + if (D->hasAttr() && + D->getAttr()->getAnnotation() == "__HIP_constant__") + return true; + } - return D->hasAttr() || - (isa(D) && D->hasAttr()) || - (isa(D) && - (D->hasAttr() || D->hasAttr() || - D->hasAttr())); + return false; } static bool requiresAMDGPUDefaultVisibility(const Decl *D, diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 6fe01d07fd149..206a6f2f54b6b 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -7788,12 +7788,11 @@ NamedDecl *Sema::ActOnVariableDeclarator( } } - if (getLangOpts().CPlusPlusAMP) { - if (SC == SC_None && S->getFnParent() != nullptr && - (NewVD->hasAttr())) { + if (getLangOpts().CPlusPlusAMP && SC == SC_None && S->getFnParent()) + if (NewVD->hasAttr() || + (NewVD->hasAttr() && + NewVD->getAttr()->getAnnotation() == "__HIP_contant__")) NewVD->setStorageClass(SC_Static); - } - } // Ensure that dllimport globals without explicit storage class are treated as // extern. The storage class is set above using parsed attributes. Now we can diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 3109117aff35b..ea95104026478 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -13140,8 +13140,7 @@ static void maybeCastArgsForHIPGlobalFunction(Sema &S, if (FormalT == ActualT) return Actual; if (FormalT->isReferenceType()) return Actual; - CastKind CK; - if (FormalT->isPointerType()) CK = CK_NoOp; + CastKind CK = CK_NoOp; if (FormalT->isIntegerType()) { if (ActualT->isIntegerType()) CK = CK_IntegralCast; if (ActualT->isFloatingType()) CK = CK_FloatingToIntegral; diff --git a/llvm/lib/Transforms/HC/CMakeLists.txt b/llvm/lib/Transforms/HC/CMakeLists.txt index 87ce927ab3290..0cdef266f160d 100644 --- a/llvm/lib/Transforms/HC/CMakeLists.txt +++ b/llvm/lib/Transforms/HC/CMakeLists.txt @@ -1,5 +1,6 @@ if (UNIX) set(LLVM_ENABLE_PLUGINS ON) + add_subdirectory(PromoteConstant) add_subdirectory(PromotePointerKernArgsToGlobal) add_subdirectory(SelectAcceleratorCode) endif() diff --git a/llvm/lib/Transforms/HC/PromoteConstant/CMakeLists.txt b/llvm/lib/Transforms/HC/PromoteConstant/CMakeLists.txt new file mode 100644 index 0000000000000..58883e8ab89ef --- /dev/null +++ b/llvm/lib/Transforms/HC/PromoteConstant/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMPromoteConstant MODULE PromoteConstant.cpp) + +add_dependencies(LLVMPromoteConstant intrinsics_gen) \ No newline at end of file diff --git a/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp b/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp new file mode 100644 index 0000000000000..72a9eaa6f3990 --- /dev/null +++ b/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp @@ -0,0 +1,111 @@ +//===-- PromotePointerKernargsToGlobal.cpp - Promote Pointers To Global --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares and defines a pass which uses the double-cast trick ( +// flat-to-global and global-to-flat) for pointers that reside in the +// __constant__ address space. For example, given __constant__ int** foo, all +// single dereferences of foo will be promoted to yield a global int*, as +// opposed to a flat int*. It is preferable to execute SelectAcceleratorCode +// before, as this reduces the workload by pruning functions that are not +// reachable by an accelerator. It is mandatory to run InferAddressSpaces after, +// otherwise no benefit shall be obtained (the spurious casts do get removed). +//===----------------------------------------------------------------------===// +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { +class PromoteConstant : public ModulePass { + // TODO: query the address spaces robustly. + static constexpr unsigned int FlatAddrSpace{0u}; + static constexpr unsigned int GlobalAddrSpace{1u}; + static constexpr unsigned int ConstantAddrSpace{4u}; + + // TODO: this should be hoisted to a common header with HC utility functions + // once the related work on PromotePointerKernArgsToGlobal gets merged + void createPromotableCast(IRBuilder<>& Builder, Value *From, Value *To) { + From->replaceAllUsesWith(To); + + Value *FToG = Builder.CreateAddrSpaceCast( + From, + cast( + From->getType())->getElementType()->getPointerTo(GlobalAddrSpace)); + Value *GToF = Builder.CreateAddrSpaceCast(FToG, From->getType()); + + To->replaceAllUsesWith(GToF); + } + + // TODO: this should be hoisted to a common header with HC utility functions + // once the related work on PromotePointerKernArgsToGlobal gets merged + bool maybePromoteUse(IRBuilder<>& Builder, Instruction *UI) { + if (!UI) + return false; + + Builder.SetInsertPoint(UI->getNextNonDebugInstruction()); + + Value *Tmp = Builder.CreateBitCast(UndefValue::get(UI->getType()), + UI->getType()); + createPromotableCast(Builder, UI, Tmp); + + return true; + } +public: + static char ID; + PromoteConstant() : ModulePass{ID} {} + + bool runOnModule(Module &M) override { + bool Modified = false; + + for (auto &&F : M.functions()) { + for (auto &&BB : F) { + for (auto &&I : BB) { + if (isa(I)) + continue; + if (isa(I)) + continue; + if (isa(I)) + continue; + if (!I.getType()->isPointerTy()) + continue; + if (I.getType()->getPointerAddressSpace() != FlatAddrSpace) + continue; + + if (auto GEPOp = dyn_cast(&I)) + if (GEPOp->getPointerAddressSpace() != ConstantAddrSpace) + continue; // TODO: not handled. + if (auto GEP = dyn_cast(&I)) + if (GEP->getPointerAddressSpace() != ConstantAddrSpace) + continue; + if (auto LI = dyn_cast(&I)) + if (LI->getPointerAddressSpace() != ConstantAddrSpace) + continue; + + IRBuilder<> Builder(I.getContext()); + if (maybePromoteUse(Builder, &I)) + Modified = true; + } + } + } + + return Modified; + } +}; +char PromoteConstant::ID = 0; + +static RegisterPass X{ + "promote-constant", + "Promotes uses of variables annotated with __constant__ to refer to the " + "global address space iff the use yields a flat pointer since, by " + "definition a pointer which is placed in __constant__ storage can only point " + "to the global address space.", + false, + false}; +} \ No newline at end of file From 561ee2bf26580babbce1e512537315bc8d4446e0 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sat, 29 Feb 2020 01:24:56 +0200 Subject: [PATCH 2/6] Overhaul pointer promotion pass to catch more cases. (#3) * Overhaul pointer promotion pass to catch more cases. * Simplify temporary insertion. * Do not regress the handling of naked pointers. * Really correct insertion point. * UndefValue is much cleaner, so use it. * Fix messy formatting. * Use trivially removable temporaries. --- .../PromotePointerKernArgsToGlobal.cpp | 81 +++++++++++++------ 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp b/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp index b2b5513665193..5980b0831e62b 100644 --- a/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp +++ b/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp @@ -23,52 +23,81 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/Pass.h" -#include - using namespace llvm; -using namespace std; namespace { class PromotePointerKernArgsToGlobal : public FunctionPass { // TODO: query the address space robustly. static constexpr unsigned int GenericAddrSpace{0u}; static constexpr unsigned int GlobalAddrSpace{1u}; + + void createPromotableCast(IRBuilder<>& Builder, Value *From, Value *To) { + From->replaceAllUsesWith(To); + + Value *FToG = Builder.CreateAddrSpaceCast( + From, + cast(From->getType()) + ->getElementType()->getPointerTo(GlobalAddrSpace)); + Value *GToF = Builder.CreateAddrSpaceCast(FToG, From->getType()); + + To->replaceAllUsesWith(GToF); + } + + void maybePromoteUse(IRBuilder<>& Builder, Instruction *UI) { + if (!UI) + return; + + Builder.SetInsertPoint(UI->getNextNonDebugInstruction()); + + Value *Tmp = Builder.CreateBitCast(UndefValue::get(UI->getType()), + UI->getType()); + createPromotableCast(Builder, UI, Tmp); + } + + void promoteArgument(IRBuilder<>& Builder, Argument *Arg) { + Value *Tmp = Builder.CreateBitCast(UndefValue::get(Arg->getType()), + Arg->getType()); + createPromotableCast(Builder, Arg, Tmp); + } public: static char ID; PromotePointerKernArgsToGlobal() : FunctionPass{ID} {} bool runOnFunction(Function &F) override { - if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) return false; + if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) + return false; + + SmallVector PromotableArgs; + SmallVector PromotableUses; + for (auto &&Arg : F.args()) { + for (auto &&U : Arg.users()) { + if (!U->getType()->isPointerTy()) + continue; + if (U->getType()->getPointerAddressSpace() != GenericAddrSpace) + continue; - SmallVector PtrArgs; - for_each(F.arg_begin(), F.arg_end(), [&](Argument &Arg) { - if (!Arg.getType()->isPointerTy()) return; - if (Arg.getType()->getPointerAddressSpace() != GenericAddrSpace) { - return; + PromotableUses.push_back(U); } - PtrArgs.push_back(&Arg); - }); + if (!Arg.getType()->isPointerTy()) + continue; + if (Arg.getType()->getPointerAddressSpace() != GenericAddrSpace) + continue; - if (PtrArgs.empty()) return false; + PromotableArgs.push_back(&Arg); + } - static IRBuilder<> Builder{F.getContext()}; - Builder.SetInsertPoint(&F.getEntryBlock().front()); + if (PromotableArgs.empty() && PromotableUses.empty()) + return false; - for_each(PtrArgs.begin(), PtrArgs.end(), [](Argument *PArg) { - Argument Tmp{PArg->getType(), PArg->getName()}; - PArg->replaceAllUsesWith(&Tmp); - - Value *FToG = Builder.CreateAddrSpaceCast( - PArg, - cast(PArg->getType()) - ->getElementType()->getPointerTo(GlobalAddrSpace)); - Value *GToF = Builder.CreateAddrSpaceCast(FToG, PArg->getType()); - - Tmp.replaceAllUsesWith(GToF); - }); + static IRBuilder<> Builder{F.getContext()}; + for (auto &&PU : PromotableUses) + maybePromoteUse(Builder, dyn_cast(PU)); + Builder.SetInsertPoint(&F.getEntryBlock().front()); + for (auto &&Arg : PromotableArgs) + promoteArgument(Builder, Arg); return true; } }; From 1bd774d2bfa609ffaf083813fc5530883c7fa936 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 6 Mar 2020 18:11:09 +0200 Subject: [PATCH 3/6] Fix and ptimise pass. No annotations on device --- clang/lib/CodeGen/CodeGenModule.cpp | 3 +- .../HC/PromoteConstant/PromoteConstant.cpp | 124 +++++++++++++----- 2 files changed, 91 insertions(+), 36 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 99506fd3abe66..eeff77619c32b 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -448,7 +448,8 @@ void CodeGenModule::Release() { } EmitCtorList(GlobalCtors, "llvm.global_ctors"); EmitCtorList(GlobalDtors, "llvm.global_dtors"); - EmitGlobalAnnotations(); + if (!LangOpts.CPlusPlusAMP || !LangOpts.DevicePath) + EmitGlobalAnnotations(); EmitStaticExternCAliases(); EmitDeferredUnusedCoverageMappings(); if (CoverageMapping) diff --git a/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp b/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp index 72a9eaa6f3990..c459d79b644b4 100644 --- a/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp +++ b/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp @@ -32,6 +32,7 @@ class PromoteConstant : public ModulePass { // TODO: this should be hoisted to a common header with HC utility functions // once the related work on PromotePointerKernArgsToGlobal gets merged void createPromotableCast(IRBuilder<>& Builder, Value *From, Value *To) { + To->dump(); From->replaceAllUsesWith(To); Value *FToG = Builder.CreateAddrSpaceCast( @@ -51,47 +52,102 @@ class PromoteConstant : public ModulePass { Builder.SetInsertPoint(UI->getNextNonDebugInstruction()); - Value *Tmp = Builder.CreateBitCast(UndefValue::get(UI->getType()), - UI->getType()); + // We cannot use IRBuilder since it might do the obvious folding, which + // would yield an undef value of a possibly primitive type, which cannot be + // disambiguated from other undefs of the same primitive type and would + // cause havoc when replaced with the promotable cast created later. + Value *UD = UndefValue::get(Builder.getInt8Ty()); + Value *Tmp = CastInst::CreateBitOrPointerCast(UD, UI->getType(), "Tmp", + &*Builder.GetInsertPoint()); + createPromotableCast(Builder, UI, Tmp); return true; } + // TODO: Whilst ConstantExpr and Operator handling could obviously be folded + // into a single function, we leave them separate for now to allow + // possible additional development. + bool maybeHandleInstruction(IRBuilder<>& Builder, Instruction *I) { + if (!I) + return false; + + if (!I->getType()->isPointerTy()) + return false; + if (I->getType()->getPointerAddressSpace() != FlatAddrSpace) + return false; + + if (auto LI = dyn_cast(I)) + return maybePromoteUse(Builder, LI); + if (auto PHI = dyn_cast(I)) { + return false; + } + if (auto SEL = dyn_cast(I)) + return false; + + return false; + } + + bool maybeHandleOperator(IRBuilder<>& Builder, Operator *Op) { + if (!Op) + return false; + if (!Op->getType()->isPointerTy()) + return false; + + bool Modified = false; + for (auto &&U : Op->users()) { + if (maybeHandleConstantExpr(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleOperator(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleInstruction(Builder, dyn_cast(U))) + Modified = true; + } + + return Modified; + } + + bool maybeHandleConstantExpr(IRBuilder<>& Builder, ConstantExpr *CE) { + if (!CE) + return false; + if (!CE->getType()->isPointerTy()) + return false; + + bool Modified = false; + for (auto &&U : CE->users()) { + if (maybeHandleConstantExpr(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleInstruction(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleOperator(Builder, dyn_cast(U))) + Modified = true; + } + + return Modified; + } public: static char ID; PromoteConstant() : ModulePass{ID} {} bool runOnModule(Module &M) override { - bool Modified = false; + SmallVector PromotableGlobals; + for (auto &&GV : M.globals()) + if (GV.getAddressSpace() == ConstantAddrSpace) + PromotableGlobals.push_back(&GV); - for (auto &&F : M.functions()) { - for (auto &&BB : F) { - for (auto &&I : BB) { - if (isa(I)) - continue; - if (isa(I)) - continue; - if (isa(I)) - continue; - if (!I.getType()->isPointerTy()) - continue; - if (I.getType()->getPointerAddressSpace() != FlatAddrSpace) - continue; - - if (auto GEPOp = dyn_cast(&I)) - if (GEPOp->getPointerAddressSpace() != ConstantAddrSpace) - continue; // TODO: not handled. - if (auto GEP = dyn_cast(&I)) - if (GEP->getPointerAddressSpace() != ConstantAddrSpace) - continue; - if (auto LI = dyn_cast(&I)) - if (LI->getPointerAddressSpace() != ConstantAddrSpace) - continue; - - IRBuilder<> Builder(I.getContext()); - if (maybePromoteUse(Builder, &I)) - Modified = true; - } + if (PromotableGlobals.empty()) + return false; + + IRBuilder<> Builder(M.getContext()); + + bool Modified = false; + for (auto &&GV : PromotableGlobals) { + for (auto &&U : GV->users()) { + if (maybeHandleConstantExpr(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleInstruction(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleOperator(Builder, dyn_cast(U))) + Modified = true; } } @@ -102,10 +158,8 @@ char PromoteConstant::ID = 0; static RegisterPass X{ "promote-constant", - "Promotes uses of variables annotated with __constant__ to refer to the " - "global address space iff the use yields a flat pointer since, by " - "definition a pointer which is placed in __constant__ storage can only point " - "to the global address space.", + "Promotes users of variables annotated with __constant__ to refer to the " + "global address space iff the user produces a flat pointer.", false, false}; } \ No newline at end of file From 60b47187ec363a15fd83bac834a011e65771365e Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 10 Mar 2020 05:44:08 +0200 Subject: [PATCH 4/6] Update PromoteConstant.cpp --- llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp b/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp index c459d79b644b4..ff125ad2fc46b 100644 --- a/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp +++ b/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp @@ -32,7 +32,6 @@ class PromoteConstant : public ModulePass { // TODO: this should be hoisted to a common header with HC utility functions // once the related work on PromotePointerKernArgsToGlobal gets merged void createPromotableCast(IRBuilder<>& Builder, Value *From, Value *To) { - To->dump(); From->replaceAllUsesWith(To); Value *FToG = Builder.CreateAddrSpaceCast( @@ -162,4 +161,4 @@ static RegisterPass X{ "global address space iff the user produces a flat pointer.", false, false}; -} \ No newline at end of file +} From efef5be7880163cad30be24b71d408739ff2a05d Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 10 Mar 2020 07:36:31 +0200 Subject: [PATCH 5/6] Function calls like `norecurse` and hidden --- .../PromotePointerKernArgsToGlobal.cpp | 157 ++++++----- .../SelectAcceleratorCode.cpp | 262 +++++++++--------- 2 files changed, 220 insertions(+), 199 deletions(-) diff --git a/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp b/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp index 5980b0831e62b..09ce8531b5677 100644 --- a/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp +++ b/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp @@ -27,86 +27,95 @@ using namespace llvm; namespace { class PromotePointerKernArgsToGlobal : public FunctionPass { - // TODO: query the address space robustly. - static constexpr unsigned int GenericAddrSpace{0u}; - static constexpr unsigned int GlobalAddrSpace{1u}; - - void createPromotableCast(IRBuilder<>& Builder, Value *From, Value *To) { - From->replaceAllUsesWith(To); - - Value *FToG = Builder.CreateAddrSpaceCast( - From, - cast(From->getType()) - ->getElementType()->getPointerTo(GlobalAddrSpace)); - Value *GToF = Builder.CreateAddrSpaceCast(FToG, From->getType()); - - To->replaceAllUsesWith(GToF); + // TODO: query the address space robustly. + static constexpr unsigned int FlatAddrSpace = 0u; + static constexpr unsigned int GlobalAddrSpace = 1u; + + void createPromotableCast(IRBuilder<>& Builder, Value *From, Value *To) { + From->replaceAllUsesWith(To); + + Value *FToG = Builder.CreateAddrSpaceCast( + From, + cast( + From->getType())->getElementType()->getPointerTo(GlobalAddrSpace)); + Value *GToF = Builder.CreateAddrSpaceCast(FToG, From->getType()); + + To->replaceAllUsesWith(GToF); + } + + Value *createTemporary(IRBuilder<>& Builder, Type *Ty) { + // We cannot use IRBuilder since it might do the obvious folding, which + // would yield an undef value of a possibly primitive type, which cannot be + // disambiguated from other undefs of the same primitive type and would + // cause havoc when replaced with the promotable cast created later. + Value *UD = UndefValue::get(Builder.getInt8Ty()); + + return CastInst::CreateBitOrPointerCast(UD, Ty, "Tmp", + &*Builder.GetInsertPoint()); + } + + void maybePromoteUse(IRBuilder<>& Builder, Instruction *UI) { + if (!UI) + return; + + Builder.SetInsertPoint(UI->getNextNonDebugInstruction()); + + createPromotableCast(Builder, UI, createTemporary(Builder, UI->getType())); + } + + void promoteArgument(IRBuilder<>& Builder, Argument *Arg) { + createPromotableCast(Builder, Arg, + createTemporary(Builder, Arg->getType())); + } +public: + static char ID; + PromotePointerKernArgsToGlobal() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override + { + if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) + return false; + + SmallVector PromotableArgs; + SmallVector PromotableUses; + for (auto &&Arg : F.args()) { + for (auto &&U : Arg.users()) { + if (!U->getType()->isPointerTy()) + continue; + if (U->getType()->getPointerAddressSpace() != FlatAddrSpace) + continue; + + PromotableUses.push_back(U); + } + + if (!Arg.getType()->isPointerTy()) + continue; + if (Arg.getType()->getPointerAddressSpace() != FlatAddrSpace) + continue; + + PromotableArgs.push_back(&Arg); } - void maybePromoteUse(IRBuilder<>& Builder, Instruction *UI) { - if (!UI) - return; + if (PromotableArgs.empty() && PromotableUses.empty()) + return false; - Builder.SetInsertPoint(UI->getNextNonDebugInstruction()); + IRBuilder<> Builder(F.getContext()); + for (auto &&PU : PromotableUses) + maybePromoteUse(Builder, dyn_cast(PU)); - Value *Tmp = Builder.CreateBitCast(UndefValue::get(UI->getType()), - UI->getType()); - createPromotableCast(Builder, UI, Tmp); - } + Builder.SetInsertPoint(&F.getEntryBlock().front()); + for (auto &&Arg : PromotableArgs) + promoteArgument(Builder, Arg); - void promoteArgument(IRBuilder<>& Builder, Argument *Arg) { - Value *Tmp = Builder.CreateBitCast(UndefValue::get(Arg->getType()), - Arg->getType()); - createPromotableCast(Builder, Arg, Tmp); - } -public: - static char ID; - PromotePointerKernArgsToGlobal() : FunctionPass{ID} {} - - bool runOnFunction(Function &F) override - { - if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) - return false; - - SmallVector PromotableArgs; - SmallVector PromotableUses; - for (auto &&Arg : F.args()) { - for (auto &&U : Arg.users()) { - if (!U->getType()->isPointerTy()) - continue; - if (U->getType()->getPointerAddressSpace() != GenericAddrSpace) - continue; - - PromotableUses.push_back(U); - } - - if (!Arg.getType()->isPointerTy()) - continue; - if (Arg.getType()->getPointerAddressSpace() != GenericAddrSpace) - continue; - - PromotableArgs.push_back(&Arg); - } - - if (PromotableArgs.empty() && PromotableUses.empty()) - return false; - - static IRBuilder<> Builder{F.getContext()}; - for (auto &&PU : PromotableUses) - maybePromoteUse(Builder, dyn_cast(PU)); - - Builder.SetInsertPoint(&F.getEntryBlock().front()); - for (auto &&Arg : PromotableArgs) - promoteArgument(Builder, Arg); - return true; - } + return true; + } }; char PromotePointerKernArgsToGlobal::ID = 0; -static RegisterPass X{ - "promote-pointer-kernargs-to-global", - "Promotes kernel formals of pointer type to point to the global address " - "space, since the actuals can only represent a global address.", - false, - false}; +static RegisterPass X( + "promote-pointer-kernargs-to-global", + "Promotes kernel formals of pointer type to point to the global address " + "space, since the actuals can only represent a global address.", + false, + false); } \ No newline at end of file diff --git a/llvm/lib/Transforms/HC/SelectAcceleratorCode/SelectAcceleratorCode.cpp b/llvm/lib/Transforms/HC/SelectAcceleratorCode/SelectAcceleratorCode.cpp index 1c269e83145b6..7f98af6a02481 100644 --- a/llvm/lib/Transforms/HC/SelectAcceleratorCode/SelectAcceleratorCode.cpp +++ b/llvm/lib/Transforms/HC/SelectAcceleratorCode/SelectAcceleratorCode.cpp @@ -32,161 +32,173 @@ using namespace llvm; static cl::opt EnableFunctionCalls( - "sac-enable-function-calls", cl::init(false), cl::Hidden, - cl::desc("Enable function calls")); + "sac-enable-function-calls", cl::init(false), cl::Hidden, + cl::desc("Enable function calls")); namespace { class SelectAcceleratorCode : public ModulePass { - SmallPtrSet HCCallees_; - - void findAllHCCallees_(const Function &F, Module &M) - { - for (auto&& BB : F) { - for (auto&& I : BB) { - if (auto CI = dyn_cast(&I)) { - auto V = CI->getCalledValue()->stripPointerCasts(); - if (auto Callee = dyn_cast(V)) { - auto Tmp = HCCallees_.insert(Callee); - if (Tmp.second) findAllHCCallees_(*Callee, M); - } - } - } + SmallPtrSet HCCallees_; + + void findAllHCCallees_(const Function &F, Module &M) { + for (auto &&BB : F) { + for (auto &&I : BB) { + if (auto CI = dyn_cast(&I)) { + auto V = CI->getCalledValue()->stripPointerCasts(); + if (auto Callee = dyn_cast(V)) { + auto Tmp = HCCallees_.insert(Callee); + if (Tmp.second) + findAllHCCallees_(*Callee, M); + } } + } } - - template - static - void erase_(T &X) - { - X.dropAllReferences(); - X.replaceAllUsesWith(UndefValue::get(X.getType())); - X.eraseFromParent(); + } + + template + static + void erase_(T &X) { + X.dropAllReferences(); + X.replaceAllUsesWith(UndefValue::get(X.getType())); + X.eraseFromParent(); + } + + template + bool eraseIf_(F First, G Last, P Predicate) const { + bool erasedSome = false; + + auto It = First(); + while (It != Last()) { + It->removeDeadConstantUsers(); + if (Predicate(*It)) { + erase_(*It); + erasedSome = true; + It = First(); + } + else ++It; } - template - bool eraseIf_(F First, G Last, P Predicate) const - { - bool erasedSome = false; - - auto It = First(); - while (It != Last()) { - It->removeDeadConstantUsers(); - if (Predicate(*It)) { - erase_(*It); - erasedSome = true; - It = First(); - } - else ++It; - } + return erasedSome; + } - return erasedSome; + bool eraseNonHCFunctionsBody_(Module &M) const { + bool Modified = false; + for (auto &&F : M.functions()) { + if (HCCallees_.count(M.getFunction(F.getName())) == 0) { + F.deleteBody(); + Modified = true; + } } - bool eraseNonHCFunctionsBody_(Module &M) const - { - bool Modified = false; - for (auto&& F : M.functions()) { - if (HCCallees_.count(M.getFunction(F.getName())) == 0) { - F.deleteBody(); - Modified = true; - } - }; - return Modified; + return Modified; + } + + bool eraseDeadGlobals_(Module &M) const { + return eraseIf_([&]() { return M.global_begin(); }, + [&]() { return M.global_end(); }, + [](const Constant &K) { return !K.isConstantUsed(); }); + } + + bool eraseDeadAliases_(Module &M) { + return eraseIf_([&]() { return M.alias_begin(); }, + [&]() { return M.alias_end(); }, + [](const Constant &K) { return !K.isConstantUsed(); }); + } + + static bool forceAlwaysInline_(Function &F) { + if (F.hasFnAttribute(Attribute::AlwaysInline)) { + // already marked as always inline, + // nothing needs to be changed + assert(!F.hasFnAttribute(Attribute::OptimizeNone)); + assert(!F.hasFnAttribute(Attribute::NoInline)); + + return false; } - bool eraseDeadGlobals_(Module &M) const - { - return eraseIf_( - [&]() { return M.global_begin(); }, - [&]() { return M.global_end(); }, - [](const Constant& K) { return !K.isConstantUsed(); }); - } + if (F.hasFnAttribute(Attribute::OptimizeNone)) { + // don't inline functions with optnone + // Function with optnone is required to have + // noinline + assert(F.hasFnAttribute(Attribute::NoInline)); - bool eraseDeadAliases_(Module &M) - { - return eraseIf_( - [&]() { return M.alias_begin(); }, - [&]() { return M.alias_end(); }, - [](const Constant& K) { return !K.isConstantUsed(); }); + return false; } - static - bool forceAlwaysInline_(Function &F) - { - if (F.hasFnAttribute(Attribute::AlwaysInline)) { - // already marked as always inline, - // nothing needs to be changed - assert(!F.hasFnAttribute(Attribute::OptimizeNone)); - assert(!F.hasFnAttribute(Attribute::NoInline)); - return false; - } + if (F.hasFnAttribute(Attribute::NoInline)) + F.removeFnAttr(Attribute::NoInline); - if (F.hasFnAttribute(Attribute::OptimizeNone)) { - // don't inline functions with optnone - // Function with optnone is required to have - // noinline - assert(F.hasFnAttribute(Attribute::NoInline)); - return false; - } + F.addFnAttr(Attribute::AlwaysInline); - if (F.hasFnAttribute(Attribute::NoInline)) { - F.removeFnAttr(Attribute::NoInline); - } - F.addFnAttr(Attribute::AlwaysInline); - return true; + return true; + } + + static bool setFunctionAttributes(Function &F) { + if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) + return false; + if (!EnableFunctionCalls) + return forceAlwaysInline_(F); + + bool Modified = false; + if (!F.hasFnAttribute(Attribute::NoRecurse)) { + F.addFnAttr(Attribute::NoRecurse); + Modified = true; + } + if (F.getVisibility() != Function::VisibilityTypes::HiddenVisibility) { + F.setVisibility(Function::VisibilityTypes::HiddenVisibility); + Modified = true; } + + return Modified; + } public: - static char ID; - SelectAcceleratorCode() : ModulePass{ID} {} - - bool doInitialization(Module &M) override { return false; } - - bool runOnModule(Module &M) override { - // This may be a candidate for an analysis pass that is - // invalidated appropriately by other passes. - for (auto&& F : M.functions()) { - if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) { - auto Tmp = HCCallees_.insert(M.getFunction(F.getName())); - if (Tmp.second) findAllHCCallees_(F, M); - } - } + static char ID; + SelectAcceleratorCode() : ModulePass(ID) {} + + bool doInitialization(Module &M) override { return false; } + + bool runOnModule(Module &M) override { + // This may be a candidate for an analysis pass that is + // invalidated appropriately by other passes. + for (auto &&F : M.functions()) { + if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) { + auto Tmp = HCCallees_.insert(M.getFunction(F.getName())); + if (Tmp.second) findAllHCCallees_(F, M); + } + } - bool Modified = eraseNonHCFunctionsBody_(M); + bool Modified = eraseNonHCFunctionsBody_(M); - Modified = eraseDeadGlobals_(M) || Modified; + Modified = eraseDeadGlobals_(M) || Modified; - M.dropTriviallyDeadConstantArrays(); + M.dropTriviallyDeadConstantArrays(); - Modified = eraseDeadAliases_(M) || Modified; + Modified = eraseDeadAliases_(M) || Modified; - if (!EnableFunctionCalls) - for (auto&& F : M.functions()) Modified = forceAlwaysInline_(F) || Modified; + for (auto &&F : M.functions()) + Modified = setFunctionAttributes(F); - return Modified; - } + return Modified; + } - bool doFinalization(Module& M) override - { - if(EnableFunctionCalls) return false; + bool doFinalization(Module& M) override { + if (EnableFunctionCalls) return false; - const auto It = std::find_if(M.begin(), M.end(), [](Function& F) { - return !isInlineViable(F).isSuccess() && !F.isIntrinsic(); - }); + const auto It = std::find_if(M.begin(), M.end(), [](Function& F) { + return !isInlineViable(F).isSuccess() && !F.isIntrinsic(); + }); - if (It != M.end()) { - M.getContext().diagnose(DiagnosticInfoUnsupported{ - *It, "The function cannot be inlined."}); - } + if (It != M.end()) + M.getContext().diagnose( + DiagnosticInfoUnsupported(*It, "The function cannot be inlined.")); - return false; - } + return false; + } }; char SelectAcceleratorCode::ID = 0; -static RegisterPass X{ - "select-accelerator-code", - "Selects only the code that is expected to run on an accelerator, " - "ensuring that it can be lowered by AMDGPU.", - false, - false}; +static RegisterPass X( + "select-accelerator-code", + "Selects only the code that is expected to run on an accelerator, " + "ensuring that it can be lowered by AMDGPU.", + false, + false); } From 5a1434132ce44c61e53bcca1196195ccf34c51b2 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 11 Mar 2020 04:17:31 +0000 Subject: [PATCH 6/6] We should emit (trivial?) definitions for globals. --- clang/lib/CodeGen/CodeGenModule.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 2bb5fc9703ef2..b210c647bf41d 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2821,13 +2821,8 @@ namespace bool r = true; - if (!x->hasAttr() && - (x->isStaticLocal() || - x->hasExternalStorage() || - x->hasGlobalStorage() || - x->isExceptionVariable())) { - r = false; - } + if (!x->hasAttr() && x->isExceptionVariable()) + r = false; d_[x] = r; @@ -2961,7 +2956,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { if (LangOpts.CPlusPlusAMP && !CodeGenOpts.AMPCPU) { if (CodeGenOpts.AMPIsDevice) { // If -famp-is-device switch is on, we are in GPU build path. - if (!isWhiteListForHCC(*this, GD)) return; + if (!isWhiteListForHCC(*this, GD)) + return; } else if (!isa(D) && D->hasAttr() &&