diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 720a5f1912a5a..470b3b5012686 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -444,9 +444,13 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, D->hasAttr())) return; - if ( (getLangOpts().CPlusPlusAMP && getLangOpts().DevicePath && - D->hasAttr()) || - (getLangOpts().OpenMP && getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit))) + if (getLangOpts().CPlusPlusAMP && getLangOpts().DevicePath) + if (D->hasAttr() || + (D->hasAttr() && + D->getAttr()->getAnnotation() == "__HIP_constant__")) + return; + if (getLangOpts().OpenMP && + getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit)) return; // Check if we've already initialized this decl. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d2dbfb182b937..fcff658c75982 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -448,11 +448,8 @@ void CodeGenModule::Release() { } EmitCtorList(GlobalCtors, "llvm.global_ctors"); EmitCtorList(GlobalDtors, "llvm.global_dtors"); - // skip global annotation for HCC kernel path - if (Context.getLangOpts().CPlusPlusAMP && getCodeGenOpts().AMPIsDevice) { - } else { + if (!LangOpts.CPlusPlusAMP || !LangOpts.DevicePath) EmitGlobalAnnotations(); - } EmitStaticExternCAliases(); EmitDeferredUnusedCoverageMappings(); if (CoverageMapping) @@ -2824,13 +2821,8 @@ namespace bool r = true; - if (!x->hasAttr() && - (x->isStaticLocal() || - x->hasExternalStorage() || - x->hasGlobalStorage() || - x->isExceptionVariable())) { - r = false; - } + if (!x->hasAttr() && x->isExceptionVariable()) + r = false; d_[x] = r; @@ -2964,7 +2956,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { if (LangOpts.CPlusPlusAMP && !CodeGenOpts.AMPCPU) { if (CodeGenOpts.AMPIsDevice) { // If -famp-is-device switch is on, we are in GPU build path. - if (!isWhiteListForHCC(*this, GD)) return; + if (!isWhiteListForHCC(*this, GD)) + return; } else if (!isa(D) && D->hasAttr() && @@ -2973,7 +2966,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { } } - PrettyStackTraceDecl CrashInfo(const_cast(D), D->getLocation(), + PrettyStackTraceDecl CrashInfo(const_cast(D), D->getLocation(), Context.getSourceManager(), "Generating code for declaration"); @@ -3928,15 +3921,23 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { return LangAS::cuda_device; } - if (LangOpts.CPlusPlusAMP && LangOpts.DevicePath && - D && D->hasAttr()) - return LangAS::hcc_tilestatic; + if (LangOpts.CPlusPlusAMP && LangOpts.DevicePath) { + if (D && D->hasAttr()) + return LangAS::hcc_tilestatic; + if (D && D->hasAttr() && + D->getAttr()->getAnnotation() == "__HIP_constant__") + return LangAS::opencl_constant; + if (D && D->getType().isConstQualified()) + return LangAS::opencl_constant; + return LangAS::hcc_global; + } if (LangOpts.OpenMP) { LangAS AS; if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS)) return AS; } + return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } @@ -4218,6 +4219,17 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, } } + if (GV && LangOpts.CPlusPlusAMP) { + // This mimics the CUDA-specific processing done above. + bool IsHIPConstant = D->hasAttr() && + D->getAttr()->getAnnotation() == "__HIP_constant__"; + + if (LangOpts.DevicePath) + if (Linkage != llvm::GlobalValue::InternalLinkage && + (D->hasAttr() || IsHIPConstant)) + GV->setExternallyInitialized(true); + } + // HIPPinnedShadowVar should remain in the final code object irrespective of // whether it is used or not within the code. Add it to used list, so that // it will not get eliminated when it is unused. Also, it is an extern var diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index c10797c603e08..0609f4f4f4485 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -8046,12 +8046,26 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D, llvm::GlobalValue *GV) { if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) return false; + if (D->hasAttr()) + return true; + if (isa(D)) { + if (D->hasAttr()) + return true; + if (D->hasAttr() && + D->getAttr()->getAnnotation() == "__HIP_global_function__") + return true; + } + if (isa(D)) { + if (D->hasAttr() || D->hasAttr()) + return true; + if (D->hasAttr()) + return true; + if (D->hasAttr() && + D->getAttr()->getAnnotation() == "__HIP_constant__") + return true; + } - return D->hasAttr() || - (isa(D) && D->hasAttr()) || - (isa(D) && - (D->hasAttr() || D->hasAttr() || - D->hasAttr())); + return false; } static bool requiresAMDGPUDefaultVisibility(const Decl *D, diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 25da828095483..a5253a2736422 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -7788,12 +7788,11 @@ NamedDecl *Sema::ActOnVariableDeclarator( } } - if (getLangOpts().CPlusPlusAMP) { - if (SC == SC_None && S->getFnParent() != nullptr && - (NewVD->hasAttr())) { + if (getLangOpts().CPlusPlusAMP && SC == SC_None && S->getFnParent()) + if (NewVD->hasAttr() || + (NewVD->hasAttr() && + NewVD->getAttr()->getAnnotation() == "__HIP_contant__")) NewVD->setStorageClass(SC_Static); - } - } // Ensure that dllimport globals without explicit storage class are treated as // extern. The storage class is set above using parsed attributes. Now we can diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 013334a2e6336..fa6e840f660cf 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -13141,8 +13141,7 @@ static void maybeCastArgsForHIPGlobalFunction(Sema &S, if (FormalT == ActualT) return Actual; if (FormalT->isReferenceType()) return Actual; - CastKind CK; - if (FormalT->isPointerType()) CK = CK_NoOp; + CastKind CK = CK_NoOp; if (FormalT->isIntegerType()) { if (ActualT->isIntegerType()) CK = CK_IntegralCast; if (ActualT->isFloatingType()) CK = CK_FloatingToIntegral; diff --git a/llvm/lib/Transforms/HC/CMakeLists.txt b/llvm/lib/Transforms/HC/CMakeLists.txt index 87ce927ab3290..0cdef266f160d 100644 --- a/llvm/lib/Transforms/HC/CMakeLists.txt +++ b/llvm/lib/Transforms/HC/CMakeLists.txt @@ -1,5 +1,6 @@ if (UNIX) set(LLVM_ENABLE_PLUGINS ON) + add_subdirectory(PromoteConstant) add_subdirectory(PromotePointerKernArgsToGlobal) add_subdirectory(SelectAcceleratorCode) endif() diff --git a/llvm/lib/Transforms/HC/PromoteConstant/CMakeLists.txt b/llvm/lib/Transforms/HC/PromoteConstant/CMakeLists.txt new file mode 100644 index 0000000000000..58883e8ab89ef --- /dev/null +++ b/llvm/lib/Transforms/HC/PromoteConstant/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMPromoteConstant MODULE PromoteConstant.cpp) + +add_dependencies(LLVMPromoteConstant intrinsics_gen) \ No newline at end of file diff --git a/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp b/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp new file mode 100644 index 0000000000000..ff125ad2fc46b --- /dev/null +++ b/llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp @@ -0,0 +1,164 @@ +//===-- PromotePointerKernargsToGlobal.cpp - Promote Pointers To Global --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares and defines a pass which uses the double-cast trick ( +// flat-to-global and global-to-flat) for pointers that reside in the +// __constant__ address space. For example, given __constant__ int** foo, all +// single dereferences of foo will be promoted to yield a global int*, as +// opposed to a flat int*. It is preferable to execute SelectAcceleratorCode +// before, as this reduces the workload by pruning functions that are not +// reachable by an accelerator. It is mandatory to run InferAddressSpaces after, +// otherwise no benefit shall be obtained (the spurious casts do get removed). +//===----------------------------------------------------------------------===// +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { +class PromoteConstant : public ModulePass { + // TODO: query the address spaces robustly. + static constexpr unsigned int FlatAddrSpace{0u}; + static constexpr unsigned int GlobalAddrSpace{1u}; + static constexpr unsigned int ConstantAddrSpace{4u}; + + // TODO: this should be hoisted to a common header with HC utility functions + // once the related work on PromotePointerKernArgsToGlobal gets merged + void createPromotableCast(IRBuilder<>& Builder, Value *From, Value *To) { + From->replaceAllUsesWith(To); + + Value *FToG = Builder.CreateAddrSpaceCast( + From, + cast( + From->getType())->getElementType()->getPointerTo(GlobalAddrSpace)); + Value *GToF = Builder.CreateAddrSpaceCast(FToG, From->getType()); + + To->replaceAllUsesWith(GToF); + } + + // TODO: this should be hoisted to a common header with HC utility functions + // once the related work on PromotePointerKernArgsToGlobal gets merged + bool maybePromoteUse(IRBuilder<>& Builder, Instruction *UI) { + if (!UI) + return false; + + Builder.SetInsertPoint(UI->getNextNonDebugInstruction()); + + // We cannot use IRBuilder since it might do the obvious folding, which + // would yield an undef value of a possibly primitive type, which cannot be + // disambiguated from other undefs of the same primitive type and would + // cause havoc when replaced with the promotable cast created later. + Value *UD = UndefValue::get(Builder.getInt8Ty()); + Value *Tmp = CastInst::CreateBitOrPointerCast(UD, UI->getType(), "Tmp", + &*Builder.GetInsertPoint()); + + createPromotableCast(Builder, UI, Tmp); + + return true; + } + // TODO: Whilst ConstantExpr and Operator handling could obviously be folded + // into a single function, we leave them separate for now to allow + // possible additional development. + bool maybeHandleInstruction(IRBuilder<>& Builder, Instruction *I) { + if (!I) + return false; + + if (!I->getType()->isPointerTy()) + return false; + if (I->getType()->getPointerAddressSpace() != FlatAddrSpace) + return false; + + if (auto LI = dyn_cast(I)) + return maybePromoteUse(Builder, LI); + if (auto PHI = dyn_cast(I)) { + return false; + } + if (auto SEL = dyn_cast(I)) + return false; + + return false; + } + + bool maybeHandleOperator(IRBuilder<>& Builder, Operator *Op) { + if (!Op) + return false; + if (!Op->getType()->isPointerTy()) + return false; + + bool Modified = false; + for (auto &&U : Op->users()) { + if (maybeHandleConstantExpr(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleOperator(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleInstruction(Builder, dyn_cast(U))) + Modified = true; + } + + return Modified; + } + + bool maybeHandleConstantExpr(IRBuilder<>& Builder, ConstantExpr *CE) { + if (!CE) + return false; + if (!CE->getType()->isPointerTy()) + return false; + + bool Modified = false; + for (auto &&U : CE->users()) { + if (maybeHandleConstantExpr(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleInstruction(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleOperator(Builder, dyn_cast(U))) + Modified = true; + } + + return Modified; + } +public: + static char ID; + PromoteConstant() : ModulePass{ID} {} + + bool runOnModule(Module &M) override { + SmallVector PromotableGlobals; + for (auto &&GV : M.globals()) + if (GV.getAddressSpace() == ConstantAddrSpace) + PromotableGlobals.push_back(&GV); + + if (PromotableGlobals.empty()) + return false; + + IRBuilder<> Builder(M.getContext()); + + bool Modified = false; + for (auto &&GV : PromotableGlobals) { + for (auto &&U : GV->users()) { + if (maybeHandleConstantExpr(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleInstruction(Builder, dyn_cast(U))) + Modified = true; + else if (maybeHandleOperator(Builder, dyn_cast(U))) + Modified = true; + } + } + + return Modified; + } +}; +char PromoteConstant::ID = 0; + +static RegisterPass X{ + "promote-constant", + "Promotes users of variables annotated with __constant__ to refer to the " + "global address space iff the user produces a flat pointer.", + false, + false}; +} diff --git a/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp b/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp index b2b5513665193..09ce8531b5677 100644 --- a/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp +++ b/llvm/lib/Transforms/HC/PromotePointerKernArgsToGlobal/PromotePointerKernArgsToGlobal.cpp @@ -23,61 +23,99 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/Pass.h" -#include - using namespace llvm; -using namespace std; namespace { class PromotePointerKernArgsToGlobal : public FunctionPass { - // TODO: query the address space robustly. - static constexpr unsigned int GenericAddrSpace{0u}; - static constexpr unsigned int GlobalAddrSpace{1u}; -public: - static char ID; - PromotePointerKernArgsToGlobal() : FunctionPass{ID} {} + // TODO: query the address space robustly. + static constexpr unsigned int FlatAddrSpace = 0u; + static constexpr unsigned int GlobalAddrSpace = 1u; + + void createPromotableCast(IRBuilder<>& Builder, Value *From, Value *To) { + From->replaceAllUsesWith(To); + + Value *FToG = Builder.CreateAddrSpaceCast( + From, + cast( + From->getType())->getElementType()->getPointerTo(GlobalAddrSpace)); + Value *GToF = Builder.CreateAddrSpaceCast(FToG, From->getType()); + + To->replaceAllUsesWith(GToF); + } + + Value *createTemporary(IRBuilder<>& Builder, Type *Ty) { + // We cannot use IRBuilder since it might do the obvious folding, which + // would yield an undef value of a possibly primitive type, which cannot be + // disambiguated from other undefs of the same primitive type and would + // cause havoc when replaced with the promotable cast created later. + Value *UD = UndefValue::get(Builder.getInt8Ty()); - bool runOnFunction(Function &F) override - { - if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) return false; + return CastInst::CreateBitOrPointerCast(UD, Ty, "Tmp", + &*Builder.GetInsertPoint()); + } - SmallVector PtrArgs; - for_each(F.arg_begin(), F.arg_end(), [&](Argument &Arg) { - if (!Arg.getType()->isPointerTy()) return; - if (Arg.getType()->getPointerAddressSpace() != GenericAddrSpace) { - return; - } + void maybePromoteUse(IRBuilder<>& Builder, Instruction *UI) { + if (!UI) + return; - PtrArgs.push_back(&Arg); - }); + Builder.SetInsertPoint(UI->getNextNonDebugInstruction()); - if (PtrArgs.empty()) return false; + createPromotableCast(Builder, UI, createTemporary(Builder, UI->getType())); + } - static IRBuilder<> Builder{F.getContext()}; - Builder.SetInsertPoint(&F.getEntryBlock().front()); + void promoteArgument(IRBuilder<>& Builder, Argument *Arg) { + createPromotableCast(Builder, Arg, + createTemporary(Builder, Arg->getType())); + } +public: + static char ID; + PromotePointerKernArgsToGlobal() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override + { + if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) + return false; - for_each(PtrArgs.begin(), PtrArgs.end(), [](Argument *PArg) { - Argument Tmp{PArg->getType(), PArg->getName()}; - PArg->replaceAllUsesWith(&Tmp); + SmallVector PromotableArgs; + SmallVector PromotableUses; + for (auto &&Arg : F.args()) { + for (auto &&U : Arg.users()) { + if (!U->getType()->isPointerTy()) + continue; + if (U->getType()->getPointerAddressSpace() != FlatAddrSpace) + continue; - Value *FToG = Builder.CreateAddrSpaceCast( - PArg, - cast(PArg->getType()) - ->getElementType()->getPointerTo(GlobalAddrSpace)); - Value *GToF = Builder.CreateAddrSpaceCast(FToG, PArg->getType()); + PromotableUses.push_back(U); + } - Tmp.replaceAllUsesWith(GToF); - }); + if (!Arg.getType()->isPointerTy()) + continue; + if (Arg.getType()->getPointerAddressSpace() != FlatAddrSpace) + continue; - return true; + PromotableArgs.push_back(&Arg); } + + if (PromotableArgs.empty() && PromotableUses.empty()) + return false; + + IRBuilder<> Builder(F.getContext()); + for (auto &&PU : PromotableUses) + maybePromoteUse(Builder, dyn_cast(PU)); + + Builder.SetInsertPoint(&F.getEntryBlock().front()); + for (auto &&Arg : PromotableArgs) + promoteArgument(Builder, Arg); + + return true; + } }; char PromotePointerKernArgsToGlobal::ID = 0; -static RegisterPass X{ - "promote-pointer-kernargs-to-global", - "Promotes kernel formals of pointer type to point to the global address " - "space, since the actuals can only represent a global address.", - false, - false}; +static RegisterPass X( + "promote-pointer-kernargs-to-global", + "Promotes kernel formals of pointer type to point to the global address " + "space, since the actuals can only represent a global address.", + false, + false); } \ No newline at end of file diff --git a/llvm/lib/Transforms/HC/SelectAcceleratorCode/SelectAcceleratorCode.cpp b/llvm/lib/Transforms/HC/SelectAcceleratorCode/SelectAcceleratorCode.cpp index 1c269e83145b6..7f98af6a02481 100644 --- a/llvm/lib/Transforms/HC/SelectAcceleratorCode/SelectAcceleratorCode.cpp +++ b/llvm/lib/Transforms/HC/SelectAcceleratorCode/SelectAcceleratorCode.cpp @@ -32,161 +32,173 @@ using namespace llvm; static cl::opt EnableFunctionCalls( - "sac-enable-function-calls", cl::init(false), cl::Hidden, - cl::desc("Enable function calls")); + "sac-enable-function-calls", cl::init(false), cl::Hidden, + cl::desc("Enable function calls")); namespace { class SelectAcceleratorCode : public ModulePass { - SmallPtrSet HCCallees_; - - void findAllHCCallees_(const Function &F, Module &M) - { - for (auto&& BB : F) { - for (auto&& I : BB) { - if (auto CI = dyn_cast(&I)) { - auto V = CI->getCalledValue()->stripPointerCasts(); - if (auto Callee = dyn_cast(V)) { - auto Tmp = HCCallees_.insert(Callee); - if (Tmp.second) findAllHCCallees_(*Callee, M); - } - } - } + SmallPtrSet HCCallees_; + + void findAllHCCallees_(const Function &F, Module &M) { + for (auto &&BB : F) { + for (auto &&I : BB) { + if (auto CI = dyn_cast(&I)) { + auto V = CI->getCalledValue()->stripPointerCasts(); + if (auto Callee = dyn_cast(V)) { + auto Tmp = HCCallees_.insert(Callee); + if (Tmp.second) + findAllHCCallees_(*Callee, M); + } } + } } - - template - static - void erase_(T &X) - { - X.dropAllReferences(); - X.replaceAllUsesWith(UndefValue::get(X.getType())); - X.eraseFromParent(); + } + + template + static + void erase_(T &X) { + X.dropAllReferences(); + X.replaceAllUsesWith(UndefValue::get(X.getType())); + X.eraseFromParent(); + } + + template + bool eraseIf_(F First, G Last, P Predicate) const { + bool erasedSome = false; + + auto It = First(); + while (It != Last()) { + It->removeDeadConstantUsers(); + if (Predicate(*It)) { + erase_(*It); + erasedSome = true; + It = First(); + } + else ++It; } - template - bool eraseIf_(F First, G Last, P Predicate) const - { - bool erasedSome = false; - - auto It = First(); - while (It != Last()) { - It->removeDeadConstantUsers(); - if (Predicate(*It)) { - erase_(*It); - erasedSome = true; - It = First(); - } - else ++It; - } + return erasedSome; + } - return erasedSome; + bool eraseNonHCFunctionsBody_(Module &M) const { + bool Modified = false; + for (auto &&F : M.functions()) { + if (HCCallees_.count(M.getFunction(F.getName())) == 0) { + F.deleteBody(); + Modified = true; + } } - bool eraseNonHCFunctionsBody_(Module &M) const - { - bool Modified = false; - for (auto&& F : M.functions()) { - if (HCCallees_.count(M.getFunction(F.getName())) == 0) { - F.deleteBody(); - Modified = true; - } - }; - return Modified; + return Modified; + } + + bool eraseDeadGlobals_(Module &M) const { + return eraseIf_([&]() { return M.global_begin(); }, + [&]() { return M.global_end(); }, + [](const Constant &K) { return !K.isConstantUsed(); }); + } + + bool eraseDeadAliases_(Module &M) { + return eraseIf_([&]() { return M.alias_begin(); }, + [&]() { return M.alias_end(); }, + [](const Constant &K) { return !K.isConstantUsed(); }); + } + + static bool forceAlwaysInline_(Function &F) { + if (F.hasFnAttribute(Attribute::AlwaysInline)) { + // already marked as always inline, + // nothing needs to be changed + assert(!F.hasFnAttribute(Attribute::OptimizeNone)); + assert(!F.hasFnAttribute(Attribute::NoInline)); + + return false; } - bool eraseDeadGlobals_(Module &M) const - { - return eraseIf_( - [&]() { return M.global_begin(); }, - [&]() { return M.global_end(); }, - [](const Constant& K) { return !K.isConstantUsed(); }); - } + if (F.hasFnAttribute(Attribute::OptimizeNone)) { + // don't inline functions with optnone + // Function with optnone is required to have + // noinline + assert(F.hasFnAttribute(Attribute::NoInline)); - bool eraseDeadAliases_(Module &M) - { - return eraseIf_( - [&]() { return M.alias_begin(); }, - [&]() { return M.alias_end(); }, - [](const Constant& K) { return !K.isConstantUsed(); }); + return false; } - static - bool forceAlwaysInline_(Function &F) - { - if (F.hasFnAttribute(Attribute::AlwaysInline)) { - // already marked as always inline, - // nothing needs to be changed - assert(!F.hasFnAttribute(Attribute::OptimizeNone)); - assert(!F.hasFnAttribute(Attribute::NoInline)); - return false; - } + if (F.hasFnAttribute(Attribute::NoInline)) + F.removeFnAttr(Attribute::NoInline); - if (F.hasFnAttribute(Attribute::OptimizeNone)) { - // don't inline functions with optnone - // Function with optnone is required to have - // noinline - assert(F.hasFnAttribute(Attribute::NoInline)); - return false; - } + F.addFnAttr(Attribute::AlwaysInline); - if (F.hasFnAttribute(Attribute::NoInline)) { - F.removeFnAttr(Attribute::NoInline); - } - F.addFnAttr(Attribute::AlwaysInline); - return true; + return true; + } + + static bool setFunctionAttributes(Function &F) { + if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) + return false; + if (!EnableFunctionCalls) + return forceAlwaysInline_(F); + + bool Modified = false; + if (!F.hasFnAttribute(Attribute::NoRecurse)) { + F.addFnAttr(Attribute::NoRecurse); + Modified = true; + } + if (F.getVisibility() != Function::VisibilityTypes::HiddenVisibility) { + F.setVisibility(Function::VisibilityTypes::HiddenVisibility); + Modified = true; } + + return Modified; + } public: - static char ID; - SelectAcceleratorCode() : ModulePass{ID} {} - - bool doInitialization(Module &M) override { return false; } - - bool runOnModule(Module &M) override { - // This may be a candidate for an analysis pass that is - // invalidated appropriately by other passes. - for (auto&& F : M.functions()) { - if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) { - auto Tmp = HCCallees_.insert(M.getFunction(F.getName())); - if (Tmp.second) findAllHCCallees_(F, M); - } - } + static char ID; + SelectAcceleratorCode() : ModulePass(ID) {} + + bool doInitialization(Module &M) override { return false; } + + bool runOnModule(Module &M) override { + // This may be a candidate for an analysis pass that is + // invalidated appropriately by other passes. + for (auto &&F : M.functions()) { + if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) { + auto Tmp = HCCallees_.insert(M.getFunction(F.getName())); + if (Tmp.second) findAllHCCallees_(F, M); + } + } - bool Modified = eraseNonHCFunctionsBody_(M); + bool Modified = eraseNonHCFunctionsBody_(M); - Modified = eraseDeadGlobals_(M) || Modified; + Modified = eraseDeadGlobals_(M) || Modified; - M.dropTriviallyDeadConstantArrays(); + M.dropTriviallyDeadConstantArrays(); - Modified = eraseDeadAliases_(M) || Modified; + Modified = eraseDeadAliases_(M) || Modified; - if (!EnableFunctionCalls) - for (auto&& F : M.functions()) Modified = forceAlwaysInline_(F) || Modified; + for (auto &&F : M.functions()) + Modified = setFunctionAttributes(F); - return Modified; - } + return Modified; + } - bool doFinalization(Module& M) override - { - if(EnableFunctionCalls) return false; + bool doFinalization(Module& M) override { + if (EnableFunctionCalls) return false; - const auto It = std::find_if(M.begin(), M.end(), [](Function& F) { - return !isInlineViable(F).isSuccess() && !F.isIntrinsic(); - }); + const auto It = std::find_if(M.begin(), M.end(), [](Function& F) { + return !isInlineViable(F).isSuccess() && !F.isIntrinsic(); + }); - if (It != M.end()) { - M.getContext().diagnose(DiagnosticInfoUnsupported{ - *It, "The function cannot be inlined."}); - } + if (It != M.end()) + M.getContext().diagnose( + DiagnosticInfoUnsupported(*It, "The function cannot be inlined.")); - return false; - } + return false; + } }; char SelectAcceleratorCode::ID = 0; -static RegisterPass X{ - "select-accelerator-code", - "Selects only the code that is expected to run on an accelerator, " - "ensuring that it can be lowered by AMDGPU.", - false, - false}; +static RegisterPass X( + "select-accelerator-code", + "Selects only the code that is expected to run on an accelerator, " + "ensuring that it can be lowered by AMDGPU.", + false, + false); }