Skip to content
10 changes: 7 additions & 3 deletions clang/lib/CodeGen/CGDeclCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,13 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
D->hasAttr<CUDASharedAttr>()))
return;

if ( (getLangOpts().CPlusPlusAMP && getLangOpts().DevicePath &&
D->hasAttr<HCCTileStaticAttr>()) ||
(getLangOpts().OpenMP && getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit)))
if (getLangOpts().CPlusPlusAMP && getLangOpts().DevicePath)
if (D->hasAttr<HCCTileStaticAttr>() ||
(D->hasAttr<AnnotateAttr>() &&
D->getAttr<AnnotateAttr>()->getAnnotation() == "__HIP_constant__"))
return;
if (getLangOpts().OpenMP &&
getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit))
return;

// Check if we've already initialized this decl.
Expand Down
44 changes: 28 additions & 16 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,11 +448,8 @@ void CodeGenModule::Release() {
}
EmitCtorList(GlobalCtors, "llvm.global_ctors");
EmitCtorList(GlobalDtors, "llvm.global_dtors");
// skip global annotation for HCC kernel path
if (Context.getLangOpts().CPlusPlusAMP && getCodeGenOpts().AMPIsDevice) {
} else {
if (!LangOpts.CPlusPlusAMP || !LangOpts.DevicePath)
EmitGlobalAnnotations();
}
EmitStaticExternCAliases();
EmitDeferredUnusedCoverageMappings();
if (CoverageMapping)
Expand Down Expand Up @@ -2824,13 +2821,8 @@ namespace

bool r = true;

if (!x->hasAttr<HCCTileStaticAttr>() &&
(x->isStaticLocal() ||
x->hasExternalStorage() ||
x->hasGlobalStorage() ||
x->isExceptionVariable())) {
r = false;
}
if (!x->hasAttr<HCCTileStaticAttr>() && x->isExceptionVariable())
r = false;

d_[x] = r;

Expand Down Expand Up @@ -2964,7 +2956,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
if (LangOpts.CPlusPlusAMP && !CodeGenOpts.AMPCPU) {
if (CodeGenOpts.AMPIsDevice) {
// If -famp-is-device switch is on, we are in GPU build path.
if (!isWhiteListForHCC(*this, GD)) return;
if (!isWhiteListForHCC(*this, GD))
return;
}
else if (!isa<VarDecl>(D) &&
D->hasAttr<CXXAMPRestrictAMPAttr>() &&
Expand All @@ -2973,7 +2966,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
}
}

PrettyStackTraceDecl CrashInfo(const_cast<ValueDecl *>(D), D->getLocation(),
PrettyStackTraceDecl CrashInfo(const_cast<ValueDecl *>(D), D->getLocation(),
Context.getSourceManager(),
"Generating code for declaration");

Expand Down Expand Up @@ -3928,15 +3921,23 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
return LangAS::cuda_device;
}

if (LangOpts.CPlusPlusAMP && LangOpts.DevicePath &&
D && D->hasAttr<HCCTileStaticAttr>())
return LangAS::hcc_tilestatic;
if (LangOpts.CPlusPlusAMP && LangOpts.DevicePath) {
if (D && D->hasAttr<HCCTileStaticAttr>())
return LangAS::hcc_tilestatic;
if (D && D->hasAttr<AnnotateAttr>() &&
D->getAttr<AnnotateAttr>()->getAnnotation() == "__HIP_constant__")
return LangAS::opencl_constant;
if (D && D->getType().isConstQualified())
return LangAS::opencl_constant;
return LangAS::hcc_global;
}

if (LangOpts.OpenMP) {
LangAS AS;
if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS))
return AS;
}

return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D);
}

Expand Down Expand Up @@ -4218,6 +4219,17 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
}
}

if (GV && LangOpts.CPlusPlusAMP) {
// This mimics the CUDA-specific processing done above.
bool IsHIPConstant = D->hasAttr<AnnotateAttr>() &&
D->getAttr<AnnotateAttr>()->getAnnotation() == "__HIP_constant__";

if (LangOpts.DevicePath)
if (Linkage != llvm::GlobalValue::InternalLinkage &&
(D->hasAttr<CXXAMPRestrictAMPAttr>() || IsHIPConstant))
GV->setExternallyInitialized(true);
}

// HIPPinnedShadowVar should remain in the final code object irrespective of
// whether it is used or not within the code. Add it to used list, so that
// it will not get eliminated when it is unused. Also, it is an extern var
Expand Down
24 changes: 19 additions & 5 deletions clang/lib/CodeGen/TargetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8046,12 +8046,26 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D,
llvm::GlobalValue *GV) {
if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
return false;
if (D->hasAttr<OpenCLKernelAttr>())
return true;
if (isa<FunctionDecl>(D)) {
if (D->hasAttr<CUDAGlobalAttr>())
return true;
if (D->hasAttr<AnnotateAttr>() &&
D->getAttr<AnnotateAttr>()->getAnnotation() == "__HIP_global_function__")
return true;
}
if (isa<VarDecl>(D)) {
if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>())
return true;
if (D->hasAttr<HIPPinnedShadowAttr>())
return true;
if (D->hasAttr<AnnotateAttr>() &&
D->getAttr<AnnotateAttr>()->getAnnotation() == "__HIP_constant__")
return true;
}

return D->hasAttr<OpenCLKernelAttr>() ||
(isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
(isa<VarDecl>(D) &&
(D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
D->hasAttr<HIPPinnedShadowAttr>()));
return false;
}

static bool requiresAMDGPUDefaultVisibility(const Decl *D,
Expand Down
9 changes: 4 additions & 5 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7788,12 +7788,11 @@ NamedDecl *Sema::ActOnVariableDeclarator(
}
}

if (getLangOpts().CPlusPlusAMP) {
if (SC == SC_None && S->getFnParent() != nullptr &&
(NewVD->hasAttr<HCCTileStaticAttr>())) {
if (getLangOpts().CPlusPlusAMP && SC == SC_None && S->getFnParent())
if (NewVD->hasAttr<HCCTileStaticAttr>() ||
(NewVD->hasAttr<AnnotateAttr>() &&
NewVD->getAttr<AnnotateAttr>()->getAnnotation() == "__HIP_contant__"))
NewVD->setStorageClass(SC_Static);
}
}

// Ensure that dllimport globals without explicit storage class are treated as
// extern. The storage class is set above using parsed attributes. Now we can
Expand Down
3 changes: 1 addition & 2 deletions clang/lib/Sema/SemaOverload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13141,8 +13141,7 @@ static void maybeCastArgsForHIPGlobalFunction(Sema &S,
if (FormalT == ActualT) return Actual;
if (FormalT->isReferenceType()) return Actual;

CastKind CK;
if (FormalT->isPointerType()) CK = CK_NoOp;
CastKind CK = CK_NoOp;
if (FormalT->isIntegerType()) {
if (ActualT->isIntegerType()) CK = CK_IntegralCast;
if (ActualT->isFloatingType()) CK = CK_FloatingToIntegral;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/HC/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
if (UNIX)
set(LLVM_ENABLE_PLUGINS ON)
add_subdirectory(PromoteConstant)
add_subdirectory(PromotePointerKernArgsToGlobal)
add_subdirectory(SelectAcceleratorCode)
endif()
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/HC/PromoteConstant/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
add_llvm_library(LLVMPromoteConstant MODULE PromoteConstant.cpp)

add_dependencies(LLVMPromoteConstant intrinsics_gen)
164 changes: 164 additions & 0 deletions llvm/lib/Transforms/HC/PromoteConstant/PromoteConstant.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
//===-- PromotePointerKernargsToGlobal.cpp - Promote Pointers To Global --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares and defines a pass which uses the double-cast trick (
// flat-to-global and global-to-flat) for pointers that reside in the
// __constant__ address space. For example, given __constant__ int** foo, all
// single dereferences of foo will be promoted to yield a global int*, as
// opposed to a flat int*. It is preferable to execute SelectAcceleratorCode
// before, as this reduces the workload by pruning functions that are not
// reachable by an accelerator. It is mandatory to run InferAddressSpaces after,
// otherwise no benefit shall be obtained (the spurious casts do get removed).
//===----------------------------------------------------------------------===//
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Pass.h"

using namespace llvm;

namespace {
class PromoteConstant : public ModulePass {
// TODO: query the address spaces robustly.
static constexpr unsigned int FlatAddrSpace{0u};
static constexpr unsigned int GlobalAddrSpace{1u};
static constexpr unsigned int ConstantAddrSpace{4u};

// TODO: this should be hoisted to a common header with HC utility functions
// once the related work on PromotePointerKernArgsToGlobal gets merged
void createPromotableCast(IRBuilder<>& Builder, Value *From, Value *To) {
From->replaceAllUsesWith(To);

Value *FToG = Builder.CreateAddrSpaceCast(
From,
cast<PointerType>(
From->getType())->getElementType()->getPointerTo(GlobalAddrSpace));
Value *GToF = Builder.CreateAddrSpaceCast(FToG, From->getType());

To->replaceAllUsesWith(GToF);
}

// TODO: this should be hoisted to a common header with HC utility functions
// once the related work on PromotePointerKernArgsToGlobal gets merged
bool maybePromoteUse(IRBuilder<>& Builder, Instruction *UI) {
if (!UI)
return false;

Builder.SetInsertPoint(UI->getNextNonDebugInstruction());

// We cannot use IRBuilder since it might do the obvious folding, which
// would yield an undef value of a possibly primitive type, which cannot be
// disambiguated from other undefs of the same primitive type and would
// cause havoc when replaced with the promotable cast created later.
Value *UD = UndefValue::get(Builder.getInt8Ty());
Value *Tmp = CastInst::CreateBitOrPointerCast(UD, UI->getType(), "Tmp",
&*Builder.GetInsertPoint());

createPromotableCast(Builder, UI, Tmp);

return true;
}
// TODO: Whilst ConstantExpr and Operator handling could obviously be folded
// into a single function, we leave them separate for now to allow
// possible additional development.
bool maybeHandleInstruction(IRBuilder<>& Builder, Instruction *I) {
if (!I)
return false;

if (!I->getType()->isPointerTy())
return false;
if (I->getType()->getPointerAddressSpace() != FlatAddrSpace)
return false;

if (auto LI = dyn_cast<LoadInst>(I))
return maybePromoteUse(Builder, LI);
if (auto PHI = dyn_cast<PHINode>(I)) {
return false;
}
if (auto SEL = dyn_cast<SelectInst>(I))
return false;

return false;
}

bool maybeHandleOperator(IRBuilder<>& Builder, Operator *Op) {
if (!Op)
return false;
if (!Op->getType()->isPointerTy())
return false;

bool Modified = false;
for (auto &&U : Op->users()) {
if (maybeHandleConstantExpr(Builder, dyn_cast<ConstantExpr>(U)))
Modified = true;
else if (maybeHandleOperator(Builder, dyn_cast<Operator>(U)))
Modified = true;
else if (maybeHandleInstruction(Builder, dyn_cast<Instruction>(U)))
Modified = true;
}

return Modified;
}

bool maybeHandleConstantExpr(IRBuilder<>& Builder, ConstantExpr *CE) {
if (!CE)
return false;
if (!CE->getType()->isPointerTy())
return false;

bool Modified = false;
for (auto &&U : CE->users()) {
if (maybeHandleConstantExpr(Builder, dyn_cast<ConstantExpr>(U)))
Modified = true;
else if (maybeHandleInstruction(Builder, dyn_cast<Instruction>(U)))
Modified = true;
else if (maybeHandleOperator(Builder, dyn_cast<Operator>(U)))
Modified = true;
}

return Modified;
}
public:
static char ID;
PromoteConstant() : ModulePass{ID} {}

bool runOnModule(Module &M) override {
SmallVector<GlobalVariable *, 8u> PromotableGlobals;
for (auto &&GV : M.globals())
if (GV.getAddressSpace() == ConstantAddrSpace)
PromotableGlobals.push_back(&GV);

if (PromotableGlobals.empty())
return false;

IRBuilder<> Builder(M.getContext());

bool Modified = false;
for (auto &&GV : PromotableGlobals) {
for (auto &&U : GV->users()) {
if (maybeHandleConstantExpr(Builder, dyn_cast<ConstantExpr>(U)))
Modified = true;
else if (maybeHandleInstruction(Builder, dyn_cast<Instruction>(U)))
Modified = true;
else if (maybeHandleOperator(Builder, dyn_cast<Operator>(U)))
Modified = true;
}
}

return Modified;
}
};
char PromoteConstant::ID = 0;

static RegisterPass<PromoteConstant> X{
"promote-constant",
"Promotes users of variables annotated with __constant__ to refer to the "
"global address space iff the user produces a flat pointer.",
false,
false};
}
Loading