1 //===-- NVPTXCtorDtorLowering.cpp - Handle global ctors and dtors --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass creates a unified init and fini kernel with the required metadata
11 //===----------------------------------------------------------------------===//
13 #include "NVPTXCtorDtorLowering.h"
14 #include "MCTargetDesc/NVPTXBaseInfo.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/GlobalVariable.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/IR/Value.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Transforms/Utils/ModuleUtils.h"
29 #define DEBUG_TYPE "nvptx-lower-ctor-dtor"
31 static cl::opt
<std::string
>
32 GlobalStr("nvptx-lower-global-ctor-dtor-id",
33 cl::desc("Override unique ID of ctor/dtor globals."),
34 cl::init(""), cl::Hidden
);
37 CreateKernels("nvptx-emit-init-fini-kernel",
38 cl::desc("Emit kernels to call ctor/dtor globals."),
39 cl::init(true), cl::Hidden
);
43 static std::string
getHash(StringRef Str
) {
45 llvm::MD5::MD5Result Hash
;
48 return llvm::utohexstr(Hash
.low(), /*LowerCase=*/true);
51 static void addKernelMetadata(Module
&M
, GlobalValue
*GV
) {
52 llvm::LLVMContext
&Ctx
= M
.getContext();
54 // Get "nvvm.annotations" metadata node.
55 llvm::NamedMDNode
*MD
= M
.getOrInsertNamedMetadata("nvvm.annotations");
57 llvm::Metadata
*KernelMDVals
[] = {
58 llvm::ConstantAsMetadata::get(GV
), llvm::MDString::get(Ctx
, "kernel"),
59 llvm::ConstantAsMetadata::get(
60 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx
), 1))};
62 // This kernel is only to be called single-threaded.
63 llvm::Metadata
*ThreadXMDVals
[] = {
64 llvm::ConstantAsMetadata::get(GV
), llvm::MDString::get(Ctx
, "maxntidx"),
65 llvm::ConstantAsMetadata::get(
66 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx
), 1))};
67 llvm::Metadata
*ThreadYMDVals
[] = {
68 llvm::ConstantAsMetadata::get(GV
), llvm::MDString::get(Ctx
, "maxntidy"),
69 llvm::ConstantAsMetadata::get(
70 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx
), 1))};
71 llvm::Metadata
*ThreadZMDVals
[] = {
72 llvm::ConstantAsMetadata::get(GV
), llvm::MDString::get(Ctx
, "maxntidz"),
73 llvm::ConstantAsMetadata::get(
74 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx
), 1))};
76 llvm::Metadata
*BlockMDVals
[] = {
77 llvm::ConstantAsMetadata::get(GV
),
78 llvm::MDString::get(Ctx
, "maxclusterrank"),
79 llvm::ConstantAsMetadata::get(
80 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx
), 1))};
82 // Append metadata to nvvm.annotations.
83 MD
->addOperand(llvm::MDNode::get(Ctx
, KernelMDVals
));
84 MD
->addOperand(llvm::MDNode::get(Ctx
, ThreadXMDVals
));
85 MD
->addOperand(llvm::MDNode::get(Ctx
, ThreadYMDVals
));
86 MD
->addOperand(llvm::MDNode::get(Ctx
, ThreadZMDVals
));
87 MD
->addOperand(llvm::MDNode::get(Ctx
, BlockMDVals
));
90 static Function
*createInitOrFiniKernelFunction(Module
&M
, bool IsCtor
) {
91 StringRef InitOrFiniKernelName
=
92 IsCtor
? "nvptx$device$init" : "nvptx$device$fini";
93 if (M
.getFunction(InitOrFiniKernelName
))
96 Function
*InitOrFiniKernel
= Function::createWithDefaultAttr(
97 FunctionType::get(Type::getVoidTy(M
.getContext()), false),
98 GlobalValue::WeakODRLinkage
, 0, InitOrFiniKernelName
, &M
);
99 addKernelMetadata(M
, InitOrFiniKernel
);
101 return InitOrFiniKernel
;
104 // We create the IR required to call each callback in this section. This is
105 // equivalent to the following code. Normally, the linker would provide us with
106 // the definitions of the init and fini array sections. The 'nvlink' linker does
107 // not do this so initializing these values is done by the runtime.
109 // extern "C" void **__init_array_start = nullptr;
110 // extern "C" void **__init_array_end = nullptr;
111 // extern "C" void **__fini_array_start = nullptr;
112 // extern "C" void **__fini_array_end = nullptr;
114 // using InitCallback = void();
115 // using FiniCallback = void();
117 // void call_init_array_callbacks() {
118 // for (auto start = __init_array_start; start != __init_array_end; ++start)
119 // reinterpret_cast<InitCallback *>(*start)();
122 // void call_init_array_callbacks() {
123 // size_t fini_array_size = __fini_array_end - __fini_array_start;
124 // for (size_t i = fini_array_size; i > 0; --i)
125 // reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
127 static void createInitOrFiniCalls(Function
&F
, bool IsCtor
) {
128 Module
&M
= *F
.getParent();
129 LLVMContext
&C
= M
.getContext();
131 IRBuilder
<> IRB(BasicBlock::Create(C
, "entry", &F
));
132 auto *LoopBB
= BasicBlock::Create(C
, "while.entry", &F
);
133 auto *ExitBB
= BasicBlock::Create(C
, "while.end", &F
);
134 Type
*PtrTy
= IRB
.getPtrTy(llvm::ADDRESS_SPACE_GLOBAL
);
136 auto *Begin
= M
.getOrInsertGlobal(
137 IsCtor
? "__init_array_start" : "__fini_array_start",
138 PointerType::get(C
, 0), [&]() {
139 auto *GV
= new GlobalVariable(
140 M
, PointerType::get(C
, 0),
141 /*isConstant=*/false, GlobalValue::WeakAnyLinkage
,
142 Constant::getNullValue(PointerType::get(C
, 0)),
143 IsCtor
? "__init_array_start" : "__fini_array_start",
144 /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal
,
145 /*AddressSpace=*/llvm::ADDRESS_SPACE_GLOBAL
);
146 GV
->setVisibility(GlobalVariable::ProtectedVisibility
);
149 auto *End
= M
.getOrInsertGlobal(
150 IsCtor
? "__init_array_end" : "__fini_array_end", PointerType::get(C
, 0),
152 auto *GV
= new GlobalVariable(
153 M
, PointerType::get(C
, 0),
154 /*isConstant=*/false, GlobalValue::WeakAnyLinkage
,
155 Constant::getNullValue(PointerType::get(C
, 0)),
156 IsCtor
? "__init_array_end" : "__fini_array_end",
157 /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal
,
158 /*AddressSpace=*/llvm::ADDRESS_SPACE_GLOBAL
);
159 GV
->setVisibility(GlobalVariable::ProtectedVisibility
);
163 // The constructor type is suppoed to allow using the argument vectors, but
164 // for now we just call them with no arguments.
165 auto *CallBackTy
= FunctionType::get(IRB
.getVoidTy(), {});
167 // The destructor array must be called in reverse order. Get an expression to
168 // the end of the array and iterate backwards in that case.
169 Value
*BeginVal
= IRB
.CreateLoad(Begin
->getType(), Begin
, "begin");
170 Value
*EndVal
= IRB
.CreateLoad(Begin
->getType(), End
, "stop");
172 auto *BeginInt
= IRB
.CreatePtrToInt(BeginVal
, IntegerType::getInt64Ty(C
));
173 auto *EndInt
= IRB
.CreatePtrToInt(EndVal
, IntegerType::getInt64Ty(C
));
174 auto *SubInst
= IRB
.CreateSub(EndInt
, BeginInt
);
175 auto *Offset
= IRB
.CreateAShr(
176 SubInst
, ConstantInt::get(IntegerType::getInt64Ty(C
), 3), "offset",
178 auto *ValuePtr
= IRB
.CreateGEP(PointerType::get(C
, 0), BeginVal
,
179 ArrayRef
<Value
*>({Offset
}));
181 BeginVal
= IRB
.CreateInBoundsGEP(
182 PointerType::get(C
, 0), ValuePtr
,
183 ArrayRef
<Value
*>(ConstantInt::get(IntegerType::getInt64Ty(C
), -1)),
187 IRB
.CreateCmp(IsCtor
? ICmpInst::ICMP_NE
: ICmpInst::ICMP_UGT
, BeginVal
,
190 IRB
.SetInsertPoint(LoopBB
);
191 auto *CallBackPHI
= IRB
.CreatePHI(PtrTy
, 2, "ptr");
192 auto *CallBack
= IRB
.CreateLoad(IRB
.getPtrTy(F
.getAddressSpace()),
193 CallBackPHI
, "callback");
194 IRB
.CreateCall(CallBackTy
, CallBack
);
196 IRB
.CreateConstGEP1_64(PtrTy
, CallBackPHI
, IsCtor
? 1 : -1, "next");
197 auto *EndCmp
= IRB
.CreateCmp(IsCtor
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_ULT
,
198 NewCallBack
, EndVal
, "end");
199 CallBackPHI
->addIncoming(BeginVal
, &F
.getEntryBlock());
200 CallBackPHI
->addIncoming(NewCallBack
, LoopBB
);
201 IRB
.CreateCondBr(EndCmp
, ExitBB
, LoopBB
);
202 IRB
.SetInsertPoint(ExitBB
);
206 static bool createInitOrFiniGlobals(Module
&M
, GlobalVariable
*GV
,
208 ConstantArray
*GA
= dyn_cast
<ConstantArray
>(GV
->getInitializer());
209 if (!GA
|| GA
->getNumOperands() == 0)
212 // NVPTX has no way to emit variables at specific sections or support for
213 // the traditional constructor sections. Instead, we emit mangled global
214 // names so the runtime can build the list manually.
215 for (Value
*V
: GA
->operands()) {
216 auto *CS
= cast
<ConstantStruct
>(V
);
217 auto *F
= cast
<Constant
>(CS
->getOperand(1));
218 uint64_t Priority
= cast
<ConstantInt
>(CS
->getOperand(0))->getSExtValue();
219 std::string PriorityStr
= "." + std::to_string(Priority
);
220 // We append a semi-unique hash and the priority to the global name.
221 std::string GlobalID
=
222 !GlobalStr
.empty() ? GlobalStr
: getHash(M
.getSourceFileName());
223 std::string NameStr
=
224 ((IsCtor
? "__init_array_object_" : "__fini_array_object_") +
225 F
->getName() + "_" + GlobalID
+ "_" + std::to_string(Priority
))
227 // PTX does not support exported names with '.' in them.
228 llvm::transform(NameStr
, NameStr
.begin(),
229 [](char c
) { return c
== '.' ? '_' : c
; });
231 auto *GV
= new GlobalVariable(M
, F
->getType(), /*IsConstant=*/true,
232 GlobalValue::ExternalLinkage
, F
, NameStr
,
233 nullptr, GlobalValue::NotThreadLocal
,
235 // This isn't respected by Nvidia, simply put here for clarity.
236 GV
->setSection(IsCtor
? ".init_array" + PriorityStr
237 : ".fini_array" + PriorityStr
);
238 GV
->setVisibility(GlobalVariable::ProtectedVisibility
);
239 appendToUsed(M
, {GV
});
245 static bool createInitOrFiniKernel(Module
&M
, StringRef GlobalName
,
247 GlobalVariable
*GV
= M
.getGlobalVariable(GlobalName
);
248 if (!GV
|| !GV
->hasInitializer())
251 if (!createInitOrFiniGlobals(M
, GV
, IsCtor
))
257 Function
*InitOrFiniKernel
= createInitOrFiniKernelFunction(M
, IsCtor
);
258 if (!InitOrFiniKernel
)
261 createInitOrFiniCalls(*InitOrFiniKernel
, IsCtor
);
263 GV
->eraseFromParent();
267 static bool lowerCtorsAndDtors(Module
&M
) {
268 bool Modified
= false;
269 Modified
|= createInitOrFiniKernel(M
, "llvm.global_ctors", /*IsCtor =*/true);
270 Modified
|= createInitOrFiniKernel(M
, "llvm.global_dtors", /*IsCtor =*/false);
274 class NVPTXCtorDtorLoweringLegacy final
: public ModulePass
{
277 NVPTXCtorDtorLoweringLegacy() : ModulePass(ID
) {}
278 bool runOnModule(Module
&M
) override
{ return lowerCtorsAndDtors(M
); }
281 } // End anonymous namespace
283 PreservedAnalyses
NVPTXCtorDtorLoweringPass::run(Module
&M
,
284 ModuleAnalysisManager
&AM
) {
285 return lowerCtorsAndDtors(M
) ? PreservedAnalyses::none()
286 : PreservedAnalyses::all();
289 char NVPTXCtorDtorLoweringLegacy::ID
= 0;
290 char &llvm::NVPTXCtorDtorLoweringLegacyPassID
= NVPTXCtorDtorLoweringLegacy::ID
;
291 INITIALIZE_PASS(NVPTXCtorDtorLoweringLegacy
, DEBUG_TYPE
,
292 "Lower ctors and dtors for NVPTX", false, false)
294 ModulePass
*llvm::createNVPTXCtorDtorLoweringLegacyPass() {
295 return new NVPTXCtorDtorLoweringLegacy();