1 //===-- NVPTXCtorDtorLowering.cpp - Handle global ctors and dtors --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass creates a unified init and fini kernel with the required metadata
11 //===----------------------------------------------------------------------===//
13 #include "NVPTXCtorDtorLowering.h"
14 #include "MCTargetDesc/NVPTXBaseInfo.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/GlobalVariable.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/IR/Value.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/MD5.h"
26 #include "llvm/Transforms/Utils/ModuleUtils.h"
30 #define DEBUG_TYPE "nvptx-lower-ctor-dtor"
32 static cl::opt
<std::string
>
33 GlobalStr("nvptx-lower-global-ctor-dtor-id",
34 cl::desc("Override unique ID of ctor/dtor globals."),
35 cl::init(""), cl::Hidden
);
38 CreateKernels("nvptx-emit-init-fini-kernel",
39 cl::desc("Emit kernels to call ctor/dtor globals."),
40 cl::init(true), cl::Hidden
);
44 static std::string
getHash(StringRef Str
) {
46 llvm::MD5::MD5Result Hash
;
49 return llvm::utohexstr(Hash
.low(), /*LowerCase=*/true);
52 static void addKernelMetadata(Module
&M
, GlobalValue
*GV
) {
53 llvm::LLVMContext
&Ctx
= M
.getContext();
55 // Get "nvvm.annotations" metadata node.
56 llvm::NamedMDNode
*MD
= M
.getOrInsertNamedMetadata("nvvm.annotations");
58 llvm::Metadata
*KernelMDVals
[] = {
59 llvm::ConstantAsMetadata::get(GV
), llvm::MDString::get(Ctx
, "kernel"),
60 llvm::ConstantAsMetadata::get(
61 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx
), 1))};
63 // This kernel is only to be called single-threaded.
64 llvm::Metadata
*ThreadXMDVals
[] = {
65 llvm::ConstantAsMetadata::get(GV
), llvm::MDString::get(Ctx
, "maxntidx"),
66 llvm::ConstantAsMetadata::get(
67 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx
), 1))};
68 llvm::Metadata
*ThreadYMDVals
[] = {
69 llvm::ConstantAsMetadata::get(GV
), llvm::MDString::get(Ctx
, "maxntidy"),
70 llvm::ConstantAsMetadata::get(
71 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx
), 1))};
72 llvm::Metadata
*ThreadZMDVals
[] = {
73 llvm::ConstantAsMetadata::get(GV
), llvm::MDString::get(Ctx
, "maxntidz"),
74 llvm::ConstantAsMetadata::get(
75 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx
), 1))};
77 llvm::Metadata
*BlockMDVals
[] = {
78 llvm::ConstantAsMetadata::get(GV
),
79 llvm::MDString::get(Ctx
, "maxclusterrank"),
80 llvm::ConstantAsMetadata::get(
81 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx
), 1))};
83 // Append metadata to nvvm.annotations.
84 MD
->addOperand(llvm::MDNode::get(Ctx
, KernelMDVals
));
85 MD
->addOperand(llvm::MDNode::get(Ctx
, ThreadXMDVals
));
86 MD
->addOperand(llvm::MDNode::get(Ctx
, ThreadYMDVals
));
87 MD
->addOperand(llvm::MDNode::get(Ctx
, ThreadZMDVals
));
88 MD
->addOperand(llvm::MDNode::get(Ctx
, BlockMDVals
));
91 static Function
*createInitOrFiniKernelFunction(Module
&M
, bool IsCtor
) {
92 StringRef InitOrFiniKernelName
=
93 IsCtor
? "nvptx$device$init" : "nvptx$device$fini";
94 if (M
.getFunction(InitOrFiniKernelName
))
97 Function
*InitOrFiniKernel
= Function::createWithDefaultAttr(
98 FunctionType::get(Type::getVoidTy(M
.getContext()), false),
99 GlobalValue::WeakODRLinkage
, 0, InitOrFiniKernelName
, &M
);
100 addKernelMetadata(M
, InitOrFiniKernel
);
102 return InitOrFiniKernel
;
105 // We create the IR required to call each callback in this section. This is
106 // equivalent to the following code. Normally, the linker would provide us with
107 // the definitions of the init and fini array sections. The 'nvlink' linker does
108 // not do this so initializing these values is done by the runtime.
110 // extern "C" void **__init_array_start = nullptr;
111 // extern "C" void **__init_array_end = nullptr;
112 // extern "C" void **__fini_array_start = nullptr;
113 // extern "C" void **__fini_array_end = nullptr;
115 // using InitCallback = void();
116 // using FiniCallback = void();
118 // void call_init_array_callbacks() {
119 // for (auto start = __init_array_start; start != __init_array_end; ++start)
120 // reinterpret_cast<InitCallback *>(*start)();
123 // void call_init_array_callbacks() {
124 // size_t fini_array_size = __fini_array_end - __fini_array_start;
125 // for (size_t i = fini_array_size; i > 0; --i)
126 // reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
128 static void createInitOrFiniCalls(Function
&F
, bool IsCtor
) {
129 Module
&M
= *F
.getParent();
130 LLVMContext
&C
= M
.getContext();
132 IRBuilder
<> IRB(BasicBlock::Create(C
, "entry", &F
));
133 auto *LoopBB
= BasicBlock::Create(C
, "while.entry", &F
);
134 auto *ExitBB
= BasicBlock::Create(C
, "while.end", &F
);
135 Type
*PtrTy
= IRB
.getPtrTy(llvm::ADDRESS_SPACE_GLOBAL
);
137 auto *Begin
= M
.getOrInsertGlobal(
138 IsCtor
? "__init_array_start" : "__fini_array_start",
139 PointerType::get(C
, 0), [&]() {
140 auto *GV
= new GlobalVariable(
141 M
, PointerType::get(C
, 0),
142 /*isConstant=*/false, GlobalValue::WeakAnyLinkage
,
143 Constant::getNullValue(PointerType::get(C
, 0)),
144 IsCtor
? "__init_array_start" : "__fini_array_start",
145 /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal
,
146 /*AddressSpace=*/llvm::ADDRESS_SPACE_GLOBAL
);
147 GV
->setVisibility(GlobalVariable::ProtectedVisibility
);
150 auto *End
= M
.getOrInsertGlobal(
151 IsCtor
? "__init_array_end" : "__fini_array_end", PointerType::get(C
, 0),
153 auto *GV
= new GlobalVariable(
154 M
, PointerType::get(C
, 0),
155 /*isConstant=*/false, GlobalValue::WeakAnyLinkage
,
156 Constant::getNullValue(PointerType::get(C
, 0)),
157 IsCtor
? "__init_array_end" : "__fini_array_end",
158 /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal
,
159 /*AddressSpace=*/llvm::ADDRESS_SPACE_GLOBAL
);
160 GV
->setVisibility(GlobalVariable::ProtectedVisibility
);
164 // The constructor type is suppoed to allow using the argument vectors, but
165 // for now we just call them with no arguments.
166 auto *CallBackTy
= FunctionType::get(IRB
.getVoidTy(), {});
168 // The destructor array must be called in reverse order. Get an expression to
169 // the end of the array and iterate backwards in that case.
170 Value
*BeginVal
= IRB
.CreateLoad(Begin
->getType(), Begin
, "begin");
171 Value
*EndVal
= IRB
.CreateLoad(Begin
->getType(), End
, "stop");
173 auto *BeginInt
= IRB
.CreatePtrToInt(BeginVal
, IntegerType::getInt64Ty(C
));
174 auto *EndInt
= IRB
.CreatePtrToInt(EndVal
, IntegerType::getInt64Ty(C
));
175 auto *SubInst
= IRB
.CreateSub(EndInt
, BeginInt
);
176 auto *Offset
= IRB
.CreateAShr(
177 SubInst
, ConstantInt::get(IntegerType::getInt64Ty(C
), 3), "offset",
179 auto *ValuePtr
= IRB
.CreateGEP(PointerType::get(C
, 0), BeginVal
,
180 ArrayRef
<Value
*>({Offset
}));
182 BeginVal
= IRB
.CreateInBoundsGEP(
183 PointerType::get(C
, 0), ValuePtr
,
184 ArrayRef
<Value
*>(ConstantInt::get(IntegerType::getInt64Ty(C
), -1)),
188 IRB
.CreateCmp(IsCtor
? ICmpInst::ICMP_NE
: ICmpInst::ICMP_UGT
, BeginVal
,
191 IRB
.SetInsertPoint(LoopBB
);
192 auto *CallBackPHI
= IRB
.CreatePHI(PtrTy
, 2, "ptr");
193 auto *CallBack
= IRB
.CreateLoad(IRB
.getPtrTy(F
.getAddressSpace()),
194 CallBackPHI
, "callback");
195 IRB
.CreateCall(CallBackTy
, CallBack
);
197 IRB
.CreateConstGEP1_64(PtrTy
, CallBackPHI
, IsCtor
? 1 : -1, "next");
198 auto *EndCmp
= IRB
.CreateCmp(IsCtor
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_ULT
,
199 NewCallBack
, EndVal
, "end");
200 CallBackPHI
->addIncoming(BeginVal
, &F
.getEntryBlock());
201 CallBackPHI
->addIncoming(NewCallBack
, LoopBB
);
202 IRB
.CreateCondBr(EndCmp
, ExitBB
, LoopBB
);
203 IRB
.SetInsertPoint(ExitBB
);
207 static bool createInitOrFiniGlobals(Module
&M
, GlobalVariable
*GV
,
209 ConstantArray
*GA
= dyn_cast
<ConstantArray
>(GV
->getInitializer());
210 if (!GA
|| GA
->getNumOperands() == 0)
213 // NVPTX has no way to emit variables at specific sections or support for
214 // the traditional constructor sections. Instead, we emit mangled global
215 // names so the runtime can build the list manually.
216 for (Value
*V
: GA
->operands()) {
217 auto *CS
= cast
<ConstantStruct
>(V
);
218 auto *F
= cast
<Constant
>(CS
->getOperand(1));
219 uint64_t Priority
= cast
<ConstantInt
>(CS
->getOperand(0))->getSExtValue();
220 std::string PriorityStr
= "." + std::to_string(Priority
);
221 // We append a semi-unique hash and the priority to the global name.
222 std::string GlobalID
=
223 !GlobalStr
.empty() ? GlobalStr
: getHash(M
.getSourceFileName());
224 std::string NameStr
=
225 ((IsCtor
? "__init_array_object_" : "__fini_array_object_") +
226 F
->getName() + "_" + GlobalID
+ "_" + std::to_string(Priority
))
228 // PTX does not support exported names with '.' in them.
229 llvm::transform(NameStr
, NameStr
.begin(),
230 [](char c
) { return c
== '.' ? '_' : c
; });
232 auto *GV
= new GlobalVariable(M
, F
->getType(), /*IsConstant=*/true,
233 GlobalValue::ExternalLinkage
, F
, NameStr
,
234 nullptr, GlobalValue::NotThreadLocal
,
236 // This isn't respected by Nvidia, simply put here for clarity.
237 GV
->setSection(IsCtor
? ".init_array" + PriorityStr
238 : ".fini_array" + PriorityStr
);
239 GV
->setVisibility(GlobalVariable::ProtectedVisibility
);
240 appendToUsed(M
, {GV
});
246 static bool createInitOrFiniKernel(Module
&M
, StringRef GlobalName
,
248 GlobalVariable
*GV
= M
.getGlobalVariable(GlobalName
);
249 if (!GV
|| !GV
->hasInitializer())
252 if (!createInitOrFiniGlobals(M
, GV
, IsCtor
))
258 Function
*InitOrFiniKernel
= createInitOrFiniKernelFunction(M
, IsCtor
);
259 if (!InitOrFiniKernel
)
262 createInitOrFiniCalls(*InitOrFiniKernel
, IsCtor
);
264 GV
->eraseFromParent();
268 static bool lowerCtorsAndDtors(Module
&M
) {
269 bool Modified
= false;
270 Modified
|= createInitOrFiniKernel(M
, "llvm.global_ctors", /*IsCtor =*/true);
271 Modified
|= createInitOrFiniKernel(M
, "llvm.global_dtors", /*IsCtor =*/false);
275 class NVPTXCtorDtorLoweringLegacy final
: public ModulePass
{
278 NVPTXCtorDtorLoweringLegacy() : ModulePass(ID
) {}
279 bool runOnModule(Module
&M
) override
{ return lowerCtorsAndDtors(M
); }
282 } // End anonymous namespace
284 PreservedAnalyses
NVPTXCtorDtorLoweringPass::run(Module
&M
,
285 ModuleAnalysisManager
&AM
) {
286 return lowerCtorsAndDtors(M
) ? PreservedAnalyses::none()
287 : PreservedAnalyses::all();
290 char NVPTXCtorDtorLoweringLegacy::ID
= 0;
291 char &llvm::NVPTXCtorDtorLoweringLegacyPassID
= NVPTXCtorDtorLoweringLegacy::ID
;
292 INITIALIZE_PASS(NVPTXCtorDtorLoweringLegacy
, DEBUG_TYPE
,
293 "Lower ctors and dtors for NVPTX", false, false)
295 ModulePass
*llvm::createNVPTXCtorDtorLoweringLegacyPass() {
296 return new NVPTXCtorDtorLoweringLegacy();