1 //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 // The pass bind printfs to a kernel arg pointer that will be bound to a buffer
11 // later by the runtime.
13 // This pass traverses the functions in the module and converts
14 // each call to printf to a sequence of operations that
15 // store the following into the printf buffer:
16 // - format string (passed as a module's metadata unique ID)
17 // - bitwise copies of printf arguments
18 // The backend passes will need to store metadata in the kernel
19 //===----------------------------------------------------------------------===//
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/Triple.h"
25 #include "llvm/Analysis/InstructionSimplify.h"
26 #include "llvm/Analysis/TargetLibraryInfo.h"
27 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/GlobalVariable.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/Module.h"
35 #include "llvm/IR/Type.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42 #define DEBUG_TYPE "printfToRuntime"
46 class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final
52 explicit AMDGPUPrintfRuntimeBinding();
55 bool runOnModule(Module
&M
) override
;
56 void getConversionSpecifiers(SmallVectorImpl
<char> &OpConvSpecifiers
,
57 StringRef fmt
, size_t num_ops
) const;
59 bool shouldPrintAsStr(char Specifier
, Type
*OpType
) const;
61 lowerPrintfForGpu(Module
&M
,
62 function_ref
<const TargetLibraryInfo
&(Function
&)> GetTLI
);
64 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
65 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
66 AU
.addRequired
<DominatorTreeWrapperPass
>();
69 Value
*simplify(Instruction
*I
, const TargetLibraryInfo
*TLI
) {
70 return SimplifyInstruction(I
, {*TD
, TLI
, DT
});
74 const DominatorTree
*DT
;
75 SmallVector
<CallInst
*, 32> Printfs
;
79 char AMDGPUPrintfRuntimeBinding::ID
= 0;
81 INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding
,
82 "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering",
84 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
85 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
86 INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding
, "amdgpu-printf-runtime-binding",
87 "AMDGPU Printf lowering", false, false)
89 char &llvm::AMDGPUPrintfRuntimeBindingID
= AMDGPUPrintfRuntimeBinding::ID
;
92 ModulePass
*createAMDGPUPrintfRuntimeBinding() {
93 return new AMDGPUPrintfRuntimeBinding();
97 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding()
98 : ModulePass(ID
), TD(nullptr), DT(nullptr) {
99 initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry());
102 void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers(
103 SmallVectorImpl
<char> &OpConvSpecifiers
, StringRef Fmt
,
104 size_t NumOps
) const {
105 // not all format characters are collected.
106 // At this time the format characters of interest
107 // are %p and %s, which use to know if we
108 // are either storing a literal string or a
109 // pointer to the printf buffer.
110 static const char ConvSpecifiers
[] = "cdieEfgGaosuxXp";
111 size_t CurFmtSpecifierIdx
= 0;
112 size_t PrevFmtSpecifierIdx
= 0;
114 while ((CurFmtSpecifierIdx
= Fmt
.find_first_of(
115 ConvSpecifiers
, CurFmtSpecifierIdx
)) != StringRef::npos
) {
116 bool ArgDump
= false;
117 StringRef CurFmt
= Fmt
.substr(PrevFmtSpecifierIdx
,
118 CurFmtSpecifierIdx
- PrevFmtSpecifierIdx
);
119 size_t pTag
= CurFmt
.find_last_of("%");
120 if (pTag
!= StringRef::npos
) {
122 while (pTag
&& CurFmt
[--pTag
] == '%') {
128 OpConvSpecifiers
.push_back(Fmt
[CurFmtSpecifierIdx
]);
130 PrevFmtSpecifierIdx
= ++CurFmtSpecifierIdx
;
134 bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier
,
135 Type
*OpType
) const {
136 if (Specifier
!= 's')
138 const PointerType
*PT
= dyn_cast
<PointerType
>(OpType
);
139 if (!PT
|| PT
->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS
)
141 Type
*ElemType
= PT
->getContainedType(0);
142 if (ElemType
->getTypeID() != Type::IntegerTyID
)
144 IntegerType
*ElemIType
= cast
<IntegerType
>(ElemType
);
145 return ElemIType
->getBitWidth() == 8;
148 bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
149 Module
&M
, function_ref
<const TargetLibraryInfo
&(Function
&)> GetTLI
) {
150 LLVMContext
&Ctx
= M
.getContext();
151 IRBuilder
<> Builder(Ctx
);
152 Type
*I32Ty
= Type::getInt32Ty(Ctx
);
154 // NB: This is important for this string size to be divizable by 4
155 const char NonLiteralStr
[4] = "???";
157 for (auto CI
: Printfs
) {
158 unsigned NumOps
= CI
->getNumArgOperands();
160 SmallString
<16> OpConvSpecifiers
;
161 Value
*Op
= CI
->getArgOperand(0);
163 if (auto LI
= dyn_cast
<LoadInst
>(Op
)) {
164 Op
= LI
->getPointerOperand();
165 for (auto Use
: Op
->users()) {
166 if (auto SI
= dyn_cast
<StoreInst
>(Use
)) {
167 Op
= SI
->getValueOperand();
173 if (auto I
= dyn_cast
<Instruction
>(Op
)) {
174 Value
*Op_simplified
= simplify(I
, &GetTLI(*I
->getFunction()));
179 ConstantExpr
*ConstExpr
= dyn_cast
<ConstantExpr
>(Op
);
182 GlobalVariable
*GVar
= dyn_cast
<GlobalVariable
>(ConstExpr
->getOperand(0));
184 StringRef
Str("unknown");
185 if (GVar
&& GVar
->hasInitializer()) {
186 auto Init
= GVar
->getInitializer();
187 if (auto CA
= dyn_cast
<ConstantDataArray
>(Init
)) {
189 Str
= CA
->getAsCString();
190 } else if (isa
<ConstantAggregateZero
>(Init
)) {
194 // we need this call to ascertain
195 // that we are printing a string
196 // or a pointer. It takes out the
197 // specifiers and fills up the first
199 getConversionSpecifiers(OpConvSpecifiers
, Str
, NumOps
- 1);
201 // Add metadata for the string
202 std::string AStreamHolder
;
203 raw_string_ostream
Sizes(AStreamHolder
);
204 int Sum
= DWORD_ALIGN
;
205 Sizes
<< CI
->getNumArgOperands() - 1;
207 for (unsigned ArgCount
= 1; ArgCount
< CI
->getNumArgOperands() &&
208 ArgCount
<= OpConvSpecifiers
.size();
210 Value
*Arg
= CI
->getArgOperand(ArgCount
);
211 Type
*ArgType
= Arg
->getType();
212 unsigned ArgSize
= TD
->getTypeAllocSizeInBits(ArgType
);
213 ArgSize
= ArgSize
/ 8;
215 // ArgSize by design should be a multiple of DWORD_ALIGN,
216 // expand the arguments that do not follow this rule.
218 if (ArgSize
% DWORD_ALIGN
!= 0) {
219 llvm::Type
*ResType
= llvm::Type::getInt32Ty(Ctx
);
220 VectorType
*LLVMVecType
= llvm::dyn_cast
<llvm::VectorType
>(ArgType
);
221 int NumElem
= LLVMVecType
? LLVMVecType
->getNumElements() : 1;
222 if (LLVMVecType
&& NumElem
> 1)
223 ResType
= llvm::VectorType::get(ResType
, NumElem
);
224 Builder
.SetInsertPoint(CI
);
225 Builder
.SetCurrentDebugLocation(CI
->getDebugLoc());
226 if (OpConvSpecifiers
[ArgCount
- 1] == 'x' ||
227 OpConvSpecifiers
[ArgCount
- 1] == 'X' ||
228 OpConvSpecifiers
[ArgCount
- 1] == 'u' ||
229 OpConvSpecifiers
[ArgCount
- 1] == 'o')
230 Arg
= Builder
.CreateZExt(Arg
, ResType
);
232 Arg
= Builder
.CreateSExt(Arg
, ResType
);
233 ArgType
= Arg
->getType();
234 ArgSize
= TD
->getTypeAllocSizeInBits(ArgType
);
235 ArgSize
= ArgSize
/ 8;
236 CI
->setOperand(ArgCount
, Arg
);
238 if (OpConvSpecifiers
[ArgCount
- 1] == 'f') {
239 ConstantFP
*FpCons
= dyn_cast
<ConstantFP
>(Arg
);
243 FPExtInst
*FpExt
= dyn_cast
<FPExtInst
>(Arg
);
244 if (FpExt
&& FpExt
->getType()->isDoubleTy() &&
245 FpExt
->getOperand(0)->getType()->isFloatTy())
249 if (shouldPrintAsStr(OpConvSpecifiers
[ArgCount
- 1], ArgType
)) {
250 if (ConstantExpr
*ConstExpr
= dyn_cast
<ConstantExpr
>(Arg
)) {
252 dyn_cast
<GlobalVariable
>(ConstExpr
->getOperand(0));
253 if (GV
&& GV
->hasInitializer()) {
254 Constant
*Init
= GV
->getInitializer();
255 ConstantDataArray
*CA
= dyn_cast
<ConstantDataArray
>(Init
);
256 if (Init
->isZeroValue() || CA
->isString()) {
257 size_t SizeStr
= Init
->isZeroValue()
259 : (strlen(CA
->getAsCString().data()) + 1);
260 size_t Rem
= SizeStr
% DWORD_ALIGN
;
262 LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr
265 NSizeStr
= SizeStr
+ (DWORD_ALIGN
- Rem
);
272 ArgSize
= sizeof(NonLiteralStr
);
275 ArgSize
= sizeof(NonLiteralStr
);
278 LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize
279 << " for type: " << *ArgType
<< '\n');
280 Sizes
<< ArgSize
<< ':';
283 LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str
.str()
285 for (size_t I
= 0; I
< Str
.size(); ++I
) {
286 // Rest of the C escape sequences (e.g. \') are handled correctly
308 // ':' cannot be scanned by Flex, as it is defined as a delimiter
309 // Replace it with it's octal representation \72
318 // Insert the printf_alloc call
319 Builder
.SetInsertPoint(CI
);
320 Builder
.SetCurrentDebugLocation(CI
->getDebugLoc());
322 AttributeList Attr
= AttributeList::get(Ctx
, AttributeList::FunctionIndex
,
323 Attribute::NoUnwind
);
325 Type
*SizetTy
= Type::getInt32Ty(Ctx
);
327 Type
*Tys_alloc
[1] = {SizetTy
};
328 Type
*I8Ptr
= PointerType::get(Type::getInt8Ty(Ctx
), 1);
329 FunctionType
*FTy_alloc
= FunctionType::get(I8Ptr
, Tys_alloc
, false);
330 FunctionCallee PrintfAllocFn
=
331 M
.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc
, Attr
);
333 LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes
.str() << '\n');
334 std::string fmtstr
= itostr(++UniqID
) + ":" + Sizes
.str().c_str();
335 MDString
*fmtStrArray
= MDString::get(Ctx
, fmtstr
);
337 // Instead of creating global variables, the
338 // printf format strings are extracted
339 // and passed as metadata. This avoids
340 // polluting llvm's symbol tables in this module.
341 // Metadata is going to be extracted
342 // by the backend passes and inserted
343 // into the OpenCL binary as appropriate.
344 StringRef
amd("llvm.printf.fmts");
345 NamedMDNode
*metaD
= M
.getOrInsertNamedMetadata(amd
);
346 MDNode
*myMD
= MDNode::get(Ctx
, fmtStrArray
);
347 metaD
->addOperand(myMD
);
348 Value
*sumC
= ConstantInt::get(SizetTy
, Sum
, false);
349 SmallVector
<Value
*, 1> alloc_args
;
350 alloc_args
.push_back(sumC
);
352 CallInst::Create(PrintfAllocFn
, alloc_args
, "printf_alloc_fn", CI
);
355 // Insert code to split basicblock with a
356 // piece of hammock code.
357 // basicblock splits after buffer overflow check
359 ConstantPointerNull
*zeroIntPtr
=
360 ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx
), 1));
362 dyn_cast
<ICmpInst
>(Builder
.CreateICmpNE(pcall
, zeroIntPtr
, ""));
363 if (!CI
->use_empty()) {
365 Builder
.CreateSExt(Builder
.CreateNot(cmp
), I32Ty
, "printf_res");
366 CI
->replaceAllUsesWith(result
);
368 SplitBlock(CI
->getParent(), cmp
);
370 SplitBlockAndInsertIfThen(cmp
, cmp
->getNextNode(), false);
372 Builder
.SetInsertPoint(Brnch
);
374 // store unique printf id in the buffer
376 SmallVector
<Value
*, 1> ZeroIdxList
;
377 ConstantInt
*zeroInt
=
378 ConstantInt::get(Ctx
, APInt(32, StringRef("0"), 10));
379 ZeroIdxList
.push_back(zeroInt
);
381 GetElementPtrInst
*BufferIdx
=
382 dyn_cast
<GetElementPtrInst
>(GetElementPtrInst::Create(
383 nullptr, pcall
, ZeroIdxList
, "PrintBuffID", Brnch
));
385 Type
*idPointer
= PointerType::get(I32Ty
, AMDGPUAS::GLOBAL_ADDRESS
);
387 new BitCastInst(BufferIdx
, idPointer
, "PrintBuffIdCast", Brnch
);
390 new StoreInst(ConstantInt::get(I32Ty
, UniqID
), id_gep_cast
);
391 stbuff
->insertBefore(Brnch
); // to Remove unused variable warning
393 SmallVector
<Value
*, 2> FourthIdxList
;
394 ConstantInt
*fourInt
=
395 ConstantInt::get(Ctx
, APInt(32, StringRef("4"), 10));
397 FourthIdxList
.push_back(fourInt
); // 1st 4 bytes hold the printf_id
398 // the following GEP is the buffer pointer
399 BufferIdx
= cast
<GetElementPtrInst
>(GetElementPtrInst::Create(
400 nullptr, pcall
, FourthIdxList
, "PrintBuffGep", Brnch
));
402 Type
*Int32Ty
= Type::getInt32Ty(Ctx
);
403 Type
*Int64Ty
= Type::getInt64Ty(Ctx
);
404 for (unsigned ArgCount
= 1; ArgCount
< CI
->getNumArgOperands() &&
405 ArgCount
<= OpConvSpecifiers
.size();
407 Value
*Arg
= CI
->getArgOperand(ArgCount
);
408 Type
*ArgType
= Arg
->getType();
409 SmallVector
<Value
*, 32> WhatToStore
;
410 if (ArgType
->isFPOrFPVectorTy() &&
411 (ArgType
->getTypeID() != Type::VectorTyID
)) {
412 Type
*IType
= (ArgType
->isFloatTy()) ? Int32Ty
: Int64Ty
;
413 if (OpConvSpecifiers
[ArgCount
- 1] == 'f') {
414 ConstantFP
*fpCons
= dyn_cast
<ConstantFP
>(Arg
);
416 APFloat
Val(fpCons
->getValueAPF());
418 Val
.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven
,
420 Arg
= ConstantFP::get(Ctx
, Val
);
423 FPExtInst
*FpExt
= dyn_cast
<FPExtInst
>(Arg
);
424 if (FpExt
&& FpExt
->getType()->isDoubleTy() &&
425 FpExt
->getOperand(0)->getType()->isFloatTy()) {
426 Arg
= FpExt
->getOperand(0);
431 Arg
= new BitCastInst(Arg
, IType
, "PrintArgFP", Brnch
);
432 WhatToStore
.push_back(Arg
);
433 } else if (ArgType
->getTypeID() == Type::PointerTyID
) {
434 if (shouldPrintAsStr(OpConvSpecifiers
[ArgCount
- 1], ArgType
)) {
435 const char *S
= NonLiteralStr
;
436 if (ConstantExpr
*ConstExpr
= dyn_cast
<ConstantExpr
>(Arg
)) {
438 dyn_cast
<GlobalVariable
>(ConstExpr
->getOperand(0));
439 if (GV
&& GV
->hasInitializer()) {
440 Constant
*Init
= GV
->getInitializer();
441 ConstantDataArray
*CA
= dyn_cast
<ConstantDataArray
>(Init
);
442 if (Init
->isZeroValue() || CA
->isString()) {
443 S
= Init
->isZeroValue() ? "" : CA
->getAsCString().data();
447 size_t SizeStr
= strlen(S
) + 1;
448 size_t Rem
= SizeStr
% DWORD_ALIGN
;
451 NSizeStr
= SizeStr
+ (DWORD_ALIGN
- Rem
);
456 char *MyNewStr
= new char[NSizeStr
]();
458 int NumInts
= NSizeStr
/ 4;
461 int ANum
= *(int *)(MyNewStr
+ CharC
);
464 Value
*ANumV
= ConstantInt::get(Int32Ty
, ANum
, false);
465 WhatToStore
.push_back(ANumV
);
469 // Empty string, give a hint to RT it is no NULL
470 Value
*ANumV
= ConstantInt::get(Int32Ty
, 0xFFFFFF00, false);
471 WhatToStore
.push_back(ANumV
);
474 uint64_t Size
= TD
->getTypeAllocSizeInBits(ArgType
);
475 assert((Size
== 32 || Size
== 64) && "unsupported size");
476 Type
*DstType
= (Size
== 32) ? Int32Ty
: Int64Ty
;
477 Arg
= new PtrToIntInst(Arg
, DstType
, "PrintArgPtr", Brnch
);
478 WhatToStore
.push_back(Arg
);
480 } else if (ArgType
->getTypeID() == Type::VectorTyID
) {
482 uint32_t EleCount
= cast
<VectorType
>(ArgType
)->getNumElements();
483 uint32_t EleSize
= ArgType
->getScalarSizeInBits();
484 uint32_t TotalSize
= EleCount
* EleSize
;
486 IntegerType
*Int32Ty
= Type::getInt32Ty(ArgType
->getContext());
487 Constant
*Indices
[4] = {
488 ConstantInt::get(Int32Ty
, 0), ConstantInt::get(Int32Ty
, 1),
489 ConstantInt::get(Int32Ty
, 2), ConstantInt::get(Int32Ty
, 2)};
490 Constant
*Mask
= ConstantVector::get(Indices
);
491 ShuffleVectorInst
*Shuffle
= new ShuffleVectorInst(Arg
, Arg
, Mask
);
492 Shuffle
->insertBefore(Brnch
);
494 ArgType
= Arg
->getType();
495 TotalSize
+= EleSize
;
499 EleCount
= TotalSize
/ 64;
500 IType
= dyn_cast
<Type
>(Type::getInt64Ty(ArgType
->getContext()));
504 EleCount
= TotalSize
/ 64;
505 IType
= dyn_cast
<Type
>(Type::getInt64Ty(ArgType
->getContext()));
506 } else if (EleCount
>= 3) {
508 IType
= dyn_cast
<Type
>(Type::getInt32Ty(ArgType
->getContext()));
511 IType
= dyn_cast
<Type
>(Type::getInt16Ty(ArgType
->getContext()));
516 EleCount
= TotalSize
/ 64;
517 IType
= dyn_cast
<Type
>(Type::getInt64Ty(ArgType
->getContext()));
520 IType
= dyn_cast
<Type
>(Type::getInt32Ty(ArgType
->getContext()));
525 IType
= dyn_cast
<Type
>(VectorType::get(IType
, EleCount
));
527 Arg
= new BitCastInst(Arg
, IType
, "PrintArgVect", Brnch
);
528 WhatToStore
.push_back(Arg
);
530 WhatToStore
.push_back(Arg
);
532 for (unsigned I
= 0, E
= WhatToStore
.size(); I
!= E
; ++I
) {
533 Value
*TheBtCast
= WhatToStore
[I
];
535 TD
->getTypeAllocSizeInBits(TheBtCast
->getType()) / 8;
536 SmallVector
<Value
*, 1> BuffOffset
;
537 BuffOffset
.push_back(ConstantInt::get(I32Ty
, ArgSize
));
539 Type
*ArgPointer
= PointerType::get(TheBtCast
->getType(), 1);
541 new BitCastInst(BufferIdx
, ArgPointer
, "PrintBuffPtrCast", Brnch
);
542 StoreInst
*StBuff
= new StoreInst(TheBtCast
, CastedGEP
, Brnch
);
543 LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
546 if (I
+ 1 == E
&& ArgCount
+ 1 == CI
->getNumArgOperands())
548 BufferIdx
= dyn_cast
<GetElementPtrInst
>(GetElementPtrInst::Create(
549 nullptr, BufferIdx
, BuffOffset
, "PrintBuffNextPtr", Brnch
));
550 LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"
551 << *BufferIdx
<< '\n');
557 // erase the printf calls
558 for (auto CI
: Printfs
)
559 CI
->eraseFromParent();
565 bool AMDGPUPrintfRuntimeBinding::runOnModule(Module
&M
) {
566 Triple
TT(M
.getTargetTriple());
567 if (TT
.getArch() == Triple::r600
)
570 auto PrintfFunction
= M
.getFunction("printf");
574 for (auto &U
: PrintfFunction
->uses()) {
575 if (auto *CI
= dyn_cast
<CallInst
>(U
.getUser())) {
576 if (CI
->isCallee(&U
))
577 Printfs
.push_back(CI
);
584 TD
= &M
.getDataLayout();
585 auto DTWP
= getAnalysisIfAvailable
<DominatorTreeWrapperPass
>();
586 DT
= DTWP
? &DTWP
->getDomTree() : nullptr;
587 auto GetTLI
= [this](Function
&F
) -> TargetLibraryInfo
& {
588 return this->getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
591 return lowerPrintfForGpu(M
, GetTLI
);