1 //===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Generates code for built-in GPU calls which are not runtime-specific.
10 // (Runtime-specific codegen lives in programming model specific files.)
12 //===----------------------------------------------------------------------===//
14 #include "CodeGenFunction.h"
15 #include "clang/Basic/Builtins.h"
16 #include "llvm/IR/DataLayout.h"
17 #include "llvm/IR/Instruction.h"
18 #include "llvm/Support/MathExtras.h"
19 #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
21 using namespace clang
;
22 using namespace CodeGen
;
25 llvm::Function
*GetVprintfDeclaration(llvm::Module
&M
) {
26 llvm::Type
*ArgTypes
[] = {llvm::PointerType::getUnqual(M
.getContext()),
27 llvm::PointerType::getUnqual(M
.getContext())};
28 llvm::FunctionType
*VprintfFuncType
= llvm::FunctionType::get(
29 llvm::Type::getInt32Ty(M
.getContext()), ArgTypes
, false);
31 if (auto *F
= M
.getFunction("vprintf")) {
32 // Our CUDA system header declares vprintf with the right signature, so
33 // nobody else should have been able to declare vprintf with a bogus
35 assert(F
->getFunctionType() == VprintfFuncType
);
39 // vprintf doesn't already exist; create a declaration and insert it into the
41 return llvm::Function::Create(
42 VprintfFuncType
, llvm::GlobalVariable::ExternalLinkage
, "vprintf", &M
);
45 llvm::Function
*GetOpenMPVprintfDeclaration(CodeGenModule
&CGM
) {
46 const char *Name
= "__llvm_omp_vprintf";
47 llvm::Module
&M
= CGM
.getModule();
48 llvm::Type
*ArgTypes
[] = {llvm::PointerType::getUnqual(M
.getContext()),
49 llvm::PointerType::getUnqual(M
.getContext()),
50 llvm::Type::getInt32Ty(M
.getContext())};
51 llvm::FunctionType
*VprintfFuncType
= llvm::FunctionType::get(
52 llvm::Type::getInt32Ty(M
.getContext()), ArgTypes
, false);
54 if (auto *F
= M
.getFunction(Name
)) {
55 if (F
->getFunctionType() != VprintfFuncType
) {
56 CGM
.Error(SourceLocation(),
57 "Invalid type declaration for __llvm_omp_vprintf");
63 return llvm::Function::Create(
64 VprintfFuncType
, llvm::GlobalVariable::ExternalLinkage
, Name
, &M
);
67 // Transforms a call to printf into a call to the NVPTX vprintf syscall (which
68 // isn't particularly special; it's invoked just like a regular function).
69 // vprintf takes two args: A format string, and a pointer to a buffer containing
72 // For example, the call
74 // printf("format string", arg1, arg2, arg3);
76 // is converted into something resembling
83 // char* buf = alloca(sizeof(Tmp));
84 // *(Tmp*)buf = {a1, a2, a3};
85 // vprintf("format string", buf);
87 // buf is aligned to the max of {alignof(Arg1), ...}. Furthermore, each of the
88 // args is itself aligned to its preferred alignment.
90 // Note that by the time this function runs, E's args have already undergone the
91 // standard C vararg promotion (short -> int, float -> double, etc.).
93 std::pair
<llvm::Value
*, llvm::TypeSize
>
94 packArgsIntoNVPTXFormatBuffer(CodeGenFunction
*CGF
, const CallArgList
&Args
) {
95 const llvm::DataLayout
&DL
= CGF
->CGM
.getDataLayout();
96 llvm::LLVMContext
&Ctx
= CGF
->CGM
.getLLVMContext();
97 CGBuilderTy
&Builder
= CGF
->Builder
;
99 // Construct and fill the args buffer that we'll pass to vprintf.
100 if (Args
.size() <= 1) {
101 // If there are no args, pass a null pointer and size 0
102 llvm::Value
*BufferPtr
=
103 llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx
));
104 return {BufferPtr
, llvm::TypeSize::getFixed(0)};
106 llvm::SmallVector
<llvm::Type
*, 8> ArgTypes
;
107 for (unsigned I
= 1, NumArgs
= Args
.size(); I
< NumArgs
; ++I
)
108 ArgTypes
.push_back(Args
[I
].getRValue(*CGF
).getScalarVal()->getType());
110 // Using llvm::StructType is correct only because printf doesn't accept
111 // aggregates. If we had to handle aggregates here, we'd have to manually
112 // compute the offsets within the alloca -- we wouldn't be able to assume
113 // that the alignment of the llvm type was the same as the alignment of the
115 llvm::Type
*AllocaTy
= llvm::StructType::create(ArgTypes
, "printf_args");
116 llvm::Value
*Alloca
= CGF
->CreateTempAlloca(AllocaTy
);
118 for (unsigned I
= 1, NumArgs
= Args
.size(); I
< NumArgs
; ++I
) {
119 llvm::Value
*P
= Builder
.CreateStructGEP(AllocaTy
, Alloca
, I
- 1);
120 llvm::Value
*Arg
= Args
[I
].getRValue(*CGF
).getScalarVal();
121 Builder
.CreateAlignedStore(Arg
, P
, DL
.getPrefTypeAlign(Arg
->getType()));
123 llvm::Value
*BufferPtr
=
124 Builder
.CreatePointerCast(Alloca
, llvm::PointerType::getUnqual(Ctx
));
125 return {BufferPtr
, DL
.getTypeAllocSize(AllocaTy
)};
129 bool containsNonScalarVarargs(CodeGenFunction
*CGF
, const CallArgList
&Args
) {
130 return llvm::any_of(llvm::drop_begin(Args
), [&](const CallArg
&A
) {
131 return !A
.getRValue(*CGF
).isScalar();
135 RValue
EmitDevicePrintfCallExpr(const CallExpr
*E
, CodeGenFunction
*CGF
,
136 llvm::Function
*Decl
, bool WithSizeArg
) {
137 CodeGenModule
&CGM
= CGF
->CGM
;
138 CGBuilderTy
&Builder
= CGF
->Builder
;
139 assert(E
->getBuiltinCallee() == Builtin::BIprintf
||
140 E
->getBuiltinCallee() == Builtin::BI__builtin_printf
);
141 assert(E
->getNumArgs() >= 1); // printf always has at least one arg.
143 // Uses the same format as nvptx for the argument packing, but also passes
144 // an i32 for the total size of the passed pointer
146 CGF
->EmitCallArgs(Args
,
147 E
->getDirectCallee()->getType()->getAs
<FunctionProtoType
>(),
148 E
->arguments(), E
->getDirectCallee(),
149 /* ParamsToSkip = */ 0);
151 // We don't know how to emit non-scalar varargs.
152 if (containsNonScalarVarargs(CGF
, Args
)) {
153 CGM
.ErrorUnsupported(E
, "non-scalar arg to printf");
154 return RValue::get(llvm::ConstantInt::get(CGF
->IntTy
, 0));
157 auto r
= packArgsIntoNVPTXFormatBuffer(CGF
, Args
);
158 llvm::Value
*BufferPtr
= r
.first
;
160 llvm::SmallVector
<llvm::Value
*, 3> Vec
= {
161 Args
[0].getRValue(*CGF
).getScalarVal(), BufferPtr
};
163 // Passing > 32bit of data as a local alloca doesn't work for nvptx or
165 llvm::Constant
*Size
=
166 llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM
.getLLVMContext()),
167 static_cast<uint32_t>(r
.second
.getFixedValue()));
171 return RValue::get(Builder
.CreateCall(Decl
, Vec
));
175 RValue
CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr
*E
) {
176 assert(getTarget().getTriple().isNVPTX());
177 return EmitDevicePrintfCallExpr(
178 E
, this, GetVprintfDeclaration(CGM
.getModule()), false);
181 RValue
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr
*E
) {
182 assert(getTarget().getTriple().isAMDGCN() ||
183 (getTarget().getTriple().isSPIRV() &&
184 getTarget().getTriple().getVendor() == llvm::Triple::AMD
));
185 assert(E
->getBuiltinCallee() == Builtin::BIprintf
||
186 E
->getBuiltinCallee() == Builtin::BI__builtin_printf
);
187 assert(E
->getNumArgs() >= 1); // printf always has at least one arg.
189 CallArgList CallArgs
;
190 EmitCallArgs(CallArgs
,
191 E
->getDirectCallee()->getType()->getAs
<FunctionProtoType
>(),
192 E
->arguments(), E
->getDirectCallee(),
193 /* ParamsToSkip = */ 0);
195 SmallVector
<llvm::Value
*, 8> Args
;
196 for (const auto &A
: CallArgs
) {
197 // We don't know how to emit non-scalar varargs.
198 if (!A
.getRValue(*this).isScalar()) {
199 CGM
.ErrorUnsupported(E
, "non-scalar arg to printf");
200 return RValue::get(llvm::ConstantInt::get(IntTy
, -1));
203 llvm::Value
*Arg
= A
.getRValue(*this).getScalarVal();
207 llvm::IRBuilder
<> IRB(Builder
.GetInsertBlock(), Builder
.GetInsertPoint());
208 IRB
.SetCurrentDebugLocation(Builder
.getCurrentDebugLocation());
210 bool isBuffered
= (CGM
.getTarget().getTargetOpts().AMDGPUPrintfKindVal
==
211 clang::TargetOptions::AMDGPUPrintfKind::Buffered
);
212 auto Printf
= llvm::emitAMDGPUPrintfCall(IRB
, Args
, isBuffered
);
213 Builder
.SetInsertPoint(IRB
.GetInsertBlock(), IRB
.GetInsertPoint());
214 return RValue::get(Printf
);
217 RValue
CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr
*E
) {
218 assert(getTarget().getTriple().isNVPTX() ||
219 getTarget().getTriple().isAMDGCN());
220 return EmitDevicePrintfCallExpr(E
, this, GetOpenMPVprintfDeclaration(CGM
),