1 //===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains functions to create parallel loops as LLVM-IR.
11 //===----------------------------------------------------------------------===//
13 #include "polly/CodeGen/LoopGeneratorsKMP.h"
14 #include "llvm/IR/Dominators.h"
15 #include "llvm/IR/Module.h"
18 using namespace polly
;
20 void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value
*SubFn
,
24 const std::string Name
= "__kmpc_fork_call";
25 Function
*F
= M
->getFunction(Name
);
26 Type
*KMPCMicroTy
= StructType::getTypeByName(M
->getContext(), "kmpc_micro");
29 // void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...)
30 Type
*MicroParams
[] = {Builder
.getInt32Ty()->getPointerTo(),
31 Builder
.getInt32Ty()->getPointerTo()};
33 KMPCMicroTy
= FunctionType::get(Builder
.getVoidTy(), MicroParams
, true);
36 // If F is not available, declare it.
39 StructType::getTypeByName(M
->getContext(), "struct.ident_t");
41 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
42 Type
*Params
[] = {IdentTy
->getPointerTo(), Builder
.getInt32Ty(),
43 KMPCMicroTy
->getPointerTo()};
45 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), Params
, true);
46 F
= Function::Create(Ty
, Linkage
, Name
, M
);
49 Value
*Task
= Builder
.CreatePointerBitCastOrAddrSpaceCast(
50 SubFn
, KMPCMicroTy
->getPointerTo());
52 Value
*Args
[] = {SourceLocationInfo
,
53 Builder
.getInt32(4) /* Number of arguments (w/o Task) */,
60 Builder
.CreateCall(F
, Args
);
63 void ParallelLoopGeneratorKMP::deployParallelExecution(Function
*SubFn
,
67 // Inform OpenMP runtime about the number of threads if greater than zero
68 if (PollyNumThreads
> 0) {
69 Value
*GlobalThreadID
= createCallGlobalThreadNum();
70 createCallPushNumThreads(GlobalThreadID
, Builder
.getInt32(PollyNumThreads
));
73 // Tell the runtime we start a parallel loop
74 createCallSpawnThreads(SubFn
, SubFnParam
, LB
, UB
, Stride
);
77 Function
*ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function
*F
) const {
78 std::vector
<Type
*> Arguments
= {Builder
.getInt32Ty()->getPointerTo(),
79 Builder
.getInt32Ty()->getPointerTo(),
83 Builder
.getInt8PtrTy()};
85 FunctionType
*FT
= FunctionType::get(Builder
.getVoidTy(), Arguments
, false);
86 Function
*SubFn
= Function::Create(FT
, Function::InternalLinkage
,
87 F
->getName() + "_polly_subfn", M
);
88 // Name the function's arguments
89 Function::arg_iterator AI
= SubFn
->arg_begin();
90 AI
->setName("polly.kmpc.global_tid");
92 AI
->setName("polly.kmpc.bound_tid");
94 AI
->setName("polly.kmpc.lb");
96 AI
->setName("polly.kmpc.ub");
98 AI
->setName("polly.kmpc.inc");
100 AI
->setName("polly.kmpc.shared");
105 // Create a subfunction of the following (preliminary) structure:
122 // HeaderBB will hold allocations, loading of variables and kmp-init calls.
123 // CheckNextBB will check for more work (dynamic / static chunked) or will be
124 // empty (static non chunked).
125 // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
126 // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
127 // Just like CheckNextBB: PreHeaderBB is (preliminary) empty in the static non
128 // chunked scheduling case. ExitBB marks the end of the parallel execution.
129 // The possibly empty BasicBlocks will automatically be removed.
130 std::tuple
<Value
*, Function
*>
131 ParallelLoopGeneratorKMP::createSubFn(Value
*SequentialLoopStride
,
132 AllocaInst
*StructData
,
133 SetVector
<Value
*> Data
, ValueMapT
&Map
) {
134 Function
*SubFn
= createSubFnDefinition();
135 LLVMContext
&Context
= SubFn
->getContext();
137 // Store the previous basic block.
138 BasicBlock
*PrevBB
= Builder
.GetInsertBlock();
140 // Create basic blocks.
141 BasicBlock
*HeaderBB
= BasicBlock::Create(Context
, "polly.par.setup", SubFn
);
142 BasicBlock
*ExitBB
= BasicBlock::Create(Context
, "polly.par.exit", SubFn
);
143 BasicBlock
*CheckNextBB
=
144 BasicBlock::Create(Context
, "polly.par.checkNext", SubFn
);
145 BasicBlock
*PreHeaderBB
=
146 BasicBlock::Create(Context
, "polly.par.loadIVBounds", SubFn
);
148 DT
.addNewBlock(HeaderBB
, PrevBB
);
149 DT
.addNewBlock(ExitBB
, HeaderBB
);
150 DT
.addNewBlock(CheckNextBB
, HeaderBB
);
151 DT
.addNewBlock(PreHeaderBB
, HeaderBB
);
153 // Fill up basic block HeaderBB.
154 Builder
.SetInsertPoint(HeaderBB
);
155 Value
*LBPtr
= Builder
.CreateAlloca(LongType
, nullptr, "polly.par.LBPtr");
156 Value
*UBPtr
= Builder
.CreateAlloca(LongType
, nullptr, "polly.par.UBPtr");
157 Value
*IsLastPtr
= Builder
.CreateAlloca(Builder
.getInt32Ty(), nullptr,
158 "polly.par.lastIterPtr");
160 Builder
.CreateAlloca(LongType
, nullptr, "polly.par.StridePtr");
162 // Get iterator for retrieving the previously defined parameters.
163 Function::arg_iterator AI
= SubFn
->arg_begin();
164 // First argument holds "global thread ID".
166 // Skip "bound thread ID" since it is not used (but had to be defined).
168 // Move iterator to: LB, UB, Stride, Shared variable struct.
173 Value
*Stride
= &*AI
;
175 Value
*Shared
= &*AI
;
177 Value
*UserContext
= Builder
.CreateBitCast(Shared
, StructData
->getType(),
178 "polly.par.userContext");
180 extractValuesFromStruct(Data
, StructData
->getAllocatedType(), UserContext
,
183 const auto Alignment
= llvm::Align(is64BitArch() ? 8 : 4);
185 Builder
.CreateAlignedLoad(IDPtr
, Alignment
, "polly.par.global_tid");
187 Builder
.CreateAlignedStore(LB
, LBPtr
, Alignment
);
188 Builder
.CreateAlignedStore(UB
, UBPtr
, Alignment
);
189 Builder
.CreateAlignedStore(Builder
.getInt32(0), IsLastPtr
, Alignment
);
190 Builder
.CreateAlignedStore(Stride
, StridePtr
, Alignment
);
192 // Subtract one as the upper bound provided by openmp is a < comparison
193 // whereas the codegenForSequential function creates a <= comparison.
194 Value
*AdjustedUB
= Builder
.CreateAdd(UB
, ConstantInt::get(LongType
, -1),
195 "polly.indvar.UBAdjusted");
198 ConstantInt::get(LongType
, std::max
<int>(PollyChunkSize
, 1));
200 OMPGeneralSchedulingType Scheduling
=
201 getSchedType(PollyChunkSize
, PollyScheduling
);
203 switch (Scheduling
) {
204 case OMPGeneralSchedulingType::Dynamic
:
205 case OMPGeneralSchedulingType::Guided
:
206 case OMPGeneralSchedulingType::Runtime
:
207 // "DYNAMIC" scheduling types are handled below (including 'runtime')
210 createCallDispatchInit(ID
, LB
, UB
, Stride
, ChunkSize
);
212 createCallDispatchNext(ID
, IsLastPtr
, LBPtr
, UBPtr
, StridePtr
);
213 Value
*HasIteration
=
214 Builder
.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ
, HasWork
,
215 Builder
.getInt32(1), "polly.hasIteration");
216 Builder
.CreateCondBr(HasIteration
, PreHeaderBB
, ExitBB
);
218 Builder
.SetInsertPoint(CheckNextBB
);
219 HasWork
= createCallDispatchNext(ID
, IsLastPtr
, LBPtr
, UBPtr
, StridePtr
);
221 Builder
.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ
, HasWork
,
222 Builder
.getInt32(1), "polly.hasWork");
223 Builder
.CreateCondBr(HasIteration
, PreHeaderBB
, ExitBB
);
225 Builder
.SetInsertPoint(PreHeaderBB
);
226 LB
= Builder
.CreateAlignedLoad(LBPtr
, Alignment
, "polly.indvar.LB");
227 UB
= Builder
.CreateAlignedLoad(UBPtr
, Alignment
, "polly.indvar.UB");
230 case OMPGeneralSchedulingType::StaticChunked
:
231 case OMPGeneralSchedulingType::StaticNonChunked
:
232 // "STATIC" scheduling types are handled below
234 Builder
.CreateAlignedStore(AdjustedUB
, UBPtr
, Alignment
);
235 createCallStaticInit(ID
, IsLastPtr
, LBPtr
, UBPtr
, StridePtr
, ChunkSize
);
237 Value
*ChunkedStride
=
238 Builder
.CreateAlignedLoad(StridePtr
, Alignment
, "polly.kmpc.stride");
240 LB
= Builder
.CreateAlignedLoad(LBPtr
, Alignment
, "polly.indvar.LB");
241 UB
= Builder
.CreateAlignedLoad(UBPtr
, Alignment
, "polly.indvar.UB.temp");
244 Builder
.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE
, UB
, AdjustedUB
,
245 "polly.indvar.UB.inRange");
246 UB
= Builder
.CreateSelect(UBInRange
, UB
, AdjustedUB
, "polly.indvar.UB");
247 Builder
.CreateAlignedStore(UB
, UBPtr
, Alignment
);
249 Value
*HasIteration
= Builder
.CreateICmp(
250 llvm::CmpInst::Predicate::ICMP_SLE
, LB
, UB
, "polly.hasIteration");
251 Builder
.CreateCondBr(HasIteration
, PreHeaderBB
, ExitBB
);
253 if (Scheduling
== OMPGeneralSchedulingType::StaticChunked
) {
254 Builder
.SetInsertPoint(PreHeaderBB
);
255 LB
= Builder
.CreateAlignedLoad(LBPtr
, Alignment
,
256 "polly.indvar.LB.entry");
257 UB
= Builder
.CreateAlignedLoad(UBPtr
, Alignment
,
258 "polly.indvar.UB.entry");
261 Builder
.SetInsertPoint(CheckNextBB
);
263 if (Scheduling
== OMPGeneralSchedulingType::StaticChunked
) {
265 Builder
.CreateAdd(LB
, ChunkedStride
, "polly.indvar.nextLB");
266 Value
*NextUB
= Builder
.CreateAdd(UB
, ChunkedStride
);
268 Value
*NextUBOutOfBounds
=
269 Builder
.CreateICmp(llvm::CmpInst::Predicate::ICMP_SGT
, NextUB
,
270 AdjustedUB
, "polly.indvar.nextUB.outOfBounds");
271 NextUB
= Builder
.CreateSelect(NextUBOutOfBounds
, AdjustedUB
, NextUB
,
272 "polly.indvar.nextUB");
274 Builder
.CreateAlignedStore(NextLB
, LBPtr
, Alignment
);
275 Builder
.CreateAlignedStore(NextUB
, UBPtr
, Alignment
);
278 Builder
.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE
, NextLB
,
279 AdjustedUB
, "polly.hasWork");
280 Builder
.CreateCondBr(HasWork
, PreHeaderBB
, ExitBB
);
282 Builder
.CreateBr(ExitBB
);
285 Builder
.SetInsertPoint(PreHeaderBB
);
290 Builder
.CreateBr(CheckNextBB
);
291 Builder
.SetInsertPoint(&*--Builder
.GetInsertPoint());
293 Value
*IV
= createLoop(LB
, UB
, SequentialLoopStride
, Builder
, LI
, DT
, AfterBB
,
294 ICmpInst::ICMP_SLE
, nullptr, true,
295 /* UseGuard */ false);
297 BasicBlock::iterator LoopBody
= Builder
.GetInsertPoint();
299 // Add code to terminate this subfunction.
300 Builder
.SetInsertPoint(ExitBB
);
301 // Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call
302 if (Scheduling
== OMPGeneralSchedulingType::StaticChunked
||
303 Scheduling
== OMPGeneralSchedulingType::StaticNonChunked
) {
304 createCallStaticFini(ID
);
306 Builder
.CreateRetVoid();
307 Builder
.SetInsertPoint(&*LoopBody
);
309 return std::make_tuple(IV
, SubFn
);
312 Value
*ParallelLoopGeneratorKMP::createCallGlobalThreadNum() {
313 const std::string Name
= "__kmpc_global_thread_num";
314 Function
*F
= M
->getFunction(Name
);
316 // If F is not available, declare it.
318 StructType
*IdentTy
=
319 StructType::getTypeByName(M
->getContext(), "struct.ident_t");
321 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
322 Type
*Params
[] = {IdentTy
->getPointerTo()};
324 FunctionType
*Ty
= FunctionType::get(Builder
.getInt32Ty(), Params
, false);
325 F
= Function::Create(Ty
, Linkage
, Name
, M
);
328 return Builder
.CreateCall(F
, {SourceLocationInfo
});
331 void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value
*GlobalThreadID
,
333 const std::string Name
= "__kmpc_push_num_threads";
334 Function
*F
= M
->getFunction(Name
);
336 // If F is not available, declare it.
338 StructType
*IdentTy
=
339 StructType::getTypeByName(M
->getContext(), "struct.ident_t");
341 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
342 Type
*Params
[] = {IdentTy
->getPointerTo(), Builder
.getInt32Ty(),
343 Builder
.getInt32Ty()};
345 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), Params
, false);
346 F
= Function::Create(Ty
, Linkage
, Name
, M
);
349 Value
*Args
[] = {SourceLocationInfo
, GlobalThreadID
, NumThreads
};
351 Builder
.CreateCall(F
, Args
);
354 void ParallelLoopGeneratorKMP::createCallStaticInit(Value
*GlobalThreadID
,
356 Value
*LBPtr
, Value
*UBPtr
,
359 const std::string Name
=
360 is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4";
361 Function
*F
= M
->getFunction(Name
);
362 StructType
*IdentTy
=
363 StructType::getTypeByName(M
->getContext(), "struct.ident_t");
365 // If F is not available, declare it.
367 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
369 Type
*Params
[] = {IdentTy
->getPointerTo(),
370 Builder
.getInt32Ty(),
371 Builder
.getInt32Ty(),
372 Builder
.getInt32Ty()->getPointerTo(),
373 LongType
->getPointerTo(),
374 LongType
->getPointerTo(),
375 LongType
->getPointerTo(),
379 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), Params
, false);
380 F
= Function::Create(Ty
, Linkage
, Name
, M
);
383 // The parameter 'ChunkSize' will hold strictly positive integer values,
384 // regardless of PollyChunkSize's value
388 Builder
.getInt32(int(getSchedType(PollyChunkSize
, PollyScheduling
))),
393 ConstantInt::get(LongType
, 1),
396 Builder
.CreateCall(F
, Args
);
399 void ParallelLoopGeneratorKMP::createCallStaticFini(Value
*GlobalThreadID
) {
400 const std::string Name
= "__kmpc_for_static_fini";
401 Function
*F
= M
->getFunction(Name
);
402 StructType
*IdentTy
=
403 StructType::getTypeByName(M
->getContext(), "struct.ident_t");
405 // If F is not available, declare it.
407 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
408 Type
*Params
[] = {IdentTy
->getPointerTo(), Builder
.getInt32Ty()};
409 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), Params
, false);
410 F
= Function::Create(Ty
, Linkage
, Name
, M
);
413 Value
*Args
[] = {SourceLocationInfo
, GlobalThreadID
};
415 Builder
.CreateCall(F
, Args
);
418 void ParallelLoopGeneratorKMP::createCallDispatchInit(Value
*GlobalThreadID
,
419 Value
*LB
, Value
*UB
,
422 const std::string Name
=
423 is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4";
424 Function
*F
= M
->getFunction(Name
);
425 StructType
*IdentTy
=
426 StructType::getTypeByName(M
->getContext(), "struct.ident_t");
428 // If F is not available, declare it.
430 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
432 Type
*Params
[] = {IdentTy
->getPointerTo(),
433 Builder
.getInt32Ty(),
434 Builder
.getInt32Ty(),
440 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), Params
, false);
441 F
= Function::Create(Ty
, Linkage
, Name
, M
);
444 // The parameter 'ChunkSize' will hold strictly positive integer values,
445 // regardless of PollyChunkSize's value
449 Builder
.getInt32(int(getSchedType(PollyChunkSize
, PollyScheduling
))),
455 Builder
.CreateCall(F
, Args
);
458 Value
*ParallelLoopGeneratorKMP::createCallDispatchNext(Value
*GlobalThreadID
,
463 const std::string Name
=
464 is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4";
465 Function
*F
= M
->getFunction(Name
);
466 StructType
*IdentTy
=
467 StructType::getTypeByName(M
->getContext(), "struct.ident_t");
469 // If F is not available, declare it.
471 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
473 Type
*Params
[] = {IdentTy
->getPointerTo(),
474 Builder
.getInt32Ty(),
475 Builder
.getInt32Ty()->getPointerTo(),
476 LongType
->getPointerTo(),
477 LongType
->getPointerTo(),
478 LongType
->getPointerTo()};
480 FunctionType
*Ty
= FunctionType::get(Builder
.getInt32Ty(), Params
, false);
481 F
= Function::Create(Ty
, Linkage
, Name
, M
);
484 Value
*Args
[] = {SourceLocationInfo
, GlobalThreadID
, IsLastPtr
, LBPtr
, UBPtr
,
487 return Builder
.CreateCall(F
, Args
);
490 // TODO: This function currently creates a source location dummy. It might be
491 // necessary to (actually) provide information, in the future.
492 GlobalVariable
*ParallelLoopGeneratorKMP::createSourceLocation() {
493 const std::string LocName
= ".loc.dummy";
494 GlobalVariable
*SourceLocDummy
= M
->getGlobalVariable(LocName
);
496 if (SourceLocDummy
== nullptr) {
497 const std::string StructName
= "struct.ident_t";
498 StructType
*IdentTy
=
499 StructType::getTypeByName(M
->getContext(), StructName
);
501 // If the ident_t StructType is not available, declare it.
502 // in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* }
504 Type
*LocMembers
[] = {Builder
.getInt32Ty(), Builder
.getInt32Ty(),
505 Builder
.getInt32Ty(), Builder
.getInt32Ty(),
506 Builder
.getInt8PtrTy()};
509 StructType::create(M
->getContext(), LocMembers
, StructName
, false);
512 const auto ArrayType
=
513 llvm::ArrayType::get(Builder
.getInt8Ty(), /* Length */ 23);
515 // Global Variable Definitions
516 GlobalVariable
*StrVar
= new GlobalVariable(
517 *M
, ArrayType
, true, GlobalValue::PrivateLinkage
, 0, ".str.ident");
518 StrVar
->setAlignment(llvm::Align(1));
520 SourceLocDummy
= new GlobalVariable(
521 *M
, IdentTy
, true, GlobalValue::PrivateLinkage
, nullptr, LocName
);
522 SourceLocDummy
->setAlignment(llvm::Align(8));
524 // Constant Definitions
525 Constant
*InitStr
= ConstantDataArray::getString(
526 M
->getContext(), "Source location dummy.", true);
528 Constant
*StrPtr
= static_cast<Constant
*>(Builder
.CreateInBoundsGEP(
529 ArrayType
, StrVar
, {Builder
.getInt32(0), Builder
.getInt32(0)}));
531 Constant
*LocInitStruct
= ConstantStruct::get(
532 IdentTy
, {Builder
.getInt32(0), Builder
.getInt32(0), Builder
.getInt32(0),
533 Builder
.getInt32(0), StrPtr
});
535 // Initialize variables
536 StrVar
->setInitializer(InitStr
);
537 SourceLocDummy
->setInitializer(LocInitStruct
);
540 return SourceLocDummy
;
543 bool ParallelLoopGeneratorKMP::is64BitArch() {
544 return (LongType
->getIntegerBitWidth() == 64);
547 OMPGeneralSchedulingType
ParallelLoopGeneratorKMP::getSchedType(
548 int ChunkSize
, OMPGeneralSchedulingType Scheduling
) const {
549 if (ChunkSize
== 0 && Scheduling
== OMPGeneralSchedulingType::StaticChunked
)
550 return OMPGeneralSchedulingType::StaticNonChunked
;