1 //===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains functions to create parallel loops as LLVM-IR.
11 //===----------------------------------------------------------------------===//
13 #include "polly/CodeGen/LoopGeneratorsGOMP.h"
14 #include "llvm/IR/Dominators.h"
15 #include "llvm/IR/Module.h"
18 using namespace polly
;
20 void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value
*SubFn
,
24 const std::string Name
= "GOMP_parallel_loop_runtime_start";
26 Function
*F
= M
->getFunction(Name
);
28 // If F is not available, declare it.
30 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
32 Type
*Params
[] = {PointerType::getUnqual(FunctionType::get(
33 Builder
.getVoidTy(), Builder
.getInt8PtrTy(), false)),
34 Builder
.getInt8PtrTy(),
40 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), Params
, false);
41 F
= Function::Create(Ty
, Linkage
, Name
, M
);
44 Value
*Args
[] = {SubFn
, SubFnParam
, Builder
.getInt32(PollyNumThreads
),
47 CallInst
*Call
= Builder
.CreateCall(F
, Args
);
48 Call
->setDebugLoc(DLGenerated
);
51 void ParallelLoopGeneratorGOMP::deployParallelExecution(Function
*SubFn
,
55 // Tell the runtime we start a parallel loop
56 createCallSpawnThreads(SubFn
, SubFnParam
, LB
, UB
, Stride
);
57 CallInst
*Call
= Builder
.CreateCall(SubFn
, SubFnParam
);
58 Call
->setDebugLoc(DLGenerated
);
59 createCallJoinThreads();
62 Function
*ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function
*F
) const {
64 FunctionType::get(Builder
.getVoidTy(), {Builder
.getInt8PtrTy()}, false);
65 Function
*SubFn
= Function::Create(FT
, Function::InternalLinkage
,
66 F
->getName() + "_polly_subfn", M
);
67 // Name the function's arguments
68 SubFn
->arg_begin()->setName("polly.par.userContext");
72 // Create a subfunction of the following (preliminary) structure:
80 // CheckNextBB PreHeaderBB
87 // HeaderBB will hold allocations and loading of variables.
88 // CheckNextBB will check for more work.
89 // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
90 // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
91 // ExitBB marks the end of the parallel execution.
92 std::tuple
<Value
*, Function
*>
93 ParallelLoopGeneratorGOMP::createSubFn(Value
*Stride
, AllocaInst
*StructData
,
94 SetVector
<Value
*> Data
,
96 if (PollyScheduling
!= OMPGeneralSchedulingType::Runtime
) {
97 // User tried to influence the scheduling type (currently not supported)
98 errs() << "warning: Polly's GNU OpenMP backend solely "
99 "supports the scheduling type 'runtime'.\n";
102 if (PollyChunkSize
!= 0) {
103 // User tried to influence the chunk size (currently not supported)
104 errs() << "warning: Polly's GNU OpenMP backend solely "
105 "supports the default chunk size.\n";
108 Function
*SubFn
= createSubFnDefinition();
109 LLVMContext
&Context
= SubFn
->getContext();
111 // Store the previous basic block.
112 BasicBlock
*PrevBB
= Builder
.GetInsertBlock();
114 // Create basic blocks.
115 BasicBlock
*HeaderBB
= BasicBlock::Create(Context
, "polly.par.setup", SubFn
);
116 BasicBlock
*ExitBB
= BasicBlock::Create(Context
, "polly.par.exit", SubFn
);
117 BasicBlock
*CheckNextBB
=
118 BasicBlock::Create(Context
, "polly.par.checkNext", SubFn
);
119 BasicBlock
*PreHeaderBB
=
120 BasicBlock::Create(Context
, "polly.par.loadIVBounds", SubFn
);
122 DT
.addNewBlock(HeaderBB
, PrevBB
);
123 DT
.addNewBlock(ExitBB
, HeaderBB
);
124 DT
.addNewBlock(CheckNextBB
, HeaderBB
);
125 DT
.addNewBlock(PreHeaderBB
, HeaderBB
);
127 // Fill up basic block HeaderBB.
128 Builder
.SetInsertPoint(HeaderBB
);
129 Value
*LBPtr
= Builder
.CreateAlloca(LongType
, nullptr, "polly.par.LBPtr");
130 Value
*UBPtr
= Builder
.CreateAlloca(LongType
, nullptr, "polly.par.UBPtr");
131 Value
*UserContext
= &*SubFn
->arg_begin();
133 extractValuesFromStruct(Data
, StructData
->getAllocatedType(), UserContext
,
135 Builder
.CreateBr(CheckNextBB
);
137 // Add code to check if another set of iterations will be executed.
138 Builder
.SetInsertPoint(CheckNextBB
);
139 Value
*Next
= createCallGetWorkItem(LBPtr
, UBPtr
);
140 Value
*HasNextSchedule
= Builder
.CreateTrunc(
141 Next
, Builder
.getInt1Ty(), "polly.par.hasNextScheduleBlock");
142 Builder
.CreateCondBr(HasNextSchedule
, PreHeaderBB
, ExitBB
);
144 // Add code to load the iv bounds for this set of iterations.
145 Builder
.SetInsertPoint(PreHeaderBB
);
146 Value
*LB
= Builder
.CreateLoad(LongType
, LBPtr
, "polly.par.LB");
147 Value
*UB
= Builder
.CreateLoad(LongType
, UBPtr
, "polly.par.UB");
149 // Subtract one as the upper bound provided by OpenMP is a < comparison
150 // whereas the codegenForSequential function creates a <= comparison.
151 UB
= Builder
.CreateSub(UB
, ConstantInt::get(LongType
, 1),
152 "polly.par.UBAdjusted");
154 Builder
.CreateBr(CheckNextBB
);
155 Builder
.SetInsertPoint(&*--Builder
.GetInsertPoint());
158 createLoop(LB
, UB
, Stride
, Builder
, LI
, DT
, AfterBB
, ICmpInst::ICMP_SLE
,
159 nullptr, true, /* UseGuard */ false);
161 BasicBlock::iterator LoopBody
= Builder
.GetInsertPoint();
163 // Add code to terminate this subfunction.
164 Builder
.SetInsertPoint(ExitBB
);
165 createCallCleanupThread();
166 Builder
.CreateRetVoid();
168 Builder
.SetInsertPoint(&*LoopBody
);
170 return std::make_tuple(IV
, SubFn
);
173 Value
*ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value
*LBPtr
,
175 const std::string Name
= "GOMP_loop_runtime_next";
177 Function
*F
= M
->getFunction(Name
);
179 // If F is not available, declare it.
181 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
182 Type
*Params
[] = {LongType
->getPointerTo(), LongType
->getPointerTo()};
183 FunctionType
*Ty
= FunctionType::get(Builder
.getInt8Ty(), Params
, false);
184 F
= Function::Create(Ty
, Linkage
, Name
, M
);
187 Value
*Args
[] = {LBPtr
, UBPtr
};
188 CallInst
*Call
= Builder
.CreateCall(F
, Args
);
189 Call
->setDebugLoc(DLGenerated
);
190 Value
*Return
= Builder
.CreateICmpNE(
191 Call
, Builder
.CreateZExt(Builder
.getFalse(), Call
->getType()));
195 void ParallelLoopGeneratorGOMP::createCallJoinThreads() {
196 const std::string Name
= "GOMP_parallel_end";
198 Function
*F
= M
->getFunction(Name
);
200 // If F is not available, declare it.
202 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
204 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), false);
205 F
= Function::Create(Ty
, Linkage
, Name
, M
);
208 CallInst
*Call
= Builder
.CreateCall(F
, {});
209 Call
->setDebugLoc(DLGenerated
);
212 void ParallelLoopGeneratorGOMP::createCallCleanupThread() {
213 const std::string Name
= "GOMP_loop_end_nowait";
215 Function
*F
= M
->getFunction(Name
);
217 // If F is not available, declare it.
219 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
221 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), false);
222 F
= Function::Create(Ty
, Linkage
, Name
, M
);
225 CallInst
*Call
= Builder
.CreateCall(F
, {});
226 Call
->setDebugLoc(DLGenerated
);