1 //===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains functions to create parallel loops as LLVM-IR.
11 //===----------------------------------------------------------------------===//
13 #include "polly/CodeGen/LoopGeneratorsGOMP.h"
14 #include "llvm/Analysis/LoopInfo.h"
15 #include "llvm/IR/Dominators.h"
16 #include "llvm/IR/Module.h"
19 using namespace polly
;
21 void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value
*SubFn
,
25 const std::string Name
= "GOMP_parallel_loop_runtime_start";
27 Function
*F
= M
->getFunction(Name
);
29 // If F is not available, declare it.
31 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
33 Type
*Params
[] = {PointerType::getUnqual(FunctionType::get(
34 Builder
.getVoidTy(), Builder
.getPtrTy(), false)),
41 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), Params
, false);
42 F
= Function::Create(Ty
, Linkage
, Name
, M
);
45 Value
*Args
[] = {SubFn
, SubFnParam
, Builder
.getInt32(PollyNumThreads
),
48 CallInst
*Call
= Builder
.CreateCall(F
, Args
);
49 Call
->setDebugLoc(DLGenerated
);
52 void ParallelLoopGeneratorGOMP::deployParallelExecution(Function
*SubFn
,
56 // Tell the runtime we start a parallel loop
57 createCallSpawnThreads(SubFn
, SubFnParam
, LB
, UB
, Stride
);
58 CallInst
*Call
= Builder
.CreateCall(SubFn
, SubFnParam
);
59 Call
->setDebugLoc(DLGenerated
);
60 createCallJoinThreads();
63 Function
*ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function
*F
) const {
65 FunctionType::get(Builder
.getVoidTy(), {Builder
.getPtrTy()}, false);
66 Function
*SubFn
= Function::Create(FT
, Function::InternalLinkage
,
67 F
->getName() + "_polly_subfn", M
);
68 // Name the function's arguments
69 SubFn
->arg_begin()->setName("polly.par.userContext");
73 // Create a subfunction of the following (preliminary) structure:
81 // CheckNextBB PreHeaderBB
88 // HeaderBB will hold allocations and loading of variables.
89 // CheckNextBB will check for more work.
90 // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
91 // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
92 // ExitBB marks the end of the parallel execution.
93 std::tuple
<Value
*, Function
*>
94 ParallelLoopGeneratorGOMP::createSubFn(Value
*Stride
, AllocaInst
*StructData
,
95 SetVector
<Value
*> Data
,
97 if (PollyScheduling
!= OMPGeneralSchedulingType::Runtime
) {
98 // User tried to influence the scheduling type (currently not supported)
99 errs() << "warning: Polly's GNU OpenMP backend solely "
100 "supports the scheduling type 'runtime'.\n";
103 if (PollyChunkSize
!= 0) {
104 // User tried to influence the chunk size (currently not supported)
105 errs() << "warning: Polly's GNU OpenMP backend solely "
106 "supports the default chunk size.\n";
109 Function
*SubFn
= createSubFnDefinition();
110 LLVMContext
&Context
= SubFn
->getContext();
112 // Create basic blocks.
113 BasicBlock
*HeaderBB
= BasicBlock::Create(Context
, "polly.par.setup", SubFn
);
114 SubFnDT
= std::make_unique
<DominatorTree
>(*SubFn
);
115 SubFnLI
= std::make_unique
<LoopInfo
>(*SubFnDT
);
117 BasicBlock
*ExitBB
= BasicBlock::Create(Context
, "polly.par.exit", SubFn
);
118 BasicBlock
*CheckNextBB
=
119 BasicBlock::Create(Context
, "polly.par.checkNext", SubFn
);
120 BasicBlock
*PreHeaderBB
=
121 BasicBlock::Create(Context
, "polly.par.loadIVBounds", SubFn
);
123 SubFnDT
->addNewBlock(ExitBB
, HeaderBB
);
124 SubFnDT
->addNewBlock(CheckNextBB
, HeaderBB
);
125 SubFnDT
->addNewBlock(PreHeaderBB
, HeaderBB
);
127 // Fill up basic block HeaderBB.
128 Builder
.SetInsertPoint(HeaderBB
);
129 Value
*LBPtr
= Builder
.CreateAlloca(LongType
, nullptr, "polly.par.LBPtr");
130 Value
*UBPtr
= Builder
.CreateAlloca(LongType
, nullptr, "polly.par.UBPtr");
131 Value
*UserContext
= &*SubFn
->arg_begin();
133 extractValuesFromStruct(Data
, StructData
->getAllocatedType(), UserContext
,
135 Builder
.CreateBr(CheckNextBB
);
137 // Add code to check if another set of iterations will be executed.
138 Builder
.SetInsertPoint(CheckNextBB
);
139 Value
*Next
= createCallGetWorkItem(LBPtr
, UBPtr
);
140 Value
*HasNextSchedule
= Builder
.CreateTrunc(
141 Next
, Builder
.getInt1Ty(), "polly.par.hasNextScheduleBlock");
142 Builder
.CreateCondBr(HasNextSchedule
, PreHeaderBB
, ExitBB
);
144 // Add code to load the iv bounds for this set of iterations.
145 Builder
.SetInsertPoint(PreHeaderBB
);
146 Value
*LB
= Builder
.CreateLoad(LongType
, LBPtr
, "polly.par.LB");
147 Value
*UB
= Builder
.CreateLoad(LongType
, UBPtr
, "polly.par.UB");
149 // Subtract one as the upper bound provided by OpenMP is a < comparison
150 // whereas the codegenForSequential function creates a <= comparison.
151 UB
= Builder
.CreateSub(UB
, ConstantInt::get(LongType
, 1),
152 "polly.par.UBAdjusted");
154 Builder
.CreateBr(CheckNextBB
);
155 Builder
.SetInsertPoint(&*--Builder
.GetInsertPoint());
158 createLoop(LB
, UB
, Stride
, Builder
, *SubFnLI
, *SubFnDT
, AfterBB
,
159 ICmpInst::ICMP_SLE
, nullptr, true, /* UseGuard */ false);
161 BasicBlock::iterator LoopBody
= Builder
.GetInsertPoint();
163 // Add code to terminate this subfunction.
164 Builder
.SetInsertPoint(ExitBB
);
165 createCallCleanupThread();
166 Builder
.CreateRetVoid();
168 Builder
.SetInsertPoint(&*LoopBody
);
170 // FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the
171 // DominatorTree/LoopInfo has been created correctly. Alternatively, recreate
172 // from scratch since it is not needed here directly.
174 return std::make_tuple(IV
, SubFn
);
177 Value
*ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value
*LBPtr
,
179 const std::string Name
= "GOMP_loop_runtime_next";
181 Function
*F
= M
->getFunction(Name
);
183 // If F is not available, declare it.
185 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
186 Type
*Params
[] = {Builder
.getPtrTy(0), Builder
.getPtrTy(0)};
187 FunctionType
*Ty
= FunctionType::get(Builder
.getInt8Ty(), Params
, false);
188 F
= Function::Create(Ty
, Linkage
, Name
, M
);
191 Value
*Args
[] = {LBPtr
, UBPtr
};
192 CallInst
*Call
= Builder
.CreateCall(F
, Args
);
193 Call
->setDebugLoc(DLGenerated
);
194 Value
*Return
= Builder
.CreateICmpNE(
195 Call
, Builder
.CreateZExt(Builder
.getFalse(), Call
->getType()));
199 void ParallelLoopGeneratorGOMP::createCallJoinThreads() {
200 const std::string Name
= "GOMP_parallel_end";
202 Function
*F
= M
->getFunction(Name
);
204 // If F is not available, declare it.
206 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
208 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), false);
209 F
= Function::Create(Ty
, Linkage
, Name
, M
);
212 CallInst
*Call
= Builder
.CreateCall(F
, {});
213 Call
->setDebugLoc(DLGenerated
);
216 void ParallelLoopGeneratorGOMP::createCallCleanupThread() {
217 const std::string Name
= "GOMP_loop_end_nowait";
219 Function
*F
= M
->getFunction(Name
);
221 // If F is not available, declare it.
223 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
225 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), false);
226 F
= Function::Create(Ty
, Linkage
, Name
, M
);
229 CallInst
*Call
= Builder
.CreateCall(F
, {});
230 Call
->setDebugLoc(DLGenerated
);