1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains functions to create scalar loops and orchestrate the
10 // creation of parallel loops as LLVM-IR.
12 //===----------------------------------------------------------------------===//
14 #include "polly/CodeGen/LoopGenerators.h"
15 #include "polly/Options.h"
16 #include "polly/ScopDetection.h"
17 #include "llvm/Analysis/LoopInfo.h"
18 #include "llvm/IR/DataLayout.h"
19 #include "llvm/IR/DebugInfoMetadata.h"
20 #include "llvm/IR/Dominators.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
26 using namespace polly
;
28 int polly::PollyNumThreads
;
29 OMPGeneralSchedulingType
polly::PollyScheduling
;
30 int polly::PollyChunkSize
;
32 static cl::opt
<int, true>
33 XPollyNumThreads("polly-num-threads",
34 cl::desc("Number of threads to use (0 = auto)"),
35 cl::Hidden
, cl::location(polly::PollyNumThreads
),
36 cl::init(0), cl::cat(PollyCategory
));
38 static cl::opt
<OMPGeneralSchedulingType
, true> XPollyScheduling(
40 cl::desc("Scheduling type of parallel OpenMP for loops"),
41 cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked
, "static",
43 clEnumValN(OMPGeneralSchedulingType::Dynamic
, "dynamic",
44 "Dynamic scheduling"),
45 clEnumValN(OMPGeneralSchedulingType::Guided
, "guided",
47 clEnumValN(OMPGeneralSchedulingType::Runtime
, "runtime",
48 "Runtime determined (OMP_SCHEDULE)")),
49 cl::Hidden
, cl::location(polly::PollyScheduling
),
50 cl::init(OMPGeneralSchedulingType::Runtime
), cl::Optional
,
51 cl::cat(PollyCategory
));
53 static cl::opt
<int, true>
54 XPollyChunkSize("polly-scheduling-chunksize",
55 cl::desc("Chunksize to use by the OpenMP runtime calls"),
56 cl::Hidden
, cl::location(polly::PollyChunkSize
),
57 cl::init(0), cl::Optional
, cl::cat(PollyCategory
));
59 // We generate a loop of either of the following structures:
64 // GuardBB PreHeaderBB
66 // __ PreHeaderBB | v \/ |
67 // / \ / | HeaderBB latch
68 // latch HeaderBB | |\ |
74 // depending on whether or not we know that it is executed at least once. If
75 // not, GuardBB checks if the loop is executed at least once. If this is the
76 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
77 // contains the loop iv 'polly.indvar', the incremented loop iv
78 // 'polly.indvar_next' as well as the condition to check if we execute another
79 // iteration of the loop. After the loop has finished, we branch to ExitBB.
80 // We expect the type of UB, LB, UB+Stride to be large enough for values that
81 // UB may take throughout the execution of the loop, including the computation
82 // of indvar + Stride before the final abort.
83 Value
*polly::createLoop(Value
*LB
, Value
*UB
, Value
*Stride
,
84 PollyIRBuilder
&Builder
, LoopInfo
&LI
,
85 DominatorTree
&DT
, BasicBlock
*&ExitBB
,
86 ICmpInst::Predicate Predicate
,
87 ScopAnnotator
*Annotator
, bool Parallel
, bool UseGuard
,
88 bool LoopVectDisabled
) {
89 Function
*F
= Builder
.GetInsertBlock()->getParent();
90 LLVMContext
&Context
= F
->getContext();
92 assert(LB
->getType() == UB
->getType() && "Types of loop bounds do not match");
93 IntegerType
*LoopIVType
= dyn_cast
<IntegerType
>(UB
->getType());
94 assert(LoopIVType
&& "UB is not integer?");
96 BasicBlock
*BeforeBB
= Builder
.GetInsertBlock();
98 UseGuard
? BasicBlock::Create(Context
, "polly.loop_if", F
) : nullptr;
99 BasicBlock
*HeaderBB
= BasicBlock::Create(Context
, "polly.loop_header", F
);
100 BasicBlock
*PreHeaderBB
=
101 BasicBlock::Create(Context
, "polly.loop_preheader", F
);
104 Loop
*OuterLoop
= LI
.getLoopFor(BeforeBB
);
105 Loop
*NewLoop
= LI
.AllocateLoop();
108 OuterLoop
->addChildLoop(NewLoop
);
110 LI
.addTopLevelLoop(NewLoop
);
114 OuterLoop
->addBasicBlockToLoop(GuardBB
, LI
);
115 OuterLoop
->addBasicBlockToLoop(PreHeaderBB
, LI
);
118 NewLoop
->addBasicBlockToLoop(HeaderBB
, LI
);
120 // Notify the annotator (if present) that we have a new loop, but only
121 // after the header block is set.
123 Annotator
->pushLoop(NewLoop
, Parallel
);
126 ExitBB
= SplitBlock(BeforeBB
, &*Builder
.GetInsertPoint(), &DT
, &LI
);
127 ExitBB
->setName("polly.loop_exit");
131 BeforeBB
->getTerminator()->setSuccessor(0, GuardBB
);
132 DT
.addNewBlock(GuardBB
, BeforeBB
);
135 Builder
.SetInsertPoint(GuardBB
);
137 LoopGuard
= Builder
.CreateICmp(Predicate
, LB
, UB
);
138 LoopGuard
->setName("polly.loop_guard");
139 Builder
.CreateCondBr(LoopGuard
, PreHeaderBB
, ExitBB
);
140 DT
.addNewBlock(PreHeaderBB
, GuardBB
);
142 BeforeBB
->getTerminator()->setSuccessor(0, PreHeaderBB
);
143 DT
.addNewBlock(PreHeaderBB
, BeforeBB
);
147 Builder
.SetInsertPoint(PreHeaderBB
);
148 Builder
.CreateBr(HeaderBB
);
151 DT
.addNewBlock(HeaderBB
, PreHeaderBB
);
152 Builder
.SetInsertPoint(HeaderBB
);
153 PHINode
*IV
= Builder
.CreatePHI(LoopIVType
, 2, "polly.indvar");
154 IV
->addIncoming(LB
, PreHeaderBB
);
155 Stride
= Builder
.CreateZExtOrBitCast(Stride
, LoopIVType
);
156 Value
*IncrementedIV
= Builder
.CreateNSWAdd(IV
, Stride
, "polly.indvar_next");
157 Value
*LoopCondition
=
158 Builder
.CreateICmp(Predicate
, IncrementedIV
, UB
, "polly.loop_cond");
160 // Create the loop latch and annotate it as such.
161 BranchInst
*B
= Builder
.CreateCondBr(LoopCondition
, HeaderBB
, ExitBB
);
163 Annotator
->annotateLoopLatch(B
, NewLoop
, Parallel
, LoopVectDisabled
);
165 IV
->addIncoming(IncrementedIV
, HeaderBB
);
167 DT
.changeImmediateDominator(ExitBB
, GuardBB
);
169 DT
.changeImmediateDominator(ExitBB
, HeaderBB
);
171 // The loop body should be added here.
172 Builder
.SetInsertPoint(HeaderBB
->getFirstNonPHI());
176 Value
*ParallelLoopGenerator::createParallelLoop(
177 Value
*LB
, Value
*UB
, Value
*Stride
, SetVector
<Value
*> &UsedValues
,
178 ValueMapT
&Map
, BasicBlock::iterator
*LoopBody
) {
180 AllocaInst
*Struct
= storeValuesIntoStruct(UsedValues
);
181 BasicBlock::iterator BeforeLoop
= Builder
.GetInsertPoint();
185 std::tie(IV
, SubFn
) = createSubFn(Stride
, Struct
, UsedValues
, Map
);
186 *LoopBody
= Builder
.GetInsertPoint();
187 Builder
.SetInsertPoint(&*BeforeLoop
);
189 // Add one as the upper bound provided by OpenMP is a < comparison
190 // whereas the codegenForSequential function creates a <= comparison.
191 UB
= Builder
.CreateAdd(UB
, ConstantInt::get(LongType
, 1));
193 // Execute the prepared subfunction in parallel.
194 deployParallelExecution(SubFn
, Struct
, LB
, UB
, Stride
);
199 Function
*ParallelLoopGenerator::createSubFnDefinition() {
200 Function
*F
= Builder
.GetInsertBlock()->getParent();
201 Function
*SubFn
= prepareSubFnDefinition(F
);
203 // Certain backends (e.g., NVPTX) do not support '.'s in function names.
204 // Hence, we ensure that all '.'s are replaced by '_'s.
205 std::string FunctionName
= SubFn
->getName().str();
206 std::replace(FunctionName
.begin(), FunctionName
.end(), '.', '_');
207 SubFn
->setName(FunctionName
);
209 // Do not run any polly pass on the new function.
210 SubFn
->addFnAttr(PollySkipFnAttr
);
216 ParallelLoopGenerator::storeValuesIntoStruct(SetVector
<Value
*> &Values
) {
217 SmallVector
<Type
*, 8> Members
;
219 for (Value
*V
: Values
)
220 Members
.push_back(V
->getType());
222 const DataLayout
&DL
= Builder
.GetInsertBlock()->getModule()->getDataLayout();
224 // We do not want to allocate the alloca inside any loop, thus we allocate it
225 // in the entry block of the function and use annotations to denote the actual
226 // live span (similar to clang).
227 BasicBlock
&EntryBB
= Builder
.GetInsertBlock()->getParent()->getEntryBlock();
228 BasicBlock::iterator IP
= EntryBB
.getFirstInsertionPt();
229 StructType
*Ty
= StructType::get(Builder
.getContext(), Members
);
230 AllocaInst
*Struct
= new AllocaInst(Ty
, DL
.getAllocaAddrSpace(), nullptr,
231 "polly.par.userContext", IP
);
233 for (unsigned i
= 0; i
< Values
.size(); i
++) {
234 Value
*Address
= Builder
.CreateStructGEP(Ty
, Struct
, i
);
235 Address
->setName("polly.subfn.storeaddr." + Values
[i
]->getName());
236 Builder
.CreateStore(Values
[i
], Address
);
242 void ParallelLoopGenerator::extractValuesFromStruct(
243 SetVector
<Value
*> OldValues
, Type
*Ty
, Value
*Struct
, ValueMapT
&Map
) {
244 for (unsigned i
= 0; i
< OldValues
.size(); i
++) {
245 Value
*Address
= Builder
.CreateStructGEP(Ty
, Struct
, i
);
246 Type
*ElemTy
= cast
<GetElementPtrInst
>(Address
)->getResultElementType();
247 Value
*NewValue
= Builder
.CreateLoad(ElemTy
, Address
);
248 NewValue
->setName("polly.subfunc.arg." + OldValues
[i
]->getName());
249 Map
[OldValues
[i
]] = NewValue
;
253 DebugLoc
polly::createDebugLocForGeneratedCode(Function
*F
) {
257 LLVMContext
&Ctx
= F
->getContext();
258 DISubprogram
*DILScope
=
259 dyn_cast_or_null
<DISubprogram
>(F
->getMetadata(LLVMContext::MD_dbg
));
262 return DILocation::get(Ctx
, 0, 0, DILScope
);