1 //===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the IslNodeBuilder, a class to translate an isl AST into
12 //===----------------------------------------------------------------------===//
14 #include "polly/CodeGen/IslNodeBuilder.h"
15 #include "polly/CodeGen/BlockGenerators.h"
16 #include "polly/CodeGen/CodeGeneration.h"
17 #include "polly/CodeGen/IslAst.h"
18 #include "polly/CodeGen/IslExprBuilder.h"
19 #include "polly/CodeGen/LoopGeneratorsGOMP.h"
20 #include "polly/CodeGen/LoopGeneratorsKMP.h"
21 #include "polly/CodeGen/RuntimeDebugBuilder.h"
22 #include "polly/Options.h"
23 #include "polly/ScopInfo.h"
24 #include "polly/Support/ISLTools.h"
25 #include "polly/Support/SCEVValidator.h"
26 #include "polly/Support/ScopHelper.h"
27 #include "polly/Support/VirtualInstruction.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/PostOrderIterator.h"
30 #include "llvm/ADT/SetVector.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/AssumptionCache.h"
34 #include "llvm/Analysis/LoopInfo.h"
35 #include "llvm/Analysis/RegionInfo.h"
36 #include "llvm/Analysis/ScalarEvolution.h"
37 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
38 #include "llvm/Analysis/TargetLibraryInfo.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constant.h"
41 #include "llvm/IR/Constants.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/TargetParser/Triple.h"
56 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
58 #include "isl/aff_type.h"
60 #include "isl/ast_build.h"
61 #include "isl/isl-noexceptions.h"
64 #include "isl/union_map.h"
65 #include "isl/union_set.h"
76 using namespace polly
;
78 #define DEBUG_TYPE "polly-codegen"
80 STATISTIC(VersionedScops
, "Number of SCoPs that required versioning.");
82 STATISTIC(SequentialLoops
, "Number of generated sequential for-loops");
83 STATISTIC(ParallelLoops
, "Number of generated parallel for-loops");
84 STATISTIC(IfConditions
, "Number of generated if-conditions");
86 /// OpenMP backend options
87 enum class OpenMPBackend
{ GNU
, LLVM
};
89 static cl::opt
<bool> PollyGenerateRTCPrint(
90 "polly-codegen-emit-rtc-print",
91 cl::desc("Emit code that prints the runtime check result dynamically."),
92 cl::Hidden
, cl::cat(PollyCategory
));
94 // If this option is set we always use the isl AST generator to regenerate
95 // memory accesses. Without this option set we regenerate expressions using the
96 // original SCEV expressions and only generate new expressions in case the
97 // access relation has been changed and consequently must be regenerated.
98 static cl::opt
<bool> PollyGenerateExpressions(
99 "polly-codegen-generate-expressions",
100 cl::desc("Generate AST expressions for unmodified and modified accesses"),
101 cl::Hidden
, cl::cat(PollyCategory
));
103 static cl::opt
<int> PollyTargetFirstLevelCacheLineSize(
104 "polly-target-first-level-cache-line-size",
105 cl::desc("The size of the first level cache line size specified in bytes."),
106 cl::Hidden
, cl::init(64), cl::cat(PollyCategory
));
108 static cl::opt
<OpenMPBackend
> PollyOmpBackend(
109 "polly-omp-backend", cl::desc("Choose the OpenMP library to use:"),
110 cl::values(clEnumValN(OpenMPBackend::GNU
, "GNU", "GNU OpenMP"),
111 clEnumValN(OpenMPBackend::LLVM
, "LLVM", "LLVM OpenMP")),
112 cl::Hidden
, cl::init(OpenMPBackend::GNU
), cl::cat(PollyCategory
));
114 isl::ast_expr
IslNodeBuilder::getUpperBound(isl::ast_node_for For
,
115 ICmpInst::Predicate
&Predicate
) {
116 isl::ast_expr Cond
= For
.cond();
117 isl::ast_expr Iterator
= For
.iterator();
118 assert(isl_ast_expr_get_type(Cond
.get()) == isl_ast_expr_op
&&
119 "conditional expression is not an atomic upper bound");
121 isl_ast_op_type OpType
= isl_ast_expr_get_op_type(Cond
.get());
125 Predicate
= ICmpInst::ICMP_SLE
;
128 Predicate
= ICmpInst::ICMP_SLT
;
131 llvm_unreachable("Unexpected comparison type in loop condition");
134 isl::ast_expr Arg0
= Cond
.get_op_arg(0);
136 assert(isl_ast_expr_get_type(Arg0
.get()) == isl_ast_expr_id
&&
137 "conditional expression is not an atomic upper bound");
139 isl::id UBID
= Arg0
.get_id();
141 assert(isl_ast_expr_get_type(Iterator
.get()) == isl_ast_expr_id
&&
142 "Could not get the iterator");
144 isl::id IteratorID
= Iterator
.get_id();
146 assert(UBID
.get() == IteratorID
.get() &&
147 "conditional expression is not an atomic upper bound");
149 return Cond
.get_op_arg(1);
152 int IslNodeBuilder::getNumberOfIterations(isl::ast_node_for For
) {
153 assert(isl_ast_node_get_type(For
.get()) == isl_ast_node_for
);
154 isl::ast_node Body
= For
.body();
156 // First, check if we can actually handle this code.
157 switch (isl_ast_node_get_type(Body
.get())) {
158 case isl_ast_node_user
:
160 case isl_ast_node_block
: {
161 isl::ast_node_block BodyBlock
= Body
.as
<isl::ast_node_block
>();
162 isl::ast_node_list List
= BodyBlock
.children();
163 for (isl::ast_node Node
: List
) {
164 isl_ast_node_type NodeType
= isl_ast_node_get_type(Node
.get());
165 if (NodeType
!= isl_ast_node_user
)
174 isl::ast_expr Init
= For
.init();
175 if (!Init
.isa
<isl::ast_expr_int
>() || !Init
.val().is_zero())
177 isl::ast_expr Inc
= For
.inc();
178 if (!Inc
.isa
<isl::ast_expr_int
>() || !Inc
.val().is_one())
180 CmpInst::Predicate Predicate
;
181 isl::ast_expr UB
= getUpperBound(For
, Predicate
);
182 if (!UB
.isa
<isl::ast_expr_int
>())
184 isl::val UpVal
= UB
.get_val();
185 int NumberIterations
= UpVal
.get_num_si();
186 if (NumberIterations
< 0)
188 if (Predicate
== CmpInst::ICMP_SLT
)
189 return NumberIterations
;
191 return NumberIterations
+ 1;
194 static void findReferencesByUse(Value
*SrcVal
, ScopStmt
*UserStmt
,
195 Loop
*UserScope
, const ValueMapT
&GlobalMap
,
196 SetVector
<Value
*> &Values
,
197 SetVector
<const SCEV
*> &SCEVs
) {
198 VirtualUse VUse
= VirtualUse::create(UserStmt
, UserScope
, SrcVal
, true);
199 switch (VUse
.getKind()) {
200 case VirtualUse::Constant
:
201 // When accelerator-offloading, GlobalValue is a host address whose content
202 // must still be transferred to the GPU.
203 if (isa
<GlobalValue
>(SrcVal
))
204 Values
.insert(SrcVal
);
207 case VirtualUse::Synthesizable
:
208 SCEVs
.insert(VUse
.getScevExpr());
211 case VirtualUse::Block
:
212 case VirtualUse::ReadOnly
:
213 case VirtualUse::Hoisted
:
214 case VirtualUse::Intra
:
215 case VirtualUse::Inter
:
219 if (Value
*NewVal
= GlobalMap
.lookup(SrcVal
))
220 Values
.insert(NewVal
);
223 static void findReferencesInInst(Instruction
*Inst
, ScopStmt
*UserStmt
,
224 Loop
*UserScope
, const ValueMapT
&GlobalMap
,
225 SetVector
<Value
*> &Values
,
226 SetVector
<const SCEV
*> &SCEVs
) {
227 for (Use
&U
: Inst
->operands())
228 findReferencesByUse(U
.get(), UserStmt
, UserScope
, GlobalMap
, Values
, SCEVs
);
231 static void findReferencesInStmt(ScopStmt
*Stmt
, SetVector
<Value
*> &Values
,
232 ValueMapT
&GlobalMap
,
233 SetVector
<const SCEV
*> &SCEVs
) {
234 LoopInfo
*LI
= Stmt
->getParent()->getLI();
236 BasicBlock
*BB
= Stmt
->getBasicBlock();
237 Loop
*Scope
= LI
->getLoopFor(BB
);
238 for (Instruction
*Inst
: Stmt
->getInstructions())
239 findReferencesInInst(Inst
, Stmt
, Scope
, GlobalMap
, Values
, SCEVs
);
241 if (Stmt
->isRegionStmt()) {
242 for (BasicBlock
*BB
: Stmt
->getRegion()->blocks()) {
243 Loop
*Scope
= LI
->getLoopFor(BB
);
244 for (Instruction
&Inst
: *BB
)
245 findReferencesInInst(&Inst
, Stmt
, Scope
, GlobalMap
, Values
, SCEVs
);
250 void polly::addReferencesFromStmt(ScopStmt
*Stmt
, void *UserPtr
,
251 bool CreateScalarRefs
) {
252 auto &References
= *static_cast<SubtreeReferences
*>(UserPtr
);
254 findReferencesInStmt(Stmt
, References
.Values
, References
.GlobalMap
,
257 for (auto &Access
: *Stmt
) {
258 if (References
.ParamSpace
) {
259 isl::space ParamSpace
= Access
->getLatestAccessRelation().get_space();
260 (*References
.ParamSpace
) =
261 References
.ParamSpace
->align_params(ParamSpace
);
264 if (Access
->isLatestArrayKind()) {
265 auto *BasePtr
= Access
->getLatestScopArrayInfo()->getBasePtr();
266 if (Instruction
*OpInst
= dyn_cast
<Instruction
>(BasePtr
))
267 if (Stmt
->getParent()->contains(OpInst
))
270 References
.Values
.insert(BasePtr
);
274 if (CreateScalarRefs
)
275 References
.Values
.insert(References
.BlockGen
.getOrCreateAlloca(*Access
));
279 /// Extract the out-of-scop values and SCEVs referenced from a set describing
282 /// This includes the SCEVUnknowns referenced by the SCEVs used in the
283 /// statement and the base pointers of the memory accesses. For scalar
284 /// statements we force the generation of alloca memory locations and list
285 /// these locations in the set of out-of-scop values as well.
287 /// @param Set A set which references the ScopStmt we are interested in.
288 /// @param UserPtr A void pointer that can be casted to a SubtreeReferences
290 static void addReferencesFromStmtSet(isl::set Set
, SubtreeReferences
*UserPtr
) {
291 isl::id Id
= Set
.get_tuple_id();
292 auto *Stmt
= static_cast<ScopStmt
*>(Id
.get_user());
293 addReferencesFromStmt(Stmt
, UserPtr
);
296 /// Extract the out-of-scop values and SCEVs referenced from a union set
297 /// referencing multiple ScopStmts.
299 /// This includes the SCEVUnknowns referenced by the SCEVs used in the
300 /// statement and the base pointers of the memory accesses. For scalar
301 /// statements we force the generation of alloca memory locations and list
302 /// these locations in the set of out-of-scop values as well.
304 /// @param USet A union set referencing the ScopStmts we are interested
306 /// @param References The SubtreeReferences data structure through which
307 /// results are returned and further information is
309 static void addReferencesFromStmtUnionSet(isl::union_set USet
,
310 SubtreeReferences
&References
) {
312 for (isl::set Set
: USet
.get_set_list())
313 addReferencesFromStmtSet(Set
, &References
);
317 IslNodeBuilder::getScheduleForAstNode(const isl::ast_node
&Node
) {
318 return IslAstInfo::getSchedule(Node
);
321 void IslNodeBuilder::getReferencesInSubtree(const isl::ast_node
&For
,
322 SetVector
<Value
*> &Values
,
323 SetVector
<const Loop
*> &Loops
) {
324 SetVector
<const SCEV
*> SCEVs
;
325 SubtreeReferences References
= {
326 LI
, SE
, S
, ValueMap
, Values
, SCEVs
, getBlockGenerator(), nullptr};
328 for (const auto &I
: IDToValue
)
329 Values
.insert(I
.second
);
331 // NOTE: this is populated in IslNodeBuilder::addParameters
332 for (const auto &I
: OutsideLoopIterations
)
333 Values
.insert(cast
<SCEVUnknown
>(I
.second
)->getValue());
335 isl::union_set Schedule
= getScheduleForAstNode(For
).domain();
336 addReferencesFromStmtUnionSet(Schedule
, References
);
338 for (const SCEV
*Expr
: SCEVs
) {
339 findValues(Expr
, SE
, Values
);
340 findLoops(Expr
, Loops
);
343 Values
.remove_if([](const Value
*V
) { return isa
<GlobalValue
>(V
); });
345 /// Note: Code generation of induction variables of loops outside Scops
347 /// Remove loops that contain the scop or that are part of the scop, as they
348 /// are considered local. This leaves only loops that are before the scop, but
349 /// do not contain the scop itself.
350 /// We ignore loops perfectly contained in the Scop because these are already
351 /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops
352 /// whose induction variables are referred to by the Scop, but the Scop is not
353 /// fully contained in these Loops. Since there can be many of these,
354 /// we choose to codegen these on-demand.
355 /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable.
356 Loops
.remove_if([this](const Loop
*L
) {
357 return S
.contains(L
) || L
->contains(S
.getEntry());
360 // Contains Values that may need to be replaced with other values
361 // due to replacements from the ValueMap. We should make sure
362 // that we return correctly remapped values.
363 // NOTE: this code path is tested by:
364 // 1. test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
365 // 2. test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
366 SetVector
<Value
*> ReplacedValues
;
367 for (Value
*V
: Values
) {
368 ReplacedValues
.insert(getLatestValue(V
));
370 Values
= ReplacedValues
;
373 Value
*IslNodeBuilder::getLatestValue(Value
*Original
) const {
374 auto It
= ValueMap
.find(Original
);
375 if (It
== ValueMap
.end())
380 void IslNodeBuilder::createMark(__isl_take isl_ast_node
*Node
) {
381 auto *Id
= isl_ast_node_mark_get_id(Node
);
382 auto Child
= isl_ast_node_mark_get_node(Node
);
383 isl_ast_node_free(Node
);
384 // If a child node of a 'SIMD mark' is a loop that has a single iteration,
385 // it will be optimized away and we should skip it.
386 if (strcmp(isl_id_get_name(Id
), "SIMD") == 0 &&
387 isl_ast_node_get_type(Child
) == isl_ast_node_for
) {
388 createForSequential(isl::manage(Child
).as
<isl::ast_node_for
>(), true);
393 BandAttr
*ChildLoopAttr
= getLoopAttr(isl::manage_copy(Id
));
394 BandAttr
*AncestorLoopAttr
;
396 // Save current LoopAttr environment to restore again when leaving this
397 // subtree. This means there was no loop between the ancestor LoopAttr and
398 // this mark, i.e. the ancestor LoopAttr did not directly mark a loop. This
399 // can happen e.g. if the AST build peeled or unrolled the loop.
400 AncestorLoopAttr
= Annotator
.getStagingAttrEnv();
402 Annotator
.getStagingAttrEnv() = ChildLoopAttr
;
408 assert(Annotator
.getStagingAttrEnv() == ChildLoopAttr
&&
409 "Nest must not overwrite loop attr environment");
410 Annotator
.getStagingAttrEnv() = AncestorLoopAttr
;
416 /// Restore the initial ordering of dimensions of the band node
418 /// In case the band node represents all the dimensions of the iteration
419 /// domain, recreate the band node to restore the initial ordering of the
422 /// @param Node The band node to be modified.
423 /// @return The modified schedule node.
424 static bool IsLoopVectorizerDisabled(isl::ast_node_for Node
) {
425 assert(isl_ast_node_get_type(Node
.get()) == isl_ast_node_for
);
426 isl::ast_node Body
= Node
.body();
427 if (isl_ast_node_get_type(Body
.get()) != isl_ast_node_mark
)
430 isl::ast_node_mark BodyMark
= Body
.as
<isl::ast_node_mark
>();
431 auto Id
= BodyMark
.id();
432 if (strcmp(Id
.get_name().c_str(), "Loop Vectorizer Disabled") == 0)
437 void IslNodeBuilder::createForSequential(isl::ast_node_for For
,
439 Value
*ValueLB
, *ValueUB
, *ValueInc
;
441 BasicBlock
*ExitBlock
;
443 CmpInst::Predicate Predicate
;
445 bool LoopVectorizerDisabled
= IsLoopVectorizerDisabled(For
);
447 isl::ast_node Body
= For
.body();
449 // isl_ast_node_for_is_degenerate(For)
451 // TODO: For degenerated loops we could generate a plain assignment.
452 // However, for now we just reuse the logic for normal loops, which will
453 // create a loop with a single iteration.
455 isl::ast_expr Init
= For
.init();
456 isl::ast_expr Inc
= For
.inc();
457 isl::ast_expr Iterator
= For
.iterator();
458 isl::id IteratorID
= Iterator
.get_id();
459 isl::ast_expr UB
= getUpperBound(For
, Predicate
);
461 ValueLB
= ExprBuilder
.create(Init
.release());
462 ValueUB
= ExprBuilder
.create(UB
.release());
463 ValueInc
= ExprBuilder
.create(Inc
.release());
465 MaxType
= ExprBuilder
.getType(Iterator
.get());
466 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueLB
->getType());
467 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueUB
->getType());
468 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueInc
->getType());
470 if (MaxType
!= ValueLB
->getType())
471 ValueLB
= Builder
.CreateSExt(ValueLB
, MaxType
);
472 if (MaxType
!= ValueUB
->getType())
473 ValueUB
= Builder
.CreateSExt(ValueUB
, MaxType
);
474 if (MaxType
!= ValueInc
->getType())
475 ValueInc
= Builder
.CreateSExt(ValueInc
, MaxType
);
477 // If we can show that LB <Predicate> UB holds at least once, we can
478 // omit the GuardBB in front of the loop.
479 bool UseGuardBB
= !GenSE
->isKnownPredicate(Predicate
, GenSE
->getSCEV(ValueLB
),
480 GenSE
->getSCEV(ValueUB
));
481 IV
= createLoop(ValueLB
, ValueUB
, ValueInc
, Builder
, *GenLI
, *GenDT
,
482 ExitBlock
, Predicate
, &Annotator
, MarkParallel
, UseGuardBB
,
483 LoopVectorizerDisabled
);
484 IDToValue
[IteratorID
.get()] = IV
;
486 create(Body
.release());
488 Annotator
.popLoop(MarkParallel
);
490 IDToValue
.erase(IDToValue
.find(IteratorID
.get()));
492 Builder
.SetInsertPoint(&ExitBlock
->front());
497 void IslNodeBuilder::createForParallel(__isl_take isl_ast_node
*For
) {
499 isl_ast_expr
*Init
, *Inc
, *Iterator
, *UB
;
501 Value
*ValueLB
, *ValueUB
, *ValueInc
;
504 CmpInst::Predicate Predicate
;
506 // The preamble of parallel code interacts different than normal code with
507 // e.g., scalar initialization. Therefore, we ensure the parallel code is
508 // separated from the last basic block.
509 BasicBlock
*ParBB
= SplitBlock(Builder
.GetInsertBlock(),
510 &*Builder
.GetInsertPoint(), &DT
, &LI
);
511 ParBB
->setName("polly.parallel.for");
512 Builder
.SetInsertPoint(&ParBB
->front());
514 Body
= isl_ast_node_for_get_body(For
);
515 Init
= isl_ast_node_for_get_init(For
);
516 Inc
= isl_ast_node_for_get_inc(For
);
517 Iterator
= isl_ast_node_for_get_iterator(For
);
518 IteratorID
= isl_ast_expr_get_id(Iterator
);
519 UB
= getUpperBound(isl::manage_copy(For
).as
<isl::ast_node_for
>(), Predicate
)
522 ValueLB
= ExprBuilder
.create(Init
);
523 ValueUB
= ExprBuilder
.create(UB
);
524 ValueInc
= ExprBuilder
.create(Inc
);
526 // OpenMP always uses SLE. In case the isl generated AST uses a SLT
527 // expression, we need to adjust the loop bound by one.
528 if (Predicate
== CmpInst::ICMP_SLT
)
529 ValueUB
= Builder
.CreateAdd(
530 ValueUB
, Builder
.CreateSExt(Builder
.getTrue(), ValueUB
->getType()));
532 MaxType
= ExprBuilder
.getType(Iterator
);
533 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueLB
->getType());
534 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueUB
->getType());
535 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueInc
->getType());
537 if (MaxType
!= ValueLB
->getType())
538 ValueLB
= Builder
.CreateSExt(ValueLB
, MaxType
);
539 if (MaxType
!= ValueUB
->getType())
540 ValueUB
= Builder
.CreateSExt(ValueUB
, MaxType
);
541 if (MaxType
!= ValueInc
->getType())
542 ValueInc
= Builder
.CreateSExt(ValueInc
, MaxType
);
544 BasicBlock::iterator LoopBody
;
546 SetVector
<Value
*> SubtreeValues
;
547 SetVector
<const Loop
*> Loops
;
549 getReferencesInSubtree(isl::manage_copy(For
), SubtreeValues
, Loops
);
551 // Create for all loops we depend on values that contain the current loop
552 // iteration. These values are necessary to generate code for SCEVs that
553 // depend on such loops. As a result we need to pass them to the subfunction.
554 // See [Code generation of induction variables of loops outside Scops]
555 for (const Loop
*L
: Loops
) {
556 Value
*LoopInductionVar
= materializeNonScopLoopInductionVariable(L
);
557 SubtreeValues
.insert(LoopInductionVar
);
562 std::unique_ptr
<ParallelLoopGenerator
> ParallelLoopGenPtr
;
564 switch (PollyOmpBackend
) {
565 case OpenMPBackend::GNU
:
566 ParallelLoopGenPtr
.reset(new ParallelLoopGeneratorGOMP(Builder
, DL
));
568 case OpenMPBackend::LLVM
:
569 ParallelLoopGenPtr
.reset(new ParallelLoopGeneratorKMP(Builder
, DL
));
573 IV
= ParallelLoopGenPtr
->createParallelLoop(
574 ValueLB
, ValueUB
, ValueInc
, SubtreeValues
, NewValues
, &LoopBody
);
575 BasicBlock::iterator AfterLoop
= Builder
.GetInsertPoint();
577 // Remember the parallel subfunction
578 Function
*SubFn
= LoopBody
->getFunction();
579 ParallelSubfunctions
.push_back(SubFn
);
581 // We start working on the outlined function. Since DominatorTree/LoopInfo are
582 // not an inter-procedural passes, we temporarily switch them out. Save the
584 Function
*CallerFn
= Builder
.GetInsertBlock()->getParent();
585 DominatorTree
*CallerDT
= GenDT
;
586 LoopInfo
*CallerLI
= GenLI
;
587 ScalarEvolution
*CallerSE
= GenSE
;
588 ValueMapT CallerGlobals
= ValueMap
;
589 IslExprBuilder::IDToValueTy IDToValueCopy
= IDToValue
;
591 // Get the analyses for the subfunction. ParallelLoopGenerator already create
592 // DominatorTree and LoopInfo for us.
593 DominatorTree
*SubDT
= ParallelLoopGenPtr
->getCalleeDominatorTree();
594 LoopInfo
*SubLI
= ParallelLoopGenPtr
->getCalleeLoopInfo();
596 // Create TargetLibraryInfo, AssumptionCachem and ScalarEvolution ourselves.
597 // TODO: Ideally, we would use the pass manager's TargetLibraryInfoPass and
598 // AssumptionAnalysis instead of our own. They contain more target-specific
599 // information than we have available here: TargetLibraryInfoImpl can be a
600 // derived class determined by TargetMachine, AssumptionCache can be
601 // configured using a TargetTransformInfo object also derived from
603 TargetLibraryInfoImpl
BaselineInfoImpl(
604 Triple(SubFn
->getParent()->getTargetTriple()));
605 TargetLibraryInfo
CalleeTLI(BaselineInfoImpl
, SubFn
);
606 AssumptionCache
CalleeAC(*SubFn
);
607 std::unique_ptr
<ScalarEvolution
> SubSE
= std::make_unique
<ScalarEvolution
>(
608 *SubFn
, CalleeTLI
, CalleeAC
, *SubDT
, *SubLI
);
610 // Switch to the subfunction
614 BlockGen
.switchGeneratedFunc(SubFn
, GenDT
, GenLI
, GenSE
);
615 ExprBuilder
.switchGeneratedFunc(SubFn
, GenDT
, GenLI
, GenSE
);
616 Builder
.SetInsertPoint(&*LoopBody
);
618 // Update the ValueMap to use instructions in the subfunction. Note that
619 // "GlobalMap" used in BlockGenerator/IslExprBuilder is a reference to this
621 for (auto &[OldVal
, NewVal
] : ValueMap
) {
622 NewVal
= NewValues
.lookup(NewVal
);
624 // Clean-up any value that getReferencesInSubtree thinks we do not need.
625 // DenseMap::erase only writes a tombstone (and destroys OldVal/NewVal), so
626 // does not invalidate our iterator.
628 ValueMap
.erase(OldVal
);
631 // This is for NewVals that do not appear in ValueMap (such as SCoP-invariant
632 // values whose original value can be reused as long as we are in the same
633 // function). No need to map the others.
634 for (auto &[NewVal
, NewNewVal
] : NewValues
) {
635 if (Instruction
*NewValInst
= dyn_cast
<Instruction
>((Value
*)NewVal
)) {
636 if (S
.contains(NewValInst
))
638 assert(NewValInst
->getFunction() == &S
.getFunction());
640 assert(!ValueMap
.contains(NewVal
));
641 ValueMap
[NewVal
] = NewNewVal
;
644 // Also update the IDToValue map to use instructions from the subfunction.
645 for (auto &[OldVal
, NewVal
] : IDToValue
) {
646 NewVal
= NewValues
.lookup(NewVal
);
649 IDToValue
[IteratorID
] = IV
;
652 // Check whether the maps now exclusively refer to SubFn values.
653 for (auto &[OldVal
, SubVal
] : ValueMap
) {
654 Instruction
*SubInst
= dyn_cast
<Instruction
>((Value
*)SubVal
);
655 assert(SubInst
->getFunction() == SubFn
&&
656 "Instructions from outside the subfn cannot be accessed within the "
659 for (auto &[Id
, SubVal
] : IDToValue
) {
660 Instruction
*SubInst
= dyn_cast
<Instruction
>((Value
*)SubVal
);
661 assert(SubInst
->getFunction() == SubFn
&&
662 "Instructions from outside the subfn cannot be accessed within the "
667 ValueMapT NewValuesReverse
;
668 for (auto P
: NewValues
)
669 NewValuesReverse
[P
.second
] = P
.first
;
671 Annotator
.addAlternativeAliasBases(NewValuesReverse
);
675 Annotator
.resetAlternativeAliasBases();
677 // Resume working on the caller function.
681 IDToValue
= std::move(IDToValueCopy
);
682 ValueMap
= std::move(CallerGlobals
);
683 ExprBuilder
.switchGeneratedFunc(CallerFn
, CallerDT
, CallerLI
, CallerSE
);
684 BlockGen
.switchGeneratedFunc(CallerFn
, CallerDT
, CallerLI
, CallerSE
);
685 Builder
.SetInsertPoint(&*AfterLoop
);
687 for (const Loop
*L
: Loops
)
688 OutsideLoopIterations
.erase(L
);
690 isl_ast_node_free(For
);
691 isl_ast_expr_free(Iterator
);
692 isl_id_free(IteratorID
);
697 void IslNodeBuilder::createFor(__isl_take isl_ast_node
*For
) {
698 if (IslAstInfo::isExecutedInParallel(isl::manage_copy(For
))) {
699 createForParallel(For
);
702 bool Parallel
= (IslAstInfo::isParallel(isl::manage_copy(For
)) &&
703 !IslAstInfo::isReductionParallel(isl::manage_copy(For
)));
704 createForSequential(isl::manage(For
).as
<isl::ast_node_for
>(), Parallel
);
707 void IslNodeBuilder::createIf(__isl_take isl_ast_node
*If
) {
708 isl_ast_expr
*Cond
= isl_ast_node_if_get_cond(If
);
710 Function
*F
= Builder
.GetInsertBlock()->getParent();
711 LLVMContext
&Context
= F
->getContext();
713 BasicBlock
*CondBB
= SplitBlock(Builder
.GetInsertBlock(),
714 &*Builder
.GetInsertPoint(), GenDT
, GenLI
);
715 CondBB
->setName("polly.cond");
716 BasicBlock
*MergeBB
= SplitBlock(CondBB
, &CondBB
->front(), GenDT
, GenLI
);
717 MergeBB
->setName("polly.merge");
718 BasicBlock
*ThenBB
= BasicBlock::Create(Context
, "polly.then", F
);
719 BasicBlock
*ElseBB
= BasicBlock::Create(Context
, "polly.else", F
);
721 GenDT
->addNewBlock(ThenBB
, CondBB
);
722 GenDT
->addNewBlock(ElseBB
, CondBB
);
723 GenDT
->changeImmediateDominator(MergeBB
, CondBB
);
725 Loop
*L
= GenLI
->getLoopFor(CondBB
);
727 L
->addBasicBlockToLoop(ThenBB
, *GenLI
);
728 L
->addBasicBlockToLoop(ElseBB
, *GenLI
);
731 CondBB
->getTerminator()->eraseFromParent();
733 Builder
.SetInsertPoint(CondBB
);
734 Value
*Predicate
= ExprBuilder
.create(Cond
);
735 Builder
.CreateCondBr(Predicate
, ThenBB
, ElseBB
);
736 Builder
.SetInsertPoint(ThenBB
);
737 Builder
.CreateBr(MergeBB
);
738 Builder
.SetInsertPoint(ElseBB
);
739 Builder
.CreateBr(MergeBB
);
740 Builder
.SetInsertPoint(&ThenBB
->front());
742 create(isl_ast_node_if_get_then(If
));
744 Builder
.SetInsertPoint(&ElseBB
->front());
746 if (isl_ast_node_if_has_else(If
))
747 create(isl_ast_node_if_get_else(If
));
749 Builder
.SetInsertPoint(&MergeBB
->front());
751 isl_ast_node_free(If
);
756 __isl_give isl_id_to_ast_expr
*
757 IslNodeBuilder::createNewAccesses(ScopStmt
*Stmt
,
758 __isl_keep isl_ast_node
*Node
) {
759 isl::id_to_ast_expr NewAccesses
=
760 isl::id_to_ast_expr::alloc(Stmt
->getParent()->getIslCtx(), 0);
762 isl::ast_build Build
= IslAstInfo::getBuild(isl::manage_copy(Node
));
763 assert(!Build
.is_null() && "Could not obtain isl_ast_build from user node");
764 Stmt
->setAstBuild(Build
);
766 for (auto *MA
: *Stmt
) {
767 if (!MA
->hasNewAccessRelation()) {
768 if (PollyGenerateExpressions
) {
771 if (MA
->getLatestScopArrayInfo()->getBasePtrOriginSAI())
775 dyn_cast
<Instruction
>(MA
->getLatestScopArrayInfo()->getBasePtr());
776 if (BasePtr
&& Stmt
->getParent()->getRegion().contains(BasePtr
))
782 assert(MA
->isAffine() &&
783 "Only affine memory accesses can be code generated");
785 isl::union_map Schedule
= Build
.get_schedule();
789 auto Dom
= Stmt
->getDomain().release();
790 auto SchedDom
= isl_set_from_union_set(Schedule
.domain().release());
791 auto AccDom
= isl_map_domain(MA
->getAccessRelation().release());
792 Dom
= isl_set_intersect_params(Dom
,
793 Stmt
->getParent()->getContext().release());
794 SchedDom
= isl_set_intersect_params(
795 SchedDom
, Stmt
->getParent()->getContext().release());
796 assert(isl_set_is_subset(SchedDom
, AccDom
) &&
797 "Access relation not defined on full schedule domain");
798 assert(isl_set_is_subset(Dom
, AccDom
) &&
799 "Access relation not defined on full domain");
800 isl_set_free(AccDom
);
801 isl_set_free(SchedDom
);
806 isl::pw_multi_aff PWAccRel
= MA
->applyScheduleToAccessRelation(Schedule
);
808 // isl cannot generate an index expression for access-nothing accesses.
809 isl::set AccDomain
= PWAccRel
.domain();
810 isl::set Context
= S
.getContext();
811 AccDomain
= AccDomain
.intersect_params(Context
);
812 if (AccDomain
.is_empty())
815 isl::ast_expr AccessExpr
= Build
.access_from(PWAccRel
);
816 NewAccesses
= NewAccesses
.set(MA
->getId(), AccessExpr
);
819 return NewAccesses
.release();
822 void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr
*Expr
,
823 ScopStmt
*Stmt
, LoopToScevMapT
<S
) {
824 assert(isl_ast_expr_get_type(Expr
) == isl_ast_expr_op
&&
825 "Expression of type 'op' expected");
826 assert(isl_ast_expr_get_op_type(Expr
) == isl_ast_op_call
&&
827 "Operation of type 'call' expected");
828 for (int i
= 0; i
< isl_ast_expr_get_op_n_arg(Expr
) - 1; ++i
) {
829 isl_ast_expr
*SubExpr
;
832 SubExpr
= isl_ast_expr_get_op_arg(Expr
, i
+ 1);
833 V
= ExprBuilder
.create(SubExpr
);
834 ScalarEvolution
*SE
= Stmt
->getParent()->getSE();
835 LTS
[Stmt
->getLoopForDimension(i
)] = SE
->getUnknown(V
);
838 isl_ast_expr_free(Expr
);
841 void IslNodeBuilder::createSubstitutionsVector(
842 __isl_take isl_ast_expr
*Expr
, ScopStmt
*Stmt
,
843 std::vector
<LoopToScevMapT
> &VLTS
, std::vector
<Value
*> &IVS
,
844 __isl_take isl_id
*IteratorID
) {
847 Value
*OldValue
= IDToValue
[IteratorID
];
848 for (Value
*IV
: IVS
) {
849 IDToValue
[IteratorID
] = IV
;
850 createSubstitutions(isl_ast_expr_copy(Expr
), Stmt
, VLTS
[i
]);
854 IDToValue
[IteratorID
] = OldValue
;
855 isl_id_free(IteratorID
);
856 isl_ast_expr_free(Expr
);
859 void IslNodeBuilder::generateCopyStmt(
860 ScopStmt
*Stmt
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
861 assert(Stmt
->size() == 2);
862 auto ReadAccess
= Stmt
->begin();
863 auto WriteAccess
= ReadAccess
++;
864 assert((*ReadAccess
)->isRead() && (*WriteAccess
)->isMustWrite());
865 assert((*ReadAccess
)->getElementType() == (*WriteAccess
)->getElementType() &&
866 "Accesses use the same data type");
867 assert((*ReadAccess
)->isArrayKind() && (*WriteAccess
)->isArrayKind());
869 isl_id_to_ast_expr_get(NewAccesses
, (*ReadAccess
)->getId().release());
870 auto *LoadValue
= ExprBuilder
.create(AccessExpr
);
872 isl_id_to_ast_expr_get(NewAccesses
, (*WriteAccess
)->getId().release());
873 auto *StoreAddr
= ExprBuilder
.createAccessAddress(AccessExpr
).first
;
874 Builder
.CreateStore(LoadValue
, StoreAddr
);
877 Value
*IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop
*L
) {
878 assert(!OutsideLoopIterations
.contains(L
) &&
879 "trying to materialize loop induction variable twice");
880 const SCEV
*OuterLIV
= SE
.getAddRecExpr(SE
.getUnknown(Builder
.getInt64(0)),
881 SE
.getUnknown(Builder
.getInt64(1)), L
,
883 Value
*V
= generateSCEV(OuterLIV
);
884 OutsideLoopIterations
[L
] = SE
.getUnknown(V
);
888 void IslNodeBuilder::createUser(__isl_take isl_ast_node
*User
) {
893 isl_ast_expr
*Expr
= isl_ast_node_user_get_expr(User
);
894 isl_ast_expr
*StmtExpr
= isl_ast_expr_get_op_arg(Expr
, 0);
895 Id
= isl_ast_expr_get_id(StmtExpr
);
896 isl_ast_expr_free(StmtExpr
);
898 LTS
.insert(OutsideLoopIterations
.begin(), OutsideLoopIterations
.end());
900 Stmt
= (ScopStmt
*)isl_id_get_user(Id
);
901 auto *NewAccesses
= createNewAccesses(Stmt
, User
);
902 if (Stmt
->isCopyStmt()) {
903 generateCopyStmt(Stmt
, NewAccesses
);
904 isl_ast_expr_free(Expr
);
906 createSubstitutions(Expr
, Stmt
, LTS
);
908 if (Stmt
->isBlockStmt())
909 BlockGen
.copyStmt(*Stmt
, LTS
, NewAccesses
);
911 RegionGen
.copyStmt(*Stmt
, LTS
, NewAccesses
);
914 isl_id_to_ast_expr_free(NewAccesses
);
915 isl_ast_node_free(User
);
919 void IslNodeBuilder::createBlock(__isl_take isl_ast_node
*Block
) {
920 isl_ast_node_list
*List
= isl_ast_node_block_get_children(Block
);
922 for (int i
= 0; i
< isl_ast_node_list_n_ast_node(List
); ++i
)
923 create(isl_ast_node_list_get_ast_node(List
, i
));
925 isl_ast_node_free(Block
);
926 isl_ast_node_list_free(List
);
929 void IslNodeBuilder::create(__isl_take isl_ast_node
*Node
) {
930 switch (isl_ast_node_get_type(Node
)) {
931 case isl_ast_node_error
:
932 llvm_unreachable("code generation error");
933 case isl_ast_node_mark
:
936 case isl_ast_node_for
:
939 case isl_ast_node_if
:
942 case isl_ast_node_user
:
945 case isl_ast_node_block
:
950 llvm_unreachable("Unknown isl_ast_node type");
953 bool IslNodeBuilder::materializeValue(__isl_take isl_id
*Id
) {
954 // If the Id is already mapped, skip it.
955 if (!IDToValue
.count(Id
)) {
956 auto *ParamSCEV
= (const SCEV
*)isl_id_get_user(Id
);
959 // Parameters could refer to invariant loads that need to be
960 // preloaded before we can generate code for the parameter. Thus,
961 // check if any value referred to in ParamSCEV is an invariant load
962 // and if so make sure its equivalence class is preloaded.
963 SetVector
<Value
*> Values
;
964 findValues(ParamSCEV
, SE
, Values
);
965 for (auto *Val
: Values
) {
966 // Check if the value is an instruction in a dead block within the SCoP
967 // and if so do not code generate it.
968 if (auto *Inst
= dyn_cast
<Instruction
>(Val
)) {
969 if (S
.contains(Inst
)) {
972 // Check for "undef" loads first, then if there is a statement for
973 // the parent of Inst and lastly if the parent of Inst has an empty
974 // domain. In the first and last case the instruction is dead but if
975 // there is a statement or the domain is not empty Inst is not dead.
976 auto MemInst
= MemAccInst::dyn_cast(Inst
);
977 auto Address
= MemInst
? MemInst
.getPointerOperand() : nullptr;
978 if (Address
&& SE
.getUnknown(UndefValue::get(Address
->getType())) ==
979 SE
.getPointerBase(SE
.getSCEV(Address
))) {
980 } else if (S
.getStmtFor(Inst
)) {
983 auto *Domain
= S
.getDomainConditions(Inst
->getParent()).release();
984 IsDead
= isl_set_is_empty(Domain
);
985 isl_set_free(Domain
);
989 V
= UndefValue::get(ParamSCEV
->getType());
995 if (auto *IAClass
= S
.lookupInvariantEquivClass(Val
)) {
996 // Check if this invariant access class is empty, hence if we never
997 // actually added a loads instruction to it. In that case it has no
998 // (meaningful) users and we should not try to code generate it.
999 if (IAClass
->InvariantAccesses
.empty())
1000 V
= UndefValue::get(ParamSCEV
->getType());
1002 if (!preloadInvariantEquivClass(*IAClass
)) {
1009 V
= V
? V
: generateSCEV(ParamSCEV
);
1017 bool IslNodeBuilder::materializeParameters(__isl_take isl_set
*Set
) {
1018 for (unsigned i
= 0, e
= isl_set_dim(Set
, isl_dim_param
); i
< e
; ++i
) {
1019 if (!isl_set_involves_dims(Set
, isl_dim_param
, i
, 1))
1021 isl_id
*Id
= isl_set_get_dim_id(Set
, isl_dim_param
, i
);
1022 if (!materializeValue(Id
))
1028 bool IslNodeBuilder::materializeParameters() {
1029 for (const SCEV
*Param
: S
.parameters()) {
1030 isl_id
*Id
= S
.getIdForParam(Param
).release();
1031 if (!materializeValue(Id
))
1037 Value
*IslNodeBuilder::preloadUnconditionally(__isl_take isl_set
*AccessRange
,
1038 isl_ast_build
*Build
,
1039 Instruction
*AccInst
) {
1040 isl_pw_multi_aff
*PWAccRel
= isl_pw_multi_aff_from_set(AccessRange
);
1041 isl_ast_expr
*Access
=
1042 isl_ast_build_access_from_pw_multi_aff(Build
, PWAccRel
);
1043 auto *Address
= isl_ast_expr_address_of(Access
);
1044 auto *AddressValue
= ExprBuilder
.create(Address
);
1047 // Correct the type as the SAI might have a different type than the user
1048 // expects, especially if the base pointer is a struct.
1049 Type
*Ty
= AccInst
->getType();
1051 auto *Ptr
= AddressValue
;
1052 auto Name
= Ptr
->getName();
1053 PreloadVal
= Builder
.CreateLoad(Ty
, Ptr
, Name
+ ".load");
1054 if (LoadInst
*PreloadInst
= dyn_cast
<LoadInst
>(PreloadVal
))
1055 PreloadInst
->setAlignment(cast
<LoadInst
>(AccInst
)->getAlign());
1057 // TODO: This is only a hot fix for SCoP sequences that use the same load
1058 // instruction contained and hoisted by one of the SCoPs.
1059 if (SE
.isSCEVable(Ty
))
1060 SE
.forgetValue(AccInst
);
1065 Value
*IslNodeBuilder::preloadInvariantLoad(const MemoryAccess
&MA
,
1066 __isl_take isl_set
*Domain
) {
1067 isl_set
*AccessRange
= isl_map_range(MA
.getAddressFunction().release());
1068 AccessRange
= isl_set_gist_params(AccessRange
, S
.getContext().release());
1070 if (!materializeParameters(AccessRange
)) {
1071 isl_set_free(AccessRange
);
1072 isl_set_free(Domain
);
1077 isl_ast_build_from_context(isl_set_universe(S
.getParamSpace().release()));
1078 isl_set
*Universe
= isl_set_universe(isl_set_get_space(Domain
));
1079 bool AlwaysExecuted
= isl_set_is_equal(Domain
, Universe
);
1080 isl_set_free(Universe
);
1082 Instruction
*AccInst
= MA
.getAccessInstruction();
1083 Type
*AccInstTy
= AccInst
->getType();
1085 Value
*PreloadVal
= nullptr;
1086 if (AlwaysExecuted
) {
1087 PreloadVal
= preloadUnconditionally(AccessRange
, Build
, AccInst
);
1088 isl_ast_build_free(Build
);
1089 isl_set_free(Domain
);
1093 if (!materializeParameters(Domain
)) {
1094 isl_ast_build_free(Build
);
1095 isl_set_free(AccessRange
);
1096 isl_set_free(Domain
);
1100 isl_ast_expr
*DomainCond
= isl_ast_build_expr_from_set(Build
, Domain
);
1103 ExprBuilder
.setTrackOverflow(true);
1104 Value
*Cond
= ExprBuilder
.create(DomainCond
);
1105 Value
*OverflowHappened
= Builder
.CreateNot(ExprBuilder
.getOverflowState(),
1106 "polly.preload.cond.overflown");
1107 Cond
= Builder
.CreateAnd(Cond
, OverflowHappened
, "polly.preload.cond.result");
1108 ExprBuilder
.setTrackOverflow(false);
1110 if (!Cond
->getType()->isIntegerTy(1))
1111 Cond
= Builder
.CreateIsNotNull(Cond
);
1113 BasicBlock
*CondBB
= SplitBlock(Builder
.GetInsertBlock(),
1114 &*Builder
.GetInsertPoint(), GenDT
, GenLI
);
1115 CondBB
->setName("polly.preload.cond");
1117 BasicBlock
*MergeBB
= SplitBlock(CondBB
, &CondBB
->front(), GenDT
, GenLI
);
1118 MergeBB
->setName("polly.preload.merge");
1120 Function
*F
= Builder
.GetInsertBlock()->getParent();
1121 LLVMContext
&Context
= F
->getContext();
1122 BasicBlock
*ExecBB
= BasicBlock::Create(Context
, "polly.preload.exec", F
);
1124 GenDT
->addNewBlock(ExecBB
, CondBB
);
1125 if (Loop
*L
= GenLI
->getLoopFor(CondBB
))
1126 L
->addBasicBlockToLoop(ExecBB
, *GenLI
);
1128 auto *CondBBTerminator
= CondBB
->getTerminator();
1129 Builder
.SetInsertPoint(CondBBTerminator
);
1130 Builder
.CreateCondBr(Cond
, ExecBB
, MergeBB
);
1131 CondBBTerminator
->eraseFromParent();
1133 Builder
.SetInsertPoint(ExecBB
);
1134 Builder
.CreateBr(MergeBB
);
1136 Builder
.SetInsertPoint(ExecBB
->getTerminator());
1137 Value
*PreAccInst
= preloadUnconditionally(AccessRange
, Build
, AccInst
);
1138 Builder
.SetInsertPoint(MergeBB
->getTerminator());
1139 auto *MergePHI
= Builder
.CreatePHI(
1140 AccInstTy
, 2, "polly.preload." + AccInst
->getName() + ".merge");
1141 PreloadVal
= MergePHI
;
1144 PreloadVal
= nullptr;
1145 PreAccInst
= UndefValue::get(AccInstTy
);
1148 MergePHI
->addIncoming(PreAccInst
, ExecBB
);
1149 MergePHI
->addIncoming(Constant::getNullValue(AccInstTy
), CondBB
);
1151 isl_ast_build_free(Build
);
1155 bool IslNodeBuilder::preloadInvariantEquivClass(
1156 InvariantEquivClassTy
&IAClass
) {
1157 // For an equivalence class of invariant loads we pre-load the representing
1158 // element with the unified execution context. However, we have to map all
1159 // elements of the class to the one preloaded load as they are referenced
1160 // during the code generation and therefor need to be mapped.
1161 const MemoryAccessList
&MAs
= IAClass
.InvariantAccesses
;
1165 MemoryAccess
*MA
= MAs
.front();
1166 assert(MA
->isArrayKind() && MA
->isRead());
1168 // If the access function was already mapped, the preload of this equivalence
1169 // class was triggered earlier already and doesn't need to be done again.
1170 if (ValueMap
.count(MA
->getAccessInstruction()))
1173 // Check for recursion which can be caused by additional constraints, e.g.,
1174 // non-finite loop constraints. In such a case we have to bail out and insert
1175 // a "false" runtime check that will cause the original code to be executed.
1176 auto PtrId
= std::make_pair(IAClass
.IdentifyingPointer
, IAClass
.AccessType
);
1177 if (!PreloadedPtrs
.insert(PtrId
).second
)
1180 // The execution context of the IAClass.
1181 isl::set
&ExecutionCtx
= IAClass
.ExecutionContext
;
1183 // If the base pointer of this class is dependent on another one we have to
1184 // make sure it was preloaded already.
1185 auto *SAI
= MA
->getScopArrayInfo();
1186 if (auto *BaseIAClass
= S
.lookupInvariantEquivClass(SAI
->getBasePtr())) {
1187 if (!preloadInvariantEquivClass(*BaseIAClass
))
1190 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and
1191 // we need to refine the ExecutionCtx.
1192 isl::set BaseExecutionCtx
= BaseIAClass
->ExecutionContext
;
1193 ExecutionCtx
= ExecutionCtx
.intersect(BaseExecutionCtx
);
1196 // If the size of a dimension is dependent on another class, make sure it is
1198 for (unsigned i
= 1, e
= SAI
->getNumberOfDimensions(); i
< e
; ++i
) {
1199 const SCEV
*Dim
= SAI
->getDimensionSize(i
);
1200 SetVector
<Value
*> Values
;
1201 findValues(Dim
, SE
, Values
);
1202 for (auto *Val
: Values
) {
1203 if (auto *BaseIAClass
= S
.lookupInvariantEquivClass(Val
)) {
1204 if (!preloadInvariantEquivClass(*BaseIAClass
))
1207 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx
1208 // and we need to refine the ExecutionCtx.
1209 isl::set BaseExecutionCtx
= BaseIAClass
->ExecutionContext
;
1210 ExecutionCtx
= ExecutionCtx
.intersect(BaseExecutionCtx
);
1215 Instruction
*AccInst
= MA
->getAccessInstruction();
1216 Type
*AccInstTy
= AccInst
->getType();
1218 Value
*PreloadVal
= preloadInvariantLoad(*MA
, ExecutionCtx
.copy());
1222 for (const MemoryAccess
*MA
: MAs
) {
1223 Instruction
*MAAccInst
= MA
->getAccessInstruction();
1224 assert(PreloadVal
->getType() == MAAccInst
->getType());
1225 ValueMap
[MAAccInst
] = PreloadVal
;
1228 if (SE
.isSCEVable(AccInstTy
)) {
1229 isl_id
*ParamId
= S
.getIdForParam(SE
.getSCEV(AccInst
)).release();
1231 IDToValue
[ParamId
] = PreloadVal
;
1232 isl_id_free(ParamId
);
1235 BasicBlock
*EntryBB
= &Builder
.GetInsertBlock()->getParent()->getEntryBlock();
1236 auto *Alloca
= new AllocaInst(AccInstTy
, DL
.getAllocaAddrSpace(),
1237 AccInst
->getName() + ".preload.s2a",
1238 EntryBB
->getFirstInsertionPt());
1239 Builder
.CreateStore(PreloadVal
, Alloca
);
1240 ValueMapT PreloadedPointer
;
1241 PreloadedPointer
[PreloadVal
] = AccInst
;
1242 Annotator
.addAlternativeAliasBases(PreloadedPointer
);
1244 for (auto *DerivedSAI
: SAI
->getDerivedSAIs()) {
1245 Value
*BasePtr
= DerivedSAI
->getBasePtr();
1247 for (const MemoryAccess
*MA
: MAs
) {
1248 // As the derived SAI information is quite coarse, any load from the
1249 // current SAI could be the base pointer of the derived SAI, however we
1250 // should only change the base pointer of the derived SAI if we actually
1252 if (BasePtr
== MA
->getOriginalBaseAddr()) {
1253 assert(BasePtr
->getType() == PreloadVal
->getType());
1254 DerivedSAI
->setBasePtr(PreloadVal
);
1257 // For scalar derived SAIs we remap the alloca used for the derived value.
1258 if (BasePtr
== MA
->getAccessInstruction())
1259 ScalarMap
[DerivedSAI
] = Alloca
;
1263 for (const MemoryAccess
*MA
: MAs
) {
1264 Instruction
*MAAccInst
= MA
->getAccessInstruction();
1265 // Use the escape system to get the correct value to users outside the SCoP.
1266 BlockGenerator::EscapeUserVectorTy EscapeUsers
;
1267 for (auto *U
: MAAccInst
->users())
1268 if (Instruction
*UI
= dyn_cast
<Instruction
>(U
))
1269 if (!S
.contains(UI
))
1270 EscapeUsers
.push_back(UI
);
1272 if (EscapeUsers
.empty())
1275 EscapeMap
[MA
->getAccessInstruction()] =
1276 std::make_pair(Alloca
, std::move(EscapeUsers
));
1282 void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks
) {
1283 for (auto &SAI
: S
.arrays()) {
1284 if (SAI
->getBasePtr())
1287 assert(SAI
->getNumberOfDimensions() > 0 && SAI
->getDimensionSize(0) &&
1288 "The size of the outermost dimension is used to declare newly "
1289 "created arrays that require memory allocation.");
1291 Type
*NewArrayType
= nullptr;
1293 // Get the size of the array = size(dim_1)*...*size(dim_n)
1294 uint64_t ArraySizeInt
= 1;
1295 for (int i
= SAI
->getNumberOfDimensions() - 1; i
>= 0; i
--) {
1296 auto *DimSize
= SAI
->getDimensionSize(i
);
1297 unsigned UnsignedDimSize
= static_cast<const SCEVConstant
*>(DimSize
)
1302 NewArrayType
= SAI
->getElementType();
1304 NewArrayType
= ArrayType::get(NewArrayType
, UnsignedDimSize
);
1305 ArraySizeInt
*= UnsignedDimSize
;
1308 if (SAI
->isOnHeap()) {
1309 LLVMContext
&Ctx
= NewArrayType
->getContext();
1311 // Get the IntPtrTy from the Datalayout
1312 auto IntPtrTy
= DL
.getIntPtrType(Ctx
);
1314 // Get the size of the element type in bits
1315 unsigned Size
= SAI
->getElemSizeInBytes();
1317 // Insert the malloc call at polly.start
1318 Builder
.SetInsertPoint(std::get
<0>(StartExitBlocks
)->getTerminator());
1319 auto *CreatedArray
= Builder
.CreateMalloc(
1320 IntPtrTy
, SAI
->getElementType(),
1321 ConstantInt::get(Type::getInt64Ty(Ctx
), Size
),
1322 ConstantInt::get(Type::getInt64Ty(Ctx
), ArraySizeInt
), nullptr,
1325 SAI
->setBasePtr(CreatedArray
);
1327 // Insert the free call at polly.exiting
1328 Builder
.SetInsertPoint(std::get
<1>(StartExitBlocks
)->getTerminator());
1329 Builder
.CreateFree(CreatedArray
);
1331 auto InstIt
= Builder
.GetInsertBlock()
1337 auto *CreatedArray
= new AllocaInst(NewArrayType
, DL
.getAllocaAddrSpace(),
1338 SAI
->getName(), InstIt
);
1339 if (PollyTargetFirstLevelCacheLineSize
)
1340 CreatedArray
->setAlignment(Align(PollyTargetFirstLevelCacheLineSize
));
1341 SAI
->setBasePtr(CreatedArray
);
1346 bool IslNodeBuilder::preloadInvariantLoads() {
1347 auto &InvariantEquivClasses
= S
.getInvariantAccesses();
1348 if (InvariantEquivClasses
.empty())
1351 BasicBlock
*PreLoadBB
= SplitBlock(Builder
.GetInsertBlock(),
1352 &*Builder
.GetInsertPoint(), GenDT
, GenLI
);
1353 PreLoadBB
->setName("polly.preload.begin");
1354 Builder
.SetInsertPoint(&PreLoadBB
->front());
1356 for (auto &IAClass
: InvariantEquivClasses
)
1357 if (!preloadInvariantEquivClass(IAClass
))
1363 void IslNodeBuilder::addParameters(__isl_take isl_set
*Context
) {
1364 // Materialize values for the parameters of the SCoP.
1365 materializeParameters();
1367 // Generate values for the current loop iteration for all surrounding loops.
1369 // We may also reference loops outside of the scop which do not contain the
1370 // scop itself, but as the number of such scops may be arbitrarily large we do
1371 // not generate code for them here, but only at the point of code generation
1372 // where these values are needed.
1373 Loop
*L
= LI
.getLoopFor(S
.getEntry());
1375 while (L
!= nullptr && S
.contains(L
))
1376 L
= L
->getParentLoop();
1378 while (L
!= nullptr) {
1379 materializeNonScopLoopInductionVariable(L
);
1380 L
= L
->getParentLoop();
1383 isl_set_free(Context
);
1386 Value
*IslNodeBuilder::generateSCEV(const SCEV
*Expr
) {
1387 /// We pass the insert location of our Builder, as Polly ensures during IR
1388 /// generation that there is always a valid CFG into which instructions are
1389 /// inserted. As a result, the insertpoint is known to be always followed by a
1390 /// terminator instruction. This means the insert point may be specified by a
1391 /// terminator instruction, but it can never point to an ->end() iterator
1392 /// which does not have a corresponding instruction. Hence, dereferencing
1393 /// the insertpoint to obtain an instruction is known to be save.
1395 /// We also do not need to update the Builder here, as new instructions are
1396 /// always inserted _before_ the given InsertLocation. As a result, the
1397 /// insert location remains valid.
1398 assert(Builder
.GetInsertBlock()->end() != Builder
.GetInsertPoint() &&
1399 "Insert location points after last valid instruction");
1400 Instruction
*InsertLocation
= &*Builder
.GetInsertPoint();
1402 return expandCodeFor(S
, SE
, Builder
.GetInsertBlock()->getParent(), *GenSE
, DL
,
1403 "polly", Expr
, Expr
->getType(), InsertLocation
,
1404 &ValueMap
, /*LoopToScevMap*/ nullptr,
1405 StartBlock
->getSinglePredecessor());
1408 /// The AST expression we generate to perform the run-time check assumes
1409 /// computations on integer types of infinite size. As we only use 64-bit
1410 /// arithmetic we check for overflows, in case of which we set the result
1411 /// of this run-time check to false to be conservatively correct,
1412 Value
*IslNodeBuilder::createRTC(isl_ast_expr
*Condition
) {
1413 auto ExprBuilder
= getExprBuilder();
1415 // In case the AST expression has integers larger than 64 bit, bail out. The
1416 // resulting LLVM-IR will contain operations on types that use more than 64
1417 // bits. These are -- in case wrapping intrinsics are used -- translated to
1418 // runtime library calls that are not available on all systems (e.g., Android)
1419 // and consequently will result in linker errors.
1420 if (ExprBuilder
.hasLargeInts(isl::manage_copy(Condition
))) {
1421 isl_ast_expr_free(Condition
);
1422 return Builder
.getFalse();
1425 ExprBuilder
.setTrackOverflow(true);
1426 Value
*RTC
= ExprBuilder
.create(Condition
);
1427 if (!RTC
->getType()->isIntegerTy(1))
1428 RTC
= Builder
.CreateIsNotNull(RTC
);
1429 Value
*OverflowHappened
=
1430 Builder
.CreateNot(ExprBuilder
.getOverflowState(), "polly.rtc.overflown");
1432 if (PollyGenerateRTCPrint
) {
1433 auto *F
= Builder
.GetInsertBlock()->getParent();
1434 RuntimeDebugBuilder::createCPUPrinter(
1436 "F: " + F
->getName().str() + " R: " + S
.getRegion().getNameStr() +
1438 RTC
, " Overflow: ", OverflowHappened
,
1440 " (0 failed, -1 succeeded)\n"
1441 " (if one or both are 0 falling back to original code, if both are -1 "
1442 "executing Polly code)\n");
1445 RTC
= Builder
.CreateAnd(RTC
, OverflowHappened
, "polly.rtc.result");
1446 ExprBuilder
.setTrackOverflow(false);
1448 if (!isa
<ConstantInt
>(RTC
))