1 //===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the IslNodeBuilder, a class to translate an isl AST into
12 //===----------------------------------------------------------------------===//
14 #include "polly/CodeGen/IslNodeBuilder.h"
15 #include "polly/CodeGen/BlockGenerators.h"
16 #include "polly/CodeGen/CodeGeneration.h"
17 #include "polly/CodeGen/IslAst.h"
18 #include "polly/CodeGen/IslExprBuilder.h"
19 #include "polly/CodeGen/LoopGeneratorsGOMP.h"
20 #include "polly/CodeGen/LoopGeneratorsKMP.h"
21 #include "polly/CodeGen/RuntimeDebugBuilder.h"
22 #include "polly/Options.h"
23 #include "polly/ScopInfo.h"
24 #include "polly/Support/ISLTools.h"
25 #include "polly/Support/SCEVValidator.h"
26 #include "polly/Support/ScopHelper.h"
27 #include "polly/Support/VirtualInstruction.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/PostOrderIterator.h"
30 #include "llvm/ADT/SetVector.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/Analysis/RegionInfo.h"
35 #include "llvm/Analysis/ScalarEvolution.h"
36 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/InstrTypes.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/ErrorHandling.h"
52 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
54 #include "isl/aff_type.h"
56 #include "isl/ast_build.h"
57 #include "isl/isl-noexceptions.h"
60 #include "isl/union_map.h"
61 #include "isl/union_set.h"
72 using namespace polly
;
74 #define DEBUG_TYPE "polly-codegen"
76 STATISTIC(VersionedScops
, "Number of SCoPs that required versioning.");
78 STATISTIC(SequentialLoops
, "Number of generated sequential for-loops");
79 STATISTIC(ParallelLoops
, "Number of generated parallel for-loops");
80 STATISTIC(IfConditions
, "Number of generated if-conditions");
82 /// OpenMP backend options
83 enum class OpenMPBackend
{ GNU
, LLVM
};
85 static cl::opt
<bool> PollyGenerateRTCPrint(
86 "polly-codegen-emit-rtc-print",
87 cl::desc("Emit code that prints the runtime check result dynamically."),
88 cl::Hidden
, cl::cat(PollyCategory
));
90 // If this option is set we always use the isl AST generator to regenerate
91 // memory accesses. Without this option set we regenerate expressions using the
92 // original SCEV expressions and only generate new expressions in case the
93 // access relation has been changed and consequently must be regenerated.
94 static cl::opt
<bool> PollyGenerateExpressions(
95 "polly-codegen-generate-expressions",
96 cl::desc("Generate AST expressions for unmodified and modified accesses"),
97 cl::Hidden
, cl::cat(PollyCategory
));
99 static cl::opt
<int> PollyTargetFirstLevelCacheLineSize(
100 "polly-target-first-level-cache-line-size",
101 cl::desc("The size of the first level cache line size specified in bytes."),
102 cl::Hidden
, cl::init(64), cl::cat(PollyCategory
));
104 static cl::opt
<OpenMPBackend
> PollyOmpBackend(
105 "polly-omp-backend", cl::desc("Choose the OpenMP library to use:"),
106 cl::values(clEnumValN(OpenMPBackend::GNU
, "GNU", "GNU OpenMP"),
107 clEnumValN(OpenMPBackend::LLVM
, "LLVM", "LLVM OpenMP")),
108 cl::Hidden
, cl::init(OpenMPBackend::GNU
), cl::cat(PollyCategory
));
110 isl::ast_expr
IslNodeBuilder::getUpperBound(isl::ast_node_for For
,
111 ICmpInst::Predicate
&Predicate
) {
112 isl::ast_expr Cond
= For
.cond();
113 isl::ast_expr Iterator
= For
.iterator();
114 assert(isl_ast_expr_get_type(Cond
.get()) == isl_ast_expr_op
&&
115 "conditional expression is not an atomic upper bound");
117 isl_ast_op_type OpType
= isl_ast_expr_get_op_type(Cond
.get());
121 Predicate
= ICmpInst::ICMP_SLE
;
124 Predicate
= ICmpInst::ICMP_SLT
;
127 llvm_unreachable("Unexpected comparison type in loop condition");
130 isl::ast_expr Arg0
= Cond
.get_op_arg(0);
132 assert(isl_ast_expr_get_type(Arg0
.get()) == isl_ast_expr_id
&&
133 "conditional expression is not an atomic upper bound");
135 isl::id UBID
= Arg0
.get_id();
137 assert(isl_ast_expr_get_type(Iterator
.get()) == isl_ast_expr_id
&&
138 "Could not get the iterator");
140 isl::id IteratorID
= Iterator
.get_id();
142 assert(UBID
.get() == IteratorID
.get() &&
143 "conditional expression is not an atomic upper bound");
145 return Cond
.get_op_arg(1);
148 int IslNodeBuilder::getNumberOfIterations(isl::ast_node_for For
) {
149 assert(isl_ast_node_get_type(For
.get()) == isl_ast_node_for
);
150 isl::ast_node Body
= For
.body();
152 // First, check if we can actually handle this code.
153 switch (isl_ast_node_get_type(Body
.get())) {
154 case isl_ast_node_user
:
156 case isl_ast_node_block
: {
157 isl::ast_node_block BodyBlock
= Body
.as
<isl::ast_node_block
>();
158 isl::ast_node_list List
= BodyBlock
.children();
159 for (isl::ast_node Node
: List
) {
160 isl_ast_node_type NodeType
= isl_ast_node_get_type(Node
.get());
161 if (NodeType
!= isl_ast_node_user
)
170 isl::ast_expr Init
= For
.init();
171 if (!Init
.isa
<isl::ast_expr_int
>() || !Init
.val().is_zero())
173 isl::ast_expr Inc
= For
.inc();
174 if (!Inc
.isa
<isl::ast_expr_int
>() || !Inc
.val().is_one())
176 CmpInst::Predicate Predicate
;
177 isl::ast_expr UB
= getUpperBound(For
, Predicate
);
178 if (!UB
.isa
<isl::ast_expr_int
>())
180 isl::val UpVal
= UB
.get_val();
181 int NumberIterations
= UpVal
.get_num_si();
182 if (NumberIterations
< 0)
184 if (Predicate
== CmpInst::ICMP_SLT
)
185 return NumberIterations
;
187 return NumberIterations
+ 1;
190 static void findReferencesByUse(Value
*SrcVal
, ScopStmt
*UserStmt
,
191 Loop
*UserScope
, const ValueMapT
&GlobalMap
,
192 SetVector
<Value
*> &Values
,
193 SetVector
<const SCEV
*> &SCEVs
) {
194 VirtualUse VUse
= VirtualUse::create(UserStmt
, UserScope
, SrcVal
, true);
195 switch (VUse
.getKind()) {
196 case VirtualUse::Constant
:
197 // When accelerator-offloading, GlobalValue is a host address whose content
198 // must still be transferred to the GPU.
199 if (isa
<GlobalValue
>(SrcVal
))
200 Values
.insert(SrcVal
);
203 case VirtualUse::Synthesizable
:
204 SCEVs
.insert(VUse
.getScevExpr());
207 case VirtualUse::Block
:
208 case VirtualUse::ReadOnly
:
209 case VirtualUse::Hoisted
:
210 case VirtualUse::Intra
:
211 case VirtualUse::Inter
:
215 if (Value
*NewVal
= GlobalMap
.lookup(SrcVal
))
216 Values
.insert(NewVal
);
219 static void findReferencesInInst(Instruction
*Inst
, ScopStmt
*UserStmt
,
220 Loop
*UserScope
, const ValueMapT
&GlobalMap
,
221 SetVector
<Value
*> &Values
,
222 SetVector
<const SCEV
*> &SCEVs
) {
223 for (Use
&U
: Inst
->operands())
224 findReferencesByUse(U
.get(), UserStmt
, UserScope
, GlobalMap
, Values
, SCEVs
);
227 static void findReferencesInStmt(ScopStmt
*Stmt
, SetVector
<Value
*> &Values
,
228 ValueMapT
&GlobalMap
,
229 SetVector
<const SCEV
*> &SCEVs
) {
230 LoopInfo
*LI
= Stmt
->getParent()->getLI();
232 BasicBlock
*BB
= Stmt
->getBasicBlock();
233 Loop
*Scope
= LI
->getLoopFor(BB
);
234 for (Instruction
*Inst
: Stmt
->getInstructions())
235 findReferencesInInst(Inst
, Stmt
, Scope
, GlobalMap
, Values
, SCEVs
);
237 if (Stmt
->isRegionStmt()) {
238 for (BasicBlock
*BB
: Stmt
->getRegion()->blocks()) {
239 Loop
*Scope
= LI
->getLoopFor(BB
);
240 for (Instruction
&Inst
: *BB
)
241 findReferencesInInst(&Inst
, Stmt
, Scope
, GlobalMap
, Values
, SCEVs
);
246 void polly::addReferencesFromStmt(ScopStmt
*Stmt
, void *UserPtr
,
247 bool CreateScalarRefs
) {
248 auto &References
= *static_cast<SubtreeReferences
*>(UserPtr
);
250 findReferencesInStmt(Stmt
, References
.Values
, References
.GlobalMap
,
253 for (auto &Access
: *Stmt
) {
254 if (References
.ParamSpace
) {
255 isl::space ParamSpace
= Access
->getLatestAccessRelation().get_space();
256 (*References
.ParamSpace
) =
257 References
.ParamSpace
->align_params(ParamSpace
);
260 if (Access
->isLatestArrayKind()) {
261 auto *BasePtr
= Access
->getLatestScopArrayInfo()->getBasePtr();
262 if (Instruction
*OpInst
= dyn_cast
<Instruction
>(BasePtr
))
263 if (Stmt
->getParent()->contains(OpInst
))
266 References
.Values
.insert(BasePtr
);
270 if (CreateScalarRefs
)
271 References
.Values
.insert(References
.BlockGen
.getOrCreateAlloca(*Access
));
275 /// Extract the out-of-scop values and SCEVs referenced from a set describing
278 /// This includes the SCEVUnknowns referenced by the SCEVs used in the
279 /// statement and the base pointers of the memory accesses. For scalar
280 /// statements we force the generation of alloca memory locations and list
281 /// these locations in the set of out-of-scop values as well.
283 /// @param Set A set which references the ScopStmt we are interested in.
284 /// @param UserPtr A void pointer that can be casted to a SubtreeReferences
286 static void addReferencesFromStmtSet(isl::set Set
, SubtreeReferences
*UserPtr
) {
287 isl::id Id
= Set
.get_tuple_id();
288 auto *Stmt
= static_cast<ScopStmt
*>(Id
.get_user());
289 addReferencesFromStmt(Stmt
, UserPtr
);
292 /// Extract the out-of-scop values and SCEVs referenced from a union set
293 /// referencing multiple ScopStmts.
295 /// This includes the SCEVUnknowns referenced by the SCEVs used in the
296 /// statement and the base pointers of the memory accesses. For scalar
297 /// statements we force the generation of alloca memory locations and list
298 /// these locations in the set of out-of-scop values as well.
300 /// @param USet A union set referencing the ScopStmts we are interested
302 /// @param References The SubtreeReferences data structure through which
303 /// results are returned and further information is
305 static void addReferencesFromStmtUnionSet(isl::union_set USet
,
306 SubtreeReferences
&References
) {
308 for (isl::set Set
: USet
.get_set_list())
309 addReferencesFromStmtSet(Set
, &References
);
313 IslNodeBuilder::getScheduleForAstNode(const isl::ast_node
&Node
) {
314 return IslAstInfo::getSchedule(Node
);
317 void IslNodeBuilder::getReferencesInSubtree(const isl::ast_node
&For
,
318 SetVector
<Value
*> &Values
,
319 SetVector
<const Loop
*> &Loops
) {
320 SetVector
<const SCEV
*> SCEVs
;
321 SubtreeReferences References
= {
322 LI
, SE
, S
, ValueMap
, Values
, SCEVs
, getBlockGenerator(), nullptr};
324 for (const auto &I
: IDToValue
)
325 Values
.insert(I
.second
);
327 // NOTE: this is populated in IslNodeBuilder::addParameters
328 for (const auto &I
: OutsideLoopIterations
)
329 Values
.insert(cast
<SCEVUnknown
>(I
.second
)->getValue());
331 isl::union_set Schedule
= getScheduleForAstNode(For
).domain();
332 addReferencesFromStmtUnionSet(Schedule
, References
);
334 for (const SCEV
*Expr
: SCEVs
) {
335 findValues(Expr
, SE
, Values
);
336 findLoops(Expr
, Loops
);
339 Values
.remove_if([](const Value
*V
) { return isa
<GlobalValue
>(V
); });
341 /// Note: Code generation of induction variables of loops outside Scops
343 /// Remove loops that contain the scop or that are part of the scop, as they
344 /// are considered local. This leaves only loops that are before the scop, but
345 /// do not contain the scop itself.
346 /// We ignore loops perfectly contained in the Scop because these are already
347 /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops
348 /// whose induction variables are referred to by the Scop, but the Scop is not
349 /// fully contained in these Loops. Since there can be many of these,
350 /// we choose to codegen these on-demand.
351 /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable.
352 Loops
.remove_if([this](const Loop
*L
) {
353 return S
.contains(L
) || L
->contains(S
.getEntry());
356 // Contains Values that may need to be replaced with other values
357 // due to replacements from the ValueMap. We should make sure
358 // that we return correctly remapped values.
359 // NOTE: this code path is tested by:
360 // 1. test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
361 // 2. test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
362 SetVector
<Value
*> ReplacedValues
;
363 for (Value
*V
: Values
) {
364 ReplacedValues
.insert(getLatestValue(V
));
366 Values
= ReplacedValues
;
369 void IslNodeBuilder::updateValues(ValueMapT
&NewValues
) {
370 SmallPtrSet
<Value
*, 5> Inserted
;
372 for (const auto &I
: IDToValue
) {
373 IDToValue
[I
.first
] = NewValues
[I
.second
];
374 Inserted
.insert(I
.second
);
377 for (const auto &I
: NewValues
) {
378 if (Inserted
.count(I
.first
))
381 ValueMap
[I
.first
] = I
.second
;
385 Value
*IslNodeBuilder::getLatestValue(Value
*Original
) const {
386 auto It
= ValueMap
.find(Original
);
387 if (It
== ValueMap
.end())
392 void IslNodeBuilder::createMark(__isl_take isl_ast_node
*Node
) {
393 auto *Id
= isl_ast_node_mark_get_id(Node
);
394 auto Child
= isl_ast_node_mark_get_node(Node
);
395 isl_ast_node_free(Node
);
396 // If a child node of a 'SIMD mark' is a loop that has a single iteration,
397 // it will be optimized away and we should skip it.
398 if (strcmp(isl_id_get_name(Id
), "SIMD") == 0 &&
399 isl_ast_node_get_type(Child
) == isl_ast_node_for
) {
400 createForSequential(isl::manage(Child
).as
<isl::ast_node_for
>(), true);
405 BandAttr
*ChildLoopAttr
= getLoopAttr(isl::manage_copy(Id
));
406 BandAttr
*AncestorLoopAttr
;
408 // Save current LoopAttr environment to restore again when leaving this
409 // subtree. This means there was no loop between the ancestor LoopAttr and
410 // this mark, i.e. the ancestor LoopAttr did not directly mark a loop. This
411 // can happen e.g. if the AST build peeled or unrolled the loop.
412 AncestorLoopAttr
= Annotator
.getStagingAttrEnv();
414 Annotator
.getStagingAttrEnv() = ChildLoopAttr
;
420 assert(Annotator
.getStagingAttrEnv() == ChildLoopAttr
&&
421 "Nest must not overwrite loop attr environment");
422 Annotator
.getStagingAttrEnv() = AncestorLoopAttr
;
428 /// Restore the initial ordering of dimensions of the band node
430 /// In case the band node represents all the dimensions of the iteration
431 /// domain, recreate the band node to restore the initial ordering of the
434 /// @param Node The band node to be modified.
435 /// @return The modified schedule node.
436 static bool IsLoopVectorizerDisabled(isl::ast_node_for Node
) {
437 assert(isl_ast_node_get_type(Node
.get()) == isl_ast_node_for
);
438 isl::ast_node Body
= Node
.body();
439 if (isl_ast_node_get_type(Body
.get()) != isl_ast_node_mark
)
442 isl::ast_node_mark BodyMark
= Body
.as
<isl::ast_node_mark
>();
443 auto Id
= BodyMark
.id();
444 if (strcmp(Id
.get_name().c_str(), "Loop Vectorizer Disabled") == 0)
449 void IslNodeBuilder::createForSequential(isl::ast_node_for For
,
451 Value
*ValueLB
, *ValueUB
, *ValueInc
;
453 BasicBlock
*ExitBlock
;
455 CmpInst::Predicate Predicate
;
457 bool LoopVectorizerDisabled
= IsLoopVectorizerDisabled(For
);
459 isl::ast_node Body
= For
.body();
461 // isl_ast_node_for_is_degenerate(For)
463 // TODO: For degenerated loops we could generate a plain assignment.
464 // However, for now we just reuse the logic for normal loops, which will
465 // create a loop with a single iteration.
467 isl::ast_expr Init
= For
.init();
468 isl::ast_expr Inc
= For
.inc();
469 isl::ast_expr Iterator
= For
.iterator();
470 isl::id IteratorID
= Iterator
.get_id();
471 isl::ast_expr UB
= getUpperBound(For
, Predicate
);
473 ValueLB
= ExprBuilder
.create(Init
.release());
474 ValueUB
= ExprBuilder
.create(UB
.release());
475 ValueInc
= ExprBuilder
.create(Inc
.release());
477 MaxType
= ExprBuilder
.getType(Iterator
.get());
478 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueLB
->getType());
479 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueUB
->getType());
480 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueInc
->getType());
482 if (MaxType
!= ValueLB
->getType())
483 ValueLB
= Builder
.CreateSExt(ValueLB
, MaxType
);
484 if (MaxType
!= ValueUB
->getType())
485 ValueUB
= Builder
.CreateSExt(ValueUB
, MaxType
);
486 if (MaxType
!= ValueInc
->getType())
487 ValueInc
= Builder
.CreateSExt(ValueInc
, MaxType
);
489 // If we can show that LB <Predicate> UB holds at least once, we can
490 // omit the GuardBB in front of the loop.
492 !SE
.isKnownPredicate(Predicate
, SE
.getSCEV(ValueLB
), SE
.getSCEV(ValueUB
));
493 IV
= createLoop(ValueLB
, ValueUB
, ValueInc
, Builder
, LI
, DT
, ExitBlock
,
494 Predicate
, &Annotator
, MarkParallel
, UseGuardBB
,
495 LoopVectorizerDisabled
);
496 IDToValue
[IteratorID
.get()] = IV
;
498 create(Body
.release());
500 Annotator
.popLoop(MarkParallel
);
502 IDToValue
.erase(IDToValue
.find(IteratorID
.get()));
504 Builder
.SetInsertPoint(&ExitBlock
->front());
509 /// Remove the BBs contained in a (sub)function from the dominator tree.
511 /// This function removes the basic blocks that are part of a subfunction from
512 /// the dominator tree. Specifically, when generating code it may happen that at
513 /// some point the code generation continues in a new sub-function (e.g., when
514 /// generating OpenMP code). The basic blocks that are created in this
515 /// sub-function are then still part of the dominator tree of the original
516 /// function, such that the dominator tree reaches over function boundaries.
517 /// This is not only incorrect, but also causes crashes. This function now
518 /// removes from the dominator tree all basic blocks that are dominated (and
519 /// consequently reachable) from the entry block of this (sub)function.
521 /// FIXME: A LLVM (function or region) pass should not touch anything outside of
522 /// the function/region it runs on. Hence, the pure need for this function shows
523 /// that we do not comply to this rule. At the moment, this does not cause any
524 /// issues, but we should be aware that such issues may appear. Unfortunately
525 /// the current LLVM pass infrastructure does not allow to make Polly a module
526 /// or call-graph pass to solve this issue, as such a pass would not have access
527 /// to the per-function analyses passes needed by Polly. A future pass manager
528 /// infrastructure is supposed to enable such kind of access possibly allowing
529 /// us to create a cleaner solution here.
531 /// FIXME: Instead of adding the dominance information and then dropping it
532 /// later on, we should try to just not add it in the first place. This requires
533 /// some careful testing to make sure this does not break in interaction with
534 /// the SCEVBuilder and SplitBlock which may rely on the dominator tree or
535 /// which may try to update it.
537 /// @param F The function which contains the BBs to removed.
538 /// @param DT The dominator tree from which to remove the BBs.
539 static void removeSubFuncFromDomTree(Function
*F
, DominatorTree
&DT
) {
540 DomTreeNode
*N
= DT
.getNode(&F
->getEntryBlock());
541 std::vector
<BasicBlock
*> Nodes
;
543 // We can only remove an element from the dominator tree, if all its children
544 // have been removed. To ensure this we obtain the list of nodes to remove
545 // using a post-order tree traversal.
546 for (po_iterator
<DomTreeNode
*> I
= po_begin(N
), E
= po_end(N
); I
!= E
; ++I
)
547 Nodes
.push_back(I
->getBlock());
549 for (BasicBlock
*BB
: Nodes
)
553 void IslNodeBuilder::createForParallel(__isl_take isl_ast_node
*For
) {
555 isl_ast_expr
*Init
, *Inc
, *Iterator
, *UB
;
557 Value
*ValueLB
, *ValueUB
, *ValueInc
;
560 CmpInst::Predicate Predicate
;
562 // The preamble of parallel code interacts different than normal code with
563 // e.g., scalar initialization. Therefore, we ensure the parallel code is
564 // separated from the last basic block.
565 BasicBlock
*ParBB
= SplitBlock(Builder
.GetInsertBlock(),
566 &*Builder
.GetInsertPoint(), &DT
, &LI
);
567 ParBB
->setName("polly.parallel.for");
568 Builder
.SetInsertPoint(&ParBB
->front());
570 Body
= isl_ast_node_for_get_body(For
);
571 Init
= isl_ast_node_for_get_init(For
);
572 Inc
= isl_ast_node_for_get_inc(For
);
573 Iterator
= isl_ast_node_for_get_iterator(For
);
574 IteratorID
= isl_ast_expr_get_id(Iterator
);
575 UB
= getUpperBound(isl::manage_copy(For
).as
<isl::ast_node_for
>(), Predicate
)
578 ValueLB
= ExprBuilder
.create(Init
);
579 ValueUB
= ExprBuilder
.create(UB
);
580 ValueInc
= ExprBuilder
.create(Inc
);
582 // OpenMP always uses SLE. In case the isl generated AST uses a SLT
583 // expression, we need to adjust the loop bound by one.
584 if (Predicate
== CmpInst::ICMP_SLT
)
585 ValueUB
= Builder
.CreateAdd(
586 ValueUB
, Builder
.CreateSExt(Builder
.getTrue(), ValueUB
->getType()));
588 MaxType
= ExprBuilder
.getType(Iterator
);
589 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueLB
->getType());
590 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueUB
->getType());
591 MaxType
= ExprBuilder
.getWidestType(MaxType
, ValueInc
->getType());
593 if (MaxType
!= ValueLB
->getType())
594 ValueLB
= Builder
.CreateSExt(ValueLB
, MaxType
);
595 if (MaxType
!= ValueUB
->getType())
596 ValueUB
= Builder
.CreateSExt(ValueUB
, MaxType
);
597 if (MaxType
!= ValueInc
->getType())
598 ValueInc
= Builder
.CreateSExt(ValueInc
, MaxType
);
600 BasicBlock::iterator LoopBody
;
602 SetVector
<Value
*> SubtreeValues
;
603 SetVector
<const Loop
*> Loops
;
605 getReferencesInSubtree(isl::manage_copy(For
), SubtreeValues
, Loops
);
607 // Create for all loops we depend on values that contain the current loop
608 // iteration. These values are necessary to generate code for SCEVs that
609 // depend on such loops. As a result we need to pass them to the subfunction.
610 // See [Code generation of induction variables of loops outside Scops]
611 for (const Loop
*L
: Loops
) {
612 Value
*LoopInductionVar
= materializeNonScopLoopInductionVariable(L
);
613 SubtreeValues
.insert(LoopInductionVar
);
618 std::unique_ptr
<ParallelLoopGenerator
> ParallelLoopGenPtr
;
620 switch (PollyOmpBackend
) {
621 case OpenMPBackend::GNU
:
622 ParallelLoopGenPtr
.reset(
623 new ParallelLoopGeneratorGOMP(Builder
, LI
, DT
, DL
));
625 case OpenMPBackend::LLVM
:
626 ParallelLoopGenPtr
.reset(new ParallelLoopGeneratorKMP(Builder
, LI
, DT
, DL
));
630 IV
= ParallelLoopGenPtr
->createParallelLoop(
631 ValueLB
, ValueUB
, ValueInc
, SubtreeValues
, NewValues
, &LoopBody
);
632 BasicBlock::iterator AfterLoop
= Builder
.GetInsertPoint();
633 Builder
.SetInsertPoint(&*LoopBody
);
635 // Remember the parallel subfunction
636 ParallelSubfunctions
.push_back(LoopBody
->getFunction());
638 // Save the current values.
639 auto ValueMapCopy
= ValueMap
;
640 IslExprBuilder::IDToValueTy IDToValueCopy
= IDToValue
;
642 updateValues(NewValues
);
643 IDToValue
[IteratorID
] = IV
;
645 ValueMapT NewValuesReverse
;
647 for (auto P
: NewValues
)
648 NewValuesReverse
[P
.second
] = P
.first
;
650 Annotator
.addAlternativeAliasBases(NewValuesReverse
);
654 Annotator
.resetAlternativeAliasBases();
655 // Restore the original values.
656 ValueMap
= ValueMapCopy
;
657 IDToValue
= IDToValueCopy
;
659 Builder
.SetInsertPoint(&*AfterLoop
);
660 removeSubFuncFromDomTree((*LoopBody
).getParent()->getParent(), DT
);
662 for (const Loop
*L
: Loops
)
663 OutsideLoopIterations
.erase(L
);
665 isl_ast_node_free(For
);
666 isl_ast_expr_free(Iterator
);
667 isl_id_free(IteratorID
);
672 void IslNodeBuilder::createFor(__isl_take isl_ast_node
*For
) {
673 if (IslAstInfo::isExecutedInParallel(isl::manage_copy(For
))) {
674 createForParallel(For
);
677 bool Parallel
= (IslAstInfo::isParallel(isl::manage_copy(For
)) &&
678 !IslAstInfo::isReductionParallel(isl::manage_copy(For
)));
679 createForSequential(isl::manage(For
).as
<isl::ast_node_for
>(), Parallel
);
682 void IslNodeBuilder::createIf(__isl_take isl_ast_node
*If
) {
683 isl_ast_expr
*Cond
= isl_ast_node_if_get_cond(If
);
685 Function
*F
= Builder
.GetInsertBlock()->getParent();
686 LLVMContext
&Context
= F
->getContext();
688 BasicBlock
*CondBB
= SplitBlock(Builder
.GetInsertBlock(),
689 &*Builder
.GetInsertPoint(), &DT
, &LI
);
690 CondBB
->setName("polly.cond");
691 BasicBlock
*MergeBB
= SplitBlock(CondBB
, &CondBB
->front(), &DT
, &LI
);
692 MergeBB
->setName("polly.merge");
693 BasicBlock
*ThenBB
= BasicBlock::Create(Context
, "polly.then", F
);
694 BasicBlock
*ElseBB
= BasicBlock::Create(Context
, "polly.else", F
);
696 DT
.addNewBlock(ThenBB
, CondBB
);
697 DT
.addNewBlock(ElseBB
, CondBB
);
698 DT
.changeImmediateDominator(MergeBB
, CondBB
);
700 Loop
*L
= LI
.getLoopFor(CondBB
);
702 L
->addBasicBlockToLoop(ThenBB
, LI
);
703 L
->addBasicBlockToLoop(ElseBB
, LI
);
706 CondBB
->getTerminator()->eraseFromParent();
708 Builder
.SetInsertPoint(CondBB
);
709 Value
*Predicate
= ExprBuilder
.create(Cond
);
710 Builder
.CreateCondBr(Predicate
, ThenBB
, ElseBB
);
711 Builder
.SetInsertPoint(ThenBB
);
712 Builder
.CreateBr(MergeBB
);
713 Builder
.SetInsertPoint(ElseBB
);
714 Builder
.CreateBr(MergeBB
);
715 Builder
.SetInsertPoint(&ThenBB
->front());
717 create(isl_ast_node_if_get_then(If
));
719 Builder
.SetInsertPoint(&ElseBB
->front());
721 if (isl_ast_node_if_has_else(If
))
722 create(isl_ast_node_if_get_else(If
));
724 Builder
.SetInsertPoint(&MergeBB
->front());
726 isl_ast_node_free(If
);
731 __isl_give isl_id_to_ast_expr
*
732 IslNodeBuilder::createNewAccesses(ScopStmt
*Stmt
,
733 __isl_keep isl_ast_node
*Node
) {
734 isl::id_to_ast_expr NewAccesses
=
735 isl::id_to_ast_expr::alloc(Stmt
->getParent()->getIslCtx(), 0);
737 isl::ast_build Build
= IslAstInfo::getBuild(isl::manage_copy(Node
));
738 assert(!Build
.is_null() && "Could not obtain isl_ast_build from user node");
739 Stmt
->setAstBuild(Build
);
741 for (auto *MA
: *Stmt
) {
742 if (!MA
->hasNewAccessRelation()) {
743 if (PollyGenerateExpressions
) {
746 if (MA
->getLatestScopArrayInfo()->getBasePtrOriginSAI())
750 dyn_cast
<Instruction
>(MA
->getLatestScopArrayInfo()->getBasePtr());
751 if (BasePtr
&& Stmt
->getParent()->getRegion().contains(BasePtr
))
757 assert(MA
->isAffine() &&
758 "Only affine memory accesses can be code generated");
760 isl::union_map Schedule
= Build
.get_schedule();
764 auto Dom
= Stmt
->getDomain().release();
765 auto SchedDom
= isl_set_from_union_set(Schedule
.domain().release());
766 auto AccDom
= isl_map_domain(MA
->getAccessRelation().release());
767 Dom
= isl_set_intersect_params(Dom
,
768 Stmt
->getParent()->getContext().release());
769 SchedDom
= isl_set_intersect_params(
770 SchedDom
, Stmt
->getParent()->getContext().release());
771 assert(isl_set_is_subset(SchedDom
, AccDom
) &&
772 "Access relation not defined on full schedule domain");
773 assert(isl_set_is_subset(Dom
, AccDom
) &&
774 "Access relation not defined on full domain");
775 isl_set_free(AccDom
);
776 isl_set_free(SchedDom
);
781 isl::pw_multi_aff PWAccRel
= MA
->applyScheduleToAccessRelation(Schedule
);
783 // isl cannot generate an index expression for access-nothing accesses.
784 isl::set AccDomain
= PWAccRel
.domain();
785 isl::set Context
= S
.getContext();
786 AccDomain
= AccDomain
.intersect_params(Context
);
787 if (AccDomain
.is_empty())
790 isl::ast_expr AccessExpr
= Build
.access_from(PWAccRel
);
791 NewAccesses
= NewAccesses
.set(MA
->getId(), AccessExpr
);
794 return NewAccesses
.release();
797 void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr
*Expr
,
798 ScopStmt
*Stmt
, LoopToScevMapT
<S
) {
799 assert(isl_ast_expr_get_type(Expr
) == isl_ast_expr_op
&&
800 "Expression of type 'op' expected");
801 assert(isl_ast_expr_get_op_type(Expr
) == isl_ast_op_call
&&
802 "Operation of type 'call' expected");
803 for (int i
= 0; i
< isl_ast_expr_get_op_n_arg(Expr
) - 1; ++i
) {
804 isl_ast_expr
*SubExpr
;
807 SubExpr
= isl_ast_expr_get_op_arg(Expr
, i
+ 1);
808 V
= ExprBuilder
.create(SubExpr
);
809 ScalarEvolution
*SE
= Stmt
->getParent()->getSE();
810 LTS
[Stmt
->getLoopForDimension(i
)] = SE
->getUnknown(V
);
813 isl_ast_expr_free(Expr
);
816 void IslNodeBuilder::createSubstitutionsVector(
817 __isl_take isl_ast_expr
*Expr
, ScopStmt
*Stmt
,
818 std::vector
<LoopToScevMapT
> &VLTS
, std::vector
<Value
*> &IVS
,
819 __isl_take isl_id
*IteratorID
) {
822 Value
*OldValue
= IDToValue
[IteratorID
];
823 for (Value
*IV
: IVS
) {
824 IDToValue
[IteratorID
] = IV
;
825 createSubstitutions(isl_ast_expr_copy(Expr
), Stmt
, VLTS
[i
]);
829 IDToValue
[IteratorID
] = OldValue
;
830 isl_id_free(IteratorID
);
831 isl_ast_expr_free(Expr
);
834 void IslNodeBuilder::generateCopyStmt(
835 ScopStmt
*Stmt
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
836 assert(Stmt
->size() == 2);
837 auto ReadAccess
= Stmt
->begin();
838 auto WriteAccess
= ReadAccess
++;
839 assert((*ReadAccess
)->isRead() && (*WriteAccess
)->isMustWrite());
840 assert((*ReadAccess
)->getElementType() == (*WriteAccess
)->getElementType() &&
841 "Accesses use the same data type");
842 assert((*ReadAccess
)->isArrayKind() && (*WriteAccess
)->isArrayKind());
844 isl_id_to_ast_expr_get(NewAccesses
, (*ReadAccess
)->getId().release());
845 auto *LoadValue
= ExprBuilder
.create(AccessExpr
);
847 isl_id_to_ast_expr_get(NewAccesses
, (*WriteAccess
)->getId().release());
848 auto *StoreAddr
= ExprBuilder
.createAccessAddress(AccessExpr
).first
;
849 Builder
.CreateStore(LoadValue
, StoreAddr
);
852 Value
*IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop
*L
) {
853 assert(!OutsideLoopIterations
.contains(L
) &&
854 "trying to materialize loop induction variable twice");
855 const SCEV
*OuterLIV
= SE
.getAddRecExpr(SE
.getUnknown(Builder
.getInt64(0)),
856 SE
.getUnknown(Builder
.getInt64(1)), L
,
858 Value
*V
= generateSCEV(OuterLIV
);
859 OutsideLoopIterations
[L
] = SE
.getUnknown(V
);
863 void IslNodeBuilder::createUser(__isl_take isl_ast_node
*User
) {
868 isl_ast_expr
*Expr
= isl_ast_node_user_get_expr(User
);
869 isl_ast_expr
*StmtExpr
= isl_ast_expr_get_op_arg(Expr
, 0);
870 Id
= isl_ast_expr_get_id(StmtExpr
);
871 isl_ast_expr_free(StmtExpr
);
873 LTS
.insert(OutsideLoopIterations
.begin(), OutsideLoopIterations
.end());
875 Stmt
= (ScopStmt
*)isl_id_get_user(Id
);
876 auto *NewAccesses
= createNewAccesses(Stmt
, User
);
877 if (Stmt
->isCopyStmt()) {
878 generateCopyStmt(Stmt
, NewAccesses
);
879 isl_ast_expr_free(Expr
);
881 createSubstitutions(Expr
, Stmt
, LTS
);
883 if (Stmt
->isBlockStmt())
884 BlockGen
.copyStmt(*Stmt
, LTS
, NewAccesses
);
886 RegionGen
.copyStmt(*Stmt
, LTS
, NewAccesses
);
889 isl_id_to_ast_expr_free(NewAccesses
);
890 isl_ast_node_free(User
);
894 void IslNodeBuilder::createBlock(__isl_take isl_ast_node
*Block
) {
895 isl_ast_node_list
*List
= isl_ast_node_block_get_children(Block
);
897 for (int i
= 0; i
< isl_ast_node_list_n_ast_node(List
); ++i
)
898 create(isl_ast_node_list_get_ast_node(List
, i
));
900 isl_ast_node_free(Block
);
901 isl_ast_node_list_free(List
);
904 void IslNodeBuilder::create(__isl_take isl_ast_node
*Node
) {
905 switch (isl_ast_node_get_type(Node
)) {
906 case isl_ast_node_error
:
907 llvm_unreachable("code generation error");
908 case isl_ast_node_mark
:
911 case isl_ast_node_for
:
914 case isl_ast_node_if
:
917 case isl_ast_node_user
:
920 case isl_ast_node_block
:
925 llvm_unreachable("Unknown isl_ast_node type");
928 bool IslNodeBuilder::materializeValue(__isl_take isl_id
*Id
) {
929 // If the Id is already mapped, skip it.
930 if (!IDToValue
.count(Id
)) {
931 auto *ParamSCEV
= (const SCEV
*)isl_id_get_user(Id
);
934 // Parameters could refer to invariant loads that need to be
935 // preloaded before we can generate code for the parameter. Thus,
936 // check if any value referred to in ParamSCEV is an invariant load
937 // and if so make sure its equivalence class is preloaded.
938 SetVector
<Value
*> Values
;
939 findValues(ParamSCEV
, SE
, Values
);
940 for (auto *Val
: Values
) {
941 // Check if the value is an instruction in a dead block within the SCoP
942 // and if so do not code generate it.
943 if (auto *Inst
= dyn_cast
<Instruction
>(Val
)) {
944 if (S
.contains(Inst
)) {
947 // Check for "undef" loads first, then if there is a statement for
948 // the parent of Inst and lastly if the parent of Inst has an empty
949 // domain. In the first and last case the instruction is dead but if
950 // there is a statement or the domain is not empty Inst is not dead.
951 auto MemInst
= MemAccInst::dyn_cast(Inst
);
952 auto Address
= MemInst
? MemInst
.getPointerOperand() : nullptr;
953 if (Address
&& SE
.getUnknown(UndefValue::get(Address
->getType())) ==
954 SE
.getPointerBase(SE
.getSCEV(Address
))) {
955 } else if (S
.getStmtFor(Inst
)) {
958 auto *Domain
= S
.getDomainConditions(Inst
->getParent()).release();
959 IsDead
= isl_set_is_empty(Domain
);
960 isl_set_free(Domain
);
964 V
= UndefValue::get(ParamSCEV
->getType());
970 if (auto *IAClass
= S
.lookupInvariantEquivClass(Val
)) {
971 // Check if this invariant access class is empty, hence if we never
972 // actually added a loads instruction to it. In that case it has no
973 // (meaningful) users and we should not try to code generate it.
974 if (IAClass
->InvariantAccesses
.empty())
975 V
= UndefValue::get(ParamSCEV
->getType());
977 if (!preloadInvariantEquivClass(*IAClass
)) {
984 V
= V
? V
: generateSCEV(ParamSCEV
);
992 bool IslNodeBuilder::materializeParameters(__isl_take isl_set
*Set
) {
993 for (unsigned i
= 0, e
= isl_set_dim(Set
, isl_dim_param
); i
< e
; ++i
) {
994 if (!isl_set_involves_dims(Set
, isl_dim_param
, i
, 1))
996 isl_id
*Id
= isl_set_get_dim_id(Set
, isl_dim_param
, i
);
997 if (!materializeValue(Id
))
1003 bool IslNodeBuilder::materializeParameters() {
1004 for (const SCEV
*Param
: S
.parameters()) {
1005 isl_id
*Id
= S
.getIdForParam(Param
).release();
1006 if (!materializeValue(Id
))
1012 Value
*IslNodeBuilder::preloadUnconditionally(__isl_take isl_set
*AccessRange
,
1013 isl_ast_build
*Build
,
1014 Instruction
*AccInst
) {
1015 isl_pw_multi_aff
*PWAccRel
= isl_pw_multi_aff_from_set(AccessRange
);
1016 isl_ast_expr
*Access
=
1017 isl_ast_build_access_from_pw_multi_aff(Build
, PWAccRel
);
1018 auto *Address
= isl_ast_expr_address_of(Access
);
1019 auto *AddressValue
= ExprBuilder
.create(Address
);
1022 // Correct the type as the SAI might have a different type than the user
1023 // expects, especially if the base pointer is a struct.
1024 Type
*Ty
= AccInst
->getType();
1026 auto *Ptr
= AddressValue
;
1027 auto Name
= Ptr
->getName();
1028 auto AS
= Ptr
->getType()->getPointerAddressSpace();
1029 Ptr
= Builder
.CreatePointerCast(Ptr
, Ty
->getPointerTo(AS
), Name
+ ".cast");
1030 PreloadVal
= Builder
.CreateLoad(Ty
, Ptr
, Name
+ ".load");
1031 if (LoadInst
*PreloadInst
= dyn_cast
<LoadInst
>(PreloadVal
))
1032 PreloadInst
->setAlignment(cast
<LoadInst
>(AccInst
)->getAlign());
1034 // TODO: This is only a hot fix for SCoP sequences that use the same load
1035 // instruction contained and hoisted by one of the SCoPs.
1036 if (SE
.isSCEVable(Ty
))
1037 SE
.forgetValue(AccInst
);
1042 Value
*IslNodeBuilder::preloadInvariantLoad(const MemoryAccess
&MA
,
1043 __isl_take isl_set
*Domain
) {
1044 isl_set
*AccessRange
= isl_map_range(MA
.getAddressFunction().release());
1045 AccessRange
= isl_set_gist_params(AccessRange
, S
.getContext().release());
1047 if (!materializeParameters(AccessRange
)) {
1048 isl_set_free(AccessRange
);
1049 isl_set_free(Domain
);
1054 isl_ast_build_from_context(isl_set_universe(S
.getParamSpace().release()));
1055 isl_set
*Universe
= isl_set_universe(isl_set_get_space(Domain
));
1056 bool AlwaysExecuted
= isl_set_is_equal(Domain
, Universe
);
1057 isl_set_free(Universe
);
1059 Instruction
*AccInst
= MA
.getAccessInstruction();
1060 Type
*AccInstTy
= AccInst
->getType();
1062 Value
*PreloadVal
= nullptr;
1063 if (AlwaysExecuted
) {
1064 PreloadVal
= preloadUnconditionally(AccessRange
, Build
, AccInst
);
1065 isl_ast_build_free(Build
);
1066 isl_set_free(Domain
);
1070 if (!materializeParameters(Domain
)) {
1071 isl_ast_build_free(Build
);
1072 isl_set_free(AccessRange
);
1073 isl_set_free(Domain
);
1077 isl_ast_expr
*DomainCond
= isl_ast_build_expr_from_set(Build
, Domain
);
1080 ExprBuilder
.setTrackOverflow(true);
1081 Value
*Cond
= ExprBuilder
.create(DomainCond
);
1082 Value
*OverflowHappened
= Builder
.CreateNot(ExprBuilder
.getOverflowState(),
1083 "polly.preload.cond.overflown");
1084 Cond
= Builder
.CreateAnd(Cond
, OverflowHappened
, "polly.preload.cond.result");
1085 ExprBuilder
.setTrackOverflow(false);
1087 if (!Cond
->getType()->isIntegerTy(1))
1088 Cond
= Builder
.CreateIsNotNull(Cond
);
1090 BasicBlock
*CondBB
= SplitBlock(Builder
.GetInsertBlock(),
1091 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1092 CondBB
->setName("polly.preload.cond");
1094 BasicBlock
*MergeBB
= SplitBlock(CondBB
, &CondBB
->front(), &DT
, &LI
);
1095 MergeBB
->setName("polly.preload.merge");
1097 Function
*F
= Builder
.GetInsertBlock()->getParent();
1098 LLVMContext
&Context
= F
->getContext();
1099 BasicBlock
*ExecBB
= BasicBlock::Create(Context
, "polly.preload.exec", F
);
1101 DT
.addNewBlock(ExecBB
, CondBB
);
1102 if (Loop
*L
= LI
.getLoopFor(CondBB
))
1103 L
->addBasicBlockToLoop(ExecBB
, LI
);
1105 auto *CondBBTerminator
= CondBB
->getTerminator();
1106 Builder
.SetInsertPoint(CondBBTerminator
);
1107 Builder
.CreateCondBr(Cond
, ExecBB
, MergeBB
);
1108 CondBBTerminator
->eraseFromParent();
1110 Builder
.SetInsertPoint(ExecBB
);
1111 Builder
.CreateBr(MergeBB
);
1113 Builder
.SetInsertPoint(ExecBB
->getTerminator());
1114 Value
*PreAccInst
= preloadUnconditionally(AccessRange
, Build
, AccInst
);
1115 Builder
.SetInsertPoint(MergeBB
->getTerminator());
1116 auto *MergePHI
= Builder
.CreatePHI(
1117 AccInstTy
, 2, "polly.preload." + AccInst
->getName() + ".merge");
1118 PreloadVal
= MergePHI
;
1121 PreloadVal
= nullptr;
1122 PreAccInst
= UndefValue::get(AccInstTy
);
1125 MergePHI
->addIncoming(PreAccInst
, ExecBB
);
1126 MergePHI
->addIncoming(Constant::getNullValue(AccInstTy
), CondBB
);
1128 isl_ast_build_free(Build
);
1132 bool IslNodeBuilder::preloadInvariantEquivClass(
1133 InvariantEquivClassTy
&IAClass
) {
1134 // For an equivalence class of invariant loads we pre-load the representing
1135 // element with the unified execution context. However, we have to map all
1136 // elements of the class to the one preloaded load as they are referenced
1137 // during the code generation and therefor need to be mapped.
1138 const MemoryAccessList
&MAs
= IAClass
.InvariantAccesses
;
1142 MemoryAccess
*MA
= MAs
.front();
1143 assert(MA
->isArrayKind() && MA
->isRead());
1145 // If the access function was already mapped, the preload of this equivalence
1146 // class was triggered earlier already and doesn't need to be done again.
1147 if (ValueMap
.count(MA
->getAccessInstruction()))
1150 // Check for recursion which can be caused by additional constraints, e.g.,
1151 // non-finite loop constraints. In such a case we have to bail out and insert
1152 // a "false" runtime check that will cause the original code to be executed.
1153 auto PtrId
= std::make_pair(IAClass
.IdentifyingPointer
, IAClass
.AccessType
);
1154 if (!PreloadedPtrs
.insert(PtrId
).second
)
1157 // The execution context of the IAClass.
1158 isl::set
&ExecutionCtx
= IAClass
.ExecutionContext
;
1160 // If the base pointer of this class is dependent on another one we have to
1161 // make sure it was preloaded already.
1162 auto *SAI
= MA
->getScopArrayInfo();
1163 if (auto *BaseIAClass
= S
.lookupInvariantEquivClass(SAI
->getBasePtr())) {
1164 if (!preloadInvariantEquivClass(*BaseIAClass
))
1167 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and
1168 // we need to refine the ExecutionCtx.
1169 isl::set BaseExecutionCtx
= BaseIAClass
->ExecutionContext
;
1170 ExecutionCtx
= ExecutionCtx
.intersect(BaseExecutionCtx
);
1173 // If the size of a dimension is dependent on another class, make sure it is
1175 for (unsigned i
= 1, e
= SAI
->getNumberOfDimensions(); i
< e
; ++i
) {
1176 const SCEV
*Dim
= SAI
->getDimensionSize(i
);
1177 SetVector
<Value
*> Values
;
1178 findValues(Dim
, SE
, Values
);
1179 for (auto *Val
: Values
) {
1180 if (auto *BaseIAClass
= S
.lookupInvariantEquivClass(Val
)) {
1181 if (!preloadInvariantEquivClass(*BaseIAClass
))
1184 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx
1185 // and we need to refine the ExecutionCtx.
1186 isl::set BaseExecutionCtx
= BaseIAClass
->ExecutionContext
;
1187 ExecutionCtx
= ExecutionCtx
.intersect(BaseExecutionCtx
);
1192 Instruction
*AccInst
= MA
->getAccessInstruction();
1193 Type
*AccInstTy
= AccInst
->getType();
1195 Value
*PreloadVal
= preloadInvariantLoad(*MA
, ExecutionCtx
.copy());
1199 for (const MemoryAccess
*MA
: MAs
) {
1200 Instruction
*MAAccInst
= MA
->getAccessInstruction();
1201 assert(PreloadVal
->getType() == MAAccInst
->getType());
1202 ValueMap
[MAAccInst
] = PreloadVal
;
1205 if (SE
.isSCEVable(AccInstTy
)) {
1206 isl_id
*ParamId
= S
.getIdForParam(SE
.getSCEV(AccInst
)).release();
1208 IDToValue
[ParamId
] = PreloadVal
;
1209 isl_id_free(ParamId
);
1212 BasicBlock
*EntryBB
= &Builder
.GetInsertBlock()->getParent()->getEntryBlock();
1213 auto *Alloca
= new AllocaInst(AccInstTy
, DL
.getAllocaAddrSpace(),
1214 AccInst
->getName() + ".preload.s2a",
1215 &*EntryBB
->getFirstInsertionPt());
1216 Builder
.CreateStore(PreloadVal
, Alloca
);
1217 ValueMapT PreloadedPointer
;
1218 PreloadedPointer
[PreloadVal
] = AccInst
;
1219 Annotator
.addAlternativeAliasBases(PreloadedPointer
);
1221 for (auto *DerivedSAI
: SAI
->getDerivedSAIs()) {
1222 Value
*BasePtr
= DerivedSAI
->getBasePtr();
1224 for (const MemoryAccess
*MA
: MAs
) {
1225 // As the derived SAI information is quite coarse, any load from the
1226 // current SAI could be the base pointer of the derived SAI, however we
1227 // should only change the base pointer of the derived SAI if we actually
1229 if (BasePtr
== MA
->getOriginalBaseAddr()) {
1230 assert(BasePtr
->getType() == PreloadVal
->getType());
1231 DerivedSAI
->setBasePtr(PreloadVal
);
1234 // For scalar derived SAIs we remap the alloca used for the derived value.
1235 if (BasePtr
== MA
->getAccessInstruction())
1236 ScalarMap
[DerivedSAI
] = Alloca
;
1240 for (const MemoryAccess
*MA
: MAs
) {
1241 Instruction
*MAAccInst
= MA
->getAccessInstruction();
1242 // Use the escape system to get the correct value to users outside the SCoP.
1243 BlockGenerator::EscapeUserVectorTy EscapeUsers
;
1244 for (auto *U
: MAAccInst
->users())
1245 if (Instruction
*UI
= dyn_cast
<Instruction
>(U
))
1246 if (!S
.contains(UI
))
1247 EscapeUsers
.push_back(UI
);
1249 if (EscapeUsers
.empty())
1252 EscapeMap
[MA
->getAccessInstruction()] =
1253 std::make_pair(Alloca
, std::move(EscapeUsers
));
1259 void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks
) {
1260 for (auto &SAI
: S
.arrays()) {
1261 if (SAI
->getBasePtr())
1264 assert(SAI
->getNumberOfDimensions() > 0 && SAI
->getDimensionSize(0) &&
1265 "The size of the outermost dimension is used to declare newly "
1266 "created arrays that require memory allocation.");
1268 Type
*NewArrayType
= nullptr;
1270 // Get the size of the array = size(dim_1)*...*size(dim_n)
1271 uint64_t ArraySizeInt
= 1;
1272 for (int i
= SAI
->getNumberOfDimensions() - 1; i
>= 0; i
--) {
1273 auto *DimSize
= SAI
->getDimensionSize(i
);
1274 unsigned UnsignedDimSize
= static_cast<const SCEVConstant
*>(DimSize
)
1279 NewArrayType
= SAI
->getElementType();
1281 NewArrayType
= ArrayType::get(NewArrayType
, UnsignedDimSize
);
1282 ArraySizeInt
*= UnsignedDimSize
;
1285 if (SAI
->isOnHeap()) {
1286 LLVMContext
&Ctx
= NewArrayType
->getContext();
1288 // Get the IntPtrTy from the Datalayout
1289 auto IntPtrTy
= DL
.getIntPtrType(Ctx
);
1291 // Get the size of the element type in bits
1292 unsigned Size
= SAI
->getElemSizeInBytes();
1294 // Insert the malloc call at polly.start
1295 Builder
.SetInsertPoint(std::get
<0>(StartExitBlocks
)->getTerminator());
1296 auto *CreatedArray
= Builder
.CreateMalloc(
1297 IntPtrTy
, SAI
->getElementType(),
1298 ConstantInt::get(Type::getInt64Ty(Ctx
), Size
),
1299 ConstantInt::get(Type::getInt64Ty(Ctx
), ArraySizeInt
), nullptr,
1302 SAI
->setBasePtr(CreatedArray
);
1304 // Insert the free call at polly.exiting
1305 Builder
.SetInsertPoint(std::get
<1>(StartExitBlocks
)->getTerminator());
1306 Builder
.CreateFree(CreatedArray
);
1308 auto InstIt
= Builder
.GetInsertBlock()
1313 auto *CreatedArray
= new AllocaInst(NewArrayType
, DL
.getAllocaAddrSpace(),
1314 SAI
->getName(), &*InstIt
);
1315 if (PollyTargetFirstLevelCacheLineSize
)
1316 CreatedArray
->setAlignment(Align(PollyTargetFirstLevelCacheLineSize
));
1317 SAI
->setBasePtr(CreatedArray
);
1322 bool IslNodeBuilder::preloadInvariantLoads() {
1323 auto &InvariantEquivClasses
= S
.getInvariantAccesses();
1324 if (InvariantEquivClasses
.empty())
1327 BasicBlock
*PreLoadBB
= SplitBlock(Builder
.GetInsertBlock(),
1328 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1329 PreLoadBB
->setName("polly.preload.begin");
1330 Builder
.SetInsertPoint(&PreLoadBB
->front());
1332 for (auto &IAClass
: InvariantEquivClasses
)
1333 if (!preloadInvariantEquivClass(IAClass
))
1339 void IslNodeBuilder::addParameters(__isl_take isl_set
*Context
) {
1340 // Materialize values for the parameters of the SCoP.
1341 materializeParameters();
1343 // Generate values for the current loop iteration for all surrounding loops.
1345 // We may also reference loops outside of the scop which do not contain the
1346 // scop itself, but as the number of such scops may be arbitrarily large we do
1347 // not generate code for them here, but only at the point of code generation
1348 // where these values are needed.
1349 Loop
*L
= LI
.getLoopFor(S
.getEntry());
1351 while (L
!= nullptr && S
.contains(L
))
1352 L
= L
->getParentLoop();
1354 while (L
!= nullptr) {
1355 materializeNonScopLoopInductionVariable(L
);
1356 L
= L
->getParentLoop();
1359 isl_set_free(Context
);
1362 Value
*IslNodeBuilder::generateSCEV(const SCEV
*Expr
) {
1363 /// We pass the insert location of our Builder, as Polly ensures during IR
1364 /// generation that there is always a valid CFG into which instructions are
1365 /// inserted. As a result, the insertpoint is known to be always followed by a
1366 /// terminator instruction. This means the insert point may be specified by a
1367 /// terminator instruction, but it can never point to an ->end() iterator
1368 /// which does not have a corresponding instruction. Hence, dereferencing
1369 /// the insertpoint to obtain an instruction is known to be save.
1371 /// We also do not need to update the Builder here, as new instructions are
1372 /// always inserted _before_ the given InsertLocation. As a result, the
1373 /// insert location remains valid.
1374 assert(Builder
.GetInsertBlock()->end() != Builder
.GetInsertPoint() &&
1375 "Insert location points after last valid instruction");
1376 Instruction
*InsertLocation
= &*Builder
.GetInsertPoint();
1377 return expandCodeFor(S
, SE
, DL
, "polly", Expr
, Expr
->getType(),
1378 InsertLocation
, &ValueMap
,
1379 StartBlock
->getSinglePredecessor());
1382 /// The AST expression we generate to perform the run-time check assumes
1383 /// computations on integer types of infinite size. As we only use 64-bit
1384 /// arithmetic we check for overflows, in case of which we set the result
1385 /// of this run-time check to false to be conservatively correct,
1386 Value
*IslNodeBuilder::createRTC(isl_ast_expr
*Condition
) {
1387 auto ExprBuilder
= getExprBuilder();
1389 // In case the AST expression has integers larger than 64 bit, bail out. The
1390 // resulting LLVM-IR will contain operations on types that use more than 64
1391 // bits. These are -- in case wrapping intrinsics are used -- translated to
1392 // runtime library calls that are not available on all systems (e.g., Android)
1393 // and consequently will result in linker errors.
1394 if (ExprBuilder
.hasLargeInts(isl::manage_copy(Condition
))) {
1395 isl_ast_expr_free(Condition
);
1396 return Builder
.getFalse();
1399 ExprBuilder
.setTrackOverflow(true);
1400 Value
*RTC
= ExprBuilder
.create(Condition
);
1401 if (!RTC
->getType()->isIntegerTy(1))
1402 RTC
= Builder
.CreateIsNotNull(RTC
);
1403 Value
*OverflowHappened
=
1404 Builder
.CreateNot(ExprBuilder
.getOverflowState(), "polly.rtc.overflown");
1406 if (PollyGenerateRTCPrint
) {
1407 auto *F
= Builder
.GetInsertBlock()->getParent();
1408 RuntimeDebugBuilder::createCPUPrinter(
1410 "F: " + F
->getName().str() + " R: " + S
.getRegion().getNameStr() +
1412 RTC
, " Overflow: ", OverflowHappened
,
1414 " (0 failed, -1 succeeded)\n"
1415 " (if one or both are 0 falling back to original code, if both are -1 "
1416 "executing Polly code)\n");
1419 RTC
= Builder
.CreateAnd(RTC
, OverflowHappened
, "polly.rtc.result");
1420 ExprBuilder
.setTrackOverflow(false);
1422 if (!isa
<ConstantInt
>(RTC
))