1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the BlockGenerator and VectorBlockGenerator classes,
10 // which generate sequential code and vectorized code for a polyhedral
11 // statement, respectively.
13 //===----------------------------------------------------------------------===//
15 #include "polly/CodeGen/BlockGenerators.h"
16 #include "polly/CodeGen/IslExprBuilder.h"
17 #include "polly/CodeGen/RuntimeDebugBuilder.h"
18 #include "polly/Options.h"
19 #include "polly/ScopInfo.h"
20 #include "polly/Support/ISLTools.h"
21 #include "polly/Support/ScopHelper.h"
22 #include "polly/Support/VirtualInstruction.h"
23 #include "llvm/Analysis/DomTreeUpdater.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/RegionInfo.h"
26 #include "llvm/Analysis/ScalarEvolution.h"
27 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
28 #include "llvm/Transforms/Utils/Local.h"
33 using namespace polly
;
35 static cl::opt
<bool> Aligned("enable-polly-aligned",
36 cl::desc("Assumed aligned memory accesses."),
37 cl::Hidden
, cl::cat(PollyCategory
));
39 bool PollyDebugPrinting
;
40 static cl::opt
<bool, true> DebugPrintingX(
41 "polly-codegen-add-debug-printing",
42 cl::desc("Add printf calls that show the values loaded/stored."),
43 cl::location(PollyDebugPrinting
), cl::Hidden
, cl::cat(PollyCategory
));
45 static cl::opt
<bool> TraceStmts(
46 "polly-codegen-trace-stmts",
47 cl::desc("Add printf calls that print the statement being executed"),
48 cl::Hidden
, cl::cat(PollyCategory
));
50 static cl::opt
<bool> TraceScalars(
51 "polly-codegen-trace-scalars",
52 cl::desc("Add printf calls that print the values of all scalar values "
53 "used in a statement. Requires -polly-codegen-trace-stmts."),
54 cl::Hidden
, cl::cat(PollyCategory
));
56 BlockGenerator::BlockGenerator(
57 PollyIRBuilder
&B
, LoopInfo
&LI
, ScalarEvolution
&SE
, DominatorTree
&DT
,
58 AllocaMapTy
&ScalarMap
, EscapeUsersAllocaMapTy
&EscapeMap
,
59 ValueMapT
&GlobalMap
, IslExprBuilder
*ExprBuilder
, BasicBlock
*StartBlock
)
60 : Builder(B
), LI(LI
), SE(SE
), ExprBuilder(ExprBuilder
), DT(DT
),
61 EntryBB(nullptr), ScalarMap(ScalarMap
), EscapeMap(EscapeMap
),
62 GlobalMap(GlobalMap
), StartBlock(StartBlock
) {}
64 Value
*BlockGenerator::trySynthesizeNewValue(ScopStmt
&Stmt
, Value
*Old
,
68 if (!SE
.isSCEVable(Old
->getType()))
71 const SCEV
*Scev
= SE
.getSCEVAtScope(Old
, L
);
75 if (isa
<SCEVCouldNotCompute
>(Scev
))
78 const SCEV
*NewScev
= SCEVLoopAddRecRewriter::rewrite(Scev
, LTS
, SE
);
80 VTV
.insert(BBMap
.begin(), BBMap
.end());
81 VTV
.insert(GlobalMap
.begin(), GlobalMap
.end());
83 Scop
&S
= *Stmt
.getParent();
84 const DataLayout
&DL
= S
.getFunction().getDataLayout();
85 auto IP
= Builder
.GetInsertPoint();
87 assert(IP
!= Builder
.GetInsertBlock()->end() &&
88 "Only instructions can be insert points for SCEVExpander");
90 expandCodeFor(S
, SE
, DL
, "polly", NewScev
, Old
->getType(), &*IP
, &VTV
,
91 StartBlock
->getSinglePredecessor());
93 BBMap
[Old
] = Expanded
;
97 Value
*BlockGenerator::getNewValue(ScopStmt
&Stmt
, Value
*Old
, ValueMapT
&BBMap
,
98 LoopToScevMapT
<S
, Loop
*L
) const {
100 auto lookupGlobally
= [this](Value
*Old
) -> Value
* {
101 Value
*New
= GlobalMap
.lookup(Old
);
106 // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
107 // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
108 // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
109 // * Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
110 // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
111 // * Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
112 // GlobalMap should be a mapping from (value in original SCoP) to (copied
113 // value in generated SCoP), without intermediate mappings, which might
114 // easily require transitiveness as well.
115 if (Value
*NewRemapped
= GlobalMap
.lookup(New
))
118 // No test case for this code.
119 if (Old
->getType()->getScalarSizeInBits() <
120 New
->getType()->getScalarSizeInBits())
121 New
= Builder
.CreateTruncOrBitCast(New
, Old
->getType());
126 Value
*New
= nullptr;
127 auto VUse
= VirtualUse::create(&Stmt
, L
, Old
, true);
128 switch (VUse
.getKind()) {
129 case VirtualUse::Block
:
130 // BasicBlock are constants, but the BlockGenerator copies them.
131 New
= BBMap
.lookup(Old
);
134 case VirtualUse::Constant
:
136 // * Isl/CodeGen/OpenMP/reference-argument-from-non-affine-region.ll
137 // Constants should not be redefined. In this case, the GlobalMap just
138 // contains a mapping to the same constant, which is unnecessary, but
140 if ((New
= lookupGlobally(Old
)))
143 assert(!BBMap
.count(Old
));
147 case VirtualUse::ReadOnly
:
148 assert(!GlobalMap
.count(Old
));
151 // * Isl/CodeGen/MemAccess/create_arrays.ll
152 // * Isl/CodeGen/read-only-scalars.ll
153 // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
154 // For some reason these reload a read-only value. The reloaded value ends
155 // up in BBMap, buts its value should be identical.
158 // * Isl/CodeGen/OpenMP/single_loop_with_param.ll
159 // The parallel subfunctions need to reference the read-only value from the
160 // parent function, this is done by reloading them locally.
161 if ((New
= BBMap
.lookup(Old
)))
167 case VirtualUse::Synthesizable
:
169 // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
170 // * Isl/CodeGen/OpenMP/recomputed-srem.ll
171 // * Isl/CodeGen/OpenMP/reference-other-bb.ll
172 // * Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll
173 // For some reason synthesizable values end up in GlobalMap. Their values
174 // are the same as trySynthesizeNewValue would return. The legacy
175 // implementation prioritized GlobalMap, so this is what we do here as well.
176 // Ideally, synthesizable values should not end up in GlobalMap.
177 if ((New
= lookupGlobally(Old
)))
181 // * Isl/CodeGen/RuntimeDebugBuilder/combine_different_values.ll
182 // * Isl/CodeGen/getNumberOfIterations.ll
183 // * Isl/CodeGen/non_affine_float_compare.ll
184 // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
185 // Ideally, synthesizable values are synthesized by trySynthesizeNewValue,
186 // not precomputed (SCEVExpander has its own caching mechanism).
187 // These tests fail without this, but I think trySynthesizeNewValue would
188 // just re-synthesize the same instructions.
189 if ((New
= BBMap
.lookup(Old
)))
192 New
= trySynthesizeNewValue(Stmt
, Old
, BBMap
, LTS
, L
);
195 case VirtualUse::Hoisted
:
196 // TODO: Hoisted invariant loads should be found in GlobalMap only, but not
197 // redefined locally (which will be ignored anyway). That is, the following
198 // assertion should apply: assert(!BBMap.count(Old))
200 New
= lookupGlobally(Old
);
203 case VirtualUse::Intra
:
204 case VirtualUse::Inter
:
205 assert(!GlobalMap
.count(Old
) &&
206 "Intra and inter-stmt values are never global");
207 New
= BBMap
.lookup(Old
);
210 assert(New
&& "Unexpected scalar dependence in region!");
214 void BlockGenerator::copyInstScalar(ScopStmt
&Stmt
, Instruction
*Inst
,
215 ValueMapT
&BBMap
, LoopToScevMapT
<S
) {
216 // We do not generate debug intrinsics as we did not investigate how to
217 // copy them correctly. At the current state, they just crash the code
218 // generation as the meta-data operands are not correctly copied.
219 if (isa
<DbgInfoIntrinsic
>(Inst
))
222 Instruction
*NewInst
= Inst
->clone();
224 // Replace old operands with the new ones.
225 for (Value
*OldOperand
: Inst
->operands()) {
227 getNewValue(Stmt
, OldOperand
, BBMap
, LTS
, getLoopForStmt(Stmt
));
230 assert(!isa
<StoreInst
>(NewInst
) &&
231 "Store instructions are always needed!");
232 NewInst
->deleteValue();
236 NewInst
->replaceUsesOfWith(OldOperand
, NewOperand
);
239 Builder
.Insert(NewInst
);
240 BBMap
[Inst
] = NewInst
;
242 assert(NewInst
->getModule() == Inst
->getModule() &&
243 "Expecting instructions to be in the same module");
245 if (!NewInst
->getType()->isVoidTy())
246 NewInst
->setName("p_" + Inst
->getName());
250 BlockGenerator::generateLocationAccessed(ScopStmt
&Stmt
, MemAccInst Inst
,
251 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
252 isl_id_to_ast_expr
*NewAccesses
) {
253 const MemoryAccess
&MA
= Stmt
.getArrayAccessFor(Inst
);
254 return generateLocationAccessed(
255 Stmt
, getLoopForStmt(Stmt
),
256 Inst
.isNull() ? nullptr : Inst
.getPointerOperand(), BBMap
, LTS
,
257 NewAccesses
, MA
.getId().release(), MA
.getAccessValue()->getType());
260 Value
*BlockGenerator::generateLocationAccessed(
261 ScopStmt
&Stmt
, Loop
*L
, Value
*Pointer
, ValueMapT
&BBMap
,
262 LoopToScevMapT
<S
, isl_id_to_ast_expr
*NewAccesses
, __isl_take isl_id
*Id
,
263 Type
*ExpectedType
) {
264 isl_ast_expr
*AccessExpr
= isl_id_to_ast_expr_get(NewAccesses
, Id
);
267 AccessExpr
= isl_ast_expr_address_of(AccessExpr
);
268 return ExprBuilder
->create(AccessExpr
);
272 "If expression was not generated, must use the original pointer value");
273 return getNewValue(Stmt
, Pointer
, BBMap
, LTS
, L
);
277 BlockGenerator::getImplicitAddress(MemoryAccess
&Access
, Loop
*L
,
278 LoopToScevMapT
<S
, ValueMapT
&BBMap
,
279 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
280 if (Access
.isLatestArrayKind())
281 return generateLocationAccessed(*Access
.getStatement(), L
, nullptr, BBMap
,
282 LTS
, NewAccesses
, Access
.getId().release(),
283 Access
.getAccessValue()->getType());
285 return getOrCreateAlloca(Access
);
288 Loop
*BlockGenerator::getLoopForStmt(const ScopStmt
&Stmt
) const {
289 auto *StmtBB
= Stmt
.getEntryBlock();
290 return LI
.getLoopFor(StmtBB
);
293 Value
*BlockGenerator::generateArrayLoad(ScopStmt
&Stmt
, LoadInst
*Load
,
294 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
295 isl_id_to_ast_expr
*NewAccesses
) {
296 if (Value
*PreloadLoad
= GlobalMap
.lookup(Load
))
300 generateLocationAccessed(Stmt
, Load
, BBMap
, LTS
, NewAccesses
);
302 Builder
.CreateAlignedLoad(Load
->getType(), NewPointer
, Load
->getAlign(),
303 Load
->getName() + "_p_scalar_");
305 if (PollyDebugPrinting
)
306 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Load from ", NewPointer
,
307 ": ", ScalarLoad
, "\n");
312 void BlockGenerator::generateArrayStore(ScopStmt
&Stmt
, StoreInst
*Store
,
313 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
314 isl_id_to_ast_expr
*NewAccesses
) {
315 MemoryAccess
&MA
= Stmt
.getArrayAccessFor(Store
);
316 isl::set AccDom
= MA
.getAccessRelation().domain();
317 std::string Subject
= MA
.getId().get_name();
319 generateConditionalExecution(Stmt
, AccDom
, Subject
.c_str(), [&, this]() {
321 generateLocationAccessed(Stmt
, Store
, BBMap
, LTS
, NewAccesses
);
322 Value
*ValueOperand
= getNewValue(Stmt
, Store
->getValueOperand(), BBMap
,
323 LTS
, getLoopForStmt(Stmt
));
325 if (PollyDebugPrinting
)
326 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Store to ", NewPointer
,
327 ": ", ValueOperand
, "\n");
329 Builder
.CreateAlignedStore(ValueOperand
, NewPointer
, Store
->getAlign());
333 bool BlockGenerator::canSyntheziseInStmt(ScopStmt
&Stmt
, Instruction
*Inst
) {
334 Loop
*L
= getLoopForStmt(Stmt
);
335 return (Stmt
.isBlockStmt() || !Stmt
.getRegion()->contains(L
)) &&
336 canSynthesize(Inst
, *Stmt
.getParent(), &SE
, L
);
339 void BlockGenerator::copyInstruction(ScopStmt
&Stmt
, Instruction
*Inst
,
340 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
341 isl_id_to_ast_expr
*NewAccesses
) {
342 // Terminator instructions control the control flow. They are explicitly
343 // expressed in the clast and do not need to be copied.
344 if (Inst
->isTerminator())
347 // Synthesizable statements will be generated on-demand.
348 if (canSyntheziseInStmt(Stmt
, Inst
))
351 if (auto *Load
= dyn_cast
<LoadInst
>(Inst
)) {
352 Value
*NewLoad
= generateArrayLoad(Stmt
, Load
, BBMap
, LTS
, NewAccesses
);
353 // Compute NewLoad before its insertion in BBMap to make the insertion
355 BBMap
[Load
] = NewLoad
;
359 if (auto *Store
= dyn_cast
<StoreInst
>(Inst
)) {
360 // Identified as redundant by -polly-simplify.
361 if (!Stmt
.getArrayAccessOrNULLFor(Store
))
364 generateArrayStore(Stmt
, Store
, BBMap
, LTS
, NewAccesses
);
368 if (auto *PHI
= dyn_cast
<PHINode
>(Inst
)) {
369 copyPHIInstruction(Stmt
, PHI
, BBMap
, LTS
);
373 // Skip some special intrinsics for which we do not adjust the semantics to
374 // the new schedule. All others are handled like every other instruction.
375 if (isIgnoredIntrinsic(Inst
))
378 copyInstScalar(Stmt
, Inst
, BBMap
, LTS
);
381 void BlockGenerator::removeDeadInstructions(BasicBlock
*BB
, ValueMapT
&BBMap
) {
382 auto NewBB
= Builder
.GetInsertBlock();
383 for (auto I
= NewBB
->rbegin(); I
!= NewBB
->rend(); I
++) {
384 Instruction
*NewInst
= &*I
;
386 if (!isInstructionTriviallyDead(NewInst
))
389 for (auto Pair
: BBMap
)
390 if (Pair
.second
== NewInst
) {
391 BBMap
.erase(Pair
.first
);
394 NewInst
->eraseFromParent();
399 void BlockGenerator::copyStmt(ScopStmt
&Stmt
, LoopToScevMapT
<S
,
400 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
401 assert(Stmt
.isBlockStmt() &&
402 "Only block statements can be copied by the block generator");
406 BasicBlock
*BB
= Stmt
.getBasicBlock();
407 copyBB(Stmt
, BB
, BBMap
, LTS
, NewAccesses
);
408 removeDeadInstructions(BB
, BBMap
);
411 BasicBlock
*BlockGenerator::splitBB(BasicBlock
*BB
) {
412 BasicBlock
*CopyBB
= SplitBlock(Builder
.GetInsertBlock(),
413 &*Builder
.GetInsertPoint(), &DT
, &LI
);
414 CopyBB
->setName("polly.stmt." + BB
->getName());
418 BasicBlock
*BlockGenerator::copyBB(ScopStmt
&Stmt
, BasicBlock
*BB
,
419 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
420 isl_id_to_ast_expr
*NewAccesses
) {
421 BasicBlock
*CopyBB
= splitBB(BB
);
422 Builder
.SetInsertPoint(&CopyBB
->front());
423 generateScalarLoads(Stmt
, LTS
, BBMap
, NewAccesses
);
424 generateBeginStmtTrace(Stmt
, LTS
, BBMap
);
426 copyBB(Stmt
, BB
, CopyBB
, BBMap
, LTS
, NewAccesses
);
428 // After a basic block was copied store all scalars that escape this block in
430 generateScalarStores(Stmt
, LTS
, BBMap
, NewAccesses
);
434 void BlockGenerator::copyBB(ScopStmt
&Stmt
, BasicBlock
*BB
, BasicBlock
*CopyBB
,
435 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
436 isl_id_to_ast_expr
*NewAccesses
) {
437 EntryBB
= &CopyBB
->getParent()->getEntryBlock();
439 // Block statements and the entry blocks of region statement are code
440 // generated from instruction lists. This allow us to optimize the
441 // instructions that belong to a certain scop statement. As the code
442 // structure of region statements might be arbitrary complex, optimizing the
443 // instruction list is not yet supported.
444 if (Stmt
.isBlockStmt() || (Stmt
.isRegionStmt() && Stmt
.getEntryBlock() == BB
))
445 for (Instruction
*Inst
: Stmt
.getInstructions())
446 copyInstruction(Stmt
, Inst
, BBMap
, LTS
, NewAccesses
);
448 for (Instruction
&Inst
: *BB
)
449 copyInstruction(Stmt
, &Inst
, BBMap
, LTS
, NewAccesses
);
452 Value
*BlockGenerator::getOrCreateAlloca(const MemoryAccess
&Access
) {
453 assert(!Access
.isLatestArrayKind() && "Trying to get alloca for array kind");
455 return getOrCreateAlloca(Access
.getLatestScopArrayInfo());
458 Value
*BlockGenerator::getOrCreateAlloca(const ScopArrayInfo
*Array
) {
459 assert(!Array
->isArrayKind() && "Trying to get alloca for array kind");
461 auto &Addr
= ScalarMap
[Array
];
464 // Allow allocas to be (temporarily) redirected once by adding a new
465 // old-alloca-addr to new-addr mapping to GlobalMap. This functionality
466 // is used for example by the OpenMP code generation where a first use
467 // of a scalar while still in the host code allocates a normal alloca with
468 // getOrCreateAlloca. When the values of this scalar are accessed during
469 // the generation of the parallel subfunction, these values are copied over
470 // to the parallel subfunction and each request for a scalar alloca slot
471 // must be forwarded to the temporary in-subfunction slot. This mapping is
472 // removed when the subfunction has been generated and again normal host
473 // code is generated. Due to the following reasons it is not possible to
474 // perform the GlobalMap lookup right after creating the alloca below, but
475 // instead we need to check GlobalMap at each call to getOrCreateAlloca:
477 // 1) GlobalMap may be changed multiple times (for each parallel loop),
478 // 2) The temporary mapping is commonly only known after the initial
479 // alloca has already been generated, and
480 // 3) The original alloca value must be restored after leaving the
482 if (Value
*NewAddr
= GlobalMap
.lookup(&*Addr
))
487 Type
*Ty
= Array
->getElementType();
488 Value
*ScalarBase
= Array
->getBasePtr();
490 if (Array
->isPHIKind())
495 const DataLayout
&DL
= Builder
.GetInsertBlock()->getDataLayout();
498 new AllocaInst(Ty
, DL
.getAllocaAddrSpace(), nullptr,
499 DL
.getPrefTypeAlign(Ty
), ScalarBase
->getName() + NameExt
);
500 EntryBB
= &Builder
.GetInsertBlock()->getParent()->getEntryBlock();
501 Addr
->insertBefore(&*EntryBB
->getFirstInsertionPt());
506 void BlockGenerator::handleOutsideUsers(const Scop
&S
, ScopArrayInfo
*Array
) {
507 Instruction
*Inst
= cast
<Instruction
>(Array
->getBasePtr());
509 // If there are escape users we get the alloca for this instruction and put it
510 // in the EscapeMap for later finalization. Lastly, if the instruction was
511 // copied multiple times we already did this and can exit.
512 if (EscapeMap
.count(Inst
))
515 EscapeUserVectorTy EscapeUsers
;
516 for (User
*U
: Inst
->users()) {
518 // Non-instruction user will never escape.
519 Instruction
*UI
= dyn_cast
<Instruction
>(U
);
526 EscapeUsers
.push_back(UI
);
529 // Exit if no escape uses were found.
530 if (EscapeUsers
.empty())
533 // Get or create an escape alloca for this instruction.
534 auto *ScalarAddr
= getOrCreateAlloca(Array
);
536 // Remember that this instruction has escape uses and the escape alloca.
537 EscapeMap
[Inst
] = std::make_pair(ScalarAddr
, std::move(EscapeUsers
));
540 void BlockGenerator::generateScalarLoads(
541 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
542 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
543 for (MemoryAccess
*MA
: Stmt
) {
544 if (MA
->isOriginalArrayKind() || MA
->isWrite())
549 Stmt
.getDomain().intersect_params(Stmt
.getParent()->getContext());
550 auto AccDom
= MA
->getAccessRelation().domain();
551 assert(!StmtDom
.is_subset(AccDom
).is_false() &&
552 "Scalar must be loaded in all statement instances");
556 getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
, BBMap
, NewAccesses
);
557 assert((!isa
<Instruction
>(Address
) ||
558 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
559 Builder
.GetInsertBlock())) &&
560 "Domination violation");
561 BBMap
[MA
->getAccessValue()] = Builder
.CreateLoad(
562 MA
->getElementType(), Address
, Address
->getName() + ".reload");
566 Value
*BlockGenerator::buildContainsCondition(ScopStmt
&Stmt
,
567 const isl::set
&Subdomain
) {
568 isl::ast_build AstBuild
= Stmt
.getAstBuild();
569 isl::set Domain
= Stmt
.getDomain();
571 isl::union_map USchedule
= AstBuild
.get_schedule();
572 USchedule
= USchedule
.intersect_domain(Domain
);
574 assert(!USchedule
.is_empty());
575 isl::map Schedule
= isl::map::from_union_map(USchedule
);
577 isl::set ScheduledDomain
= Schedule
.range();
578 isl::set ScheduledSet
= Subdomain
.apply(Schedule
);
580 isl::ast_build RestrictedBuild
= AstBuild
.restrict(ScheduledDomain
);
582 isl::ast_expr IsInSet
= RestrictedBuild
.expr_from(ScheduledSet
);
583 Value
*IsInSetExpr
= ExprBuilder
->create(IsInSet
.copy());
584 IsInSetExpr
= Builder
.CreateICmpNE(
585 IsInSetExpr
, ConstantInt::get(IsInSetExpr
->getType(), 0));
590 void BlockGenerator::generateConditionalExecution(
591 ScopStmt
&Stmt
, const isl::set
&Subdomain
, StringRef Subject
,
592 const std::function
<void()> &GenThenFunc
) {
593 isl::set StmtDom
= Stmt
.getDomain();
595 // If the condition is a tautology, don't generate a condition around the
597 bool IsPartialWrite
=
598 !StmtDom
.intersect_params(Stmt
.getParent()->getContext())
599 .is_subset(Subdomain
);
600 if (!IsPartialWrite
) {
605 // Generate the condition.
606 Value
*Cond
= buildContainsCondition(Stmt
, Subdomain
);
608 // Don't call GenThenFunc if it is never executed. An ast index expression
609 // might not be defined in this case.
610 if (auto *Const
= dyn_cast
<ConstantInt
>(Cond
))
614 BasicBlock
*HeadBlock
= Builder
.GetInsertBlock();
615 StringRef BlockName
= HeadBlock
->getName();
617 // Generate the conditional block.
618 DomTreeUpdater
DTU(DT
, DomTreeUpdater::UpdateStrategy::Eager
);
619 SplitBlockAndInsertIfThen(Cond
, &*Builder
.GetInsertPoint(), false, nullptr,
621 BranchInst
*Branch
= cast
<BranchInst
>(HeadBlock
->getTerminator());
622 BasicBlock
*ThenBlock
= Branch
->getSuccessor(0);
623 BasicBlock
*TailBlock
= Branch
->getSuccessor(1);
625 // Assign descriptive names.
626 if (auto *CondInst
= dyn_cast
<Instruction
>(Cond
))
627 CondInst
->setName("polly." + Subject
+ ".cond");
628 ThenBlock
->setName(BlockName
+ "." + Subject
+ ".partial");
629 TailBlock
->setName(BlockName
+ ".cont");
631 // Put the client code into the conditional block and continue in the merge
633 Builder
.SetInsertPoint(ThenBlock
, ThenBlock
->getFirstInsertionPt());
635 Builder
.SetInsertPoint(TailBlock
, TailBlock
->getFirstInsertionPt());
638 static std::string
getInstName(Value
*Val
) {
640 raw_string_ostream
OS(Result
);
641 Val
->printAsOperand(OS
, false);
645 void BlockGenerator::generateBeginStmtTrace(ScopStmt
&Stmt
, LoopToScevMapT
<S
,
650 Scop
*S
= Stmt
.getParent();
651 const char *BaseName
= Stmt
.getBaseName();
653 isl::ast_build AstBuild
= Stmt
.getAstBuild();
654 isl::set Domain
= Stmt
.getDomain();
656 isl::union_map USchedule
= AstBuild
.get_schedule().intersect_domain(Domain
);
657 isl::map Schedule
= isl::map::from_union_map(USchedule
);
658 assert(Schedule
.is_empty().is_false() &&
659 "The stmt must have a valid instance");
661 isl::multi_pw_aff ScheduleMultiPwAff
=
662 isl::pw_multi_aff::from_map(Schedule
.reverse());
663 isl::ast_build RestrictedBuild
= AstBuild
.restrict(Schedule
.range());
665 // Sequence of strings to print.
666 SmallVector
<llvm::Value
*, 8> Values
;
668 // Print the name of the statement.
669 // TODO: Indent by the depth of the statement instance in the schedule tree.
670 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, BaseName
));
671 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, "("));
673 // Add the coordinate of the statement instance.
674 for (unsigned i
: rangeIslSize(0, ScheduleMultiPwAff
.dim(isl::dim::out
))) {
676 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, ","));
678 isl::ast_expr IsInSet
= RestrictedBuild
.expr_from(ScheduleMultiPwAff
.at(i
));
679 Values
.push_back(ExprBuilder
->create(IsInSet
.copy()));
683 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, ")"));
684 DenseSet
<Instruction
*> Encountered
;
686 // Add the value of each scalar (and the result of PHIs) used in the
688 // TODO: Values used in region-statements.
689 for (Instruction
*Inst
: Stmt
.insts()) {
690 if (!RuntimeDebugBuilder::isPrintable(Inst
->getType()))
693 if (isa
<PHINode
>(Inst
)) {
694 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, " "));
695 Values
.push_back(RuntimeDebugBuilder::getPrintableString(
696 Builder
, getInstName(Inst
)));
697 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, "="));
698 Values
.push_back(getNewValue(Stmt
, Inst
, BBMap
, LTS
,
699 LI
.getLoopFor(Inst
->getParent())));
701 for (Value
*Op
: Inst
->operand_values()) {
702 // Do not print values that cannot change during the execution of the
704 auto *OpInst
= dyn_cast
<Instruction
>(Op
);
707 if (!S
->contains(OpInst
))
710 // Print each scalar at most once, and exclude values defined in the
712 if (Encountered
.count(OpInst
))
716 RuntimeDebugBuilder::getPrintableString(Builder
, " "));
717 Values
.push_back(RuntimeDebugBuilder::getPrintableString(
718 Builder
, getInstName(OpInst
)));
720 RuntimeDebugBuilder::getPrintableString(Builder
, "="));
721 Values
.push_back(getNewValue(Stmt
, OpInst
, BBMap
, LTS
,
722 LI
.getLoopFor(Inst
->getParent())));
723 Encountered
.insert(OpInst
);
727 Encountered
.insert(Inst
);
730 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, "\n"));
732 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, ")\n"));
735 RuntimeDebugBuilder::createCPUPrinter(Builder
, ArrayRef
<Value
*>(Values
));
738 void BlockGenerator::generateScalarStores(
739 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
740 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
741 Loop
*L
= LI
.getLoopFor(Stmt
.getBasicBlock());
743 assert(Stmt
.isBlockStmt() &&
744 "Region statements need to use the generateScalarStores() function in "
745 "the RegionGenerator");
747 for (MemoryAccess
*MA
: Stmt
) {
748 if (MA
->isOriginalArrayKind() || MA
->isRead())
751 isl::set AccDom
= MA
->getAccessRelation().domain();
752 std::string Subject
= MA
->getId().get_name();
754 generateConditionalExecution(
755 Stmt
, AccDom
, Subject
.c_str(), [&, this, MA
]() {
756 Value
*Val
= MA
->getAccessValue();
757 if (MA
->isAnyPHIKind()) {
758 assert(MA
->getIncoming().size() >= 1 &&
759 "Block statements have exactly one exiting block, or "
761 "with same incoming block and value");
762 assert(std::all_of(MA
->getIncoming().begin(),
763 MA
->getIncoming().end(),
764 [&](std::pair
<BasicBlock
*, Value
*> p
) -> bool {
765 return p
.first
== Stmt
.getBasicBlock();
767 "Incoming block must be statement's block");
768 Val
= MA
->getIncoming()[0].second
;
770 auto Address
= getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
,
773 Val
= getNewValue(Stmt
, Val
, BBMap
, LTS
, L
);
774 assert((!isa
<Instruction
>(Val
) ||
775 DT
.dominates(cast
<Instruction
>(Val
)->getParent(),
776 Builder
.GetInsertBlock())) &&
777 "Domination violation");
778 assert((!isa
<Instruction
>(Address
) ||
779 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
780 Builder
.GetInsertBlock())) &&
781 "Domination violation");
783 // The new Val might have a different type than the old Val due to
784 // ScalarEvolution looking through bitcasts.
785 Address
= Builder
.CreateBitOrPointerCast(
786 Address
, Val
->getType()->getPointerTo(
787 Address
->getType()->getPointerAddressSpace()));
789 Builder
.CreateStore(Val
, Address
);
794 void BlockGenerator::createScalarInitialization(Scop
&S
) {
795 BasicBlock
*ExitBB
= S
.getExit();
796 BasicBlock
*PreEntryBB
= S
.getEnteringBlock();
798 Builder
.SetInsertPoint(&*StartBlock
->begin());
800 for (auto &Array
: S
.arrays()) {
801 if (Array
->getNumberOfDimensions() != 0)
803 if (Array
->isPHIKind()) {
804 // For PHI nodes, the only values we need to store are the ones that
805 // reach the PHI node from outside the region. In general there should
806 // only be one such incoming edge and this edge should enter through
808 auto PHI
= cast
<PHINode
>(Array
->getBasePtr());
810 for (auto BI
= PHI
->block_begin(), BE
= PHI
->block_end(); BI
!= BE
; BI
++)
811 if (!S
.contains(*BI
) && *BI
!= PreEntryBB
)
812 llvm_unreachable("Incoming edges from outside the scop should always "
813 "come from PreEntryBB");
815 int Idx
= PHI
->getBasicBlockIndex(PreEntryBB
);
819 Value
*ScalarValue
= PHI
->getIncomingValue(Idx
);
821 Builder
.CreateStore(ScalarValue
, getOrCreateAlloca(Array
));
825 auto *Inst
= dyn_cast
<Instruction
>(Array
->getBasePtr());
827 if (Inst
&& S
.contains(Inst
))
830 // PHI nodes that are not marked as such in their SAI object are either exit
831 // PHI nodes we model as common scalars but without initialization, or
832 // incoming phi nodes that need to be initialized. Check if the first is the
833 // case for Inst and do not create and initialize memory if so.
834 if (auto *PHI
= dyn_cast_or_null
<PHINode
>(Inst
))
835 if (!S
.hasSingleExitEdge() && PHI
->getBasicBlockIndex(ExitBB
) >= 0)
838 Builder
.CreateStore(Array
->getBasePtr(), getOrCreateAlloca(Array
));
842 void BlockGenerator::createScalarFinalization(Scop
&S
) {
843 // The exit block of the __unoptimized__ region.
844 BasicBlock
*ExitBB
= S
.getExitingBlock();
845 // The merge block __just after__ the region and the optimized region.
846 BasicBlock
*MergeBB
= S
.getExit();
848 // The exit block of the __optimized__ region.
849 BasicBlock
*OptExitBB
= *(pred_begin(MergeBB
));
850 if (OptExitBB
== ExitBB
)
851 OptExitBB
= *(++pred_begin(MergeBB
));
853 Builder
.SetInsertPoint(OptExitBB
->getTerminator());
854 for (const auto &EscapeMapping
: EscapeMap
) {
855 // Extract the escaping instruction and the escaping users as well as the
856 // alloca the instruction was demoted to.
857 Instruction
*EscapeInst
= EscapeMapping
.first
;
858 const auto &EscapeMappingValue
= EscapeMapping
.second
;
859 const EscapeUserVectorTy
&EscapeUsers
= EscapeMappingValue
.second
;
860 auto *ScalarAddr
= cast
<AllocaInst
>(&*EscapeMappingValue
.first
);
862 // Reload the demoted instruction in the optimized version of the SCoP.
863 Value
*EscapeInstReload
=
864 Builder
.CreateLoad(ScalarAddr
->getAllocatedType(), ScalarAddr
,
865 EscapeInst
->getName() + ".final_reload");
867 Builder
.CreateBitOrPointerCast(EscapeInstReload
, EscapeInst
->getType());
869 // Create the merge PHI that merges the optimized and unoptimized version.
870 PHINode
*MergePHI
= PHINode::Create(EscapeInst
->getType(), 2,
871 EscapeInst
->getName() + ".merge");
872 MergePHI
->insertBefore(&*MergeBB
->getFirstInsertionPt());
874 // Add the respective values to the merge PHI.
875 MergePHI
->addIncoming(EscapeInstReload
, OptExitBB
);
876 MergePHI
->addIncoming(EscapeInst
, ExitBB
);
878 // The information of scalar evolution about the escaping instruction needs
879 // to be revoked so the new merged instruction will be used.
880 if (SE
.isSCEVable(EscapeInst
->getType()))
881 SE
.forgetValue(EscapeInst
);
883 // Replace all uses of the demoted instruction with the merge PHI.
884 for (Instruction
*EUser
: EscapeUsers
)
885 EUser
->replaceUsesOfWith(EscapeInst
, MergePHI
);
889 void BlockGenerator::findOutsideUsers(Scop
&S
) {
890 for (auto &Array
: S
.arrays()) {
892 if (Array
->getNumberOfDimensions() != 0)
895 if (Array
->isPHIKind())
898 auto *Inst
= dyn_cast
<Instruction
>(Array
->getBasePtr());
903 // Scop invariant hoisting moves some of the base pointers out of the scop.
904 // We can ignore these, as the invariant load hoisting already registers the
905 // relevant outside users.
906 if (!S
.contains(Inst
))
909 handleOutsideUsers(S
, Array
);
913 void BlockGenerator::createExitPHINodeMerges(Scop
&S
) {
914 if (S
.hasSingleExitEdge())
917 auto *ExitBB
= S
.getExitingBlock();
918 auto *MergeBB
= S
.getExit();
919 auto *AfterMergeBB
= MergeBB
->getSingleSuccessor();
920 BasicBlock
*OptExitBB
= *(pred_begin(MergeBB
));
921 if (OptExitBB
== ExitBB
)
922 OptExitBB
= *(++pred_begin(MergeBB
));
924 Builder
.SetInsertPoint(OptExitBB
->getTerminator());
926 for (auto &SAI
: S
.arrays()) {
927 auto *Val
= SAI
->getBasePtr();
929 // Only Value-like scalars need a merge PHI. Exit block PHIs receive either
930 // the original PHI's value or the reloaded incoming values from the
931 // generated code. An llvm::Value is merged between the original code's
932 // value or the generated one.
933 if (!SAI
->isExitPHIKind())
936 PHINode
*PHI
= dyn_cast
<PHINode
>(Val
);
940 if (PHI
->getParent() != AfterMergeBB
)
943 std::string Name
= PHI
->getName().str();
944 Value
*ScalarAddr
= getOrCreateAlloca(SAI
);
945 Value
*Reload
= Builder
.CreateLoad(SAI
->getElementType(), ScalarAddr
,
946 Name
+ ".ph.final_reload");
947 Reload
= Builder
.CreateBitOrPointerCast(Reload
, PHI
->getType());
948 Value
*OriginalValue
= PHI
->getIncomingValueForBlock(MergeBB
);
949 assert((!isa
<Instruction
>(OriginalValue
) ||
950 cast
<Instruction
>(OriginalValue
)->getParent() != MergeBB
) &&
951 "Original value must no be one we just generated.");
952 auto *MergePHI
= PHINode::Create(PHI
->getType(), 2, Name
+ ".ph.merge");
953 MergePHI
->insertBefore(&*MergeBB
->getFirstInsertionPt());
954 MergePHI
->addIncoming(Reload
, OptExitBB
);
955 MergePHI
->addIncoming(OriginalValue
, ExitBB
);
956 int Idx
= PHI
->getBasicBlockIndex(MergeBB
);
957 PHI
->setIncomingValue(Idx
, MergePHI
);
961 void BlockGenerator::invalidateScalarEvolution(Scop
&S
) {
963 if (Stmt
.isCopyStmt())
965 else if (Stmt
.isBlockStmt())
966 for (auto &Inst
: *Stmt
.getBasicBlock())
967 SE
.forgetValue(&Inst
);
968 else if (Stmt
.isRegionStmt())
969 for (auto *BB
: Stmt
.getRegion()->blocks())
970 for (auto &Inst
: *BB
)
971 SE
.forgetValue(&Inst
);
973 llvm_unreachable("Unexpected statement type found");
975 // Invalidate SCEV of loops surrounding the EscapeUsers.
976 for (const auto &EscapeMapping
: EscapeMap
) {
977 const EscapeUserVectorTy
&EscapeUsers
= EscapeMapping
.second
.second
;
978 for (Instruction
*EUser
: EscapeUsers
) {
979 if (Loop
*L
= LI
.getLoopFor(EUser
->getParent()))
982 L
= L
->getParentLoop();
988 void BlockGenerator::finalizeSCoP(Scop
&S
) {
990 createScalarInitialization(S
);
991 createExitPHINodeMerges(S
);
992 createScalarFinalization(S
);
993 invalidateScalarEvolution(S
);
996 BasicBlock
*RegionGenerator::repairDominance(BasicBlock
*BB
,
997 BasicBlock
*BBCopy
) {
999 BasicBlock
*BBIDom
= DT
.getNode(BB
)->getIDom()->getBlock();
1000 BasicBlock
*BBCopyIDom
= EndBlockMap
.lookup(BBIDom
);
1003 DT
.changeImmediateDominator(BBCopy
, BBCopyIDom
);
1005 return StartBlockMap
.lookup(BBIDom
);
1008 // This is to determine whether an llvm::Value (defined in @p BB) is usable when
1009 // leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock())
1010 // does not work in cases where the exit block has edges from outside the
1011 // region. In that case the llvm::Value would never be usable in in the exit
1012 // block. The RegionGenerator however creates an new exit block ('ExitBBCopy')
1013 // for the subregion's exiting edges only. We need to determine whether an
1014 // llvm::Value is usable in there. We do this by checking whether it dominates
1015 // all exiting blocks individually.
1016 static bool isDominatingSubregionExit(const DominatorTree
&DT
, Region
*R
,
1018 for (auto ExitingBB
: predecessors(R
->getExit())) {
1019 // Check for non-subregion incoming edges.
1020 if (!R
->contains(ExitingBB
))
1023 if (!DT
.dominates(BB
, ExitingBB
))
1030 // Find the direct dominator of the subregion's exit block if the subregion was
1032 static BasicBlock
*findExitDominator(DominatorTree
&DT
, Region
*R
) {
1033 BasicBlock
*Common
= nullptr;
1034 for (auto ExitingBB
: predecessors(R
->getExit())) {
1035 // Check for non-subregion incoming edges.
1036 if (!R
->contains(ExitingBB
))
1039 // First exiting edge.
1045 Common
= DT
.findNearestCommonDominator(Common
, ExitingBB
);
1048 assert(Common
&& R
->contains(Common
));
1052 void RegionGenerator::copyStmt(ScopStmt
&Stmt
, LoopToScevMapT
<S
,
1053 __isl_keep isl_id_to_ast_expr
*IdToAstExp
) {
1054 assert(Stmt
.isRegionStmt() &&
1055 "Only region statements can be copied by the region generator");
1057 // Forget all old mappings.
1058 StartBlockMap
.clear();
1059 EndBlockMap
.clear();
1061 IncompletePHINodeMap
.clear();
1063 // Collection of all values related to this subregion.
1066 // The region represented by the statement.
1067 Region
*R
= Stmt
.getRegion();
1069 // Create a dedicated entry for the region where we can reload all demoted
1071 BasicBlock
*EntryBB
= R
->getEntry();
1072 BasicBlock
*EntryBBCopy
= SplitBlock(Builder
.GetInsertBlock(),
1073 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1074 EntryBBCopy
->setName("polly.stmt." + EntryBB
->getName() + ".entry");
1075 Builder
.SetInsertPoint(&EntryBBCopy
->front());
1077 ValueMapT
&EntryBBMap
= RegionMaps
[EntryBBCopy
];
1078 generateScalarLoads(Stmt
, LTS
, EntryBBMap
, IdToAstExp
);
1079 generateBeginStmtTrace(Stmt
, LTS
, EntryBBMap
);
1081 for (auto PI
= pred_begin(EntryBB
), PE
= pred_end(EntryBB
); PI
!= PE
; ++PI
)
1082 if (!R
->contains(*PI
)) {
1083 StartBlockMap
[*PI
] = EntryBBCopy
;
1084 EndBlockMap
[*PI
] = EntryBBCopy
;
1087 // Iterate over all blocks in the region in a breadth-first search.
1088 std::deque
<BasicBlock
*> Blocks
;
1089 SmallSetVector
<BasicBlock
*, 8> SeenBlocks
;
1090 Blocks
.push_back(EntryBB
);
1091 SeenBlocks
.insert(EntryBB
);
1093 while (!Blocks
.empty()) {
1094 BasicBlock
*BB
= Blocks
.front();
1097 // First split the block and update dominance information.
1098 BasicBlock
*BBCopy
= splitBB(BB
);
1099 BasicBlock
*BBCopyIDom
= repairDominance(BB
, BBCopy
);
1101 // Get the mapping for this block and initialize it with either the scalar
1102 // loads from the generated entering block (which dominates all blocks of
1103 // this subregion) or the maps of the immediate dominator, if part of the
1104 // subregion. The latter necessarily includes the former.
1105 ValueMapT
*InitBBMap
;
1107 assert(RegionMaps
.count(BBCopyIDom
));
1108 InitBBMap
= &RegionMaps
[BBCopyIDom
];
1110 InitBBMap
= &EntryBBMap
;
1111 auto Inserted
= RegionMaps
.insert(std::make_pair(BBCopy
, *InitBBMap
));
1112 ValueMapT
&RegionMap
= Inserted
.first
->second
;
1114 // Copy the block with the BlockGenerator.
1115 Builder
.SetInsertPoint(&BBCopy
->front());
1116 copyBB(Stmt
, BB
, BBCopy
, RegionMap
, LTS
, IdToAstExp
);
1118 // In order to remap PHI nodes we store also basic block mappings.
1119 StartBlockMap
[BB
] = BBCopy
;
1120 EndBlockMap
[BB
] = Builder
.GetInsertBlock();
1122 // Add values to incomplete PHI nodes waiting for this block to be copied.
1123 for (const PHINodePairTy
&PHINodePair
: IncompletePHINodeMap
[BB
])
1124 addOperandToPHI(Stmt
, PHINodePair
.first
, PHINodePair
.second
, BB
, LTS
);
1125 IncompletePHINodeMap
[BB
].clear();
1127 // And continue with new successors inside the region.
1128 for (auto SI
= succ_begin(BB
), SE
= succ_end(BB
); SI
!= SE
; SI
++)
1129 if (R
->contains(*SI
) && SeenBlocks
.insert(*SI
))
1130 Blocks
.push_back(*SI
);
1132 // Remember value in case it is visible after this subregion.
1133 if (isDominatingSubregionExit(DT
, R
, BB
))
1134 ValueMap
.insert(RegionMap
.begin(), RegionMap
.end());
1137 // Now create a new dedicated region exit block and add it to the region map.
1138 BasicBlock
*ExitBBCopy
= SplitBlock(Builder
.GetInsertBlock(),
1139 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1140 ExitBBCopy
->setName("polly.stmt." + R
->getExit()->getName() + ".exit");
1141 StartBlockMap
[R
->getExit()] = ExitBBCopy
;
1142 EndBlockMap
[R
->getExit()] = ExitBBCopy
;
1144 BasicBlock
*ExitDomBBCopy
= EndBlockMap
.lookup(findExitDominator(DT
, R
));
1145 assert(ExitDomBBCopy
&&
1146 "Common exit dominator must be within region; at least the entry node "
1148 DT
.changeImmediateDominator(ExitBBCopy
, ExitDomBBCopy
);
1150 // As the block generator doesn't handle control flow we need to add the
1151 // region control flow by hand after all blocks have been copied.
1152 for (BasicBlock
*BB
: SeenBlocks
) {
1154 BasicBlock
*BBCopyStart
= StartBlockMap
[BB
];
1155 BasicBlock
*BBCopyEnd
= EndBlockMap
[BB
];
1156 Instruction
*TI
= BB
->getTerminator();
1157 if (isa
<UnreachableInst
>(TI
)) {
1158 while (!BBCopyEnd
->empty())
1159 BBCopyEnd
->begin()->eraseFromParent();
1160 new UnreachableInst(BBCopyEnd
->getContext(), BBCopyEnd
);
1164 Instruction
*BICopy
= BBCopyEnd
->getTerminator();
1166 ValueMapT
&RegionMap
= RegionMaps
[BBCopyStart
];
1167 RegionMap
.insert(StartBlockMap
.begin(), StartBlockMap
.end());
1169 Builder
.SetInsertPoint(BICopy
);
1170 copyInstScalar(Stmt
, TI
, RegionMap
, LTS
);
1171 BICopy
->eraseFromParent();
1174 // Add counting PHI nodes to all loops in the region that can be used as
1175 // replacement for SCEVs referring to the old loop.
1176 for (BasicBlock
*BB
: SeenBlocks
) {
1177 Loop
*L
= LI
.getLoopFor(BB
);
1178 if (L
== nullptr || L
->getHeader() != BB
|| !R
->contains(L
))
1181 BasicBlock
*BBCopy
= StartBlockMap
[BB
];
1182 Value
*NullVal
= Builder
.getInt32(0);
1184 PHINode::Create(Builder
.getInt32Ty(), 2, "polly.subregion.iv");
1185 Instruction
*LoopPHIInc
= BinaryOperator::CreateAdd(
1186 LoopPHI
, Builder
.getInt32(1), "polly.subregion.iv.inc");
1187 LoopPHI
->insertBefore(&BBCopy
->front());
1188 LoopPHIInc
->insertBefore(BBCopy
->getTerminator());
1190 for (auto *PredBB
: predecessors(BB
)) {
1191 if (!R
->contains(PredBB
))
1193 if (L
->contains(PredBB
))
1194 LoopPHI
->addIncoming(LoopPHIInc
, EndBlockMap
[PredBB
]);
1196 LoopPHI
->addIncoming(NullVal
, EndBlockMap
[PredBB
]);
1199 for (auto *PredBBCopy
: predecessors(BBCopy
))
1200 if (LoopPHI
->getBasicBlockIndex(PredBBCopy
) < 0)
1201 LoopPHI
->addIncoming(NullVal
, PredBBCopy
);
1203 LTS
[L
] = SE
.getUnknown(LoopPHI
);
1206 // Continue generating code in the exit block.
1207 Builder
.SetInsertPoint(&*ExitBBCopy
->getFirstInsertionPt());
1209 // Write values visible to other statements.
1210 generateScalarStores(Stmt
, LTS
, ValueMap
, IdToAstExp
);
1211 StartBlockMap
.clear();
1212 EndBlockMap
.clear();
1214 IncompletePHINodeMap
.clear();
1217 PHINode
*RegionGenerator::buildExitPHI(MemoryAccess
*MA
, LoopToScevMapT
<S
,
1218 ValueMapT
&BBMap
, Loop
*L
) {
1219 ScopStmt
*Stmt
= MA
->getStatement();
1220 Region
*SubR
= Stmt
->getRegion();
1221 auto Incoming
= MA
->getIncoming();
1223 PollyIRBuilder::InsertPointGuard
IPGuard(Builder
);
1224 PHINode
*OrigPHI
= cast
<PHINode
>(MA
->getAccessInstruction());
1225 BasicBlock
*NewSubregionExit
= Builder
.GetInsertBlock();
1227 // This can happen if the subregion is simplified after the ScopStmts
1228 // have been created; simplification happens as part of CodeGeneration.
1229 if (OrigPHI
->getParent() != SubR
->getExit()) {
1230 BasicBlock
*FormerExit
= SubR
->getExitingBlock();
1232 NewSubregionExit
= StartBlockMap
.lookup(FormerExit
);
1235 PHINode
*NewPHI
= PHINode::Create(OrigPHI
->getType(), Incoming
.size(),
1236 "polly." + OrigPHI
->getName(),
1237 NewSubregionExit
->getFirstNonPHIIt());
1239 // Add the incoming values to the PHI.
1240 for (auto &Pair
: Incoming
) {
1241 BasicBlock
*OrigIncomingBlock
= Pair
.first
;
1242 BasicBlock
*NewIncomingBlockStart
= StartBlockMap
.lookup(OrigIncomingBlock
);
1243 BasicBlock
*NewIncomingBlockEnd
= EndBlockMap
.lookup(OrigIncomingBlock
);
1244 Builder
.SetInsertPoint(NewIncomingBlockEnd
->getTerminator());
1245 assert(RegionMaps
.count(NewIncomingBlockStart
));
1246 assert(RegionMaps
.count(NewIncomingBlockEnd
));
1247 ValueMapT
*LocalBBMap
= &RegionMaps
[NewIncomingBlockStart
];
1249 Value
*OrigIncomingValue
= Pair
.second
;
1250 Value
*NewIncomingValue
=
1251 getNewValue(*Stmt
, OrigIncomingValue
, *LocalBBMap
, LTS
, L
);
1252 NewPHI
->addIncoming(NewIncomingValue
, NewIncomingBlockEnd
);
1258 Value
*RegionGenerator::getExitScalar(MemoryAccess
*MA
, LoopToScevMapT
<S
,
1260 ScopStmt
*Stmt
= MA
->getStatement();
1262 // TODO: Add some test cases that ensure this is really the right choice.
1263 Loop
*L
= LI
.getLoopFor(Stmt
->getRegion()->getExit());
1265 if (MA
->isAnyPHIKind()) {
1266 auto Incoming
= MA
->getIncoming();
1267 assert(!Incoming
.empty() &&
1268 "PHI WRITEs must have originate from at least one incoming block");
1270 // If there is only one incoming value, we do not need to create a PHI.
1271 if (Incoming
.size() == 1) {
1272 Value
*OldVal
= Incoming
[0].second
;
1273 return getNewValue(*Stmt
, OldVal
, BBMap
, LTS
, L
);
1276 return buildExitPHI(MA
, LTS
, BBMap
, L
);
1279 // MemoryKind::Value accesses leaving the subregion must dominate the exit
1280 // block; just pass the copied value.
1281 Value
*OldVal
= MA
->getAccessValue();
1282 return getNewValue(*Stmt
, OldVal
, BBMap
, LTS
, L
);
1285 void RegionGenerator::generateScalarStores(
1286 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
1287 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1288 assert(Stmt
.getRegion() &&
1289 "Block statements need to use the generateScalarStores() "
1290 "function in the BlockGenerator");
1292 // Get the exit scalar values before generating the writes.
1293 // This is necessary because RegionGenerator::getExitScalar may insert
1294 // PHINodes that depend on the region's exiting blocks. But
1295 // BlockGenerator::generateConditionalExecution may insert a new basic block
1296 // such that the current basic block is not a direct successor of the exiting
1297 // blocks anymore. Hence, build the PHINodes while the current block is still
1298 // the direct successor.
1299 SmallDenseMap
<MemoryAccess
*, Value
*> NewExitScalars
;
1300 for (MemoryAccess
*MA
: Stmt
) {
1301 if (MA
->isOriginalArrayKind() || MA
->isRead())
1304 Value
*NewVal
= getExitScalar(MA
, LTS
, BBMap
);
1305 NewExitScalars
[MA
] = NewVal
;
1308 for (MemoryAccess
*MA
: Stmt
) {
1309 if (MA
->isOriginalArrayKind() || MA
->isRead())
1312 isl::set AccDom
= MA
->getAccessRelation().domain();
1313 std::string Subject
= MA
->getId().get_name();
1314 generateConditionalExecution(
1315 Stmt
, AccDom
, Subject
.c_str(), [&, this, MA
]() {
1316 Value
*NewVal
= NewExitScalars
.lookup(MA
);
1317 assert(NewVal
&& "The exit scalar must be determined before");
1318 Value
*Address
= getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
,
1319 BBMap
, NewAccesses
);
1320 assert((!isa
<Instruction
>(NewVal
) ||
1321 DT
.dominates(cast
<Instruction
>(NewVal
)->getParent(),
1322 Builder
.GetInsertBlock())) &&
1323 "Domination violation");
1324 assert((!isa
<Instruction
>(Address
) ||
1325 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
1326 Builder
.GetInsertBlock())) &&
1327 "Domination violation");
1328 Builder
.CreateStore(NewVal
, Address
);
1333 void RegionGenerator::addOperandToPHI(ScopStmt
&Stmt
, PHINode
*PHI
,
1334 PHINode
*PHICopy
, BasicBlock
*IncomingBB
,
1335 LoopToScevMapT
<S
) {
1336 // If the incoming block was not yet copied mark this PHI as incomplete.
1337 // Once the block will be copied the incoming value will be added.
1338 BasicBlock
*BBCopyStart
= StartBlockMap
[IncomingBB
];
1339 BasicBlock
*BBCopyEnd
= EndBlockMap
[IncomingBB
];
1342 assert(Stmt
.represents(IncomingBB
) &&
1343 "Bad incoming block for PHI in non-affine region");
1344 IncompletePHINodeMap
[IncomingBB
].push_back(std::make_pair(PHI
, PHICopy
));
1348 assert(RegionMaps
.count(BBCopyStart
) &&
1349 "Incoming PHI block did not have a BBMap");
1350 ValueMapT
&BBCopyMap
= RegionMaps
[BBCopyStart
];
1352 Value
*OpCopy
= nullptr;
1354 if (Stmt
.represents(IncomingBB
)) {
1355 Value
*Op
= PHI
->getIncomingValueForBlock(IncomingBB
);
1357 // If the current insert block is different from the PHIs incoming block
1358 // change it, otherwise do not.
1359 auto IP
= Builder
.GetInsertPoint();
1360 if (IP
->getParent() != BBCopyEnd
)
1361 Builder
.SetInsertPoint(BBCopyEnd
->getTerminator());
1362 OpCopy
= getNewValue(Stmt
, Op
, BBCopyMap
, LTS
, getLoopForStmt(Stmt
));
1363 if (IP
->getParent() != BBCopyEnd
)
1364 Builder
.SetInsertPoint(&*IP
);
1366 // All edges from outside the non-affine region become a single edge
1367 // in the new copy of the non-affine region. Make sure to only add the
1368 // corresponding edge the first time we encounter a basic block from
1369 // outside the non-affine region.
1370 if (PHICopy
->getBasicBlockIndex(BBCopyEnd
) >= 0)
1373 // Get the reloaded value.
1374 OpCopy
= getNewValue(Stmt
, PHI
, BBCopyMap
, LTS
, getLoopForStmt(Stmt
));
1377 assert(OpCopy
&& "Incoming PHI value was not copied properly");
1378 PHICopy
->addIncoming(OpCopy
, BBCopyEnd
);
1381 void RegionGenerator::copyPHIInstruction(ScopStmt
&Stmt
, PHINode
*PHI
,
1383 LoopToScevMapT
<S
) {
1384 unsigned NumIncoming
= PHI
->getNumIncomingValues();
1386 Builder
.CreatePHI(PHI
->getType(), NumIncoming
, "polly." + PHI
->getName());
1387 PHICopy
->moveBefore(PHICopy
->getParent()->getFirstNonPHI());
1388 BBMap
[PHI
] = PHICopy
;
1390 for (BasicBlock
*IncomingBB
: PHI
->blocks())
1391 addOperandToPHI(Stmt
, PHI
, PHICopy
, IncomingBB
, LTS
);