1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the BlockGenerator and VectorBlockGenerator classes,
10 // which generate sequential code and vectorized code for a polyhedral
11 // statement, respectively.
13 //===----------------------------------------------------------------------===//
15 #include "polly/CodeGen/BlockGenerators.h"
16 #include "polly/CodeGen/IslExprBuilder.h"
17 #include "polly/CodeGen/RuntimeDebugBuilder.h"
18 #include "polly/Options.h"
19 #include "polly/ScopInfo.h"
20 #include "polly/Support/ScopHelper.h"
21 #include "polly/Support/VirtualInstruction.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/RegionInfo.h"
24 #include "llvm/Analysis/ScalarEvolution.h"
25 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
26 #include "llvm/Transforms/Utils/Local.h"
31 using namespace polly
;
33 static cl::opt
<bool> Aligned("enable-polly-aligned",
34 cl::desc("Assumed aligned memory accesses."),
35 cl::Hidden
, cl::init(false), cl::ZeroOrMore
,
36 cl::cat(PollyCategory
));
38 bool PollyDebugPrinting
;
39 static cl::opt
<bool, true> DebugPrintingX(
40 "polly-codegen-add-debug-printing",
41 cl::desc("Add printf calls that show the values loaded/stored."),
42 cl::location(PollyDebugPrinting
), cl::Hidden
, cl::init(false),
43 cl::ZeroOrMore
, cl::cat(PollyCategory
));
45 static cl::opt
<bool> TraceStmts(
46 "polly-codegen-trace-stmts",
47 cl::desc("Add printf calls that print the statement being executed"),
48 cl::Hidden
, cl::init(false), cl::ZeroOrMore
, cl::cat(PollyCategory
));
50 static cl::opt
<bool> TraceScalars(
51 "polly-codegen-trace-scalars",
52 cl::desc("Add printf calls that print the values of all scalar values "
53 "used in a statement. Requires -polly-codegen-trace-stmts."),
54 cl::Hidden
, cl::init(false), cl::ZeroOrMore
, cl::cat(PollyCategory
));
56 BlockGenerator::BlockGenerator(
57 PollyIRBuilder
&B
, LoopInfo
&LI
, ScalarEvolution
&SE
, DominatorTree
&DT
,
58 AllocaMapTy
&ScalarMap
, EscapeUsersAllocaMapTy
&EscapeMap
,
59 ValueMapT
&GlobalMap
, IslExprBuilder
*ExprBuilder
, BasicBlock
*StartBlock
)
60 : Builder(B
), LI(LI
), SE(SE
), ExprBuilder(ExprBuilder
), DT(DT
),
61 EntryBB(nullptr), ScalarMap(ScalarMap
), EscapeMap(EscapeMap
),
62 GlobalMap(GlobalMap
), StartBlock(StartBlock
) {}
64 Value
*BlockGenerator::trySynthesizeNewValue(ScopStmt
&Stmt
, Value
*Old
,
68 if (!SE
.isSCEVable(Old
->getType()))
71 const SCEV
*Scev
= SE
.getSCEVAtScope(Old
, L
);
75 if (isa
<SCEVCouldNotCompute
>(Scev
))
78 const SCEV
*NewScev
= SCEVLoopAddRecRewriter::rewrite(Scev
, LTS
, SE
);
80 VTV
.insert(BBMap
.begin(), BBMap
.end());
81 VTV
.insert(GlobalMap
.begin(), GlobalMap
.end());
83 Scop
&S
= *Stmt
.getParent();
84 const DataLayout
&DL
= S
.getFunction().getParent()->getDataLayout();
85 auto IP
= Builder
.GetInsertPoint();
87 assert(IP
!= Builder
.GetInsertBlock()->end() &&
88 "Only instructions can be insert points for SCEVExpander");
90 expandCodeFor(S
, SE
, DL
, "polly", NewScev
, Old
->getType(), &*IP
, &VTV
,
91 StartBlock
->getSinglePredecessor());
93 BBMap
[Old
] = Expanded
;
97 Value
*BlockGenerator::getNewValue(ScopStmt
&Stmt
, Value
*Old
, ValueMapT
&BBMap
,
98 LoopToScevMapT
<S
, Loop
*L
) const {
100 auto lookupGlobally
= [this](Value
*Old
) -> Value
* {
101 Value
*New
= GlobalMap
.lookup(Old
);
106 // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
107 // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
108 // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
109 // * Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
110 // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
111 // * Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
112 // GlobalMap should be a mapping from (value in original SCoP) to (copied
113 // value in generated SCoP), without intermediate mappings, which might
114 // easily require transitiveness as well.
115 if (Value
*NewRemapped
= GlobalMap
.lookup(New
))
118 // No test case for this code.
119 if (Old
->getType()->getScalarSizeInBits() <
120 New
->getType()->getScalarSizeInBits())
121 New
= Builder
.CreateTruncOrBitCast(New
, Old
->getType());
126 Value
*New
= nullptr;
127 auto VUse
= VirtualUse::create(&Stmt
, L
, Old
, true);
128 switch (VUse
.getKind()) {
129 case VirtualUse::Block
:
130 // BasicBlock are constants, but the BlockGenerator copies them.
131 New
= BBMap
.lookup(Old
);
134 case VirtualUse::Constant
:
136 // * Isl/CodeGen/OpenMP/reference-argument-from-non-affine-region.ll
137 // Constants should not be redefined. In this case, the GlobalMap just
138 // contains a mapping to the same constant, which is unnecessary, but
140 if ((New
= lookupGlobally(Old
)))
143 assert(!BBMap
.count(Old
));
147 case VirtualUse::ReadOnly
:
148 assert(!GlobalMap
.count(Old
));
151 // * Isl/CodeGen/MemAccess/create_arrays.ll
152 // * Isl/CodeGen/read-only-scalars.ll
153 // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
154 // For some reason these reload a read-only value. The reloaded value ends
155 // up in BBMap, buts its value should be identical.
158 // * Isl/CodeGen/OpenMP/single_loop_with_param.ll
159 // The parallel subfunctions need to reference the read-only value from the
160 // parent function, this is done by reloading them locally.
161 if ((New
= BBMap
.lookup(Old
)))
167 case VirtualUse::Synthesizable
:
169 // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
170 // * Isl/CodeGen/OpenMP/recomputed-srem.ll
171 // * Isl/CodeGen/OpenMP/reference-other-bb.ll
172 // * Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll
173 // For some reason synthesizable values end up in GlobalMap. Their values
174 // are the same as trySynthesizeNewValue would return. The legacy
175 // implementation prioritized GlobalMap, so this is what we do here as well.
176 // Ideally, synthesizable values should not end up in GlobalMap.
177 if ((New
= lookupGlobally(Old
)))
181 // * Isl/CodeGen/RuntimeDebugBuilder/combine_different_values.ll
182 // * Isl/CodeGen/getNumberOfIterations.ll
183 // * Isl/CodeGen/non_affine_float_compare.ll
184 // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
185 // Ideally, synthesizable values are synthesized by trySynthesizeNewValue,
186 // not precomputed (SCEVExpander has its own caching mechanism).
187 // These tests fail without this, but I think trySynthesizeNewValue would
188 // just re-synthesize the same instructions.
189 if ((New
= BBMap
.lookup(Old
)))
192 New
= trySynthesizeNewValue(Stmt
, Old
, BBMap
, LTS
, L
);
195 case VirtualUse::Hoisted
:
196 // TODO: Hoisted invariant loads should be found in GlobalMap only, but not
197 // redefined locally (which will be ignored anyway). That is, the following
198 // assertion should apply: assert(!BBMap.count(Old))
200 New
= lookupGlobally(Old
);
203 case VirtualUse::Intra
:
204 case VirtualUse::Inter
:
205 assert(!GlobalMap
.count(Old
) &&
206 "Intra and inter-stmt values are never global");
207 New
= BBMap
.lookup(Old
);
210 assert(New
&& "Unexpected scalar dependence in region!");
214 void BlockGenerator::copyInstScalar(ScopStmt
&Stmt
, Instruction
*Inst
,
215 ValueMapT
&BBMap
, LoopToScevMapT
<S
) {
216 // We do not generate debug intrinsics as we did not investigate how to
217 // copy them correctly. At the current state, they just crash the code
218 // generation as the meta-data operands are not correctly copied.
219 if (isa
<DbgInfoIntrinsic
>(Inst
))
222 Instruction
*NewInst
= Inst
->clone();
224 // Replace old operands with the new ones.
225 for (Value
*OldOperand
: Inst
->operands()) {
227 getNewValue(Stmt
, OldOperand
, BBMap
, LTS
, getLoopForStmt(Stmt
));
230 assert(!isa
<StoreInst
>(NewInst
) &&
231 "Store instructions are always needed!");
232 NewInst
->deleteValue();
236 NewInst
->replaceUsesOfWith(OldOperand
, NewOperand
);
239 Builder
.Insert(NewInst
);
240 BBMap
[Inst
] = NewInst
;
242 // When copying the instruction onto the Module meant for the GPU,
243 // debug metadata attached to an instruction causes all related
244 // metadata to be pulled into the Module. This includes the DICompileUnit,
245 // which will not be listed in llvm.dbg.cu of the Module since the Module
246 // doesn't contain one. This fails the verification of the Module and the
247 // subsequent generation of the ASM string.
248 if (NewInst
->getModule() != Inst
->getModule())
249 NewInst
->setDebugLoc(llvm::DebugLoc());
251 if (!NewInst
->getType()->isVoidTy())
252 NewInst
->setName("p_" + Inst
->getName());
256 BlockGenerator::generateLocationAccessed(ScopStmt
&Stmt
, MemAccInst Inst
,
257 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
258 isl_id_to_ast_expr
*NewAccesses
) {
259 const MemoryAccess
&MA
= Stmt
.getArrayAccessFor(Inst
);
260 return generateLocationAccessed(
261 Stmt
, getLoopForStmt(Stmt
),
262 Inst
.isNull() ? nullptr : Inst
.getPointerOperand(), BBMap
, LTS
,
263 NewAccesses
, MA
.getId().release(), MA
.getAccessValue()->getType());
266 Value
*BlockGenerator::generateLocationAccessed(
267 ScopStmt
&Stmt
, Loop
*L
, Value
*Pointer
, ValueMapT
&BBMap
,
268 LoopToScevMapT
<S
, isl_id_to_ast_expr
*NewAccesses
, __isl_take isl_id
*Id
,
269 Type
*ExpectedType
) {
270 isl_ast_expr
*AccessExpr
= isl_id_to_ast_expr_get(NewAccesses
, Id
);
273 AccessExpr
= isl_ast_expr_address_of(AccessExpr
);
274 auto Address
= ExprBuilder
->create(AccessExpr
);
276 // Cast the address of this memory access to a pointer type that has the
277 // same element type as the original access, but uses the address space of
278 // the newly generated pointer.
279 auto OldPtrTy
= ExpectedType
->getPointerTo();
280 auto NewPtrTy
= Address
->getType();
281 OldPtrTy
= PointerType::get(OldPtrTy
->getElementType(),
282 NewPtrTy
->getPointerAddressSpace());
284 if (OldPtrTy
!= NewPtrTy
)
285 Address
= Builder
.CreateBitOrPointerCast(Address
, OldPtrTy
);
290 "If expression was not generated, must use the original pointer value");
291 return getNewValue(Stmt
, Pointer
, BBMap
, LTS
, L
);
295 BlockGenerator::getImplicitAddress(MemoryAccess
&Access
, Loop
*L
,
296 LoopToScevMapT
<S
, ValueMapT
&BBMap
,
297 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
298 if (Access
.isLatestArrayKind())
299 return generateLocationAccessed(*Access
.getStatement(), L
, nullptr, BBMap
,
300 LTS
, NewAccesses
, Access
.getId().release(),
301 Access
.getAccessValue()->getType());
303 return getOrCreateAlloca(Access
);
306 Loop
*BlockGenerator::getLoopForStmt(const ScopStmt
&Stmt
) const {
307 auto *StmtBB
= Stmt
.getEntryBlock();
308 return LI
.getLoopFor(StmtBB
);
311 Value
*BlockGenerator::generateArrayLoad(ScopStmt
&Stmt
, LoadInst
*Load
,
312 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
313 isl_id_to_ast_expr
*NewAccesses
) {
314 if (Value
*PreloadLoad
= GlobalMap
.lookup(Load
))
318 generateLocationAccessed(Stmt
, Load
, BBMap
, LTS
, NewAccesses
);
319 Value
*ScalarLoad
= Builder
.CreateAlignedLoad(NewPointer
, Load
->getAlign(),
320 Load
->getName() + "_p_scalar_");
322 if (PollyDebugPrinting
)
323 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Load from ", NewPointer
,
324 ": ", ScalarLoad
, "\n");
329 void BlockGenerator::generateArrayStore(ScopStmt
&Stmt
, StoreInst
*Store
,
330 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
331 isl_id_to_ast_expr
*NewAccesses
) {
332 MemoryAccess
&MA
= Stmt
.getArrayAccessFor(Store
);
333 isl::set AccDom
= MA
.getAccessRelation().domain();
334 std::string Subject
= MA
.getId().get_name();
336 generateConditionalExecution(Stmt
, AccDom
, Subject
.c_str(), [&, this]() {
338 generateLocationAccessed(Stmt
, Store
, BBMap
, LTS
, NewAccesses
);
339 Value
*ValueOperand
= getNewValue(Stmt
, Store
->getValueOperand(), BBMap
,
340 LTS
, getLoopForStmt(Stmt
));
342 if (PollyDebugPrinting
)
343 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Store to ", NewPointer
,
344 ": ", ValueOperand
, "\n");
346 Builder
.CreateAlignedStore(ValueOperand
, NewPointer
, Store
->getAlign());
350 bool BlockGenerator::canSyntheziseInStmt(ScopStmt
&Stmt
, Instruction
*Inst
) {
351 Loop
*L
= getLoopForStmt(Stmt
);
352 return (Stmt
.isBlockStmt() || !Stmt
.getRegion()->contains(L
)) &&
353 canSynthesize(Inst
, *Stmt
.getParent(), &SE
, L
);
356 void BlockGenerator::copyInstruction(ScopStmt
&Stmt
, Instruction
*Inst
,
357 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
358 isl_id_to_ast_expr
*NewAccesses
) {
359 // Terminator instructions control the control flow. They are explicitly
360 // expressed in the clast and do not need to be copied.
361 if (Inst
->isTerminator())
364 // Synthesizable statements will be generated on-demand.
365 if (canSyntheziseInStmt(Stmt
, Inst
))
368 if (auto *Load
= dyn_cast
<LoadInst
>(Inst
)) {
369 Value
*NewLoad
= generateArrayLoad(Stmt
, Load
, BBMap
, LTS
, NewAccesses
);
370 // Compute NewLoad before its insertion in BBMap to make the insertion
372 BBMap
[Load
] = NewLoad
;
376 if (auto *Store
= dyn_cast
<StoreInst
>(Inst
)) {
377 // Identified as redundant by -polly-simplify.
378 if (!Stmt
.getArrayAccessOrNULLFor(Store
))
381 generateArrayStore(Stmt
, Store
, BBMap
, LTS
, NewAccesses
);
385 if (auto *PHI
= dyn_cast
<PHINode
>(Inst
)) {
386 copyPHIInstruction(Stmt
, PHI
, BBMap
, LTS
);
390 // Skip some special intrinsics for which we do not adjust the semantics to
391 // the new schedule. All others are handled like every other instruction.
392 if (isIgnoredIntrinsic(Inst
))
395 copyInstScalar(Stmt
, Inst
, BBMap
, LTS
);
398 void BlockGenerator::removeDeadInstructions(BasicBlock
*BB
, ValueMapT
&BBMap
) {
399 auto NewBB
= Builder
.GetInsertBlock();
400 for (auto I
= NewBB
->rbegin(); I
!= NewBB
->rend(); I
++) {
401 Instruction
*NewInst
= &*I
;
403 if (!isInstructionTriviallyDead(NewInst
))
406 for (auto Pair
: BBMap
)
407 if (Pair
.second
== NewInst
) {
408 BBMap
.erase(Pair
.first
);
411 NewInst
->eraseFromParent();
416 void BlockGenerator::copyStmt(ScopStmt
&Stmt
, LoopToScevMapT
<S
,
417 isl_id_to_ast_expr
*NewAccesses
) {
418 assert(Stmt
.isBlockStmt() &&
419 "Only block statements can be copied by the block generator");
423 BasicBlock
*BB
= Stmt
.getBasicBlock();
424 copyBB(Stmt
, BB
, BBMap
, LTS
, NewAccesses
);
425 removeDeadInstructions(BB
, BBMap
);
428 BasicBlock
*BlockGenerator::splitBB(BasicBlock
*BB
) {
429 BasicBlock
*CopyBB
= SplitBlock(Builder
.GetInsertBlock(),
430 &*Builder
.GetInsertPoint(), &DT
, &LI
);
431 CopyBB
->setName("polly.stmt." + BB
->getName());
435 BasicBlock
*BlockGenerator::copyBB(ScopStmt
&Stmt
, BasicBlock
*BB
,
436 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
437 isl_id_to_ast_expr
*NewAccesses
) {
438 BasicBlock
*CopyBB
= splitBB(BB
);
439 Builder
.SetInsertPoint(&CopyBB
->front());
440 generateScalarLoads(Stmt
, LTS
, BBMap
, NewAccesses
);
441 generateBeginStmtTrace(Stmt
, LTS
, BBMap
);
443 copyBB(Stmt
, BB
, CopyBB
, BBMap
, LTS
, NewAccesses
);
445 // After a basic block was copied store all scalars that escape this block in
447 generateScalarStores(Stmt
, LTS
, BBMap
, NewAccesses
);
451 void BlockGenerator::copyBB(ScopStmt
&Stmt
, BasicBlock
*BB
, BasicBlock
*CopyBB
,
452 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
453 isl_id_to_ast_expr
*NewAccesses
) {
454 EntryBB
= &CopyBB
->getParent()->getEntryBlock();
456 // Block statements and the entry blocks of region statement are code
457 // generated from instruction lists. This allow us to optimize the
458 // instructions that belong to a certain scop statement. As the code
459 // structure of region statements might be arbitrary complex, optimizing the
460 // instruction list is not yet supported.
461 if (Stmt
.isBlockStmt() || (Stmt
.isRegionStmt() && Stmt
.getEntryBlock() == BB
))
462 for (Instruction
*Inst
: Stmt
.getInstructions())
463 copyInstruction(Stmt
, Inst
, BBMap
, LTS
, NewAccesses
);
465 for (Instruction
&Inst
: *BB
)
466 copyInstruction(Stmt
, &Inst
, BBMap
, LTS
, NewAccesses
);
469 Value
*BlockGenerator::getOrCreateAlloca(const MemoryAccess
&Access
) {
470 assert(!Access
.isLatestArrayKind() && "Trying to get alloca for array kind");
472 return getOrCreateAlloca(Access
.getLatestScopArrayInfo());
475 Value
*BlockGenerator::getOrCreateAlloca(const ScopArrayInfo
*Array
) {
476 assert(!Array
->isArrayKind() && "Trying to get alloca for array kind");
478 auto &Addr
= ScalarMap
[Array
];
481 // Allow allocas to be (temporarily) redirected once by adding a new
482 // old-alloca-addr to new-addr mapping to GlobalMap. This functionality
483 // is used for example by the OpenMP code generation where a first use
484 // of a scalar while still in the host code allocates a normal alloca with
485 // getOrCreateAlloca. When the values of this scalar are accessed during
486 // the generation of the parallel subfunction, these values are copied over
487 // to the parallel subfunction and each request for a scalar alloca slot
488 // must be forwarded to the temporary in-subfunction slot. This mapping is
489 // removed when the subfunction has been generated and again normal host
490 // code is generated. Due to the following reasons it is not possible to
491 // perform the GlobalMap lookup right after creating the alloca below, but
492 // instead we need to check GlobalMap at each call to getOrCreateAlloca:
494 // 1) GlobalMap may be changed multiple times (for each parallel loop),
495 // 2) The temporary mapping is commonly only known after the initial
496 // alloca has already been generated, and
497 // 3) The original alloca value must be restored after leaving the
499 if (Value
*NewAddr
= GlobalMap
.lookup(&*Addr
))
504 Type
*Ty
= Array
->getElementType();
505 Value
*ScalarBase
= Array
->getBasePtr();
507 if (Array
->isPHIKind())
512 const DataLayout
&DL
= Builder
.GetInsertBlock()->getModule()->getDataLayout();
515 new AllocaInst(Ty
, DL
.getAllocaAddrSpace(), nullptr,
516 DL
.getPrefTypeAlign(Ty
), ScalarBase
->getName() + NameExt
);
517 EntryBB
= &Builder
.GetInsertBlock()->getParent()->getEntryBlock();
518 Addr
->insertBefore(&*EntryBB
->getFirstInsertionPt());
523 void BlockGenerator::handleOutsideUsers(const Scop
&S
, ScopArrayInfo
*Array
) {
524 Instruction
*Inst
= cast
<Instruction
>(Array
->getBasePtr());
526 // If there are escape users we get the alloca for this instruction and put it
527 // in the EscapeMap for later finalization. Lastly, if the instruction was
528 // copied multiple times we already did this and can exit.
529 if (EscapeMap
.count(Inst
))
532 EscapeUserVectorTy EscapeUsers
;
533 for (User
*U
: Inst
->users()) {
535 // Non-instruction user will never escape.
536 Instruction
*UI
= dyn_cast
<Instruction
>(U
);
543 EscapeUsers
.push_back(UI
);
546 // Exit if no escape uses were found.
547 if (EscapeUsers
.empty())
550 // Get or create an escape alloca for this instruction.
551 auto *ScalarAddr
= getOrCreateAlloca(Array
);
553 // Remember that this instruction has escape uses and the escape alloca.
554 EscapeMap
[Inst
] = std::make_pair(ScalarAddr
, std::move(EscapeUsers
));
557 void BlockGenerator::generateScalarLoads(
558 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
559 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
560 for (MemoryAccess
*MA
: Stmt
) {
561 if (MA
->isOriginalArrayKind() || MA
->isWrite())
566 Stmt
.getDomain().intersect_params(Stmt
.getParent()->getContext());
567 auto AccDom
= MA
->getAccessRelation().domain();
568 assert(!StmtDom
.is_subset(AccDom
).is_false() &&
569 "Scalar must be loaded in all statement instances");
573 getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
, BBMap
, NewAccesses
);
574 assert((!isa
<Instruction
>(Address
) ||
575 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
576 Builder
.GetInsertBlock())) &&
577 "Domination violation");
578 BBMap
[MA
->getAccessValue()] =
579 Builder
.CreateLoad(Address
, Address
->getName() + ".reload");
583 Value
*BlockGenerator::buildContainsCondition(ScopStmt
&Stmt
,
584 const isl::set
&Subdomain
) {
585 isl::ast_build AstBuild
= Stmt
.getAstBuild();
586 isl::set Domain
= Stmt
.getDomain();
588 isl::union_map USchedule
= AstBuild
.get_schedule();
589 USchedule
= USchedule
.intersect_domain(Domain
);
591 assert(!USchedule
.is_empty());
592 isl::map Schedule
= isl::map::from_union_map(USchedule
);
594 isl::set ScheduledDomain
= Schedule
.range();
595 isl::set ScheduledSet
= Subdomain
.apply(Schedule
);
597 isl::ast_build RestrictedBuild
= AstBuild
.restrict(ScheduledDomain
);
599 isl::ast_expr IsInSet
= RestrictedBuild
.expr_from(ScheduledSet
);
600 Value
*IsInSetExpr
= ExprBuilder
->create(IsInSet
.copy());
601 IsInSetExpr
= Builder
.CreateICmpNE(
602 IsInSetExpr
, ConstantInt::get(IsInSetExpr
->getType(), 0));
607 void BlockGenerator::generateConditionalExecution(
608 ScopStmt
&Stmt
, const isl::set
&Subdomain
, StringRef Subject
,
609 const std::function
<void()> &GenThenFunc
) {
610 isl::set StmtDom
= Stmt
.getDomain();
612 // If the condition is a tautology, don't generate a condition around the
614 bool IsPartialWrite
=
615 !StmtDom
.intersect_params(Stmt
.getParent()->getContext())
616 .is_subset(Subdomain
);
617 if (!IsPartialWrite
) {
622 // Generate the condition.
623 Value
*Cond
= buildContainsCondition(Stmt
, Subdomain
);
625 // Don't call GenThenFunc if it is never executed. An ast index expression
626 // might not be defined in this case.
627 if (auto *Const
= dyn_cast
<ConstantInt
>(Cond
))
631 BasicBlock
*HeadBlock
= Builder
.GetInsertBlock();
632 StringRef BlockName
= HeadBlock
->getName();
634 // Generate the conditional block.
635 SplitBlockAndInsertIfThen(Cond
, &*Builder
.GetInsertPoint(), false, nullptr,
637 BranchInst
*Branch
= cast
<BranchInst
>(HeadBlock
->getTerminator());
638 BasicBlock
*ThenBlock
= Branch
->getSuccessor(0);
639 BasicBlock
*TailBlock
= Branch
->getSuccessor(1);
641 // Assign descriptive names.
642 if (auto *CondInst
= dyn_cast
<Instruction
>(Cond
))
643 CondInst
->setName("polly." + Subject
+ ".cond");
644 ThenBlock
->setName(BlockName
+ "." + Subject
+ ".partial");
645 TailBlock
->setName(BlockName
+ ".cont");
647 // Put the client code into the conditional block and continue in the merge
649 Builder
.SetInsertPoint(ThenBlock
, ThenBlock
->getFirstInsertionPt());
651 Builder
.SetInsertPoint(TailBlock
, TailBlock
->getFirstInsertionPt());
654 static std::string
getInstName(Value
*Val
) {
656 raw_string_ostream
OS(Result
);
657 Val
->printAsOperand(OS
, false);
661 void BlockGenerator::generateBeginStmtTrace(ScopStmt
&Stmt
, LoopToScevMapT
<S
,
666 Scop
*S
= Stmt
.getParent();
667 const char *BaseName
= Stmt
.getBaseName();
669 isl::ast_build AstBuild
= Stmt
.getAstBuild();
670 isl::set Domain
= Stmt
.getDomain();
672 isl::union_map USchedule
= AstBuild
.get_schedule().intersect_domain(Domain
);
673 isl::map Schedule
= isl::map::from_union_map(USchedule
);
674 assert(Schedule
.is_empty().is_false() &&
675 "The stmt must have a valid instance");
677 isl::multi_pw_aff ScheduleMultiPwAff
=
678 isl::pw_multi_aff::from_map(Schedule
.reverse());
679 isl::ast_build RestrictedBuild
= AstBuild
.restrict(Schedule
.range());
681 // Sequence of strings to print.
682 SmallVector
<llvm::Value
*, 8> Values
;
684 // Print the name of the statement.
685 // TODO: Indent by the depth of the statement instance in the schedule tree.
686 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, BaseName
));
687 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, "("));
689 // Add the coordinate of the statement instance.
690 int DomDims
= ScheduleMultiPwAff
.dim(isl::dim::out
);
691 for (int i
= 0; i
< DomDims
; i
+= 1) {
693 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, ","));
695 isl::ast_expr IsInSet
=
696 RestrictedBuild
.expr_from(ScheduleMultiPwAff
.get_pw_aff(i
));
697 Values
.push_back(ExprBuilder
->create(IsInSet
.copy()));
701 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, ")"));
702 DenseSet
<Instruction
*> Encountered
;
704 // Add the value of each scalar (and the result of PHIs) used in the
706 // TODO: Values used in region-statements.
707 for (Instruction
*Inst
: Stmt
.insts()) {
708 if (!RuntimeDebugBuilder::isPrintable(Inst
->getType()))
711 if (isa
<PHINode
>(Inst
)) {
712 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, " "));
713 Values
.push_back(RuntimeDebugBuilder::getPrintableString(
714 Builder
, getInstName(Inst
)));
715 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, "="));
716 Values
.push_back(getNewValue(Stmt
, Inst
, BBMap
, LTS
,
717 LI
.getLoopFor(Inst
->getParent())));
719 for (Value
*Op
: Inst
->operand_values()) {
720 // Do not print values that cannot change during the execution of the
722 auto *OpInst
= dyn_cast
<Instruction
>(Op
);
725 if (!S
->contains(OpInst
))
728 // Print each scalar at most once, and exclude values defined in the
730 if (Encountered
.count(OpInst
))
734 RuntimeDebugBuilder::getPrintableString(Builder
, " "));
735 Values
.push_back(RuntimeDebugBuilder::getPrintableString(
736 Builder
, getInstName(OpInst
)));
738 RuntimeDebugBuilder::getPrintableString(Builder
, "="));
739 Values
.push_back(getNewValue(Stmt
, OpInst
, BBMap
, LTS
,
740 LI
.getLoopFor(Inst
->getParent())));
741 Encountered
.insert(OpInst
);
745 Encountered
.insert(Inst
);
748 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, "\n"));
750 Values
.push_back(RuntimeDebugBuilder::getPrintableString(Builder
, ")\n"));
753 RuntimeDebugBuilder::createCPUPrinter(Builder
, ArrayRef
<Value
*>(Values
));
756 void BlockGenerator::generateScalarStores(
757 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
758 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
759 Loop
*L
= LI
.getLoopFor(Stmt
.getBasicBlock());
761 assert(Stmt
.isBlockStmt() &&
762 "Region statements need to use the generateScalarStores() function in "
763 "the RegionGenerator");
765 for (MemoryAccess
*MA
: Stmt
) {
766 if (MA
->isOriginalArrayKind() || MA
->isRead())
769 isl::set AccDom
= MA
->getAccessRelation().domain();
770 std::string Subject
= MA
->getId().get_name();
772 generateConditionalExecution(
773 Stmt
, AccDom
, Subject
.c_str(), [&, this, MA
]() {
774 Value
*Val
= MA
->getAccessValue();
775 if (MA
->isAnyPHIKind()) {
776 assert(MA
->getIncoming().size() >= 1 &&
777 "Block statements have exactly one exiting block, or "
779 "with same incoming block and value");
780 assert(std::all_of(MA
->getIncoming().begin(),
781 MA
->getIncoming().end(),
782 [&](std::pair
<BasicBlock
*, Value
*> p
) -> bool {
783 return p
.first
== Stmt
.getBasicBlock();
785 "Incoming block must be statement's block");
786 Val
= MA
->getIncoming()[0].second
;
788 auto Address
= getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
,
791 Val
= getNewValue(Stmt
, Val
, BBMap
, LTS
, L
);
792 assert((!isa
<Instruction
>(Val
) ||
793 DT
.dominates(cast
<Instruction
>(Val
)->getParent(),
794 Builder
.GetInsertBlock())) &&
795 "Domination violation");
796 assert((!isa
<Instruction
>(Address
) ||
797 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
798 Builder
.GetInsertBlock())) &&
799 "Domination violation");
801 // The new Val might have a different type than the old Val due to
802 // ScalarEvolution looking through bitcasts.
803 if (Val
->getType() != Address
->getType()->getPointerElementType())
804 Address
= Builder
.CreateBitOrPointerCast(
805 Address
, Val
->getType()->getPointerTo());
807 Builder
.CreateStore(Val
, Address
);
812 void BlockGenerator::createScalarInitialization(Scop
&S
) {
813 BasicBlock
*ExitBB
= S
.getExit();
814 BasicBlock
*PreEntryBB
= S
.getEnteringBlock();
816 Builder
.SetInsertPoint(&*StartBlock
->begin());
818 for (auto &Array
: S
.arrays()) {
819 if (Array
->getNumberOfDimensions() != 0)
821 if (Array
->isPHIKind()) {
822 // For PHI nodes, the only values we need to store are the ones that
823 // reach the PHI node from outside the region. In general there should
824 // only be one such incoming edge and this edge should enter through
826 auto PHI
= cast
<PHINode
>(Array
->getBasePtr());
828 for (auto BI
= PHI
->block_begin(), BE
= PHI
->block_end(); BI
!= BE
; BI
++)
829 if (!S
.contains(*BI
) && *BI
!= PreEntryBB
)
830 llvm_unreachable("Incoming edges from outside the scop should always "
831 "come from PreEntryBB");
833 int Idx
= PHI
->getBasicBlockIndex(PreEntryBB
);
837 Value
*ScalarValue
= PHI
->getIncomingValue(Idx
);
839 Builder
.CreateStore(ScalarValue
, getOrCreateAlloca(Array
));
843 auto *Inst
= dyn_cast
<Instruction
>(Array
->getBasePtr());
845 if (Inst
&& S
.contains(Inst
))
848 // PHI nodes that are not marked as such in their SAI object are either exit
849 // PHI nodes we model as common scalars but without initialization, or
850 // incoming phi nodes that need to be initialized. Check if the first is the
851 // case for Inst and do not create and initialize memory if so.
852 if (auto *PHI
= dyn_cast_or_null
<PHINode
>(Inst
))
853 if (!S
.hasSingleExitEdge() && PHI
->getBasicBlockIndex(ExitBB
) >= 0)
856 Builder
.CreateStore(Array
->getBasePtr(), getOrCreateAlloca(Array
));
860 void BlockGenerator::createScalarFinalization(Scop
&S
) {
861 // The exit block of the __unoptimized__ region.
862 BasicBlock
*ExitBB
= S
.getExitingBlock();
863 // The merge block __just after__ the region and the optimized region.
864 BasicBlock
*MergeBB
= S
.getExit();
866 // The exit block of the __optimized__ region.
867 BasicBlock
*OptExitBB
= *(pred_begin(MergeBB
));
868 if (OptExitBB
== ExitBB
)
869 OptExitBB
= *(++pred_begin(MergeBB
));
871 Builder
.SetInsertPoint(OptExitBB
->getTerminator());
872 for (const auto &EscapeMapping
: EscapeMap
) {
873 // Extract the escaping instruction and the escaping users as well as the
874 // alloca the instruction was demoted to.
875 Instruction
*EscapeInst
= EscapeMapping
.first
;
876 const auto &EscapeMappingValue
= EscapeMapping
.second
;
877 const EscapeUserVectorTy
&EscapeUsers
= EscapeMappingValue
.second
;
878 Value
*ScalarAddr
= EscapeMappingValue
.first
;
880 // Reload the demoted instruction in the optimized version of the SCoP.
881 Value
*EscapeInstReload
=
882 Builder
.CreateLoad(ScalarAddr
, EscapeInst
->getName() + ".final_reload");
884 Builder
.CreateBitOrPointerCast(EscapeInstReload
, EscapeInst
->getType());
886 // Create the merge PHI that merges the optimized and unoptimized version.
887 PHINode
*MergePHI
= PHINode::Create(EscapeInst
->getType(), 2,
888 EscapeInst
->getName() + ".merge");
889 MergePHI
->insertBefore(&*MergeBB
->getFirstInsertionPt());
891 // Add the respective values to the merge PHI.
892 MergePHI
->addIncoming(EscapeInstReload
, OptExitBB
);
893 MergePHI
->addIncoming(EscapeInst
, ExitBB
);
895 // The information of scalar evolution about the escaping instruction needs
896 // to be revoked so the new merged instruction will be used.
897 if (SE
.isSCEVable(EscapeInst
->getType()))
898 SE
.forgetValue(EscapeInst
);
900 // Replace all uses of the demoted instruction with the merge PHI.
901 for (Instruction
*EUser
: EscapeUsers
)
902 EUser
->replaceUsesOfWith(EscapeInst
, MergePHI
);
906 void BlockGenerator::findOutsideUsers(Scop
&S
) {
907 for (auto &Array
: S
.arrays()) {
909 if (Array
->getNumberOfDimensions() != 0)
912 if (Array
->isPHIKind())
915 auto *Inst
= dyn_cast
<Instruction
>(Array
->getBasePtr());
920 // Scop invariant hoisting moves some of the base pointers out of the scop.
921 // We can ignore these, as the invariant load hoisting already registers the
922 // relevant outside users.
923 if (!S
.contains(Inst
))
926 handleOutsideUsers(S
, Array
);
930 void BlockGenerator::createExitPHINodeMerges(Scop
&S
) {
931 if (S
.hasSingleExitEdge())
934 auto *ExitBB
= S
.getExitingBlock();
935 auto *MergeBB
= S
.getExit();
936 auto *AfterMergeBB
= MergeBB
->getSingleSuccessor();
937 BasicBlock
*OptExitBB
= *(pred_begin(MergeBB
));
938 if (OptExitBB
== ExitBB
)
939 OptExitBB
= *(++pred_begin(MergeBB
));
941 Builder
.SetInsertPoint(OptExitBB
->getTerminator());
943 for (auto &SAI
: S
.arrays()) {
944 auto *Val
= SAI
->getBasePtr();
946 // Only Value-like scalars need a merge PHI. Exit block PHIs receive either
947 // the original PHI's value or the reloaded incoming values from the
948 // generated code. An llvm::Value is merged between the original code's
949 // value or the generated one.
950 if (!SAI
->isExitPHIKind())
953 PHINode
*PHI
= dyn_cast
<PHINode
>(Val
);
957 if (PHI
->getParent() != AfterMergeBB
)
960 std::string Name
= PHI
->getName().str();
961 Value
*ScalarAddr
= getOrCreateAlloca(SAI
);
962 Value
*Reload
= Builder
.CreateLoad(ScalarAddr
, Name
+ ".ph.final_reload");
963 Reload
= Builder
.CreateBitOrPointerCast(Reload
, PHI
->getType());
964 Value
*OriginalValue
= PHI
->getIncomingValueForBlock(MergeBB
);
965 assert((!isa
<Instruction
>(OriginalValue
) ||
966 cast
<Instruction
>(OriginalValue
)->getParent() != MergeBB
) &&
967 "Original value must no be one we just generated.");
968 auto *MergePHI
= PHINode::Create(PHI
->getType(), 2, Name
+ ".ph.merge");
969 MergePHI
->insertBefore(&*MergeBB
->getFirstInsertionPt());
970 MergePHI
->addIncoming(Reload
, OptExitBB
);
971 MergePHI
->addIncoming(OriginalValue
, ExitBB
);
972 int Idx
= PHI
->getBasicBlockIndex(MergeBB
);
973 PHI
->setIncomingValue(Idx
, MergePHI
);
977 void BlockGenerator::invalidateScalarEvolution(Scop
&S
) {
979 if (Stmt
.isCopyStmt())
981 else if (Stmt
.isBlockStmt())
982 for (auto &Inst
: *Stmt
.getBasicBlock())
983 SE
.forgetValue(&Inst
);
984 else if (Stmt
.isRegionStmt())
985 for (auto *BB
: Stmt
.getRegion()->blocks())
986 for (auto &Inst
: *BB
)
987 SE
.forgetValue(&Inst
);
989 llvm_unreachable("Unexpected statement type found");
991 // Invalidate SCEV of loops surrounding the EscapeUsers.
992 for (const auto &EscapeMapping
: EscapeMap
) {
993 const EscapeUserVectorTy
&EscapeUsers
= EscapeMapping
.second
.second
;
994 for (Instruction
*EUser
: EscapeUsers
) {
995 if (Loop
*L
= LI
.getLoopFor(EUser
->getParent()))
998 L
= L
->getParentLoop();
1004 void BlockGenerator::finalizeSCoP(Scop
&S
) {
1005 findOutsideUsers(S
);
1006 createScalarInitialization(S
);
1007 createExitPHINodeMerges(S
);
1008 createScalarFinalization(S
);
1009 invalidateScalarEvolution(S
);
1012 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator
&BlockGen
,
1013 std::vector
<LoopToScevMapT
> &VLTS
,
1015 : BlockGenerator(BlockGen
), VLTS(VLTS
), Schedule(Schedule
) {
1016 assert(Schedule
&& "No statement domain provided");
1019 Value
*VectorBlockGenerator::getVectorValue(ScopStmt
&Stmt
, Value
*Old
,
1020 ValueMapT
&VectorMap
,
1021 VectorValueMapT
&ScalarMaps
,
1023 if (Value
*NewValue
= VectorMap
.lookup(Old
))
1026 int Width
= getVectorWidth();
1028 Value
*Vector
= UndefValue::get(FixedVectorType::get(Old
->getType(), Width
));
1030 for (int Lane
= 0; Lane
< Width
; Lane
++)
1031 Vector
= Builder
.CreateInsertElement(
1032 Vector
, getNewValue(Stmt
, Old
, ScalarMaps
[Lane
], VLTS
[Lane
], L
),
1033 Builder
.getInt32(Lane
));
1035 VectorMap
[Old
] = Vector
;
1040 Type
*VectorBlockGenerator::getVectorPtrTy(const Value
*Val
, int Width
) {
1041 auto *PointerTy
= cast
<PointerType
>(Val
->getType());
1042 unsigned AddrSpace
= PointerTy
->getAddressSpace();
1044 Type
*ScalarType
= PointerTy
->getElementType();
1045 auto *FVTy
= FixedVectorType::get(ScalarType
, Width
);
1047 return PointerType::get(FVTy
, AddrSpace
);
1050 Value
*VectorBlockGenerator::generateStrideOneLoad(
1051 ScopStmt
&Stmt
, LoadInst
*Load
, VectorValueMapT
&ScalarMaps
,
1052 __isl_keep isl_id_to_ast_expr
*NewAccesses
, bool NegativeStride
= false) {
1053 unsigned VectorWidth
= getVectorWidth();
1054 auto *Pointer
= Load
->getPointerOperand();
1055 Type
*VectorPtrType
= getVectorPtrTy(Pointer
, VectorWidth
);
1056 unsigned Offset
= NegativeStride
? VectorWidth
- 1 : 0;
1058 Value
*NewPointer
= generateLocationAccessed(Stmt
, Load
, ScalarMaps
[Offset
],
1059 VLTS
[Offset
], NewAccesses
);
1061 Builder
.CreateBitCast(NewPointer
, VectorPtrType
, "vector_ptr");
1063 Builder
.CreateLoad(VectorPtr
, Load
->getName() + "_p_vec_full");
1065 VecLoad
->setAlignment(Align(8));
1067 if (NegativeStride
) {
1068 SmallVector
<Constant
*, 16> Indices
;
1069 for (int i
= VectorWidth
- 1; i
>= 0; i
--)
1070 Indices
.push_back(ConstantInt::get(Builder
.getInt32Ty(), i
));
1071 Constant
*SV
= llvm::ConstantVector::get(Indices
);
1072 Value
*RevVecLoad
= Builder
.CreateShuffleVector(
1073 VecLoad
, VecLoad
, SV
, Load
->getName() + "_reverse");
1080 Value
*VectorBlockGenerator::generateStrideZeroLoad(
1081 ScopStmt
&Stmt
, LoadInst
*Load
, ValueMapT
&BBMap
,
1082 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1083 auto *Pointer
= Load
->getPointerOperand();
1084 Type
*VectorPtrType
= getVectorPtrTy(Pointer
, 1);
1086 generateLocationAccessed(Stmt
, Load
, BBMap
, VLTS
[0], NewAccesses
);
1087 Value
*VectorPtr
= Builder
.CreateBitCast(NewPointer
, VectorPtrType
,
1088 Load
->getName() + "_p_vec_p");
1089 LoadInst
*ScalarLoad
=
1090 Builder
.CreateLoad(VectorPtr
, Load
->getName() + "_p_splat_one");
1093 ScalarLoad
->setAlignment(Align(8));
1095 Constant
*SplatVector
= Constant::getNullValue(
1096 FixedVectorType::get(Builder
.getInt32Ty(), getVectorWidth()));
1098 Value
*VectorLoad
= Builder
.CreateShuffleVector(
1099 ScalarLoad
, ScalarLoad
, SplatVector
, Load
->getName() + "_p_splat");
1103 Value
*VectorBlockGenerator::generateUnknownStrideLoad(
1104 ScopStmt
&Stmt
, LoadInst
*Load
, VectorValueMapT
&ScalarMaps
,
1105 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1106 int VectorWidth
= getVectorWidth();
1107 auto *Pointer
= Load
->getPointerOperand();
1108 auto *FVTy
= FixedVectorType::get(
1109 dyn_cast
<PointerType
>(Pointer
->getType())->getElementType(), VectorWidth
);
1111 Value
*Vector
= UndefValue::get(FVTy
);
1113 for (int i
= 0; i
< VectorWidth
; i
++) {
1114 Value
*NewPointer
= generateLocationAccessed(Stmt
, Load
, ScalarMaps
[i
],
1115 VLTS
[i
], NewAccesses
);
1117 Builder
.CreateLoad(NewPointer
, Load
->getName() + "_p_scalar_");
1118 Vector
= Builder
.CreateInsertElement(
1119 Vector
, ScalarLoad
, Builder
.getInt32(i
), Load
->getName() + "_p_vec_");
1125 void VectorBlockGenerator::generateLoad(
1126 ScopStmt
&Stmt
, LoadInst
*Load
, ValueMapT
&VectorMap
,
1127 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1128 if (Value
*PreloadLoad
= GlobalMap
.lookup(Load
)) {
1129 VectorMap
[Load
] = Builder
.CreateVectorSplat(getVectorWidth(), PreloadLoad
,
1130 Load
->getName() + "_p");
1134 if (!VectorType::isValidElementType(Load
->getType())) {
1135 for (int i
= 0; i
< getVectorWidth(); i
++)
1136 ScalarMaps
[i
][Load
] =
1137 generateArrayLoad(Stmt
, Load
, ScalarMaps
[i
], VLTS
[i
], NewAccesses
);
1141 const MemoryAccess
&Access
= Stmt
.getArrayAccessFor(Load
);
1143 // Make sure we have scalar values available to access the pointer to
1144 // the data location.
1145 extractScalarValues(Load
, VectorMap
, ScalarMaps
);
1148 if (Access
.isStrideZero(isl::manage_copy(Schedule
)))
1149 NewLoad
= generateStrideZeroLoad(Stmt
, Load
, ScalarMaps
[0], NewAccesses
);
1150 else if (Access
.isStrideOne(isl::manage_copy(Schedule
)))
1151 NewLoad
= generateStrideOneLoad(Stmt
, Load
, ScalarMaps
, NewAccesses
);
1152 else if (Access
.isStrideX(isl::manage_copy(Schedule
), -1))
1153 NewLoad
= generateStrideOneLoad(Stmt
, Load
, ScalarMaps
, NewAccesses
, true);
1155 NewLoad
= generateUnknownStrideLoad(Stmt
, Load
, ScalarMaps
, NewAccesses
);
1157 VectorMap
[Load
] = NewLoad
;
1160 void VectorBlockGenerator::copyUnaryInst(ScopStmt
&Stmt
, UnaryInstruction
*Inst
,
1161 ValueMapT
&VectorMap
,
1162 VectorValueMapT
&ScalarMaps
) {
1163 int VectorWidth
= getVectorWidth();
1164 Value
*NewOperand
= getVectorValue(Stmt
, Inst
->getOperand(0), VectorMap
,
1165 ScalarMaps
, getLoopForStmt(Stmt
));
1167 assert(isa
<CastInst
>(Inst
) && "Can not generate vector code for instruction");
1169 const CastInst
*Cast
= dyn_cast
<CastInst
>(Inst
);
1170 auto *DestType
= FixedVectorType::get(Inst
->getType(), VectorWidth
);
1171 VectorMap
[Inst
] = Builder
.CreateCast(Cast
->getOpcode(), NewOperand
, DestType
);
1174 void VectorBlockGenerator::copyBinaryInst(ScopStmt
&Stmt
, BinaryOperator
*Inst
,
1175 ValueMapT
&VectorMap
,
1176 VectorValueMapT
&ScalarMaps
) {
1177 Loop
*L
= getLoopForStmt(Stmt
);
1178 Value
*OpZero
= Inst
->getOperand(0);
1179 Value
*OpOne
= Inst
->getOperand(1);
1181 Value
*NewOpZero
, *NewOpOne
;
1182 NewOpZero
= getVectorValue(Stmt
, OpZero
, VectorMap
, ScalarMaps
, L
);
1183 NewOpOne
= getVectorValue(Stmt
, OpOne
, VectorMap
, ScalarMaps
, L
);
1185 Value
*NewInst
= Builder
.CreateBinOp(Inst
->getOpcode(), NewOpZero
, NewOpOne
,
1186 Inst
->getName() + "p_vec");
1187 VectorMap
[Inst
] = NewInst
;
1190 void VectorBlockGenerator::copyStore(
1191 ScopStmt
&Stmt
, StoreInst
*Store
, ValueMapT
&VectorMap
,
1192 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1193 const MemoryAccess
&Access
= Stmt
.getArrayAccessFor(Store
);
1195 auto *Pointer
= Store
->getPointerOperand();
1196 Value
*Vector
= getVectorValue(Stmt
, Store
->getValueOperand(), VectorMap
,
1197 ScalarMaps
, getLoopForStmt(Stmt
));
1199 // Make sure we have scalar values available to access the pointer to
1200 // the data location.
1201 extractScalarValues(Store
, VectorMap
, ScalarMaps
);
1203 if (Access
.isStrideOne(isl::manage_copy(Schedule
))) {
1204 Type
*VectorPtrType
= getVectorPtrTy(Pointer
, getVectorWidth());
1205 Value
*NewPointer
= generateLocationAccessed(Stmt
, Store
, ScalarMaps
[0],
1206 VLTS
[0], NewAccesses
);
1209 Builder
.CreateBitCast(NewPointer
, VectorPtrType
, "vector_ptr");
1210 StoreInst
*Store
= Builder
.CreateStore(Vector
, VectorPtr
);
1213 Store
->setAlignment(Align(8));
1215 for (unsigned i
= 0; i
< ScalarMaps
.size(); i
++) {
1216 Value
*Scalar
= Builder
.CreateExtractElement(Vector
, Builder
.getInt32(i
));
1217 Value
*NewPointer
= generateLocationAccessed(Stmt
, Store
, ScalarMaps
[i
],
1218 VLTS
[i
], NewAccesses
);
1219 Builder
.CreateStore(Scalar
, NewPointer
);
1224 bool VectorBlockGenerator::hasVectorOperands(const Instruction
*Inst
,
1225 ValueMapT
&VectorMap
) {
1226 for (Value
*Operand
: Inst
->operands())
1227 if (VectorMap
.count(Operand
))
1232 bool VectorBlockGenerator::extractScalarValues(const Instruction
*Inst
,
1233 ValueMapT
&VectorMap
,
1234 VectorValueMapT
&ScalarMaps
) {
1235 bool HasVectorOperand
= false;
1236 int VectorWidth
= getVectorWidth();
1238 for (Value
*Operand
: Inst
->operands()) {
1239 ValueMapT::iterator VecOp
= VectorMap
.find(Operand
);
1241 if (VecOp
== VectorMap
.end())
1244 HasVectorOperand
= true;
1245 Value
*NewVector
= VecOp
->second
;
1247 for (int i
= 0; i
< VectorWidth
; ++i
) {
1248 ValueMapT
&SM
= ScalarMaps
[i
];
1250 // If there is one scalar extracted, all scalar elements should have
1251 // already been extracted by the code here. So no need to check for the
1252 // existence of all of them.
1253 if (SM
.count(Operand
))
1257 Builder
.CreateExtractElement(NewVector
, Builder
.getInt32(i
));
1261 return HasVectorOperand
;
1264 void VectorBlockGenerator::copyInstScalarized(
1265 ScopStmt
&Stmt
, Instruction
*Inst
, ValueMapT
&VectorMap
,
1266 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1267 bool HasVectorOperand
;
1268 int VectorWidth
= getVectorWidth();
1270 HasVectorOperand
= extractScalarValues(Inst
, VectorMap
, ScalarMaps
);
1272 for (int VectorLane
= 0; VectorLane
< getVectorWidth(); VectorLane
++)
1273 BlockGenerator::copyInstruction(Stmt
, Inst
, ScalarMaps
[VectorLane
],
1274 VLTS
[VectorLane
], NewAccesses
);
1276 if (!VectorType::isValidElementType(Inst
->getType()) || !HasVectorOperand
)
1279 // Make the result available as vector value.
1280 auto *FVTy
= FixedVectorType::get(Inst
->getType(), VectorWidth
);
1281 Value
*Vector
= UndefValue::get(FVTy
);
1283 for (int i
= 0; i
< VectorWidth
; i
++)
1284 Vector
= Builder
.CreateInsertElement(Vector
, ScalarMaps
[i
][Inst
],
1285 Builder
.getInt32(i
));
1287 VectorMap
[Inst
] = Vector
;
1290 int VectorBlockGenerator::getVectorWidth() { return VLTS
.size(); }
1292 void VectorBlockGenerator::copyInstruction(
1293 ScopStmt
&Stmt
, Instruction
*Inst
, ValueMapT
&VectorMap
,
1294 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1295 // Terminator instructions control the control flow. They are explicitly
1296 // expressed in the clast and do not need to be copied.
1297 if (Inst
->isTerminator())
1300 if (canSyntheziseInStmt(Stmt
, Inst
))
1303 if (auto *Load
= dyn_cast
<LoadInst
>(Inst
)) {
1304 generateLoad(Stmt
, Load
, VectorMap
, ScalarMaps
, NewAccesses
);
1308 if (hasVectorOperands(Inst
, VectorMap
)) {
1309 if (auto *Store
= dyn_cast
<StoreInst
>(Inst
)) {
1310 // Identified as redundant by -polly-simplify.
1311 if (!Stmt
.getArrayAccessOrNULLFor(Store
))
1314 copyStore(Stmt
, Store
, VectorMap
, ScalarMaps
, NewAccesses
);
1318 if (auto *Unary
= dyn_cast
<UnaryInstruction
>(Inst
)) {
1319 copyUnaryInst(Stmt
, Unary
, VectorMap
, ScalarMaps
);
1323 if (auto *Binary
= dyn_cast
<BinaryOperator
>(Inst
)) {
1324 copyBinaryInst(Stmt
, Binary
, VectorMap
, ScalarMaps
);
1328 // Fallthrough: We generate scalar instructions, if we don't know how to
1329 // generate vector code.
1332 copyInstScalarized(Stmt
, Inst
, VectorMap
, ScalarMaps
, NewAccesses
);
1335 void VectorBlockGenerator::generateScalarVectorLoads(
1336 ScopStmt
&Stmt
, ValueMapT
&VectorBlockMap
) {
1337 for (MemoryAccess
*MA
: Stmt
) {
1338 if (MA
->isArrayKind() || MA
->isWrite())
1341 auto *Address
= getOrCreateAlloca(*MA
);
1342 Type
*VectorPtrType
= getVectorPtrTy(Address
, 1);
1343 Value
*VectorPtr
= Builder
.CreateBitCast(Address
, VectorPtrType
,
1344 Address
->getName() + "_p_vec_p");
1345 auto *Val
= Builder
.CreateLoad(VectorPtr
, Address
->getName() + ".reload");
1346 Constant
*SplatVector
= Constant::getNullValue(
1347 FixedVectorType::get(Builder
.getInt32Ty(), getVectorWidth()));
1349 Value
*VectorVal
= Builder
.CreateShuffleVector(
1350 Val
, Val
, SplatVector
, Address
->getName() + "_p_splat");
1351 VectorBlockMap
[MA
->getAccessValue()] = VectorVal
;
1355 void VectorBlockGenerator::verifyNoScalarStores(ScopStmt
&Stmt
) {
1356 for (MemoryAccess
*MA
: Stmt
) {
1357 if (MA
->isArrayKind() || MA
->isRead())
1360 llvm_unreachable("Scalar stores not expected in vector loop");
1364 void VectorBlockGenerator::copyStmt(
1365 ScopStmt
&Stmt
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1366 assert(Stmt
.isBlockStmt() &&
1367 "TODO: Only block statements can be copied by the vector block "
1370 BasicBlock
*BB
= Stmt
.getBasicBlock();
1371 BasicBlock
*CopyBB
= SplitBlock(Builder
.GetInsertBlock(),
1372 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1373 CopyBB
->setName("polly.stmt." + BB
->getName());
1374 Builder
.SetInsertPoint(&CopyBB
->front());
1376 // Create two maps that store the mapping from the original instructions of
1377 // the old basic block to their copies in the new basic block. Those maps
1378 // are basic block local.
1380 // As vector code generation is supported there is one map for scalar values
1381 // and one for vector values.
1383 // In case we just do scalar code generation, the vectorMap is not used and
1384 // the scalarMap has just one dimension, which contains the mapping.
1386 // In case vector code generation is done, an instruction may either appear
1387 // in the vector map once (as it is calculating >vectorwidth< values at a
1388 // time. Or (if the values are calculated using scalar operations), it
1389 // appears once in every dimension of the scalarMap.
1390 VectorValueMapT
ScalarBlockMap(getVectorWidth());
1391 ValueMapT VectorBlockMap
;
1393 generateScalarVectorLoads(Stmt
, VectorBlockMap
);
1395 for (Instruction
*Inst
: Stmt
.getInstructions())
1396 copyInstruction(Stmt
, Inst
, VectorBlockMap
, ScalarBlockMap
, NewAccesses
);
1398 verifyNoScalarStores(Stmt
);
1401 BasicBlock
*RegionGenerator::repairDominance(BasicBlock
*BB
,
1402 BasicBlock
*BBCopy
) {
1404 BasicBlock
*BBIDom
= DT
.getNode(BB
)->getIDom()->getBlock();
1405 BasicBlock
*BBCopyIDom
= EndBlockMap
.lookup(BBIDom
);
1408 DT
.changeImmediateDominator(BBCopy
, BBCopyIDom
);
1410 return StartBlockMap
.lookup(BBIDom
);
1413 // This is to determine whether an llvm::Value (defined in @p BB) is usable when
1414 // leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock())
1415 // does not work in cases where the exit block has edges from outside the
1416 // region. In that case the llvm::Value would never be usable in in the exit
1417 // block. The RegionGenerator however creates an new exit block ('ExitBBCopy')
1418 // for the subregion's exiting edges only. We need to determine whether an
1419 // llvm::Value is usable in there. We do this by checking whether it dominates
1420 // all exiting blocks individually.
1421 static bool isDominatingSubregionExit(const DominatorTree
&DT
, Region
*R
,
1423 for (auto ExitingBB
: predecessors(R
->getExit())) {
1424 // Check for non-subregion incoming edges.
1425 if (!R
->contains(ExitingBB
))
1428 if (!DT
.dominates(BB
, ExitingBB
))
1435 // Find the direct dominator of the subregion's exit block if the subregion was
1437 static BasicBlock
*findExitDominator(DominatorTree
&DT
, Region
*R
) {
1438 BasicBlock
*Common
= nullptr;
1439 for (auto ExitingBB
: predecessors(R
->getExit())) {
1440 // Check for non-subregion incoming edges.
1441 if (!R
->contains(ExitingBB
))
1444 // First exiting edge.
1450 Common
= DT
.findNearestCommonDominator(Common
, ExitingBB
);
1453 assert(Common
&& R
->contains(Common
));
1457 void RegionGenerator::copyStmt(ScopStmt
&Stmt
, LoopToScevMapT
<S
,
1458 isl_id_to_ast_expr
*IdToAstExp
) {
1459 assert(Stmt
.isRegionStmt() &&
1460 "Only region statements can be copied by the region generator");
1462 // Forget all old mappings.
1463 StartBlockMap
.clear();
1464 EndBlockMap
.clear();
1466 IncompletePHINodeMap
.clear();
1468 // Collection of all values related to this subregion.
1471 // The region represented by the statement.
1472 Region
*R
= Stmt
.getRegion();
1474 // Create a dedicated entry for the region where we can reload all demoted
1476 BasicBlock
*EntryBB
= R
->getEntry();
1477 BasicBlock
*EntryBBCopy
= SplitBlock(Builder
.GetInsertBlock(),
1478 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1479 EntryBBCopy
->setName("polly.stmt." + EntryBB
->getName() + ".entry");
1480 Builder
.SetInsertPoint(&EntryBBCopy
->front());
1482 ValueMapT
&EntryBBMap
= RegionMaps
[EntryBBCopy
];
1483 generateScalarLoads(Stmt
, LTS
, EntryBBMap
, IdToAstExp
);
1484 generateBeginStmtTrace(Stmt
, LTS
, EntryBBMap
);
1486 for (auto PI
= pred_begin(EntryBB
), PE
= pred_end(EntryBB
); PI
!= PE
; ++PI
)
1487 if (!R
->contains(*PI
)) {
1488 StartBlockMap
[*PI
] = EntryBBCopy
;
1489 EndBlockMap
[*PI
] = EntryBBCopy
;
1492 // Iterate over all blocks in the region in a breadth-first search.
1493 std::deque
<BasicBlock
*> Blocks
;
1494 SmallSetVector
<BasicBlock
*, 8> SeenBlocks
;
1495 Blocks
.push_back(EntryBB
);
1496 SeenBlocks
.insert(EntryBB
);
1498 while (!Blocks
.empty()) {
1499 BasicBlock
*BB
= Blocks
.front();
1502 // First split the block and update dominance information.
1503 BasicBlock
*BBCopy
= splitBB(BB
);
1504 BasicBlock
*BBCopyIDom
= repairDominance(BB
, BBCopy
);
1506 // Get the mapping for this block and initialize it with either the scalar
1507 // loads from the generated entering block (which dominates all blocks of
1508 // this subregion) or the maps of the immediate dominator, if part of the
1509 // subregion. The latter necessarily includes the former.
1510 ValueMapT
*InitBBMap
;
1512 assert(RegionMaps
.count(BBCopyIDom
));
1513 InitBBMap
= &RegionMaps
[BBCopyIDom
];
1515 InitBBMap
= &EntryBBMap
;
1516 auto Inserted
= RegionMaps
.insert(std::make_pair(BBCopy
, *InitBBMap
));
1517 ValueMapT
&RegionMap
= Inserted
.first
->second
;
1519 // Copy the block with the BlockGenerator.
1520 Builder
.SetInsertPoint(&BBCopy
->front());
1521 copyBB(Stmt
, BB
, BBCopy
, RegionMap
, LTS
, IdToAstExp
);
1523 // In order to remap PHI nodes we store also basic block mappings.
1524 StartBlockMap
[BB
] = BBCopy
;
1525 EndBlockMap
[BB
] = Builder
.GetInsertBlock();
1527 // Add values to incomplete PHI nodes waiting for this block to be copied.
1528 for (const PHINodePairTy
&PHINodePair
: IncompletePHINodeMap
[BB
])
1529 addOperandToPHI(Stmt
, PHINodePair
.first
, PHINodePair
.second
, BB
, LTS
);
1530 IncompletePHINodeMap
[BB
].clear();
1532 // And continue with new successors inside the region.
1533 for (auto SI
= succ_begin(BB
), SE
= succ_end(BB
); SI
!= SE
; SI
++)
1534 if (R
->contains(*SI
) && SeenBlocks
.insert(*SI
))
1535 Blocks
.push_back(*SI
);
1537 // Remember value in case it is visible after this subregion.
1538 if (isDominatingSubregionExit(DT
, R
, BB
))
1539 ValueMap
.insert(RegionMap
.begin(), RegionMap
.end());
1542 // Now create a new dedicated region exit block and add it to the region map.
1543 BasicBlock
*ExitBBCopy
= SplitBlock(Builder
.GetInsertBlock(),
1544 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1545 ExitBBCopy
->setName("polly.stmt." + R
->getExit()->getName() + ".exit");
1546 StartBlockMap
[R
->getExit()] = ExitBBCopy
;
1547 EndBlockMap
[R
->getExit()] = ExitBBCopy
;
1549 BasicBlock
*ExitDomBBCopy
= EndBlockMap
.lookup(findExitDominator(DT
, R
));
1550 assert(ExitDomBBCopy
&&
1551 "Common exit dominator must be within region; at least the entry node "
1553 DT
.changeImmediateDominator(ExitBBCopy
, ExitDomBBCopy
);
1555 // As the block generator doesn't handle control flow we need to add the
1556 // region control flow by hand after all blocks have been copied.
1557 for (BasicBlock
*BB
: SeenBlocks
) {
1559 BasicBlock
*BBCopyStart
= StartBlockMap
[BB
];
1560 BasicBlock
*BBCopyEnd
= EndBlockMap
[BB
];
1561 Instruction
*TI
= BB
->getTerminator();
1562 if (isa
<UnreachableInst
>(TI
)) {
1563 while (!BBCopyEnd
->empty())
1564 BBCopyEnd
->begin()->eraseFromParent();
1565 new UnreachableInst(BBCopyEnd
->getContext(), BBCopyEnd
);
1569 Instruction
*BICopy
= BBCopyEnd
->getTerminator();
1571 ValueMapT
&RegionMap
= RegionMaps
[BBCopyStart
];
1572 RegionMap
.insert(StartBlockMap
.begin(), StartBlockMap
.end());
1574 Builder
.SetInsertPoint(BICopy
);
1575 copyInstScalar(Stmt
, TI
, RegionMap
, LTS
);
1576 BICopy
->eraseFromParent();
1579 // Add counting PHI nodes to all loops in the region that can be used as
1580 // replacement for SCEVs referring to the old loop.
1581 for (BasicBlock
*BB
: SeenBlocks
) {
1582 Loop
*L
= LI
.getLoopFor(BB
);
1583 if (L
== nullptr || L
->getHeader() != BB
|| !R
->contains(L
))
1586 BasicBlock
*BBCopy
= StartBlockMap
[BB
];
1587 Value
*NullVal
= Builder
.getInt32(0);
1589 PHINode::Create(Builder
.getInt32Ty(), 2, "polly.subregion.iv");
1590 Instruction
*LoopPHIInc
= BinaryOperator::CreateAdd(
1591 LoopPHI
, Builder
.getInt32(1), "polly.subregion.iv.inc");
1592 LoopPHI
->insertBefore(&BBCopy
->front());
1593 LoopPHIInc
->insertBefore(BBCopy
->getTerminator());
1595 for (auto *PredBB
: make_range(pred_begin(BB
), pred_end(BB
))) {
1596 if (!R
->contains(PredBB
))
1598 if (L
->contains(PredBB
))
1599 LoopPHI
->addIncoming(LoopPHIInc
, EndBlockMap
[PredBB
]);
1601 LoopPHI
->addIncoming(NullVal
, EndBlockMap
[PredBB
]);
1604 for (auto *PredBBCopy
: make_range(pred_begin(BBCopy
), pred_end(BBCopy
)))
1605 if (LoopPHI
->getBasicBlockIndex(PredBBCopy
) < 0)
1606 LoopPHI
->addIncoming(NullVal
, PredBBCopy
);
1608 LTS
[L
] = SE
.getUnknown(LoopPHI
);
1611 // Continue generating code in the exit block.
1612 Builder
.SetInsertPoint(&*ExitBBCopy
->getFirstInsertionPt());
1614 // Write values visible to other statements.
1615 generateScalarStores(Stmt
, LTS
, ValueMap
, IdToAstExp
);
1616 StartBlockMap
.clear();
1617 EndBlockMap
.clear();
1619 IncompletePHINodeMap
.clear();
1622 PHINode
*RegionGenerator::buildExitPHI(MemoryAccess
*MA
, LoopToScevMapT
<S
,
1623 ValueMapT
&BBMap
, Loop
*L
) {
1624 ScopStmt
*Stmt
= MA
->getStatement();
1625 Region
*SubR
= Stmt
->getRegion();
1626 auto Incoming
= MA
->getIncoming();
1628 PollyIRBuilder::InsertPointGuard
IPGuard(Builder
);
1629 PHINode
*OrigPHI
= cast
<PHINode
>(MA
->getAccessInstruction());
1630 BasicBlock
*NewSubregionExit
= Builder
.GetInsertBlock();
1632 // This can happen if the subregion is simplified after the ScopStmts
1633 // have been created; simplification happens as part of CodeGeneration.
1634 if (OrigPHI
->getParent() != SubR
->getExit()) {
1635 BasicBlock
*FormerExit
= SubR
->getExitingBlock();
1637 NewSubregionExit
= StartBlockMap
.lookup(FormerExit
);
1640 PHINode
*NewPHI
= PHINode::Create(OrigPHI
->getType(), Incoming
.size(),
1641 "polly." + OrigPHI
->getName(),
1642 NewSubregionExit
->getFirstNonPHI());
1644 // Add the incoming values to the PHI.
1645 for (auto &Pair
: Incoming
) {
1646 BasicBlock
*OrigIncomingBlock
= Pair
.first
;
1647 BasicBlock
*NewIncomingBlockStart
= StartBlockMap
.lookup(OrigIncomingBlock
);
1648 BasicBlock
*NewIncomingBlockEnd
= EndBlockMap
.lookup(OrigIncomingBlock
);
1649 Builder
.SetInsertPoint(NewIncomingBlockEnd
->getTerminator());
1650 assert(RegionMaps
.count(NewIncomingBlockStart
));
1651 assert(RegionMaps
.count(NewIncomingBlockEnd
));
1652 ValueMapT
*LocalBBMap
= &RegionMaps
[NewIncomingBlockStart
];
1654 Value
*OrigIncomingValue
= Pair
.second
;
1655 Value
*NewIncomingValue
=
1656 getNewValue(*Stmt
, OrigIncomingValue
, *LocalBBMap
, LTS
, L
);
1657 NewPHI
->addIncoming(NewIncomingValue
, NewIncomingBlockEnd
);
1663 Value
*RegionGenerator::getExitScalar(MemoryAccess
*MA
, LoopToScevMapT
<S
,
1665 ScopStmt
*Stmt
= MA
->getStatement();
1667 // TODO: Add some test cases that ensure this is really the right choice.
1668 Loop
*L
= LI
.getLoopFor(Stmt
->getRegion()->getExit());
1670 if (MA
->isAnyPHIKind()) {
1671 auto Incoming
= MA
->getIncoming();
1672 assert(!Incoming
.empty() &&
1673 "PHI WRITEs must have originate from at least one incoming block");
1675 // If there is only one incoming value, we do not need to create a PHI.
1676 if (Incoming
.size() == 1) {
1677 Value
*OldVal
= Incoming
[0].second
;
1678 return getNewValue(*Stmt
, OldVal
, BBMap
, LTS
, L
);
1681 return buildExitPHI(MA
, LTS
, BBMap
, L
);
1684 // MemoryKind::Value accesses leaving the subregion must dominate the exit
1685 // block; just pass the copied value.
1686 Value
*OldVal
= MA
->getAccessValue();
1687 return getNewValue(*Stmt
, OldVal
, BBMap
, LTS
, L
);
1690 void RegionGenerator::generateScalarStores(
1691 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
1692 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1693 assert(Stmt
.getRegion() &&
1694 "Block statements need to use the generateScalarStores() "
1695 "function in the BlockGenerator");
1697 // Get the exit scalar values before generating the writes.
1698 // This is necessary because RegionGenerator::getExitScalar may insert
1699 // PHINodes that depend on the region's exiting blocks. But
1700 // BlockGenerator::generateConditionalExecution may insert a new basic block
1701 // such that the current basic block is not a direct successor of the exiting
1702 // blocks anymore. Hence, build the PHINodes while the current block is still
1703 // the direct successor.
1704 SmallDenseMap
<MemoryAccess
*, Value
*> NewExitScalars
;
1705 for (MemoryAccess
*MA
: Stmt
) {
1706 if (MA
->isOriginalArrayKind() || MA
->isRead())
1709 Value
*NewVal
= getExitScalar(MA
, LTS
, BBMap
);
1710 NewExitScalars
[MA
] = NewVal
;
1713 for (MemoryAccess
*MA
: Stmt
) {
1714 if (MA
->isOriginalArrayKind() || MA
->isRead())
1717 isl::set AccDom
= MA
->getAccessRelation().domain();
1718 std::string Subject
= MA
->getId().get_name();
1719 generateConditionalExecution(
1720 Stmt
, AccDom
, Subject
.c_str(), [&, this, MA
]() {
1721 Value
*NewVal
= NewExitScalars
.lookup(MA
);
1722 assert(NewVal
&& "The exit scalar must be determined before");
1723 Value
*Address
= getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
,
1724 BBMap
, NewAccesses
);
1725 assert((!isa
<Instruction
>(NewVal
) ||
1726 DT
.dominates(cast
<Instruction
>(NewVal
)->getParent(),
1727 Builder
.GetInsertBlock())) &&
1728 "Domination violation");
1729 assert((!isa
<Instruction
>(Address
) ||
1730 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
1731 Builder
.GetInsertBlock())) &&
1732 "Domination violation");
1733 Builder
.CreateStore(NewVal
, Address
);
1738 void RegionGenerator::addOperandToPHI(ScopStmt
&Stmt
, PHINode
*PHI
,
1739 PHINode
*PHICopy
, BasicBlock
*IncomingBB
,
1740 LoopToScevMapT
<S
) {
1741 // If the incoming block was not yet copied mark this PHI as incomplete.
1742 // Once the block will be copied the incoming value will be added.
1743 BasicBlock
*BBCopyStart
= StartBlockMap
[IncomingBB
];
1744 BasicBlock
*BBCopyEnd
= EndBlockMap
[IncomingBB
];
1747 assert(Stmt
.represents(IncomingBB
) &&
1748 "Bad incoming block for PHI in non-affine region");
1749 IncompletePHINodeMap
[IncomingBB
].push_back(std::make_pair(PHI
, PHICopy
));
1753 assert(RegionMaps
.count(BBCopyStart
) &&
1754 "Incoming PHI block did not have a BBMap");
1755 ValueMapT
&BBCopyMap
= RegionMaps
[BBCopyStart
];
1757 Value
*OpCopy
= nullptr;
1759 if (Stmt
.represents(IncomingBB
)) {
1760 Value
*Op
= PHI
->getIncomingValueForBlock(IncomingBB
);
1762 // If the current insert block is different from the PHIs incoming block
1763 // change it, otherwise do not.
1764 auto IP
= Builder
.GetInsertPoint();
1765 if (IP
->getParent() != BBCopyEnd
)
1766 Builder
.SetInsertPoint(BBCopyEnd
->getTerminator());
1767 OpCopy
= getNewValue(Stmt
, Op
, BBCopyMap
, LTS
, getLoopForStmt(Stmt
));
1768 if (IP
->getParent() != BBCopyEnd
)
1769 Builder
.SetInsertPoint(&*IP
);
1771 // All edges from outside the non-affine region become a single edge
1772 // in the new copy of the non-affine region. Make sure to only add the
1773 // corresponding edge the first time we encounter a basic block from
1774 // outside the non-affine region.
1775 if (PHICopy
->getBasicBlockIndex(BBCopyEnd
) >= 0)
1778 // Get the reloaded value.
1779 OpCopy
= getNewValue(Stmt
, PHI
, BBCopyMap
, LTS
, getLoopForStmt(Stmt
));
1782 assert(OpCopy
&& "Incoming PHI value was not copied properly");
1783 PHICopy
->addIncoming(OpCopy
, BBCopyEnd
);
1786 void RegionGenerator::copyPHIInstruction(ScopStmt
&Stmt
, PHINode
*PHI
,
1788 LoopToScevMapT
<S
) {
1789 unsigned NumIncoming
= PHI
->getNumIncomingValues();
1791 Builder
.CreatePHI(PHI
->getType(), NumIncoming
, "polly." + PHI
->getName());
1792 PHICopy
->moveBefore(PHICopy
->getParent()->getFirstNonPHI());
1793 BBMap
[PHI
] = PHICopy
;
1795 for (BasicBlock
*IncomingBB
: PHI
->blocks())
1796 addOperandToPHI(Stmt
, PHI
, PHICopy
, IncomingBB
, LTS
);