1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Move instructions between statements.
11 //===----------------------------------------------------------------------===//
13 #include "polly/ForwardOpTree.h"
14 #include "polly/Options.h"
15 #include "polly/ScopBuilder.h"
16 #include "polly/ScopInfo.h"
17 #include "polly/ScopPass.h"
18 #include "polly/Support/GICHelper.h"
19 #include "polly/Support/ISLOStream.h"
20 #include "polly/Support/ISLTools.h"
21 #include "polly/Support/VirtualInstruction.h"
22 #include "polly/ZoneAlgo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Value.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/raw_ostream.h"
39 #include "isl/isl-noexceptions.h"
43 #define DEBUG_TYPE "polly-optree"
46 using namespace polly
;
49 AnalyzeKnown("polly-optree-analyze-known",
50 cl::desc("Analyze array contents for load forwarding"),
51 cl::cat(PollyCategory
), cl::init(true), cl::Hidden
);
54 NormalizePHIs("polly-optree-normalize-phi",
55 cl::desc("Replace PHIs by their incoming values"),
56 cl::cat(PollyCategory
), cl::init(false), cl::Hidden
);
58 static cl::opt
<unsigned>
59 MaxOps("polly-optree-max-ops",
60 cl::desc("Maximum number of ISL operations to invest for known "
61 "analysis; 0=no limit"),
62 cl::init(1000000), cl::cat(PollyCategory
), cl::Hidden
);
64 STATISTIC(KnownAnalyzed
, "Number of successfully analyzed SCoPs");
65 STATISTIC(KnownOutOfQuota
,
66 "Analyses aborted because max_operations was reached");
68 STATISTIC(TotalInstructionsCopied
, "Number of copied instructions");
69 STATISTIC(TotalKnownLoadsForwarded
,
70 "Number of forwarded loads because their value was known");
71 STATISTIC(TotalReloads
, "Number of reloaded values");
72 STATISTIC(TotalReadOnlyCopied
, "Number of copied read-only accesses");
73 STATISTIC(TotalForwardedTrees
, "Number of forwarded operand trees");
74 STATISTIC(TotalModifiedStmts
,
75 "Number of statements with at least one forwarded tree");
77 STATISTIC(ScopsModified
, "Number of SCoPs with at least one forwarded tree");
79 STATISTIC(NumValueWrites
, "Number of scalar value writes after OpTree");
80 STATISTIC(NumValueWritesInLoops
,
81 "Number of scalar value writes nested in affine loops after OpTree");
82 STATISTIC(NumPHIWrites
, "Number of scalar phi writes after OpTree");
83 STATISTIC(NumPHIWritesInLoops
,
84 "Number of scalar phi writes nested in affine loops after OpTree");
85 STATISTIC(NumSingletonWrites
, "Number of singleton writes after OpTree");
86 STATISTIC(NumSingletonWritesInLoops
,
87 "Number of singleton writes nested in affine loops after OpTree");
91 /// The state of whether an operand tree was/can be forwarded.
93 /// The items apply to an instructions and its operand tree with the instruction
94 /// as the root element. If the value in question is not an instruction in the
95 /// SCoP, it can be a leaf of an instruction's operand tree.
96 enum ForwardingDecision
{
97 /// An uninitialized value.
100 /// The root instruction or value cannot be forwarded at all.
103 /// The root instruction or value can be forwarded as a leaf of a larger
105 /// It does not make sense to move the value itself, it would just replace it
106 /// by a use of itself. For instance, a constant "5" used in a statement can
107 /// be forwarded, but it would just replace it by the same constant "5".
108 /// However, it makes sense to move as an operand of
112 /// where "5" is moved as part of a larger operand tree. "5" would be placed
113 /// (disregarding for a moment that literal constants don't have a location
114 /// and can be used anywhere) into the same statement as %add would.
117 /// The root instruction can be forwarded and doing so avoids a scalar
120 /// This can be either because the operand tree can be moved to the target
121 /// statement, or a memory access is redirected to read from a different
123 FD_CanForwardProfitably
,
125 /// A forwarding method cannot be applied to the operand tree.
126 /// The difference to FD_CannotForward is that there might be other methods
127 /// that can handle it.
131 /// Represents the evaluation of and action to taken when forwarding a value
132 /// from an operand tree.
133 struct ForwardingAction
{
134 using KeyTy
= std::pair
<Value
*, ScopStmt
*>;
136 /// Evaluation of forwarding a value.
137 ForwardingDecision Decision
= FD_Unknown
;
139 /// Callback to execute the forwarding.
140 /// Returning true allows deleting the polly::MemoryAccess if the value is the
141 /// root of the operand tree (and its elimination the reason why the
142 /// forwarding is done). Return false if the MemoryAccess is reused or there
143 /// might be other users of the read accesses. In the letter case the
144 /// polly::SimplifyPass can remove dead MemoryAccesses.
145 std::function
<bool()> Execute
= []() -> bool {
146 llvm_unreachable("unspecified how to forward");
149 /// Other values that need to be forwarded if this action is executed. Their
150 /// actions are executed after this one.
151 SmallVector
<KeyTy
, 4> Depends
;
153 /// Named ctor: The method creating this object does not apply to the kind of
154 /// value, but other methods may.
155 static ForwardingAction
notApplicable() {
156 ForwardingAction Result
;
157 Result
.Decision
= FD_NotApplicable
;
161 /// Named ctor: The value cannot be forwarded.
162 static ForwardingAction
cannotForward() {
163 ForwardingAction Result
;
164 Result
.Decision
= FD_CannotForward
;
168 /// Named ctor: The value can just be used without any preparation.
169 static ForwardingAction
triviallyForwardable(bool IsProfitable
, Value
*Val
) {
170 ForwardingAction Result
;
172 IsProfitable
? FD_CanForwardProfitably
: FD_CanForwardLeaf
;
173 Result
.Execute
= [=]() {
174 LLVM_DEBUG(dbgs() << " trivially forwarded: " << *Val
<< "\n");
180 /// Name ctor: The value can be forwarded by executing an action.
181 static ForwardingAction
canForward(std::function
<bool()> Execute
,
182 ArrayRef
<KeyTy
> Depends
,
184 ForwardingAction Result
;
186 IsProfitable
? FD_CanForwardProfitably
: FD_CanForwardLeaf
;
187 Result
.Execute
= std::move(Execute
);
188 Result
.Depends
.append(Depends
.begin(), Depends
.end());
193 /// Implementation of operand tree forwarding for a specific SCoP.
195 /// For a statement that requires a scalar value (through a value read
196 /// MemoryAccess), see if its operand can be moved into the statement. If so,
197 /// the MemoryAccess is removed and the all the operand tree instructions are
198 /// moved into the statement. All original instructions are left in the source
199 /// statements. The simplification pass can clean these up.
200 class ForwardOpTreeImpl final
: ZoneAlgorithm
{
202 using MemoizationTy
= DenseMap
<ForwardingAction::KeyTy
, ForwardingAction
>;
204 /// Scope guard to limit the number of isl operations for this pass.
205 IslMaxOperationsGuard
&MaxOpGuard
;
207 /// How many instructions have been copied to other statements.
208 int NumInstructionsCopied
= 0;
210 /// Number of loads forwarded because their value was known.
211 int NumKnownLoadsForwarded
= 0;
213 /// Number of values reloaded from known array elements.
216 /// How many read-only accesses have been copied.
217 int NumReadOnlyCopied
= 0;
219 /// How many operand trees have been forwarded.
220 int NumForwardedTrees
= 0;
222 /// Number of statements with at least one forwarded operand tree.
223 int NumModifiedStmts
= 0;
225 /// Whether we carried out at least one change to the SCoP.
226 bool Modified
= false;
228 /// Cache of how to forward values.
229 /// The key of this map is the llvm::Value to be forwarded and the
230 /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value
231 /// can evaluate differently depending on where it is evaluate. For instance,
232 /// a synthesizable Scev represents a recurrence with an loop but the loop's
233 /// exit value if evaluated after the loop.
234 /// The cached results are only valid for the current TargetStmt.
235 /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the
236 /// exit value when instantiated outside of the loop. The primary concern is
237 /// ambiguity when crossing PHI nodes, which currently is not supported.
238 MemoizationTy ForwardingActions
;
240 /// Contains the zones where array elements are known to contain a specific
242 /// { [Element[] -> Zone[]] -> ValInst[] }
243 /// @see computeKnown()
244 isl::union_map Known
;
246 /// Translator for newly introduced ValInsts to already existing ValInsts such
247 /// that new introduced load instructions can reuse the Known analysis of its
248 /// original load. { ValInst[] -> ValInst[] }
249 isl::union_map Translator
;
251 /// Get list of array elements that do contain the same ValInst[] at Domain[].
253 /// @param ValInst { Domain[] -> ValInst[] }
254 /// The values for which we search for alternative locations,
255 /// per statement instance.
257 /// @return { Domain[] -> Element[] }
258 /// For each statement instance, the array elements that contain the
260 isl::union_map
findSameContentElements(isl::union_map ValInst
) {
261 assert(!ValInst
.is_single_valued().is_false());
264 isl::union_set Domain
= ValInst
.domain();
266 // { Domain[] -> Scatter[] }
267 isl::union_map Schedule
= getScatterFor(Domain
);
269 // { Element[] -> [Scatter[] -> ValInst[]] }
270 isl::union_map MustKnownCurried
=
271 convertZoneToTimepoints(Known
, isl::dim::in
, false, true).curry();
273 // { [Domain[] -> ValInst[]] -> Scatter[] }
274 isl::union_map DomValSched
= ValInst
.domain_map().apply_range(Schedule
);
276 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
277 isl::union_map SchedValDomVal
=
278 DomValSched
.range_product(ValInst
.range_map()).reverse();
280 // { Element[] -> [Domain[] -> ValInst[]] }
281 isl::union_map MustKnownInst
= MustKnownCurried
.apply_range(SchedValDomVal
);
283 // { Domain[] -> Element[] }
284 isl::union_map MustKnownMap
=
285 MustKnownInst
.uncurry().domain().unwrap().reverse();
286 simplify(MustKnownMap
);
291 /// Find a single array element for each statement instance, within a single
294 /// @param MustKnown { Domain[] -> Element[] }
295 /// Set of candidate array elements.
296 /// @param Domain { Domain[] }
297 /// The statement instance for which we need elements for.
299 /// @return { Domain[] -> Element[] }
300 /// For each statement instance, an array element out of @p MustKnown.
301 /// All array elements must be in the same array (Polly does not yet
302 /// support reading from different accesses using the same
303 /// MemoryAccess). If no mapping for all of @p Domain exists, returns
305 isl::map
singleLocation(isl::union_map MustKnown
, isl::set Domain
) {
306 // { Domain[] -> Element[] }
309 // Make irrelevant elements not interfere.
310 Domain
= Domain
.intersect_params(S
->getContext());
312 // MemoryAccesses can read only elements from a single array
313 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
314 // Look through all spaces until we find one that contains at least the
315 // wanted statement instance.s
316 for (isl::map Map
: MustKnown
.get_map_list()) {
317 // Get the array this is accessing.
318 isl::id ArrayId
= Map
.get_tuple_id(isl::dim::out
);
319 ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(ArrayId
.get_user());
321 // No support for generation of indirect array accesses.
322 if (SAI
->getBasePtrOriginSAI())
325 // Determine whether this map contains all wanted values.
326 isl::set MapDom
= Map
.domain();
327 if (!Domain
.is_subset(MapDom
).is_true())
330 // There might be multiple array elements that contain the same value, but
331 // choose only one of them. lexmin is used because it returns a one-value
332 // mapping, we do not care about which one.
333 // TODO: Get the simplest access function.
334 Result
= Map
.lexmin();
342 ForwardOpTreeImpl(Scop
*S
, LoopInfo
*LI
, IslMaxOperationsGuard
&MaxOpGuard
)
343 : ZoneAlgorithm("polly-optree", S
, LI
), MaxOpGuard(MaxOpGuard
) {}
345 /// Compute the zones of known array element contents.
347 /// @return True if the computed #Known is usable.
348 bool computeKnownValues() {
349 isl::union_map MustKnown
, KnownFromLoad
, KnownFromInit
;
351 // Check that nothing strange occurs.
352 collectCompatibleElts();
355 IslQuotaScope QuotaScope
= MaxOpGuard
.enter();
359 computeNormalizedPHIs();
360 Known
= computeKnown(true, true);
362 // Preexisting ValInsts use the known content analysis of themselves.
363 Translator
= makeIdentityMap(Known
.range(), false);
366 if (Known
.is_null() || Translator
.is_null() || NormalizeMap
.is_null()) {
367 assert(isl_ctx_last_error(IslCtx
.get()) == isl_error_quota
);
371 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
376 LLVM_DEBUG(dbgs() << "All known: " << Known
<< "\n");
381 void printStatistics(raw_ostream
&OS
, int Indent
= 0) {
382 OS
.indent(Indent
) << "Statistics {\n";
383 OS
.indent(Indent
+ 4) << "Instructions copied: " << NumInstructionsCopied
385 OS
.indent(Indent
+ 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
387 OS
.indent(Indent
+ 4) << "Reloads: " << NumReloads
<< '\n';
388 OS
.indent(Indent
+ 4) << "Read-only accesses copied: " << NumReadOnlyCopied
390 OS
.indent(Indent
+ 4) << "Operand trees forwarded: " << NumForwardedTrees
392 OS
.indent(Indent
+ 4) << "Statements with forwarded operand trees: "
393 << NumModifiedStmts
<< '\n';
394 OS
.indent(Indent
) << "}\n";
397 void printStatements(raw_ostream
&OS
, int Indent
= 0) const {
398 OS
.indent(Indent
) << "After statements {\n";
399 for (auto &Stmt
: *S
) {
400 OS
.indent(Indent
+ 4) << Stmt
.getBaseName() << "\n";
401 for (auto *MA
: Stmt
)
404 OS
.indent(Indent
+ 12);
405 Stmt
.printInstructions(OS
);
407 OS
.indent(Indent
) << "}\n";
410 /// Create a new MemoryAccess of type read and MemoryKind::Array.
412 /// @param Stmt The statement in which the access occurs.
413 /// @param LI The instruction that does the access.
414 /// @param AccessRelation The array element that each statement instance
417 /// @param The newly created access.
418 MemoryAccess
*makeReadArrayAccess(ScopStmt
*Stmt
, LoadInst
*LI
,
419 isl::map AccessRelation
) {
420 isl::id ArrayId
= AccessRelation
.get_tuple_id(isl::dim::out
);
421 ScopArrayInfo
*SAI
= reinterpret_cast<ScopArrayInfo
*>(ArrayId
.get_user());
423 // Create a dummy SCEV access, to be replaced anyway.
424 SmallVector
<const SCEV
*, 4> Sizes
;
425 Sizes
.reserve(SAI
->getNumberOfDimensions());
426 SmallVector
<const SCEV
*, 4> Subscripts
;
427 Subscripts
.reserve(SAI
->getNumberOfDimensions());
428 for (unsigned i
= 0; i
< SAI
->getNumberOfDimensions(); i
+= 1) {
429 Sizes
.push_back(SAI
->getDimensionSize(i
));
430 Subscripts
.push_back(nullptr);
433 MemoryAccess
*Access
=
434 new MemoryAccess(Stmt
, LI
, MemoryAccess::READ
, SAI
->getBasePtr(),
435 LI
->getType(), true, {}, Sizes
, LI
, MemoryKind::Array
);
436 S
->addAccessFunction(Access
);
437 Stmt
->addAccess(Access
, true);
439 Access
->setNewAccessRelation(AccessRelation
);
444 /// Forward a load by reading from an array element that contains the same
445 /// value. Typically the location it was loaded from.
447 /// @param TargetStmt The statement the operand tree will be copied to.
448 /// @param Inst The (possibly speculatable) instruction to forward.
449 /// @param UseStmt The statement that uses @p Inst.
450 /// @param UseLoop The loop @p Inst is used in.
451 /// @param DefStmt The statement @p Inst is defined in.
452 /// @param DefLoop The loop which contains @p Inst.
454 /// @return A ForwardingAction object describing the feasibility and
455 /// profitability evaluation and the callback carrying-out the value
457 ForwardingAction
forwardKnownLoad(ScopStmt
*TargetStmt
, Instruction
*Inst
,
458 ScopStmt
*UseStmt
, Loop
*UseLoop
,
459 ScopStmt
*DefStmt
, Loop
*DefLoop
) {
460 // Cannot do anything without successful known analysis.
461 if (Known
.is_null() || Translator
.is_null() ||
462 MaxOpGuard
.hasQuotaExceeded())
463 return ForwardingAction::notApplicable();
465 LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
);
467 return ForwardingAction::notApplicable();
469 ForwardingDecision OpDecision
=
470 forwardTree(TargetStmt
, LI
->getPointerOperand(), DefStmt
, DefLoop
);
471 switch (OpDecision
) {
472 case FD_CanForwardProfitably
:
473 case FD_CanForwardLeaf
:
475 case FD_CannotForward
:
476 return ForwardingAction::cannotForward();
478 llvm_unreachable("Shouldn't return this");
481 MemoryAccess
*Access
= TargetStmt
->getArrayAccessOrNULLFor(LI
);
483 // If the load is already in the statement, no forwarding is necessary.
484 // However, it might happen that the LoadInst is already present in the
485 // statement's instruction list. In that case we do as follows:
486 // - For the evaluation, we can trivially forward it as it is
487 // benefit of forwarding an already present instruction.
488 // - For the execution, prepend the instruction (to make it
489 // available to all instructions following in the instruction list), but
490 // do not add another MemoryAccess.
491 auto ExecAction
= [this, TargetStmt
, LI
, Access
]() -> bool {
492 TargetStmt
->prependInstruction(LI
);
494 dbgs() << " forwarded known load with preexisting MemoryAccess"
498 NumKnownLoadsForwarded
++;
499 TotalKnownLoadsForwarded
++;
502 return ForwardingAction::canForward(
503 ExecAction
, {{LI
->getPointerOperand(), DefStmt
}}, true);
506 // Allow the following Isl calculations (until we return the
507 // ForwardingAction, excluding the code inside the lambda that will be
508 // executed later) to fail.
509 IslQuotaScope QuotaScope
= MaxOpGuard
.enter();
511 // { DomainDef[] -> ValInst[] }
512 isl::map ExpectedVal
= makeValInst(Inst
, UseStmt
, UseLoop
);
513 assert(!isNormalized(ExpectedVal
).is_false() &&
514 "LoadInsts are always normalized");
516 // { DomainUse[] -> DomainTarget[] }
517 isl::map UseToTarget
= getDefToTarget(UseStmt
, TargetStmt
);
519 // { DomainTarget[] -> ValInst[] }
520 isl::map TargetExpectedVal
= ExpectedVal
.apply_domain(UseToTarget
);
521 isl::union_map TranslatedExpectedVal
=
522 isl::union_map(TargetExpectedVal
).apply_range(Translator
);
524 // { DomainTarget[] -> Element[] }
525 isl::union_map Candidates
= findSameContentElements(TranslatedExpectedVal
);
527 isl::map SameVal
= singleLocation(Candidates
, getDomainFor(TargetStmt
));
528 if (SameVal
.is_null())
529 return ForwardingAction::notApplicable();
531 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal
533 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates
537 isl::space ValInstSpace
= ExpectedVal
.get_space().range();
539 // After adding a new load to the SCoP, also update the Known content
540 // about it. The new load will have a known ValInst of
541 // { [DomainTarget[] -> Value[]] }
542 // but which -- because it is a copy of it -- has same value as the
543 // { [DomainDef[] -> Value[]] }
544 // that it replicates. Instead of cloning the known content of
545 // [DomainDef[] -> Value[]]
546 // for DomainTarget[], we add a 'translator' that maps
547 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
548 // before comparing to the known content.
549 // TODO: 'Translator' could also be used to map PHINodes to their incoming
551 isl::map LocalTranslator
;
552 if (!ValInstSpace
.is_wrapping().is_false()) {
553 // { DefDomain[] -> Value[] }
554 isl::map ValInsts
= ExpectedVal
.range().unwrap();
557 isl::set DefDomain
= ValInsts
.domain();
560 isl::space ValSpace
= ValInstSpace
.unwrap().range();
562 // { Value[] -> Value[] }
564 isl::map::identity(ValSpace
.map_from_domain_and_range(ValSpace
));
566 // { DomainDef[] -> DomainTarget[] }
567 isl::map DefToTarget
= getDefToTarget(DefStmt
, TargetStmt
);
569 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
570 LocalTranslator
= DefToTarget
.reverse().product(ValToVal
);
571 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator
574 if (LocalTranslator
.is_null())
575 return ForwardingAction::notApplicable();
578 auto ExecAction
= [this, TargetStmt
, LI
, SameVal
,
579 LocalTranslator
]() -> bool {
580 TargetStmt
->prependInstruction(LI
);
581 MemoryAccess
*Access
= makeReadArrayAccess(TargetStmt
, LI
, SameVal
);
582 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess"
586 if (!LocalTranslator
.is_null())
587 Translator
= Translator
.unite(LocalTranslator
);
589 NumKnownLoadsForwarded
++;
590 TotalKnownLoadsForwarded
++;
593 return ForwardingAction::canForward(
594 ExecAction
, {{LI
->getPointerOperand(), DefStmt
}}, true);
597 /// Forward a scalar by redirecting the access to an array element that stores
600 /// @param TargetStmt The statement the operand tree will be copied to.
601 /// @param Inst The scalar to forward.
602 /// @param UseStmt The statement that uses @p Inst.
603 /// @param UseLoop The loop @p Inst is used in.
604 /// @param DefStmt The statement @p Inst is defined in.
605 /// @param DefLoop The loop which contains @p Inst.
607 /// @return A ForwardingAction object describing the feasibility and
608 /// profitability evaluation and the callback carrying-out the value
610 ForwardingAction
reloadKnownContent(ScopStmt
*TargetStmt
, Instruction
*Inst
,
611 ScopStmt
*UseStmt
, Loop
*UseLoop
,
612 ScopStmt
*DefStmt
, Loop
*DefLoop
) {
613 // Cannot do anything without successful known analysis.
614 if (Known
.is_null() || Translator
.is_null() ||
615 MaxOpGuard
.hasQuotaExceeded())
616 return ForwardingAction::notApplicable();
618 // Don't spend too much time analyzing whether it can be reloaded.
619 IslQuotaScope QuotaScope
= MaxOpGuard
.enter();
621 // { DomainDef[] -> ValInst[] }
622 isl::union_map ExpectedVal
= makeNormalizedValInst(Inst
, UseStmt
, UseLoop
);
624 // { DomainUse[] -> DomainTarget[] }
625 isl::map UseToTarget
= getDefToTarget(UseStmt
, TargetStmt
);
627 // { DomainTarget[] -> ValInst[] }
628 isl::union_map TargetExpectedVal
= ExpectedVal
.apply_domain(UseToTarget
);
629 isl::union_map TranslatedExpectedVal
=
630 TargetExpectedVal
.apply_range(Translator
);
632 // { DomainTarget[] -> Element[] }
633 isl::union_map Candidates
= findSameContentElements(TranslatedExpectedVal
);
635 isl::map SameVal
= singleLocation(Candidates
, getDomainFor(TargetStmt
));
637 if (SameVal
.is_null())
638 return ForwardingAction::notApplicable();
640 auto ExecAction
= [this, TargetStmt
, Inst
, SameVal
]() {
641 MemoryAccess
*Access
= TargetStmt
->lookupInputAccessOf(Inst
);
643 Access
= TargetStmt
->ensureValueRead(Inst
);
644 Access
->setNewAccessRelation(SameVal
);
646 LLVM_DEBUG(dbgs() << " forwarded known content of " << *Inst
647 << " which is " << SameVal
<< "\n");
653 return ForwardingAction::canForward(ExecAction
, {}, true);
656 /// Forwards a speculatively executable instruction.
658 /// @param TargetStmt The statement the operand tree will be copied to.
659 /// @param UseInst The (possibly speculatable) instruction to forward.
660 /// @param DefStmt The statement @p UseInst is defined in.
661 /// @param DefLoop The loop which contains @p UseInst.
663 /// @return A ForwardingAction object describing the feasibility and
664 /// profitability evaluation and the callback carrying-out the value
666 ForwardingAction
forwardSpeculatable(ScopStmt
*TargetStmt
,
667 Instruction
*UseInst
, ScopStmt
*DefStmt
,
669 // PHIs, unless synthesizable, are not yet supported.
670 if (isa
<PHINode
>(UseInst
))
671 return ForwardingAction::notApplicable();
673 // Compatible instructions must satisfy the following conditions:
674 // 1. Idempotent (instruction will be copied, not moved; although its
675 // original instance might be removed by simplification)
676 // 2. Not access memory (There might be memory writes between)
677 // 3. Not cause undefined behaviour (we might copy to a location when the
678 // original instruction was no executed; this is currently not possible
679 // because we do not forward PHINodes)
680 // 4. Not leak memory if executed multiple times (i.e. malloc)
682 // Instruction::mayHaveSideEffects is not sufficient because it considers
683 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
684 // not sufficient because it allows memory accesses.
685 if (mayHaveNonDefUseDependency(*UseInst
))
686 return ForwardingAction::notApplicable();
688 SmallVector
<ForwardingAction::KeyTy
, 4> Depends
;
689 Depends
.reserve(UseInst
->getNumOperands());
690 for (Value
*OpVal
: UseInst
->operand_values()) {
691 ForwardingDecision OpDecision
=
692 forwardTree(TargetStmt
, OpVal
, DefStmt
, DefLoop
);
693 switch (OpDecision
) {
694 case FD_CannotForward
:
695 return ForwardingAction::cannotForward();
697 case FD_CanForwardLeaf
:
698 case FD_CanForwardProfitably
:
699 Depends
.emplace_back(OpVal
, DefStmt
);
702 case FD_NotApplicable
:
705 "forwardTree should never return FD_NotApplicable/FD_Unknown");
709 auto ExecAction
= [this, TargetStmt
, UseInst
]() {
710 // To ensure the right order, prepend this instruction before its
711 // operands. This ensures that its operands are inserted before the
712 // instruction using them.
713 TargetStmt
->prependInstruction(UseInst
);
715 LLVM_DEBUG(dbgs() << " forwarded speculable instruction: " << *UseInst
717 NumInstructionsCopied
++;
718 TotalInstructionsCopied
++;
721 return ForwardingAction::canForward(ExecAction
, Depends
, true);
724 /// Determines whether an operand tree can be forwarded and returns
725 /// instructions how to do so in the form of a ForwardingAction object.
727 /// @param TargetStmt The statement the operand tree will be copied to.
728 /// @param UseVal The value (usually an instruction) which is root of an
730 /// @param UseStmt The statement that uses @p UseVal.
731 /// @param UseLoop The loop @p UseVal is used in.
733 /// @return A ForwardingAction object describing the feasibility and
734 /// profitability evaluation and the callback carrying-out the value
736 ForwardingAction
forwardTreeImpl(ScopStmt
*TargetStmt
, Value
*UseVal
,
737 ScopStmt
*UseStmt
, Loop
*UseLoop
) {
738 ScopStmt
*DefStmt
= nullptr;
739 Loop
*DefLoop
= nullptr;
741 // { DefDomain[] -> TargetDomain[] }
742 isl::map DefToTarget
;
744 VirtualUse VUse
= VirtualUse::create(UseStmt
, UseLoop
, UseVal
, true);
745 switch (VUse
.getKind()) {
746 case VirtualUse::Constant
:
747 case VirtualUse::Block
:
748 case VirtualUse::Hoisted
:
749 // These can be used anywhere without special considerations.
750 return ForwardingAction::triviallyForwardable(false, UseVal
);
752 case VirtualUse::Synthesizable
: {
753 // Check if the value is synthesizable at the new location as well. This
754 // might be possible when leaving a loop for which ScalarEvolution is
755 // unable to derive the exit value for.
756 // TODO: If there is a LCSSA PHI at the loop exit, use that one.
757 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
758 // do not forward past its loop header. This would require us to use a
759 // previous loop induction variable instead the current one. We currently
760 // do not allow forwarding PHI nodes, thus this should never occur (the
761 // only exception where no phi is necessary being an unreachable loop
762 // without edge from the outside).
763 VirtualUse TargetUse
= VirtualUse::create(
764 S
, TargetStmt
, TargetStmt
->getSurroundingLoop(), UseVal
, true);
765 if (TargetUse
.getKind() == VirtualUse::Synthesizable
)
766 return ForwardingAction::triviallyForwardable(false, UseVal
);
769 dbgs() << " Synthesizable would not be synthesizable anymore: "
771 return ForwardingAction::cannotForward();
774 case VirtualUse::ReadOnly
: {
775 if (!ModelReadOnlyScalars
)
776 return ForwardingAction::triviallyForwardable(false, UseVal
);
778 // If we model read-only scalars, we need to create a MemoryAccess for it.
779 auto ExecAction
= [this, TargetStmt
, UseVal
]() {
780 TargetStmt
->ensureValueRead(UseVal
);
782 LLVM_DEBUG(dbgs() << " forwarded read-only value " << *UseVal
785 TotalReadOnlyCopied
++;
787 // Note that we cannot return true here. With a operand tree
788 // depth of 0, UseVal is the use in TargetStmt that we try to replace.
789 // With -polly-analyze-read-only-scalars=true we would ensure the
790 // existence of a MemoryAccess (which already exists for a leaf) and be
791 // removed again by tryForwardTree because it's goal is to remove this
792 // scalar MemoryAccess. It interprets FD_CanForwardTree as the
793 // permission to do so.
796 return ForwardingAction::canForward(ExecAction
, {}, false);
799 case VirtualUse::Intra
:
800 // Knowing that UseStmt and DefStmt are the same statement instance, just
801 // reuse the information about UseStmt for DefStmt
805 case VirtualUse::Inter
:
806 Instruction
*Inst
= cast
<Instruction
>(UseVal
);
809 DefStmt
= S
->getStmtFor(Inst
);
811 return ForwardingAction::cannotForward();
814 DefLoop
= LI
->getLoopFor(Inst
->getParent());
816 ForwardingAction SpeculativeResult
=
817 forwardSpeculatable(TargetStmt
, Inst
, DefStmt
, DefLoop
);
818 if (SpeculativeResult
.Decision
!= FD_NotApplicable
)
819 return SpeculativeResult
;
821 ForwardingAction KnownResult
= forwardKnownLoad(
822 TargetStmt
, Inst
, UseStmt
, UseLoop
, DefStmt
, DefLoop
);
823 if (KnownResult
.Decision
!= FD_NotApplicable
)
826 ForwardingAction ReloadResult
= reloadKnownContent(
827 TargetStmt
, Inst
, UseStmt
, UseLoop
, DefStmt
, DefLoop
);
828 if (ReloadResult
.Decision
!= FD_NotApplicable
)
831 // When no method is found to forward the operand tree, we effectively
833 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst
<< "\n");
834 return ForwardingAction::cannotForward();
837 llvm_unreachable("Case unhandled");
840 /// Determines whether an operand tree can be forwarded. Previous evaluations
843 /// @param TargetStmt The statement the operand tree will be copied to.
844 /// @param UseVal The value (usually an instruction) which is root of an
846 /// @param UseStmt The statement that uses @p UseVal.
847 /// @param UseLoop The loop @p UseVal is used in.
849 /// @return FD_CannotForward if @p UseVal cannot be forwarded.
850 /// FD_CanForwardLeaf if @p UseVal is forwardable, but not
852 /// FD_CanForwardProfitably if @p UseVal is forwardable and useful to
854 ForwardingDecision
forwardTree(ScopStmt
*TargetStmt
, Value
*UseVal
,
855 ScopStmt
*UseStmt
, Loop
*UseLoop
) {
856 // Lookup any cached evaluation.
857 auto It
= ForwardingActions
.find({UseVal
, UseStmt
});
858 if (It
!= ForwardingActions
.end())
859 return It
->second
.Decision
;
861 // Make a new evaluation.
862 ForwardingAction Action
=
863 forwardTreeImpl(TargetStmt
, UseVal
, UseStmt
, UseLoop
);
864 ForwardingDecision Result
= Action
.Decision
;
866 // Remember for the next time.
867 assert(!ForwardingActions
.count({UseVal
, UseStmt
}) &&
868 "circular dependency?");
869 ForwardingActions
.insert({{UseVal
, UseStmt
}, std::move(Action
)});
874 /// Forward an operand tree using cached actions.
876 /// @param Stmt Statement the operand tree is moved into.
877 /// @param UseVal Root of the operand tree within @p Stmt.
878 /// @param RA The MemoryAccess for @p UseVal that the forwarding intends
880 void applyForwardingActions(ScopStmt
*Stmt
, Value
*UseVal
, MemoryAccess
*RA
) {
882 decltype(std::declval
<ForwardingAction
>().Depends
.begin());
883 using EdgeTy
= std::pair
<ForwardingAction
*, ChildItTy
>;
885 DenseSet
<ForwardingAction::KeyTy
> Visited
;
886 SmallVector
<EdgeTy
, 32> Stack
;
887 SmallVector
<ForwardingAction
*, 32> Ordered
;
889 // Seed the tree search using the root value.
890 assert(ForwardingActions
.count({UseVal
, Stmt
}));
891 ForwardingAction
*RootAction
= &ForwardingActions
[{UseVal
, Stmt
}];
892 Stack
.emplace_back(RootAction
, RootAction
->Depends
.begin());
894 // Compute the postorder of the operand tree: all operands of an instruction
895 // must be visited before the instruction itself. As an additional
896 // requirement, the topological ordering must be 'compact': Any subtree node
897 // must not be interleaved with nodes from a non-shared subtree. This is
898 // because the same llvm::Instruction can be materialized multiple times as
899 // used at different ScopStmts which might be different values. Intersecting
900 // these lifetimes may result in miscompilations.
901 // FIXME: Intersecting lifetimes might still be possible for the roots
902 // themselves, since instructions are just prepended to a ScopStmt's
904 while (!Stack
.empty()) {
905 EdgeTy
&Top
= Stack
.back();
906 ForwardingAction
*TopAction
= Top
.first
;
907 ChildItTy
&TopEdge
= Top
.second
;
909 if (TopEdge
== TopAction
->Depends
.end()) {
911 Ordered
.push_back(TopAction
);
915 ForwardingAction::KeyTy Key
= *TopEdge
;
917 // Next edge for this level
920 auto VisitIt
= Visited
.insert(Key
);
924 assert(ForwardingActions
.count(Key
) &&
925 "Must not insert new actions during execution phase");
926 ForwardingAction
*ChildAction
= &ForwardingActions
[Key
];
927 Stack
.emplace_back(ChildAction
, ChildAction
->Depends
.begin());
930 // Actually, we need the reverse postorder because actions prepend new
931 // instructions. Therefore, the first one will always be the action for the
932 // operand tree's root.
933 assert(Ordered
.back() == RootAction
);
934 if (RootAction
->Execute())
935 Stmt
->removeSingleMemoryAccess(RA
);
937 for (auto DepAction
: reverse(Ordered
)) {
938 assert(DepAction
->Decision
!= FD_Unknown
&&
939 DepAction
->Decision
!= FD_CannotForward
);
940 assert(DepAction
!= RootAction
);
941 DepAction
->Execute();
945 /// Try to forward an operand tree rooted in @p RA.
946 bool tryForwardTree(MemoryAccess
*RA
) {
947 assert(RA
->isLatestScalarKind());
948 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA
<< "...\n");
950 ScopStmt
*Stmt
= RA
->getStatement();
951 Loop
*InLoop
= Stmt
->getSurroundingLoop();
953 isl::map TargetToUse
;
954 if (!Known
.is_null()) {
955 isl::space DomSpace
= Stmt
->getDomainSpace();
957 isl::map::identity(DomSpace
.map_from_domain_and_range(DomSpace
));
960 ForwardingDecision Assessment
=
961 forwardTree(Stmt
, RA
->getAccessValue(), Stmt
, InLoop
);
963 // If considered feasible and profitable, forward it.
964 bool Changed
= false;
965 if (Assessment
== FD_CanForwardProfitably
) {
966 applyForwardingActions(Stmt
, RA
->getAccessValue(), RA
);
970 ForwardingActions
.clear();
974 /// Return which SCoP this instance is processing.
975 Scop
*getScop() const { return S
; }
977 /// Run the algorithm: Use value read accesses as operand tree roots and try
978 /// to forward them into the statement.
979 bool forwardOperandTrees() {
980 for (ScopStmt
&Stmt
: *S
) {
981 bool StmtModified
= false;
983 // Because we are modifying the MemoryAccess list, collect them first to
984 // avoid iterator invalidation.
985 SmallVector
<MemoryAccess
*, 16> Accs(Stmt
.begin(), Stmt
.end());
987 for (MemoryAccess
*RA
: Accs
) {
990 if (!RA
->isLatestScalarKind())
993 if (tryForwardTree(RA
)) {
997 TotalForwardedTrees
++;
1003 TotalModifiedStmts
++;
1014 /// Print the pass result, performed transformations and the SCoP after the
1016 void print(raw_ostream
&OS
, int Indent
= 0) {
1017 printStatistics(OS
, Indent
);
1020 // This line can easily be checked in regression tests.
1021 OS
<< "ForwardOpTree executed, but did not modify anything\n";
1025 printStatements(OS
, Indent
);
1028 bool isModified() const { return Modified
; }
1031 static std::unique_ptr
<ForwardOpTreeImpl
> runForwardOpTree(Scop
&S
,
1033 std::unique_ptr
<ForwardOpTreeImpl
> Impl
;
1035 IslMaxOperationsGuard
MaxOpGuard(S
.getIslCtx().get(), MaxOps
, false);
1036 Impl
= std::make_unique
<ForwardOpTreeImpl
>(&S
, &LI
, MaxOpGuard
);
1039 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
1040 Impl
->computeKnownValues();
1043 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
1044 Impl
->forwardOperandTrees();
1046 if (MaxOpGuard
.hasQuotaExceeded()) {
1047 LLVM_DEBUG(dbgs() << "Not all operations completed because of "
1048 "max_operations exceeded\n");
1053 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
1054 LLVM_DEBUG(dbgs() << S
);
1056 // Update statistics
1057 Scop::ScopStatistics ScopStats
= S
.getStatistics();
1058 NumValueWrites
+= ScopStats
.NumValueWrites
;
1059 NumValueWritesInLoops
+= ScopStats
.NumValueWritesInLoops
;
1060 NumPHIWrites
+= ScopStats
.NumPHIWrites
;
1061 NumPHIWritesInLoops
+= ScopStats
.NumPHIWritesInLoops
;
1062 NumSingletonWrites
+= ScopStats
.NumSingletonWrites
;
1063 NumSingletonWritesInLoops
+= ScopStats
.NumSingletonWritesInLoops
;
1068 static PreservedAnalyses
1069 runForwardOpTreeUsingNPM(Scop
&S
, ScopAnalysisManager
&SAM
,
1070 ScopStandardAnalysisResults
&SAR
, SPMUpdater
&U
,
1072 LoopInfo
&LI
= SAR
.LI
;
1074 std::unique_ptr
<ForwardOpTreeImpl
> Impl
= runForwardOpTree(S
, LI
);
1076 *OS
<< "Printing analysis 'Polly - Forward operand tree' for region: '"
1077 << S
.getName() << "' in function '" << S
.getFunction().getName()
1080 assert(Impl
->getScop() == &S
);
1086 if (!Impl
->isModified())
1087 return PreservedAnalyses::all();
1089 PreservedAnalyses PA
;
1090 PA
.preserveSet
<AllAnalysesOn
<Module
>>();
1091 PA
.preserveSet
<AllAnalysesOn
<Function
>>();
1092 PA
.preserveSet
<AllAnalysesOn
<Loop
>>();
1096 /// Pass that redirects scalar reads to array elements that are known to contain
1099 /// This reduces the number of scalar accesses and therefore potentially
1100 /// increases the freedom of the scheduler. In the ideal case, all reads of a
1101 /// scalar definition are redirected (We currently do not care about removing
1102 /// the write in this case). This is also useful for the main DeLICM pass as
1103 /// there are less scalars to be mapped.
1104 class ForwardOpTreeWrapperPass final
: public ScopPass
{
1106 /// The pass implementation, also holding per-scop data.
1107 std::unique_ptr
<ForwardOpTreeImpl
> Impl
;
1112 explicit ForwardOpTreeWrapperPass() : ScopPass(ID
) {}
1113 ForwardOpTreeWrapperPass(const ForwardOpTreeWrapperPass
&) = delete;
1114 ForwardOpTreeWrapperPass
&
1115 operator=(const ForwardOpTreeWrapperPass
&) = delete;
1117 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1118 AU
.addRequiredTransitive
<ScopInfoRegionPass
>();
1119 AU
.addRequired
<LoopInfoWrapperPass
>();
1120 AU
.setPreservesAll();
1123 bool runOnScop(Scop
&S
) override
{
1124 // Free resources for previous SCoP's computation, if not yet done.
1127 LoopInfo
&LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
1129 Impl
= runForwardOpTree(S
, LI
);
1134 void printScop(raw_ostream
&OS
, Scop
&S
) const override
{
1138 assert(Impl
->getScop() == &S
);
1142 void releaseMemory() override
{ Impl
.reset(); }
1143 }; // class ForwardOpTree
1145 char ForwardOpTreeWrapperPass::ID
;
1147 /// Print result from ForwardOpTreeWrapperPass.
1148 class ForwardOpTreePrinterLegacyPass final
: public ScopPass
{
1152 ForwardOpTreePrinterLegacyPass() : ForwardOpTreePrinterLegacyPass(outs()){};
1153 explicit ForwardOpTreePrinterLegacyPass(llvm::raw_ostream
&OS
)
1154 : ScopPass(ID
), OS(OS
) {}
1156 bool runOnScop(Scop
&S
) override
{
1157 ForwardOpTreeWrapperPass
&P
= getAnalysis
<ForwardOpTreeWrapperPass
>();
1159 OS
<< "Printing analysis '" << P
.getPassName() << "' for region: '"
1160 << S
.getRegion().getNameStr() << "' in function '"
1161 << S
.getFunction().getName() << "':\n";
1167 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1168 ScopPass::getAnalysisUsage(AU
);
1169 AU
.addRequired
<ForwardOpTreeWrapperPass
>();
1170 AU
.setPreservesAll();
1174 llvm::raw_ostream
&OS
;
1177 char ForwardOpTreePrinterLegacyPass::ID
= 0;
1180 Pass
*polly::createForwardOpTreeWrapperPass() {
1181 return new ForwardOpTreeWrapperPass();
1184 Pass
*polly::createForwardOpTreePrinterLegacyPass(llvm::raw_ostream
&OS
) {
1185 return new ForwardOpTreePrinterLegacyPass(OS
);
1188 llvm::PreservedAnalyses
ForwardOpTreePass::run(Scop
&S
,
1189 ScopAnalysisManager
&SAM
,
1190 ScopStandardAnalysisResults
&SAR
,
1192 return runForwardOpTreeUsingNPM(S
, SAM
, SAR
, U
, nullptr);
1195 llvm::PreservedAnalyses
1196 ForwardOpTreePrinterPass::run(Scop
&S
, ScopAnalysisManager
&SAM
,
1197 ScopStandardAnalysisResults
&SAR
, SPMUpdater
&U
) {
1198 return runForwardOpTreeUsingNPM(S
, SAM
, SAR
, U
, &OS
);
1201 INITIALIZE_PASS_BEGIN(ForwardOpTreeWrapperPass
, "polly-optree",
1202 "Polly - Forward operand tree", false, false)
1203 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
1204 INITIALIZE_PASS_END(ForwardOpTreeWrapperPass
, "polly-optree",
1205 "Polly - Forward operand tree", false, false)
1207 INITIALIZE_PASS_BEGIN(ForwardOpTreePrinterLegacyPass
, "polly-print-optree",
1208 "Polly - Print forward operand tree result", false, false)
1209 INITIALIZE_PASS_DEPENDENCY(ForwardOpTreeWrapperPass
)
1210 INITIALIZE_PASS_END(ForwardOpTreePrinterLegacyPass
, "polly-print-optree",
1211 "Polly - Print forward operand tree result", false, false)