[AMDGPU] Test codegen'ing True16 additions.
[llvm-project.git] / polly / lib / Transform / ForwardOpTree.cpp
blob2bed3e35412d76f5b9dd6480f5fbb169f30794a0
1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Move instructions between statements.
11 //===----------------------------------------------------------------------===//
13 #include "polly/ForwardOpTree.h"
14 #include "polly/Options.h"
15 #include "polly/ScopBuilder.h"
16 #include "polly/ScopInfo.h"
17 #include "polly/ScopPass.h"
18 #include "polly/Support/GICHelper.h"
19 #include "polly/Support/ISLOStream.h"
20 #include "polly/Support/ISLTools.h"
21 #include "polly/Support/VirtualInstruction.h"
22 #include "polly/ZoneAlgo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Value.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "isl/ctx.h"
39 #include "isl/isl-noexceptions.h"
40 #include <cassert>
41 #include <memory>
43 #define DEBUG_TYPE "polly-optree"
45 using namespace llvm;
46 using namespace polly;
48 static cl::opt<bool>
49 AnalyzeKnown("polly-optree-analyze-known",
50 cl::desc("Analyze array contents for load forwarding"),
51 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
53 static cl::opt<bool>
54 NormalizePHIs("polly-optree-normalize-phi",
55 cl::desc("Replace PHIs by their incoming values"),
56 cl::cat(PollyCategory), cl::init(false), cl::Hidden);
58 static cl::opt<unsigned>
59 MaxOps("polly-optree-max-ops",
60 cl::desc("Maximum number of ISL operations to invest for known "
61 "analysis; 0=no limit"),
62 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
64 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
65 STATISTIC(KnownOutOfQuota,
66 "Analyses aborted because max_operations was reached");
68 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
69 STATISTIC(TotalKnownLoadsForwarded,
70 "Number of forwarded loads because their value was known");
71 STATISTIC(TotalReloads, "Number of reloaded values");
72 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
73 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
74 STATISTIC(TotalModifiedStmts,
75 "Number of statements with at least one forwarded tree");
77 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
79 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
80 STATISTIC(NumValueWritesInLoops,
81 "Number of scalar value writes nested in affine loops after OpTree");
82 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
83 STATISTIC(NumPHIWritesInLoops,
84 "Number of scalar phi writes nested in affine loops after OpTree");
85 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
86 STATISTIC(NumSingletonWritesInLoops,
87 "Number of singleton writes nested in affine loops after OpTree");
89 namespace {
91 /// The state of whether an operand tree was/can be forwarded.
92 ///
93 /// The items apply to an instructions and its operand tree with the instruction
94 /// as the root element. If the value in question is not an instruction in the
95 /// SCoP, it can be a leaf of an instruction's operand tree.
96 enum ForwardingDecision {
97 /// An uninitialized value.
98 FD_Unknown,
100 /// The root instruction or value cannot be forwarded at all.
101 FD_CannotForward,
103 /// The root instruction or value can be forwarded as a leaf of a larger
104 /// operand tree.
105 /// It does not make sense to move the value itself, it would just replace it
106 /// by a use of itself. For instance, a constant "5" used in a statement can
107 /// be forwarded, but it would just replace it by the same constant "5".
108 /// However, it makes sense to move as an operand of
110 /// %add = add 5, 5
112 /// where "5" is moved as part of a larger operand tree. "5" would be placed
113 /// (disregarding for a moment that literal constants don't have a location
114 /// and can be used anywhere) into the same statement as %add would.
115 FD_CanForwardLeaf,
117 /// The root instruction can be forwarded and doing so avoids a scalar
118 /// dependency.
120 /// This can be either because the operand tree can be moved to the target
121 /// statement, or a memory access is redirected to read from a different
122 /// location.
123 FD_CanForwardProfitably,
125 /// A forwarding method cannot be applied to the operand tree.
126 /// The difference to FD_CannotForward is that there might be other methods
127 /// that can handle it.
128 FD_NotApplicable
131 /// Represents the evaluation of and action to taken when forwarding a value
132 /// from an operand tree.
133 struct ForwardingAction {
134 using KeyTy = std::pair<Value *, ScopStmt *>;
136 /// Evaluation of forwarding a value.
137 ForwardingDecision Decision = FD_Unknown;
139 /// Callback to execute the forwarding.
140 /// Returning true allows deleting the polly::MemoryAccess if the value is the
141 /// root of the operand tree (and its elimination the reason why the
142 /// forwarding is done). Return false if the MemoryAccess is reused or there
143 /// might be other users of the read accesses. In the letter case the
144 /// polly::SimplifyPass can remove dead MemoryAccesses.
145 std::function<bool()> Execute = []() -> bool {
146 llvm_unreachable("unspecified how to forward");
149 /// Other values that need to be forwarded if this action is executed. Their
150 /// actions are executed after this one.
151 SmallVector<KeyTy, 4> Depends;
153 /// Named ctor: The method creating this object does not apply to the kind of
154 /// value, but other methods may.
155 static ForwardingAction notApplicable() {
156 ForwardingAction Result;
157 Result.Decision = FD_NotApplicable;
158 return Result;
161 /// Named ctor: The value cannot be forwarded.
162 static ForwardingAction cannotForward() {
163 ForwardingAction Result;
164 Result.Decision = FD_CannotForward;
165 return Result;
168 /// Named ctor: The value can just be used without any preparation.
169 static ForwardingAction triviallyForwardable(bool IsProfitable, Value *Val) {
170 ForwardingAction Result;
171 Result.Decision =
172 IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
173 Result.Execute = [=]() {
174 LLVM_DEBUG(dbgs() << " trivially forwarded: " << *Val << "\n");
175 return true;
177 return Result;
180 /// Name ctor: The value can be forwarded by executing an action.
181 static ForwardingAction canForward(std::function<bool()> Execute,
182 ArrayRef<KeyTy> Depends,
183 bool IsProfitable) {
184 ForwardingAction Result;
185 Result.Decision =
186 IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
187 Result.Execute = std::move(Execute);
188 Result.Depends.append(Depends.begin(), Depends.end());
189 return Result;
193 /// Implementation of operand tree forwarding for a specific SCoP.
195 /// For a statement that requires a scalar value (through a value read
196 /// MemoryAccess), see if its operand can be moved into the statement. If so,
197 /// the MemoryAccess is removed and the all the operand tree instructions are
198 /// moved into the statement. All original instructions are left in the source
199 /// statements. The simplification pass can clean these up.
200 class ForwardOpTreeImpl final : ZoneAlgorithm {
201 private:
202 using MemoizationTy = DenseMap<ForwardingAction::KeyTy, ForwardingAction>;
204 /// Scope guard to limit the number of isl operations for this pass.
205 IslMaxOperationsGuard &MaxOpGuard;
207 /// How many instructions have been copied to other statements.
208 int NumInstructionsCopied = 0;
210 /// Number of loads forwarded because their value was known.
211 int NumKnownLoadsForwarded = 0;
213 /// Number of values reloaded from known array elements.
214 int NumReloads = 0;
216 /// How many read-only accesses have been copied.
217 int NumReadOnlyCopied = 0;
219 /// How many operand trees have been forwarded.
220 int NumForwardedTrees = 0;
222 /// Number of statements with at least one forwarded operand tree.
223 int NumModifiedStmts = 0;
225 /// Whether we carried out at least one change to the SCoP.
226 bool Modified = false;
228 /// Cache of how to forward values.
229 /// The key of this map is the llvm::Value to be forwarded and the
230 /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value
231 /// can evaluate differently depending on where it is evaluate. For instance,
232 /// a synthesizable Scev represents a recurrence with an loop but the loop's
233 /// exit value if evaluated after the loop.
234 /// The cached results are only valid for the current TargetStmt.
235 /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the
236 /// exit value when instantiated outside of the loop. The primary concern is
237 /// ambiguity when crossing PHI nodes, which currently is not supported.
238 MemoizationTy ForwardingActions;
240 /// Contains the zones where array elements are known to contain a specific
241 /// value.
242 /// { [Element[] -> Zone[]] -> ValInst[] }
243 /// @see computeKnown()
244 isl::union_map Known;
246 /// Translator for newly introduced ValInsts to already existing ValInsts such
247 /// that new introduced load instructions can reuse the Known analysis of its
248 /// original load. { ValInst[] -> ValInst[] }
249 isl::union_map Translator;
251 /// Get list of array elements that do contain the same ValInst[] at Domain[].
253 /// @param ValInst { Domain[] -> ValInst[] }
254 /// The values for which we search for alternative locations,
255 /// per statement instance.
257 /// @return { Domain[] -> Element[] }
258 /// For each statement instance, the array elements that contain the
259 /// same ValInst.
260 isl::union_map findSameContentElements(isl::union_map ValInst) {
261 assert(!ValInst.is_single_valued().is_false());
263 // { Domain[] }
264 isl::union_set Domain = ValInst.domain();
266 // { Domain[] -> Scatter[] }
267 isl::union_map Schedule = getScatterFor(Domain);
269 // { Element[] -> [Scatter[] -> ValInst[]] }
270 isl::union_map MustKnownCurried =
271 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
273 // { [Domain[] -> ValInst[]] -> Scatter[] }
274 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
276 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
277 isl::union_map SchedValDomVal =
278 DomValSched.range_product(ValInst.range_map()).reverse();
280 // { Element[] -> [Domain[] -> ValInst[]] }
281 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
283 // { Domain[] -> Element[] }
284 isl::union_map MustKnownMap =
285 MustKnownInst.uncurry().domain().unwrap().reverse();
286 simplify(MustKnownMap);
288 return MustKnownMap;
291 /// Find a single array element for each statement instance, within a single
292 /// array.
294 /// @param MustKnown { Domain[] -> Element[] }
295 /// Set of candidate array elements.
296 /// @param Domain { Domain[] }
297 /// The statement instance for which we need elements for.
299 /// @return { Domain[] -> Element[] }
300 /// For each statement instance, an array element out of @p MustKnown.
301 /// All array elements must be in the same array (Polly does not yet
302 /// support reading from different accesses using the same
303 /// MemoryAccess). If no mapping for all of @p Domain exists, returns
304 /// null.
305 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
306 // { Domain[] -> Element[] }
307 isl::map Result;
309 // Make irrelevant elements not interfere.
310 Domain = Domain.intersect_params(S->getContext());
312 // MemoryAccesses can read only elements from a single array
313 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
314 // Look through all spaces until we find one that contains at least the
315 // wanted statement instance.s
316 for (isl::map Map : MustKnown.get_map_list()) {
317 // Get the array this is accessing.
318 isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
319 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
321 // No support for generation of indirect array accesses.
322 if (SAI->getBasePtrOriginSAI())
323 continue;
325 // Determine whether this map contains all wanted values.
326 isl::set MapDom = Map.domain();
327 if (!Domain.is_subset(MapDom).is_true())
328 continue;
330 // There might be multiple array elements that contain the same value, but
331 // choose only one of them. lexmin is used because it returns a one-value
332 // mapping, we do not care about which one.
333 // TODO: Get the simplest access function.
334 Result = Map.lexmin();
335 break;
338 return Result;
341 public:
342 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
343 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
345 /// Compute the zones of known array element contents.
347 /// @return True if the computed #Known is usable.
348 bool computeKnownValues() {
349 isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
351 // Check that nothing strange occurs.
352 collectCompatibleElts();
355 IslQuotaScope QuotaScope = MaxOpGuard.enter();
357 computeCommon();
358 if (NormalizePHIs)
359 computeNormalizedPHIs();
360 Known = computeKnown(true, true);
362 // Preexisting ValInsts use the known content analysis of themselves.
363 Translator = makeIdentityMap(Known.range(), false);
366 if (Known.is_null() || Translator.is_null() || NormalizeMap.is_null()) {
367 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
368 Known = {};
369 Translator = {};
370 NormalizeMap = {};
371 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
372 return false;
375 KnownAnalyzed++;
376 LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
378 return true;
381 void printStatistics(raw_ostream &OS, int Indent = 0) {
382 OS.indent(Indent) << "Statistics {\n";
383 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
384 << '\n';
385 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
386 << '\n';
387 OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
388 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
389 << '\n';
390 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
391 << '\n';
392 OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
393 << NumModifiedStmts << '\n';
394 OS.indent(Indent) << "}\n";
397 void printStatements(raw_ostream &OS, int Indent = 0) const {
398 OS.indent(Indent) << "After statements {\n";
399 for (auto &Stmt : *S) {
400 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
401 for (auto *MA : Stmt)
402 MA->print(OS);
404 OS.indent(Indent + 12);
405 Stmt.printInstructions(OS);
407 OS.indent(Indent) << "}\n";
410 /// Create a new MemoryAccess of type read and MemoryKind::Array.
412 /// @param Stmt The statement in which the access occurs.
413 /// @param LI The instruction that does the access.
414 /// @param AccessRelation The array element that each statement instance
415 /// accesses.
417 /// @param The newly created access.
418 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
419 isl::map AccessRelation) {
420 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
421 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
423 // Create a dummy SCEV access, to be replaced anyway.
424 SmallVector<const SCEV *, 4> Sizes;
425 Sizes.reserve(SAI->getNumberOfDimensions());
426 SmallVector<const SCEV *, 4> Subscripts;
427 Subscripts.reserve(SAI->getNumberOfDimensions());
428 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
429 Sizes.push_back(SAI->getDimensionSize(i));
430 Subscripts.push_back(nullptr);
433 MemoryAccess *Access =
434 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
435 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
436 S->addAccessFunction(Access);
437 Stmt->addAccess(Access, true);
439 Access->setNewAccessRelation(AccessRelation);
441 return Access;
444 /// Forward a load by reading from an array element that contains the same
445 /// value. Typically the location it was loaded from.
447 /// @param TargetStmt The statement the operand tree will be copied to.
448 /// @param Inst The (possibly speculatable) instruction to forward.
449 /// @param UseStmt The statement that uses @p Inst.
450 /// @param UseLoop The loop @p Inst is used in.
451 /// @param DefStmt The statement @p Inst is defined in.
452 /// @param DefLoop The loop which contains @p Inst.
454 /// @return A ForwardingAction object describing the feasibility and
455 /// profitability evaluation and the callback carrying-out the value
456 /// forwarding.
457 ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
458 ScopStmt *UseStmt, Loop *UseLoop,
459 ScopStmt *DefStmt, Loop *DefLoop) {
460 // Cannot do anything without successful known analysis.
461 if (Known.is_null() || Translator.is_null() ||
462 MaxOpGuard.hasQuotaExceeded())
463 return ForwardingAction::notApplicable();
465 LoadInst *LI = dyn_cast<LoadInst>(Inst);
466 if (!LI)
467 return ForwardingAction::notApplicable();
469 ForwardingDecision OpDecision =
470 forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop);
471 switch (OpDecision) {
472 case FD_CanForwardProfitably:
473 case FD_CanForwardLeaf:
474 break;
475 case FD_CannotForward:
476 return ForwardingAction::cannotForward();
477 default:
478 llvm_unreachable("Shouldn't return this");
481 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
482 if (Access) {
483 // If the load is already in the statement, no forwarding is necessary.
484 // However, it might happen that the LoadInst is already present in the
485 // statement's instruction list. In that case we do as follows:
486 // - For the evaluation, we can trivially forward it as it is
487 // benefit of forwarding an already present instruction.
488 // - For the execution, prepend the instruction (to make it
489 // available to all instructions following in the instruction list), but
490 // do not add another MemoryAccess.
491 auto ExecAction = [this, TargetStmt, LI, Access]() -> bool {
492 TargetStmt->prependInstruction(LI);
493 LLVM_DEBUG(
494 dbgs() << " forwarded known load with preexisting MemoryAccess"
495 << Access << "\n");
496 (void)Access;
498 NumKnownLoadsForwarded++;
499 TotalKnownLoadsForwarded++;
500 return true;
502 return ForwardingAction::canForward(
503 ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
506 // Allow the following Isl calculations (until we return the
507 // ForwardingAction, excluding the code inside the lambda that will be
508 // executed later) to fail.
509 IslQuotaScope QuotaScope = MaxOpGuard.enter();
511 // { DomainDef[] -> ValInst[] }
512 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
513 assert(!isNormalized(ExpectedVal).is_false() &&
514 "LoadInsts are always normalized");
516 // { DomainUse[] -> DomainTarget[] }
517 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
519 // { DomainTarget[] -> ValInst[] }
520 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
521 isl::union_map TranslatedExpectedVal =
522 isl::union_map(TargetExpectedVal).apply_range(Translator);
524 // { DomainTarget[] -> Element[] }
525 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
527 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
528 if (SameVal.is_null())
529 return ForwardingAction::notApplicable();
531 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal
532 << "\n");
533 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates
534 << "\n");
536 // { ValInst[] }
537 isl::space ValInstSpace = ExpectedVal.get_space().range();
539 // After adding a new load to the SCoP, also update the Known content
540 // about it. The new load will have a known ValInst of
541 // { [DomainTarget[] -> Value[]] }
542 // but which -- because it is a copy of it -- has same value as the
543 // { [DomainDef[] -> Value[]] }
544 // that it replicates. Instead of cloning the known content of
545 // [DomainDef[] -> Value[]]
546 // for DomainTarget[], we add a 'translator' that maps
547 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
548 // before comparing to the known content.
549 // TODO: 'Translator' could also be used to map PHINodes to their incoming
550 // ValInsts.
551 isl::map LocalTranslator;
552 if (!ValInstSpace.is_wrapping().is_false()) {
553 // { DefDomain[] -> Value[] }
554 isl::map ValInsts = ExpectedVal.range().unwrap();
556 // { DefDomain[] }
557 isl::set DefDomain = ValInsts.domain();
559 // { Value[] }
560 isl::space ValSpace = ValInstSpace.unwrap().range();
562 // { Value[] -> Value[] }
563 isl::map ValToVal =
564 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
566 // { DomainDef[] -> DomainTarget[] }
567 isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
569 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
570 LocalTranslator = DefToTarget.reverse().product(ValToVal);
571 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator
572 << "\n");
574 if (LocalTranslator.is_null())
575 return ForwardingAction::notApplicable();
578 auto ExecAction = [this, TargetStmt, LI, SameVal,
579 LocalTranslator]() -> bool {
580 TargetStmt->prependInstruction(LI);
581 MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
582 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess"
583 << Access << "\n");
584 (void)Access;
586 if (!LocalTranslator.is_null())
587 Translator = Translator.unite(LocalTranslator);
589 NumKnownLoadsForwarded++;
590 TotalKnownLoadsForwarded++;
591 return true;
593 return ForwardingAction::canForward(
594 ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
597 /// Forward a scalar by redirecting the access to an array element that stores
598 /// the same value.
600 /// @param TargetStmt The statement the operand tree will be copied to.
601 /// @param Inst The scalar to forward.
602 /// @param UseStmt The statement that uses @p Inst.
603 /// @param UseLoop The loop @p Inst is used in.
604 /// @param DefStmt The statement @p Inst is defined in.
605 /// @param DefLoop The loop which contains @p Inst.
607 /// @return A ForwardingAction object describing the feasibility and
608 /// profitability evaluation and the callback carrying-out the value
609 /// forwarding.
610 ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
611 ScopStmt *UseStmt, Loop *UseLoop,
612 ScopStmt *DefStmt, Loop *DefLoop) {
613 // Cannot do anything without successful known analysis.
614 if (Known.is_null() || Translator.is_null() ||
615 MaxOpGuard.hasQuotaExceeded())
616 return ForwardingAction::notApplicable();
618 // Don't spend too much time analyzing whether it can be reloaded.
619 IslQuotaScope QuotaScope = MaxOpGuard.enter();
621 // { DomainDef[] -> ValInst[] }
622 isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
624 // { DomainUse[] -> DomainTarget[] }
625 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
627 // { DomainTarget[] -> ValInst[] }
628 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
629 isl::union_map TranslatedExpectedVal =
630 TargetExpectedVal.apply_range(Translator);
632 // { DomainTarget[] -> Element[] }
633 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
635 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
636 simplify(SameVal);
637 if (SameVal.is_null())
638 return ForwardingAction::notApplicable();
640 auto ExecAction = [this, TargetStmt, Inst, SameVal]() {
641 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
642 if (!Access)
643 Access = TargetStmt->ensureValueRead(Inst);
644 Access->setNewAccessRelation(SameVal);
646 LLVM_DEBUG(dbgs() << " forwarded known content of " << *Inst
647 << " which is " << SameVal << "\n");
648 TotalReloads++;
649 NumReloads++;
650 return false;
653 return ForwardingAction::canForward(ExecAction, {}, true);
656 /// Forwards a speculatively executable instruction.
658 /// @param TargetStmt The statement the operand tree will be copied to.
659 /// @param UseInst The (possibly speculatable) instruction to forward.
660 /// @param DefStmt The statement @p UseInst is defined in.
661 /// @param DefLoop The loop which contains @p UseInst.
663 /// @return A ForwardingAction object describing the feasibility and
664 /// profitability evaluation and the callback carrying-out the value
665 /// forwarding.
666 ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt,
667 Instruction *UseInst, ScopStmt *DefStmt,
668 Loop *DefLoop) {
669 // PHIs, unless synthesizable, are not yet supported.
670 if (isa<PHINode>(UseInst))
671 return ForwardingAction::notApplicable();
673 // Compatible instructions must satisfy the following conditions:
674 // 1. Idempotent (instruction will be copied, not moved; although its
675 // original instance might be removed by simplification)
676 // 2. Not access memory (There might be memory writes between)
677 // 3. Not cause undefined behaviour (we might copy to a location when the
678 // original instruction was no executed; this is currently not possible
679 // because we do not forward PHINodes)
680 // 4. Not leak memory if executed multiple times (i.e. malloc)
682 // Instruction::mayHaveSideEffects is not sufficient because it considers
683 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
684 // not sufficient because it allows memory accesses.
685 if (mayHaveNonDefUseDependency(*UseInst))
686 return ForwardingAction::notApplicable();
688 SmallVector<ForwardingAction::KeyTy, 4> Depends;
689 Depends.reserve(UseInst->getNumOperands());
690 for (Value *OpVal : UseInst->operand_values()) {
691 ForwardingDecision OpDecision =
692 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop);
693 switch (OpDecision) {
694 case FD_CannotForward:
695 return ForwardingAction::cannotForward();
697 case FD_CanForwardLeaf:
698 case FD_CanForwardProfitably:
699 Depends.emplace_back(OpVal, DefStmt);
700 break;
702 case FD_NotApplicable:
703 case FD_Unknown:
704 llvm_unreachable(
705 "forwardTree should never return FD_NotApplicable/FD_Unknown");
709 auto ExecAction = [this, TargetStmt, UseInst]() {
710 // To ensure the right order, prepend this instruction before its
711 // operands. This ensures that its operands are inserted before the
712 // instruction using them.
713 TargetStmt->prependInstruction(UseInst);
715 LLVM_DEBUG(dbgs() << " forwarded speculable instruction: " << *UseInst
716 << "\n");
717 NumInstructionsCopied++;
718 TotalInstructionsCopied++;
719 return true;
721 return ForwardingAction::canForward(ExecAction, Depends, true);
724 /// Determines whether an operand tree can be forwarded and returns
725 /// instructions how to do so in the form of a ForwardingAction object.
727 /// @param TargetStmt The statement the operand tree will be copied to.
728 /// @param UseVal The value (usually an instruction) which is root of an
729 /// operand tree.
730 /// @param UseStmt The statement that uses @p UseVal.
731 /// @param UseLoop The loop @p UseVal is used in.
733 /// @return A ForwardingAction object describing the feasibility and
734 /// profitability evaluation and the callback carrying-out the value
735 /// forwarding.
736 ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal,
737 ScopStmt *UseStmt, Loop *UseLoop) {
738 ScopStmt *DefStmt = nullptr;
739 Loop *DefLoop = nullptr;
741 // { DefDomain[] -> TargetDomain[] }
742 isl::map DefToTarget;
744 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
745 switch (VUse.getKind()) {
746 case VirtualUse::Constant:
747 case VirtualUse::Block:
748 case VirtualUse::Hoisted:
749 // These can be used anywhere without special considerations.
750 return ForwardingAction::triviallyForwardable(false, UseVal);
752 case VirtualUse::Synthesizable: {
753 // Check if the value is synthesizable at the new location as well. This
754 // might be possible when leaving a loop for which ScalarEvolution is
755 // unable to derive the exit value for.
756 // TODO: If there is a LCSSA PHI at the loop exit, use that one.
757 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
758 // do not forward past its loop header. This would require us to use a
759 // previous loop induction variable instead the current one. We currently
760 // do not allow forwarding PHI nodes, thus this should never occur (the
761 // only exception where no phi is necessary being an unreachable loop
762 // without edge from the outside).
763 VirtualUse TargetUse = VirtualUse::create(
764 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
765 if (TargetUse.getKind() == VirtualUse::Synthesizable)
766 return ForwardingAction::triviallyForwardable(false, UseVal);
768 LLVM_DEBUG(
769 dbgs() << " Synthesizable would not be synthesizable anymore: "
770 << *UseVal << "\n");
771 return ForwardingAction::cannotForward();
774 case VirtualUse::ReadOnly: {
775 if (!ModelReadOnlyScalars)
776 return ForwardingAction::triviallyForwardable(false, UseVal);
778 // If we model read-only scalars, we need to create a MemoryAccess for it.
779 auto ExecAction = [this, TargetStmt, UseVal]() {
780 TargetStmt->ensureValueRead(UseVal);
782 LLVM_DEBUG(dbgs() << " forwarded read-only value " << *UseVal
783 << "\n");
784 NumReadOnlyCopied++;
785 TotalReadOnlyCopied++;
787 // Note that we cannot return true here. With a operand tree
788 // depth of 0, UseVal is the use in TargetStmt that we try to replace.
789 // With -polly-analyze-read-only-scalars=true we would ensure the
790 // existence of a MemoryAccess (which already exists for a leaf) and be
791 // removed again by tryForwardTree because it's goal is to remove this
792 // scalar MemoryAccess. It interprets FD_CanForwardTree as the
793 // permission to do so.
794 return false;
796 return ForwardingAction::canForward(ExecAction, {}, false);
799 case VirtualUse::Intra:
800 // Knowing that UseStmt and DefStmt are the same statement instance, just
801 // reuse the information about UseStmt for DefStmt
802 DefStmt = UseStmt;
804 [[fallthrough]];
805 case VirtualUse::Inter:
806 Instruction *Inst = cast<Instruction>(UseVal);
808 if (!DefStmt) {
809 DefStmt = S->getStmtFor(Inst);
810 if (!DefStmt)
811 return ForwardingAction::cannotForward();
814 DefLoop = LI->getLoopFor(Inst->getParent());
816 ForwardingAction SpeculativeResult =
817 forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop);
818 if (SpeculativeResult.Decision != FD_NotApplicable)
819 return SpeculativeResult;
821 ForwardingAction KnownResult = forwardKnownLoad(
822 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
823 if (KnownResult.Decision != FD_NotApplicable)
824 return KnownResult;
826 ForwardingAction ReloadResult = reloadKnownContent(
827 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
828 if (ReloadResult.Decision != FD_NotApplicable)
829 return ReloadResult;
831 // When no method is found to forward the operand tree, we effectively
832 // cannot handle it.
833 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n");
834 return ForwardingAction::cannotForward();
837 llvm_unreachable("Case unhandled");
840 /// Determines whether an operand tree can be forwarded. Previous evaluations
841 /// are cached.
843 /// @param TargetStmt The statement the operand tree will be copied to.
844 /// @param UseVal The value (usually an instruction) which is root of an
845 /// operand tree.
846 /// @param UseStmt The statement that uses @p UseVal.
847 /// @param UseLoop The loop @p UseVal is used in.
849 /// @return FD_CannotForward if @p UseVal cannot be forwarded.
850 /// FD_CanForwardLeaf if @p UseVal is forwardable, but not
851 /// profitable.
852 /// FD_CanForwardProfitably if @p UseVal is forwardable and useful to
853 /// do.
854 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
855 ScopStmt *UseStmt, Loop *UseLoop) {
856 // Lookup any cached evaluation.
857 auto It = ForwardingActions.find({UseVal, UseStmt});
858 if (It != ForwardingActions.end())
859 return It->second.Decision;
861 // Make a new evaluation.
862 ForwardingAction Action =
863 forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop);
864 ForwardingDecision Result = Action.Decision;
866 // Remember for the next time.
867 assert(!ForwardingActions.count({UseVal, UseStmt}) &&
868 "circular dependency?");
869 ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)});
871 return Result;
874 /// Forward an operand tree using cached actions.
876 /// @param Stmt Statement the operand tree is moved into.
877 /// @param UseVal Root of the operand tree within @p Stmt.
878 /// @param RA The MemoryAccess for @p UseVal that the forwarding intends
879 /// to remove.
880 void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) {
881 using ChildItTy =
882 decltype(std::declval<ForwardingAction>().Depends.begin());
883 using EdgeTy = std::pair<ForwardingAction *, ChildItTy>;
885 DenseSet<ForwardingAction::KeyTy> Visited;
886 SmallVector<EdgeTy, 32> Stack;
887 SmallVector<ForwardingAction *, 32> Ordered;
889 // Seed the tree search using the root value.
890 assert(ForwardingActions.count({UseVal, Stmt}));
891 ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}];
892 Stack.emplace_back(RootAction, RootAction->Depends.begin());
894 // Compute the postorder of the operand tree: all operands of an instruction
895 // must be visited before the instruction itself. As an additional
896 // requirement, the topological ordering must be 'compact': Any subtree node
897 // must not be interleaved with nodes from a non-shared subtree. This is
898 // because the same llvm::Instruction can be materialized multiple times as
899 // used at different ScopStmts which might be different values. Intersecting
900 // these lifetimes may result in miscompilations.
901 // FIXME: Intersecting lifetimes might still be possible for the roots
902 // themselves, since instructions are just prepended to a ScopStmt's
903 // instruction list.
904 while (!Stack.empty()) {
905 EdgeTy &Top = Stack.back();
906 ForwardingAction *TopAction = Top.first;
907 ChildItTy &TopEdge = Top.second;
909 if (TopEdge == TopAction->Depends.end()) {
910 // Postorder sorting
911 Ordered.push_back(TopAction);
912 Stack.pop_back();
913 continue;
915 ForwardingAction::KeyTy Key = *TopEdge;
917 // Next edge for this level
918 ++TopEdge;
920 auto VisitIt = Visited.insert(Key);
921 if (!VisitIt.second)
922 continue;
924 assert(ForwardingActions.count(Key) &&
925 "Must not insert new actions during execution phase");
926 ForwardingAction *ChildAction = &ForwardingActions[Key];
927 Stack.emplace_back(ChildAction, ChildAction->Depends.begin());
930 // Actually, we need the reverse postorder because actions prepend new
931 // instructions. Therefore, the first one will always be the action for the
932 // operand tree's root.
933 assert(Ordered.back() == RootAction);
934 if (RootAction->Execute())
935 Stmt->removeSingleMemoryAccess(RA);
936 Ordered.pop_back();
937 for (auto DepAction : reverse(Ordered)) {
938 assert(DepAction->Decision != FD_Unknown &&
939 DepAction->Decision != FD_CannotForward);
940 assert(DepAction != RootAction);
941 DepAction->Execute();
945 /// Try to forward an operand tree rooted in @p RA.
946 bool tryForwardTree(MemoryAccess *RA) {
947 assert(RA->isLatestScalarKind());
948 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
950 ScopStmt *Stmt = RA->getStatement();
951 Loop *InLoop = Stmt->getSurroundingLoop();
953 isl::map TargetToUse;
954 if (!Known.is_null()) {
955 isl::space DomSpace = Stmt->getDomainSpace();
956 TargetToUse =
957 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
960 ForwardingDecision Assessment =
961 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop);
963 // If considered feasible and profitable, forward it.
964 bool Changed = false;
965 if (Assessment == FD_CanForwardProfitably) {
966 applyForwardingActions(Stmt, RA->getAccessValue(), RA);
967 Changed = true;
970 ForwardingActions.clear();
971 return Changed;
974 /// Return which SCoP this instance is processing.
975 Scop *getScop() const { return S; }
977 /// Run the algorithm: Use value read accesses as operand tree roots and try
978 /// to forward them into the statement.
979 bool forwardOperandTrees() {
980 for (ScopStmt &Stmt : *S) {
981 bool StmtModified = false;
983 // Because we are modifying the MemoryAccess list, collect them first to
984 // avoid iterator invalidation.
985 SmallVector<MemoryAccess *, 16> Accs(Stmt.begin(), Stmt.end());
987 for (MemoryAccess *RA : Accs) {
988 if (!RA->isRead())
989 continue;
990 if (!RA->isLatestScalarKind())
991 continue;
993 if (tryForwardTree(RA)) {
994 Modified = true;
995 StmtModified = true;
996 NumForwardedTrees++;
997 TotalForwardedTrees++;
1001 if (StmtModified) {
1002 NumModifiedStmts++;
1003 TotalModifiedStmts++;
1007 if (Modified) {
1008 ScopsModified++;
1009 S->realignParams();
1011 return Modified;
1014 /// Print the pass result, performed transformations and the SCoP after the
1015 /// transformation.
1016 void print(raw_ostream &OS, int Indent = 0) {
1017 printStatistics(OS, Indent);
1019 if (!Modified) {
1020 // This line can easily be checked in regression tests.
1021 OS << "ForwardOpTree executed, but did not modify anything\n";
1022 return;
1025 printStatements(OS, Indent);
1028 bool isModified() const { return Modified; }
1031 static std::unique_ptr<ForwardOpTreeImpl> runForwardOpTree(Scop &S,
1032 LoopInfo &LI) {
1033 std::unique_ptr<ForwardOpTreeImpl> Impl;
1035 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
1036 Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
1038 if (AnalyzeKnown) {
1039 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
1040 Impl->computeKnownValues();
1043 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
1044 Impl->forwardOperandTrees();
1046 if (MaxOpGuard.hasQuotaExceeded()) {
1047 LLVM_DEBUG(dbgs() << "Not all operations completed because of "
1048 "max_operations exceeded\n");
1049 KnownOutOfQuota++;
1053 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
1054 LLVM_DEBUG(dbgs() << S);
1056 // Update statistics
1057 Scop::ScopStatistics ScopStats = S.getStatistics();
1058 NumValueWrites += ScopStats.NumValueWrites;
1059 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
1060 NumPHIWrites += ScopStats.NumPHIWrites;
1061 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
1062 NumSingletonWrites += ScopStats.NumSingletonWrites;
1063 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
1065 return Impl;
1068 static PreservedAnalyses
1069 runForwardOpTreeUsingNPM(Scop &S, ScopAnalysisManager &SAM,
1070 ScopStandardAnalysisResults &SAR, SPMUpdater &U,
1071 raw_ostream *OS) {
1072 LoopInfo &LI = SAR.LI;
1074 std::unique_ptr<ForwardOpTreeImpl> Impl = runForwardOpTree(S, LI);
1075 if (OS) {
1076 *OS << "Printing analysis 'Polly - Forward operand tree' for region: '"
1077 << S.getName() << "' in function '" << S.getFunction().getName()
1078 << "':\n";
1079 if (Impl) {
1080 assert(Impl->getScop() == &S);
1082 Impl->print(*OS);
1086 if (!Impl->isModified())
1087 return PreservedAnalyses::all();
1089 PreservedAnalyses PA;
1090 PA.preserveSet<AllAnalysesOn<Module>>();
1091 PA.preserveSet<AllAnalysesOn<Function>>();
1092 PA.preserveSet<AllAnalysesOn<Loop>>();
1093 return PA;
1096 /// Pass that redirects scalar reads to array elements that are known to contain
1097 /// the same value.
1099 /// This reduces the number of scalar accesses and therefore potentially
1100 /// increases the freedom of the scheduler. In the ideal case, all reads of a
1101 /// scalar definition are redirected (We currently do not care about removing
1102 /// the write in this case). This is also useful for the main DeLICM pass as
1103 /// there are less scalars to be mapped.
1104 class ForwardOpTreeWrapperPass final : public ScopPass {
1105 private:
1106 /// The pass implementation, also holding per-scop data.
1107 std::unique_ptr<ForwardOpTreeImpl> Impl;
1109 public:
1110 static char ID;
1112 explicit ForwardOpTreeWrapperPass() : ScopPass(ID) {}
1113 ForwardOpTreeWrapperPass(const ForwardOpTreeWrapperPass &) = delete;
1114 ForwardOpTreeWrapperPass &
1115 operator=(const ForwardOpTreeWrapperPass &) = delete;
1117 void getAnalysisUsage(AnalysisUsage &AU) const override {
1118 AU.addRequiredTransitive<ScopInfoRegionPass>();
1119 AU.addRequired<LoopInfoWrapperPass>();
1120 AU.setPreservesAll();
1123 bool runOnScop(Scop &S) override {
1124 // Free resources for previous SCoP's computation, if not yet done.
1125 releaseMemory();
1127 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1129 Impl = runForwardOpTree(S, LI);
1131 return false;
1134 void printScop(raw_ostream &OS, Scop &S) const override {
1135 if (!Impl)
1136 return;
1138 assert(Impl->getScop() == &S);
1139 Impl->print(OS);
1142 void releaseMemory() override { Impl.reset(); }
1143 }; // class ForwardOpTree
1145 char ForwardOpTreeWrapperPass::ID;
1147 /// Print result from ForwardOpTreeWrapperPass.
1148 class ForwardOpTreePrinterLegacyPass final : public ScopPass {
1149 public:
1150 static char ID;
1152 ForwardOpTreePrinterLegacyPass() : ForwardOpTreePrinterLegacyPass(outs()){};
1153 explicit ForwardOpTreePrinterLegacyPass(llvm::raw_ostream &OS)
1154 : ScopPass(ID), OS(OS) {}
1156 bool runOnScop(Scop &S) override {
1157 ForwardOpTreeWrapperPass &P = getAnalysis<ForwardOpTreeWrapperPass>();
1159 OS << "Printing analysis '" << P.getPassName() << "' for region: '"
1160 << S.getRegion().getNameStr() << "' in function '"
1161 << S.getFunction().getName() << "':\n";
1162 P.printScop(OS, S);
1164 return false;
1167 void getAnalysisUsage(AnalysisUsage &AU) const override {
1168 ScopPass::getAnalysisUsage(AU);
1169 AU.addRequired<ForwardOpTreeWrapperPass>();
1170 AU.setPreservesAll();
1173 private:
1174 llvm::raw_ostream &OS;
1177 char ForwardOpTreePrinterLegacyPass::ID = 0;
1178 } // namespace
1180 Pass *polly::createForwardOpTreeWrapperPass() {
1181 return new ForwardOpTreeWrapperPass();
1184 Pass *polly::createForwardOpTreePrinterLegacyPass(llvm::raw_ostream &OS) {
1185 return new ForwardOpTreePrinterLegacyPass(OS);
1188 llvm::PreservedAnalyses ForwardOpTreePass::run(Scop &S,
1189 ScopAnalysisManager &SAM,
1190 ScopStandardAnalysisResults &SAR,
1191 SPMUpdater &U) {
1192 return runForwardOpTreeUsingNPM(S, SAM, SAR, U, nullptr);
1195 llvm::PreservedAnalyses
1196 ForwardOpTreePrinterPass::run(Scop &S, ScopAnalysisManager &SAM,
1197 ScopStandardAnalysisResults &SAR, SPMUpdater &U) {
1198 return runForwardOpTreeUsingNPM(S, SAM, SAR, U, &OS);
1201 INITIALIZE_PASS_BEGIN(ForwardOpTreeWrapperPass, "polly-optree",
1202 "Polly - Forward operand tree", false, false)
1203 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1204 INITIALIZE_PASS_END(ForwardOpTreeWrapperPass, "polly-optree",
1205 "Polly - Forward operand tree", false, false)
1207 INITIALIZE_PASS_BEGIN(ForwardOpTreePrinterLegacyPass, "polly-print-optree",
1208 "Polly - Print forward operand tree result", false, false)
1209 INITIALIZE_PASS_DEPENDENCY(ForwardOpTreeWrapperPass)
1210 INITIALIZE_PASS_END(ForwardOpTreePrinterLegacyPass, "polly-print-optree",
1211 "Polly - Print forward operand tree result", false, false)