1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This Pass handles loop interchange transform.
10 // This pass interchanges loops to provide a more cache-friendly memory access
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Scalar/LoopInterchange.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/StringSet.h"
21 #include "llvm/Analysis/DependenceAnalysis.h"
22 #include "llvm/Analysis/LoopCacheAnalysis.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/LoopNestAnalysis.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
27 #include "llvm/Analysis/ScalarEvolution.h"
28 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
29 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/User.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/Scalar/LoopPassManager.h"
44 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
45 #include "llvm/Transforms/Utils/LoopUtils.h"
52 #define DEBUG_TYPE "loop-interchange"
54 STATISTIC(LoopsInterchanged
, "Number of loops interchanged");
56 static cl::opt
<int> LoopInterchangeCostThreshold(
57 "loop-interchange-threshold", cl::init(0), cl::Hidden
,
58 cl::desc("Interchange if you gain more than this number"));
62 using LoopVector
= SmallVector
<Loop
*, 8>;
64 // TODO: Check if we can use a sparse matrix here.
65 using CharMatrix
= std::vector
<std::vector
<char>>;
67 } // end anonymous namespace
69 // Maximum number of dependencies that can be handled in the dependency matrix.
70 static const unsigned MaxMemInstrCount
= 100;
72 // Maximum loop depth supported.
73 static const unsigned MaxLoopNestDepth
= 10;
76 static void printDepMatrix(CharMatrix
&DepMatrix
) {
77 for (auto &Row
: DepMatrix
) {
79 LLVM_DEBUG(dbgs() << D
<< " ");
80 LLVM_DEBUG(dbgs() << "\n");
85 static bool populateDependencyMatrix(CharMatrix
&DepMatrix
, unsigned Level
,
86 Loop
*L
, DependenceInfo
*DI
,
87 ScalarEvolution
*SE
) {
88 using ValueVector
= SmallVector
<Value
*, 16>;
93 for (BasicBlock
*BB
: L
->blocks()) {
94 // Scan the BB and collect legal loads and stores.
95 for (Instruction
&I
: *BB
) {
96 if (!isa
<Instruction
>(I
))
98 if (auto *Ld
= dyn_cast
<LoadInst
>(&I
)) {
101 MemInstr
.push_back(&I
);
102 } else if (auto *St
= dyn_cast
<StoreInst
>(&I
)) {
105 MemInstr
.push_back(&I
);
110 LLVM_DEBUG(dbgs() << "Found " << MemInstr
.size()
111 << " Loads and Stores to analyze\n");
113 ValueVector::iterator I
, IE
, J
, JE
;
116 for (I
= MemInstr
.begin(), IE
= MemInstr
.end(); I
!= IE
; ++I
) {
117 for (J
= I
, JE
= MemInstr
.end(); J
!= JE
; ++J
) {
118 std::vector
<char> Dep
;
119 Instruction
*Src
= cast
<Instruction
>(*I
);
120 Instruction
*Dst
= cast
<Instruction
>(*J
);
121 // Ignore Input dependencies.
122 if (isa
<LoadInst
>(Src
) && isa
<LoadInst
>(Dst
))
124 // Track Output, Flow, and Anti dependencies.
125 if (auto D
= DI
->depends(Src
, Dst
, true)) {
126 assert(D
->isOrdered() && "Expected an output, flow or anti dep.");
127 // If the direction vector is negative, normalize it to
128 // make it non-negative.
129 if (D
->normalize(SE
))
130 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
131 LLVM_DEBUG(StringRef DepType
=
132 D
->isFlow() ? "flow" : D
->isAnti() ? "anti" : "output";
133 dbgs() << "Found " << DepType
134 << " dependency between Src and Dst\n"
135 << " Src:" << *Src
<< "\n Dst:" << *Dst
<< '\n');
136 unsigned Levels
= D
->getLevels();
138 for (unsigned II
= 1; II
<= Levels
; ++II
) {
139 if (D
->isScalar(II
)) {
141 Dep
.push_back(Direction
);
143 unsigned Dir
= D
->getDirection(II
);
144 if (Dir
== Dependence::DVEntry::LT
||
145 Dir
== Dependence::DVEntry::LE
)
147 else if (Dir
== Dependence::DVEntry::GT
||
148 Dir
== Dependence::DVEntry::GE
)
150 else if (Dir
== Dependence::DVEntry::EQ
)
154 Dep
.push_back(Direction
);
157 while (Dep
.size() != Level
) {
161 // Make sure we only add unique entries to the dependency matrix.
162 if (Seen
.insert(StringRef(Dep
.data(), Dep
.size())).second
)
163 DepMatrix
.push_back(Dep
);
165 if (DepMatrix
.size() > MaxMemInstrCount
) {
166 LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
167 << " dependencies inside loop\n");
177 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
178 // matrix by exchanging the two columns.
179 static void interChangeDependencies(CharMatrix
&DepMatrix
, unsigned FromIndx
,
181 for (unsigned I
= 0, E
= DepMatrix
.size(); I
< E
; ++I
)
182 std::swap(DepMatrix
[I
][ToIndx
], DepMatrix
[I
][FromIndx
]);
185 // After interchanging, check if the direction vector is valid.
186 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
187 // if the direction matrix, after the same permutation is applied to its
188 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
189 static bool isLexicographicallyPositive(std::vector
<char> &DV
) {
190 for (unsigned char Direction
: DV
) {
191 if (Direction
== '<')
193 if (Direction
== '>' || Direction
== '*')
199 // Checks if it is legal to interchange 2 loops.
200 static bool isLegalToInterChangeLoops(CharMatrix
&DepMatrix
,
201 unsigned InnerLoopId
,
202 unsigned OuterLoopId
) {
203 unsigned NumRows
= DepMatrix
.size();
204 std::vector
<char> Cur
;
205 // For each row check if it is valid to interchange.
206 for (unsigned Row
= 0; Row
< NumRows
; ++Row
) {
207 // Create temporary DepVector check its lexicographical order
208 // before and after swapping OuterLoop vs InnerLoop
209 Cur
= DepMatrix
[Row
];
210 if (!isLexicographicallyPositive(Cur
))
212 std::swap(Cur
[InnerLoopId
], Cur
[OuterLoopId
]);
213 if (!isLexicographicallyPositive(Cur
))
219 static void populateWorklist(Loop
&L
, LoopVector
&LoopList
) {
220 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
221 << L
.getHeader()->getParent()->getName() << " Loop: %"
222 << L
.getHeader()->getName() << '\n');
223 assert(LoopList
.empty() && "LoopList should initially be empty!");
224 Loop
*CurrentLoop
= &L
;
225 const std::vector
<Loop
*> *Vec
= &CurrentLoop
->getSubLoops();
226 while (!Vec
->empty()) {
227 // The current loop has multiple subloops in it hence it is not tightly
229 // Discard all loops above it added into Worklist.
230 if (Vec
->size() != 1) {
235 LoopList
.push_back(CurrentLoop
);
236 CurrentLoop
= Vec
->front();
237 Vec
= &CurrentLoop
->getSubLoops();
239 LoopList
.push_back(CurrentLoop
);
242 static bool hasMinimumLoopDepth(SmallVectorImpl
<Loop
*> &LoopList
) {
243 unsigned LoopNestDepth
= LoopList
.size();
244 if (LoopNestDepth
< 2) {
245 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
252 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
253 class LoopInterchangeLegality
{
255 LoopInterchangeLegality(Loop
*Outer
, Loop
*Inner
, ScalarEvolution
*SE
,
256 OptimizationRemarkEmitter
*ORE
)
257 : OuterLoop(Outer
), InnerLoop(Inner
), SE(SE
), ORE(ORE
) {}
259 /// Check if the loops can be interchanged.
260 bool canInterchangeLoops(unsigned InnerLoopId
, unsigned OuterLoopId
,
261 CharMatrix
&DepMatrix
);
263 /// Discover induction PHIs in the header of \p L. Induction
264 /// PHIs are added to \p Inductions.
265 bool findInductions(Loop
*L
, SmallVectorImpl
<PHINode
*> &Inductions
);
267 /// Check if the loop structure is understood. We do not handle triangular
269 bool isLoopStructureUnderstood();
271 bool currentLimitations();
273 const SmallPtrSetImpl
<PHINode
*> &getOuterInnerReductions() const {
274 return OuterInnerReductions
;
277 const SmallVectorImpl
<PHINode
*> &getInnerLoopInductions() const {
278 return InnerLoopInductions
;
282 bool tightlyNested(Loop
*Outer
, Loop
*Inner
);
283 bool containsUnsafeInstructions(BasicBlock
*BB
);
285 /// Discover induction and reduction PHIs in the header of \p L. Induction
286 /// PHIs are added to \p Inductions, reductions are added to
287 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
288 /// to be passed as \p InnerLoop.
289 bool findInductionAndReductions(Loop
*L
,
290 SmallVector
<PHINode
*, 8> &Inductions
,
298 /// Interface to emit optimization remarks.
299 OptimizationRemarkEmitter
*ORE
;
301 /// Set of reduction PHIs taking part of a reduction across the inner and
303 SmallPtrSet
<PHINode
*, 4> OuterInnerReductions
;
305 /// Set of inner loop induction PHIs
306 SmallVector
<PHINode
*, 8> InnerLoopInductions
;
309 /// LoopInterchangeProfitability checks if it is profitable to interchange the
311 class LoopInterchangeProfitability
{
313 LoopInterchangeProfitability(Loop
*Outer
, Loop
*Inner
, ScalarEvolution
*SE
,
314 OptimizationRemarkEmitter
*ORE
)
315 : OuterLoop(Outer
), InnerLoop(Inner
), SE(SE
), ORE(ORE
) {}
317 /// Check if the loop interchange is profitable.
318 bool isProfitable(const Loop
*InnerLoop
, const Loop
*OuterLoop
,
319 unsigned InnerLoopId
, unsigned OuterLoopId
,
320 CharMatrix
&DepMatrix
,
321 const DenseMap
<const Loop
*, unsigned> &CostMap
,
322 std::unique_ptr
<CacheCost
> &CC
);
325 int getInstrOrderCost();
326 std::optional
<bool> isProfitablePerLoopCacheAnalysis(
327 const DenseMap
<const Loop
*, unsigned> &CostMap
,
328 std::unique_ptr
<CacheCost
> &CC
);
329 std::optional
<bool> isProfitablePerInstrOrderCost();
330 std::optional
<bool> isProfitableForVectorization(unsigned InnerLoopId
,
331 unsigned OuterLoopId
,
332 CharMatrix
&DepMatrix
);
339 /// Interface to emit optimization remarks.
340 OptimizationRemarkEmitter
*ORE
;
343 /// LoopInterchangeTransform interchanges the loop.
344 class LoopInterchangeTransform
{
346 LoopInterchangeTransform(Loop
*Outer
, Loop
*Inner
, ScalarEvolution
*SE
,
347 LoopInfo
*LI
, DominatorTree
*DT
,
348 const LoopInterchangeLegality
&LIL
)
349 : OuterLoop(Outer
), InnerLoop(Inner
), SE(SE
), LI(LI
), DT(DT
), LIL(LIL
) {}
351 /// Interchange OuterLoop and InnerLoop.
353 void restructureLoops(Loop
*NewInner
, Loop
*NewOuter
,
354 BasicBlock
*OrigInnerPreHeader
,
355 BasicBlock
*OrigOuterPreHeader
);
356 void removeChildLoop(Loop
*OuterLoop
, Loop
*InnerLoop
);
359 bool adjustLoopLinks();
360 bool adjustLoopBranches();
371 const LoopInterchangeLegality
&LIL
;
374 struct LoopInterchange
{
375 ScalarEvolution
*SE
= nullptr;
376 LoopInfo
*LI
= nullptr;
377 DependenceInfo
*DI
= nullptr;
378 DominatorTree
*DT
= nullptr;
379 std::unique_ptr
<CacheCost
> CC
= nullptr;
381 /// Interface to emit optimization remarks.
382 OptimizationRemarkEmitter
*ORE
;
384 LoopInterchange(ScalarEvolution
*SE
, LoopInfo
*LI
, DependenceInfo
*DI
,
385 DominatorTree
*DT
, std::unique_ptr
<CacheCost
> &CC
,
386 OptimizationRemarkEmitter
*ORE
)
387 : SE(SE
), LI(LI
), DI(DI
), DT(DT
), CC(std::move(CC
)), ORE(ORE
) {}
390 if (L
->getParentLoop())
392 SmallVector
<Loop
*, 8> LoopList
;
393 populateWorklist(*L
, LoopList
);
394 return processLoopList(LoopList
);
397 bool run(LoopNest
&LN
) {
398 SmallVector
<Loop
*, 8> LoopList(LN
.getLoops());
399 for (unsigned I
= 1; I
< LoopList
.size(); ++I
)
400 if (LoopList
[I
]->getParentLoop() != LoopList
[I
- 1])
402 return processLoopList(LoopList
);
405 bool isComputableLoopNest(ArrayRef
<Loop
*> LoopList
) {
406 for (Loop
*L
: LoopList
) {
407 const SCEV
*ExitCountOuter
= SE
->getBackedgeTakenCount(L
);
408 if (isa
<SCEVCouldNotCompute
>(ExitCountOuter
)) {
409 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
412 if (L
->getNumBackEdges() != 1) {
413 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
416 if (!L
->getExitingBlock()) {
417 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
424 unsigned selectLoopForInterchange(ArrayRef
<Loop
*> LoopList
) {
425 // TODO: Add a better heuristic to select the loop to be interchanged based
426 // on the dependence matrix. Currently we select the innermost loop.
427 return LoopList
.size() - 1;
430 bool processLoopList(SmallVectorImpl
<Loop
*> &LoopList
) {
431 bool Changed
= false;
433 // Ensure minimum loop nest depth.
434 assert(hasMinimumLoopDepth(LoopList
) && "Loop nest does not meet minimum depth.");
436 unsigned LoopNestDepth
= LoopList
.size();
437 if (LoopNestDepth
> MaxLoopNestDepth
) {
438 LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
439 << MaxLoopNestDepth
<< "\n");
442 if (!isComputableLoopNest(LoopList
)) {
443 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
447 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
450 CharMatrix DependencyMatrix
;
451 Loop
*OuterMostLoop
= *(LoopList
.begin());
452 if (!populateDependencyMatrix(DependencyMatrix
, LoopNestDepth
,
453 OuterMostLoop
, DI
, SE
)) {
454 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
458 LLVM_DEBUG(dbgs() << "Dependency matrix before interchange:\n";
459 printDepMatrix(DependencyMatrix
));
461 // Get the Outermost loop exit.
462 BasicBlock
*LoopNestExit
= OuterMostLoop
->getExitBlock();
464 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
468 unsigned SelecLoopId
= selectLoopForInterchange(LoopList
);
469 // Obtain the loop vector returned from loop cache analysis beforehand,
470 // and put each <Loop, index> pair into a map for constant time query
471 // later. Indices in loop vector reprsent the optimal order of the
472 // corresponding loop, e.g., given a loopnest with depth N, index 0
473 // indicates the loop should be placed as the outermost loop and index N
474 // indicates the loop should be placed as the innermost loop.
476 // For the old pass manager CacheCost would be null.
477 DenseMap
<const Loop
*, unsigned> CostMap
;
479 const auto &LoopCosts
= CC
->getLoopCosts();
480 for (unsigned i
= 0; i
< LoopCosts
.size(); i
++) {
481 CostMap
[LoopCosts
[i
].first
] = i
;
484 // We try to achieve the globally optimal memory access for the loopnest,
485 // and do interchange based on a bubble-sort fasion. We start from
486 // the innermost loop, move it outwards to the best possible position
487 // and repeat this process.
488 for (unsigned j
= SelecLoopId
; j
> 0; j
--) {
489 bool ChangedPerIter
= false;
490 for (unsigned i
= SelecLoopId
; i
> SelecLoopId
- j
; i
--) {
491 bool Interchanged
= processLoop(LoopList
[i
], LoopList
[i
- 1], i
, i
- 1,
492 DependencyMatrix
, CostMap
);
495 // Loops interchanged, update LoopList accordingly.
496 std::swap(LoopList
[i
- 1], LoopList
[i
]);
497 // Update the DependencyMatrix
498 interChangeDependencies(DependencyMatrix
, i
, i
- 1);
500 LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
501 printDepMatrix(DependencyMatrix
));
503 ChangedPerIter
|= Interchanged
;
504 Changed
|= Interchanged
;
506 // Early abort if there was no interchange during an entire round of
507 // moving loops outwards.
514 bool processLoop(Loop
*InnerLoop
, Loop
*OuterLoop
, unsigned InnerLoopId
,
515 unsigned OuterLoopId
,
516 std::vector
<std::vector
<char>> &DependencyMatrix
,
517 const DenseMap
<const Loop
*, unsigned> &CostMap
) {
518 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
519 << " and OuterLoopId = " << OuterLoopId
<< "\n");
520 LoopInterchangeLegality
LIL(OuterLoop
, InnerLoop
, SE
, ORE
);
521 if (!LIL
.canInterchangeLoops(InnerLoopId
, OuterLoopId
, DependencyMatrix
)) {
522 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
525 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
526 LoopInterchangeProfitability
LIP(OuterLoop
, InnerLoop
, SE
, ORE
);
527 if (!LIP
.isProfitable(InnerLoop
, OuterLoop
, InnerLoopId
, OuterLoopId
,
528 DependencyMatrix
, CostMap
, CC
)) {
529 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
534 return OptimizationRemark(DEBUG_TYPE
, "Interchanged",
535 InnerLoop
->getStartLoc(),
536 InnerLoop
->getHeader())
537 << "Loop interchanged with enclosing loop.";
540 LoopInterchangeTransform
LIT(OuterLoop
, InnerLoop
, SE
, LI
, DT
, LIL
);
542 LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
545 llvm::formLCSSARecursively(*OuterLoop
, *DT
, LI
, SE
);
550 } // end anonymous namespace
552 bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock
*BB
) {
553 return any_of(*BB
, [](const Instruction
&I
) {
554 return I
.mayHaveSideEffects() || I
.mayReadFromMemory();
558 bool LoopInterchangeLegality::tightlyNested(Loop
*OuterLoop
, Loop
*InnerLoop
) {
559 BasicBlock
*OuterLoopHeader
= OuterLoop
->getHeader();
560 BasicBlock
*InnerLoopPreHeader
= InnerLoop
->getLoopPreheader();
561 BasicBlock
*OuterLoopLatch
= OuterLoop
->getLoopLatch();
563 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
565 // A perfectly nested loop will not have any branch in between the outer and
566 // inner block i.e. outer header will branch to either inner preheader and
568 BranchInst
*OuterLoopHeaderBI
=
569 dyn_cast
<BranchInst
>(OuterLoopHeader
->getTerminator());
570 if (!OuterLoopHeaderBI
)
573 for (BasicBlock
*Succ
: successors(OuterLoopHeaderBI
))
574 if (Succ
!= InnerLoopPreHeader
&& Succ
!= InnerLoop
->getHeader() &&
575 Succ
!= OuterLoopLatch
)
578 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
579 // We do not have any basic block in between now make sure the outer header
580 // and outer loop latch doesn't contain any unsafe instructions.
581 if (containsUnsafeInstructions(OuterLoopHeader
) ||
582 containsUnsafeInstructions(OuterLoopLatch
))
585 // Also make sure the inner loop preheader does not contain any unsafe
586 // instructions. Note that all instructions in the preheader will be moved to
587 // the outer loop header when interchanging.
588 if (InnerLoopPreHeader
!= OuterLoopHeader
&&
589 containsUnsafeInstructions(InnerLoopPreHeader
))
592 BasicBlock
*InnerLoopExit
= InnerLoop
->getExitBlock();
593 // Ensure the inner loop exit block flows to the outer loop latch possibly
594 // through empty blocks.
595 const BasicBlock
&SuccInner
=
596 LoopNest::skipEmptyBlockUntil(InnerLoopExit
, OuterLoopLatch
);
597 if (&SuccInner
!= OuterLoopLatch
) {
598 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
599 << " does not lead to the outer loop latch.\n";);
602 // The inner loop exit block does flow to the outer loop latch and not some
603 // other BBs, now make sure it contains safe instructions, since it will be
604 // moved into the (new) inner loop after interchange.
605 if (containsUnsafeInstructions(InnerLoopExit
))
608 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
609 // We have a perfect loop nest.
613 bool LoopInterchangeLegality::isLoopStructureUnderstood() {
614 BasicBlock
*InnerLoopPreheader
= InnerLoop
->getLoopPreheader();
615 for (PHINode
*InnerInduction
: InnerLoopInductions
) {
616 unsigned Num
= InnerInduction
->getNumOperands();
617 for (unsigned i
= 0; i
< Num
; ++i
) {
618 Value
*Val
= InnerInduction
->getOperand(i
);
619 if (isa
<Constant
>(Val
))
621 Instruction
*I
= dyn_cast
<Instruction
>(Val
);
624 // TODO: Handle triangular loops.
625 // e.g. for(int i=0;i<N;i++)
626 // for(int j=i;j<N;j++)
627 unsigned IncomBlockIndx
= PHINode::getIncomingValueNumForOperand(i
);
628 if (InnerInduction
->getIncomingBlock(IncomBlockIndx
) ==
629 InnerLoopPreheader
&&
630 !OuterLoop
->isLoopInvariant(I
)) {
636 // TODO: Handle triangular loops of another form.
637 // e.g. for(int i=0;i<N;i++)
638 // for(int j=0;j<i;j++)
640 // for(int i=0;i<N;i++)
641 // for(int j=0;j*i<N;j++)
642 BasicBlock
*InnerLoopLatch
= InnerLoop
->getLoopLatch();
643 BranchInst
*InnerLoopLatchBI
=
644 dyn_cast
<BranchInst
>(InnerLoopLatch
->getTerminator());
645 if (!InnerLoopLatchBI
->isConditional())
647 if (CmpInst
*InnerLoopCmp
=
648 dyn_cast
<CmpInst
>(InnerLoopLatchBI
->getCondition())) {
649 Value
*Op0
= InnerLoopCmp
->getOperand(0);
650 Value
*Op1
= InnerLoopCmp
->getOperand(1);
652 // LHS and RHS of the inner loop exit condition, e.g.,
653 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
654 Value
*Left
= nullptr;
655 Value
*Right
= nullptr;
657 // Check if V only involves inner loop induction variable.
658 // Return true if V is InnerInduction, or a cast from
659 // InnerInduction, or a binary operator that involves
660 // InnerInduction and a constant.
661 std::function
<bool(Value
*)> IsPathToInnerIndVar
;
662 IsPathToInnerIndVar
= [this, &IsPathToInnerIndVar
](const Value
*V
) -> bool {
663 if (llvm::is_contained(InnerLoopInductions
, V
))
665 if (isa
<Constant
>(V
))
667 const Instruction
*I
= dyn_cast
<Instruction
>(V
);
670 if (isa
<CastInst
>(I
))
671 return IsPathToInnerIndVar(I
->getOperand(0));
672 if (isa
<BinaryOperator
>(I
))
673 return IsPathToInnerIndVar(I
->getOperand(0)) &&
674 IsPathToInnerIndVar(I
->getOperand(1));
678 // In case of multiple inner loop indvars, it is okay if LHS and RHS
679 // are both inner indvar related variables.
680 if (IsPathToInnerIndVar(Op0
) && IsPathToInnerIndVar(Op1
))
683 // Otherwise we check if the cmp instruction compares an inner indvar
684 // related variable (Left) with a outer loop invariant (Right).
685 if (IsPathToInnerIndVar(Op0
) && !isa
<Constant
>(Op0
)) {
688 } else if (IsPathToInnerIndVar(Op1
) && !isa
<Constant
>(Op1
)) {
696 const SCEV
*S
= SE
->getSCEV(Right
);
697 if (!SE
->isLoopInvariant(S
, OuterLoop
))
704 // If SV is a LCSSA PHI node with a single incoming value, return the incoming
706 static Value
*followLCSSA(Value
*SV
) {
707 PHINode
*PHI
= dyn_cast
<PHINode
>(SV
);
711 if (PHI
->getNumIncomingValues() != 1)
713 return followLCSSA(PHI
->getIncomingValue(0));
716 // Check V's users to see if it is involved in a reduction in L.
717 static PHINode
*findInnerReductionPhi(Loop
*L
, Value
*V
) {
718 // Reduction variables cannot be constants.
719 if (isa
<Constant
>(V
))
722 for (Value
*User
: V
->users()) {
723 if (PHINode
*PHI
= dyn_cast
<PHINode
>(User
)) {
724 if (PHI
->getNumIncomingValues() == 1)
726 RecurrenceDescriptor RD
;
727 if (RecurrenceDescriptor::isReductionPHI(PHI
, L
, RD
)) {
728 // Detect floating point reduction only when it can be reordered.
729 if (RD
.getExactFPMathInst() != nullptr)
740 bool LoopInterchangeLegality::findInductionAndReductions(
741 Loop
*L
, SmallVector
<PHINode
*, 8> &Inductions
, Loop
*InnerLoop
) {
742 if (!L
->getLoopLatch() || !L
->getLoopPredecessor())
744 for (PHINode
&PHI
: L
->getHeader()->phis()) {
745 InductionDescriptor ID
;
746 if (InductionDescriptor::isInductionPHI(&PHI
, L
, SE
, ID
))
747 Inductions
.push_back(&PHI
);
749 // PHIs in inner loops need to be part of a reduction in the outer loop,
750 // discovered when checking the PHIs of the outer loop earlier.
752 if (!OuterInnerReductions
.count(&PHI
)) {
753 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
754 "across the outer loop.\n");
758 assert(PHI
.getNumIncomingValues() == 2 &&
759 "Phis in loop header should have exactly 2 incoming values");
760 // Check if we have a PHI node in the outer loop that has a reduction
761 // result from the inner loop as an incoming value.
762 Value
*V
= followLCSSA(PHI
.getIncomingValueForBlock(L
->getLoopLatch()));
763 PHINode
*InnerRedPhi
= findInnerReductionPhi(InnerLoop
, V
);
765 !llvm::is_contained(InnerRedPhi
->incoming_values(), &PHI
)) {
768 << "Failed to recognize PHI as an induction or reduction.\n");
771 OuterInnerReductions
.insert(&PHI
);
772 OuterInnerReductions
.insert(InnerRedPhi
);
779 // This function indicates the current limitations in the transform as a result
780 // of which we do not proceed.
781 bool LoopInterchangeLegality::currentLimitations() {
782 BasicBlock
*InnerLoopLatch
= InnerLoop
->getLoopLatch();
784 // transform currently expects the loop latches to also be the exiting
786 if (InnerLoop
->getExitingBlock() != InnerLoopLatch
||
787 OuterLoop
->getExitingBlock() != OuterLoop
->getLoopLatch() ||
788 !isa
<BranchInst
>(InnerLoopLatch
->getTerminator()) ||
789 !isa
<BranchInst
>(OuterLoop
->getLoopLatch()->getTerminator())) {
791 dbgs() << "Loops where the latch is not the exiting block are not"
792 << " supported currently.\n");
794 return OptimizationRemarkMissed(DEBUG_TYPE
, "ExitingNotLatch",
795 OuterLoop
->getStartLoc(),
796 OuterLoop
->getHeader())
797 << "Loops where the latch is not the exiting block cannot be"
798 " interchange currently.";
803 SmallVector
<PHINode
*, 8> Inductions
;
804 if (!findInductionAndReductions(OuterLoop
, Inductions
, InnerLoop
)) {
806 dbgs() << "Only outer loops with induction or reduction PHI nodes "
807 << "are supported currently.\n");
809 return OptimizationRemarkMissed(DEBUG_TYPE
, "UnsupportedPHIOuter",
810 OuterLoop
->getStartLoc(),
811 OuterLoop
->getHeader())
812 << "Only outer loops with induction or reduction PHI nodes can be"
813 " interchanged currently.";
819 // For multi-level loop nests, make sure that all phi nodes for inner loops
820 // at all levels can be recognized as a induction or reduction phi. Bail out
821 // if a phi node at a certain nesting level cannot be properly recognized.
822 Loop
*CurLevelLoop
= OuterLoop
;
823 while (!CurLevelLoop
->getSubLoops().empty()) {
824 // We already made sure that the loop nest is tightly nested.
825 CurLevelLoop
= CurLevelLoop
->getSubLoops().front();
826 if (!findInductionAndReductions(CurLevelLoop
, Inductions
, nullptr)) {
828 dbgs() << "Only inner loops with induction or reduction PHI nodes "
829 << "are supported currently.\n");
831 return OptimizationRemarkMissed(DEBUG_TYPE
, "UnsupportedPHIInner",
832 CurLevelLoop
->getStartLoc(),
833 CurLevelLoop
->getHeader())
834 << "Only inner loops with induction or reduction PHI nodes can be"
835 " interchange currently.";
841 // TODO: Triangular loops are not handled for now.
842 if (!isLoopStructureUnderstood()) {
843 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
845 return OptimizationRemarkMissed(DEBUG_TYPE
, "UnsupportedStructureInner",
846 InnerLoop
->getStartLoc(),
847 InnerLoop
->getHeader())
848 << "Inner loop structure not understood currently.";
856 bool LoopInterchangeLegality::findInductions(
857 Loop
*L
, SmallVectorImpl
<PHINode
*> &Inductions
) {
858 for (PHINode
&PHI
: L
->getHeader()->phis()) {
859 InductionDescriptor ID
;
860 if (InductionDescriptor::isInductionPHI(&PHI
, L
, SE
, ID
))
861 Inductions
.push_back(&PHI
);
863 return !Inductions
.empty();
866 // We currently only support LCSSA PHI nodes in the inner loop exit, if their
867 // users are either reduction PHIs or PHIs outside the outer loop (which means
868 // the we are only interested in the final value after the loop).
870 areInnerLoopExitPHIsSupported(Loop
*InnerL
, Loop
*OuterL
,
871 SmallPtrSetImpl
<PHINode
*> &Reductions
) {
872 BasicBlock
*InnerExit
= OuterL
->getUniqueExitBlock();
873 for (PHINode
&PHI
: InnerExit
->phis()) {
874 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
875 if (PHI
.getNumIncomingValues() > 1)
877 if (any_of(PHI
.users(), [&Reductions
, OuterL
](User
*U
) {
878 PHINode
*PN
= dyn_cast
<PHINode
>(U
);
880 (!Reductions
.count(PN
) && OuterL
->contains(PN
->getParent()));
888 // We currently support LCSSA PHI nodes in the outer loop exit, if their
889 // incoming values do not come from the outer loop latch or if the
890 // outer loop latch has a single predecessor. In that case, the value will
891 // be available if both the inner and outer loop conditions are true, which
892 // will still be true after interchanging. If we have multiple predecessor,
893 // that may not be the case, e.g. because the outer loop latch may be executed
894 // if the inner loop is not executed.
895 static bool areOuterLoopExitPHIsSupported(Loop
*OuterLoop
, Loop
*InnerLoop
) {
896 BasicBlock
*LoopNestExit
= OuterLoop
->getUniqueExitBlock();
897 for (PHINode
&PHI
: LoopNestExit
->phis()) {
898 for (unsigned i
= 0; i
< PHI
.getNumIncomingValues(); i
++) {
899 Instruction
*IncomingI
= dyn_cast
<Instruction
>(PHI
.getIncomingValue(i
));
900 if (!IncomingI
|| IncomingI
->getParent() != OuterLoop
->getLoopLatch())
903 // The incoming value is defined in the outer loop latch. Currently we
904 // only support that in case the outer loop latch has a single predecessor.
905 // This guarantees that the outer loop latch is executed if and only if
906 // the inner loop is executed (because tightlyNested() guarantees that the
907 // outer loop header only branches to the inner loop or the outer loop
909 // FIXME: We could weaken this logic and allow multiple predecessors,
910 // if the values are produced outside the loop latch. We would need
911 // additional logic to update the PHI nodes in the exit block as
913 if (OuterLoop
->getLoopLatch()->getUniquePredecessor() == nullptr)
920 // In case of multi-level nested loops, it may occur that lcssa phis exist in
921 // the latch of InnerLoop, i.e., when defs of the incoming values are further
922 // inside the loopnest. Sometimes those incoming values are not available
923 // after interchange, since the original inner latch will become the new outer
924 // latch which may have predecessor paths that do not include those incoming
926 // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
927 // multi-level loop nests.
928 static bool areInnerLoopLatchPHIsSupported(Loop
*OuterLoop
, Loop
*InnerLoop
) {
929 if (InnerLoop
->getSubLoops().empty())
931 // If the original outer latch has only one predecessor, then values defined
932 // further inside the looploop, e.g., in the innermost loop, will be available
933 // at the new outer latch after interchange.
934 if (OuterLoop
->getLoopLatch()->getUniquePredecessor() != nullptr)
937 // The outer latch has more than one predecessors, i.e., the inner
938 // exit and the inner header.
939 // PHI nodes in the inner latch are lcssa phis where the incoming values
940 // are defined further inside the loopnest. Check if those phis are used
941 // in the original inner latch. If that is the case then bail out since
942 // those incoming values may not be available at the new outer latch.
943 BasicBlock
*InnerLoopLatch
= InnerLoop
->getLoopLatch();
944 for (PHINode
&PHI
: InnerLoopLatch
->phis()) {
945 for (auto *U
: PHI
.users()) {
946 Instruction
*UI
= cast
<Instruction
>(U
);
947 if (InnerLoopLatch
== UI
->getParent())
954 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId
,
955 unsigned OuterLoopId
,
956 CharMatrix
&DepMatrix
) {
957 if (!isLegalToInterChangeLoops(DepMatrix
, InnerLoopId
, OuterLoopId
)) {
958 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
959 << " and OuterLoopId = " << OuterLoopId
960 << " due to dependence\n");
962 return OptimizationRemarkMissed(DEBUG_TYPE
, "Dependence",
963 InnerLoop
->getStartLoc(),
964 InnerLoop
->getHeader())
965 << "Cannot interchange loops due to dependences.";
969 // Check if outer and inner loop contain legal instructions only.
970 for (auto *BB
: OuterLoop
->blocks())
971 for (Instruction
&I
: BB
->instructionsWithoutDebug())
972 if (CallInst
*CI
= dyn_cast
<CallInst
>(&I
)) {
973 // readnone functions do not prevent interchanging.
974 if (CI
->onlyWritesMemory())
977 dbgs() << "Loops with call instructions cannot be interchanged "
980 return OptimizationRemarkMissed(DEBUG_TYPE
, "CallInst",
983 << "Cannot interchange loops due to call instruction.";
989 if (!findInductions(InnerLoop
, InnerLoopInductions
)) {
990 LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n");
994 if (!areInnerLoopLatchPHIsSupported(OuterLoop
, InnerLoop
)) {
995 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
997 return OptimizationRemarkMissed(DEBUG_TYPE
, "UnsupportedInnerLatchPHI",
998 InnerLoop
->getStartLoc(),
999 InnerLoop
->getHeader())
1000 << "Cannot interchange loops because unsupported PHI nodes found "
1001 "in inner loop latch.";
1006 // TODO: The loops could not be interchanged due to current limitations in the
1007 // transform module.
1008 if (currentLimitations()) {
1009 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1013 // Check if the loops are tightly nested.
1014 if (!tightlyNested(OuterLoop
, InnerLoop
)) {
1015 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1017 return OptimizationRemarkMissed(DEBUG_TYPE
, "NotTightlyNested",
1018 InnerLoop
->getStartLoc(),
1019 InnerLoop
->getHeader())
1020 << "Cannot interchange loops because they are not tightly "
1026 if (!areInnerLoopExitPHIsSupported(OuterLoop
, InnerLoop
,
1027 OuterInnerReductions
)) {
1028 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1030 return OptimizationRemarkMissed(DEBUG_TYPE
, "UnsupportedExitPHI",
1031 InnerLoop
->getStartLoc(),
1032 InnerLoop
->getHeader())
1033 << "Found unsupported PHI node in loop exit.";
1038 if (!areOuterLoopExitPHIsSupported(OuterLoop
, InnerLoop
)) {
1039 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1041 return OptimizationRemarkMissed(DEBUG_TYPE
, "UnsupportedExitPHI",
1042 OuterLoop
->getStartLoc(),
1043 OuterLoop
->getHeader())
1044 << "Found unsupported PHI node in loop exit.";
1052 int LoopInterchangeProfitability::getInstrOrderCost() {
1053 unsigned GoodOrder
, BadOrder
;
1054 BadOrder
= GoodOrder
= 0;
1055 for (BasicBlock
*BB
: InnerLoop
->blocks()) {
1056 for (Instruction
&Ins
: *BB
) {
1057 if (const GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(&Ins
)) {
1058 unsigned NumOp
= GEP
->getNumOperands();
1059 bool FoundInnerInduction
= false;
1060 bool FoundOuterInduction
= false;
1061 for (unsigned i
= 0; i
< NumOp
; ++i
) {
1062 // Skip operands that are not SCEV-able.
1063 if (!SE
->isSCEVable(GEP
->getOperand(i
)->getType()))
1066 const SCEV
*OperandVal
= SE
->getSCEV(GEP
->getOperand(i
));
1067 const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(OperandVal
);
1071 // If we find the inner induction after an outer induction e.g.
1072 // for(int i=0;i<N;i++)
1073 // for(int j=0;j<N;j++)
1074 // A[i][j] = A[i-1][j-1]+k;
1075 // then it is a good order.
1076 if (AR
->getLoop() == InnerLoop
) {
1077 // We found an InnerLoop induction after OuterLoop induction. It is
1079 FoundInnerInduction
= true;
1080 if (FoundOuterInduction
) {
1085 // If we find the outer induction after an inner induction e.g.
1086 // for(int i=0;i<N;i++)
1087 // for(int j=0;j<N;j++)
1088 // A[j][i] = A[j-1][i-1]+k;
1089 // then it is a bad order.
1090 if (AR
->getLoop() == OuterLoop
) {
1091 // We found an OuterLoop induction after InnerLoop induction. It is
1093 FoundOuterInduction
= true;
1094 if (FoundInnerInduction
) {
1103 return GoodOrder
- BadOrder
;
1107 LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1108 const DenseMap
<const Loop
*, unsigned> &CostMap
,
1109 std::unique_ptr
<CacheCost
> &CC
) {
1110 // This is the new cost model returned from loop cache analysis.
1111 // A smaller index means the loop should be placed an outer loop, and vice
1113 if (CostMap
.contains(InnerLoop
) && CostMap
.contains(OuterLoop
)) {
1114 unsigned InnerIndex
= 0, OuterIndex
= 0;
1115 InnerIndex
= CostMap
.find(InnerLoop
)->second
;
1116 OuterIndex
= CostMap
.find(OuterLoop
)->second
;
1117 LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
1118 << ", OuterIndex = " << OuterIndex
<< "\n");
1119 if (InnerIndex
< OuterIndex
)
1120 return std::optional
<bool>(true);
1121 assert(InnerIndex
!= OuterIndex
&& "CostMap should assign unique "
1122 "numbers to each loop");
1123 if (CC
->getLoopCost(*OuterLoop
) == CC
->getLoopCost(*InnerLoop
))
1124 return std::nullopt
;
1125 return std::optional
<bool>(false);
1127 return std::nullopt
;
1131 LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1132 // Legacy cost model: this is rough cost estimation algorithm. It counts the
1133 // good and bad order of induction variables in the instruction and allows
1134 // reordering if number of bad orders is more than good.
1135 int Cost
= getInstrOrderCost();
1136 LLVM_DEBUG(dbgs() << "Cost = " << Cost
<< "\n");
1137 if (Cost
< 0 && Cost
< LoopInterchangeCostThreshold
)
1138 return std::optional
<bool>(true);
1140 return std::nullopt
;
1143 std::optional
<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1144 unsigned InnerLoopId
, unsigned OuterLoopId
, CharMatrix
&DepMatrix
) {
1145 for (auto &Row
: DepMatrix
) {
1146 // If the inner loop is loop independent or doesn't carry any dependency
1147 // it is not profitable to move this to outer position, since we are
1148 // likely able to do inner loop vectorization already.
1149 if (Row
[InnerLoopId
] == 'I' || Row
[InnerLoopId
] == '=')
1150 return std::optional
<bool>(false);
1152 // If the outer loop is not loop independent it is not profitable to move
1153 // this to inner position, since doing so would not enable inner loop
1155 if (Row
[OuterLoopId
] != 'I' && Row
[OuterLoopId
] != '=')
1156 return std::optional
<bool>(false);
1158 // If inner loop has dependence and outer loop is loop independent then it
1159 // is/ profitable to interchange to enable inner loop parallelism.
1160 // If there are no dependences, interchanging will not improve anything.
1161 return std::optional
<bool>(!DepMatrix
.empty());
1164 bool LoopInterchangeProfitability::isProfitable(
1165 const Loop
*InnerLoop
, const Loop
*OuterLoop
, unsigned InnerLoopId
,
1166 unsigned OuterLoopId
, CharMatrix
&DepMatrix
,
1167 const DenseMap
<const Loop
*, unsigned> &CostMap
,
1168 std::unique_ptr
<CacheCost
> &CC
) {
1169 // isProfitable() is structured to avoid endless loop interchange.
1170 // If loop cache analysis could decide the profitability then,
1171 // profitability check will stop and return the analysis result.
1172 // If cache analysis failed to analyze the loopnest (e.g.,
1173 // due to delinearization issues) then only check whether it is
1174 // profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to
1175 // analysis the profitability then only, isProfitableForVectorization
1177 std::optional
<bool> shouldInterchange
=
1178 isProfitablePerLoopCacheAnalysis(CostMap
, CC
);
1179 if (!shouldInterchange
.has_value()) {
1180 shouldInterchange
= isProfitablePerInstrOrderCost();
1181 if (!shouldInterchange
.has_value())
1183 isProfitableForVectorization(InnerLoopId
, OuterLoopId
, DepMatrix
);
1185 if (!shouldInterchange
.has_value()) {
1187 return OptimizationRemarkMissed(DEBUG_TYPE
, "InterchangeNotProfitable",
1188 InnerLoop
->getStartLoc(),
1189 InnerLoop
->getHeader())
1190 << "Insufficient information to calculate the cost of loop for "
1194 } else if (!shouldInterchange
.value()) {
1196 return OptimizationRemarkMissed(DEBUG_TYPE
, "InterchangeNotProfitable",
1197 InnerLoop
->getStartLoc(),
1198 InnerLoop
->getHeader())
1199 << "Interchanging loops is not considered to improve cache "
1200 "locality nor vectorization.";
1207 void LoopInterchangeTransform::removeChildLoop(Loop
*OuterLoop
,
1209 for (Loop
*L
: *OuterLoop
)
1210 if (L
== InnerLoop
) {
1211 OuterLoop
->removeChildLoop(L
);
1214 llvm_unreachable("Couldn't find loop");
1217 /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1218 /// new inner and outer loop after interchanging: NewInner is the original
1219 /// outer loop and NewOuter is the original inner loop.
1221 /// Before interchanging, we have the following structure
1231 // After interchanging:
1240 void LoopInterchangeTransform::restructureLoops(
1241 Loop
*NewInner
, Loop
*NewOuter
, BasicBlock
*OrigInnerPreHeader
,
1242 BasicBlock
*OrigOuterPreHeader
) {
1243 Loop
*OuterLoopParent
= OuterLoop
->getParentLoop();
1244 // The original inner loop preheader moves from the new inner loop to
1245 // the parent loop, if there is one.
1246 NewInner
->removeBlockFromLoop(OrigInnerPreHeader
);
1247 LI
->changeLoopFor(OrigInnerPreHeader
, OuterLoopParent
);
1249 // Switch the loop levels.
1250 if (OuterLoopParent
) {
1251 // Remove the loop from its parent loop.
1252 removeChildLoop(OuterLoopParent
, NewInner
);
1253 removeChildLoop(NewInner
, NewOuter
);
1254 OuterLoopParent
->addChildLoop(NewOuter
);
1256 removeChildLoop(NewInner
, NewOuter
);
1257 LI
->changeTopLevelLoop(NewInner
, NewOuter
);
1259 while (!NewOuter
->isInnermost())
1260 NewInner
->addChildLoop(NewOuter
->removeChildLoop(NewOuter
->begin()));
1261 NewOuter
->addChildLoop(NewInner
);
1263 // BBs from the original inner loop.
1264 SmallVector
<BasicBlock
*, 8> OrigInnerBBs(NewOuter
->blocks());
1266 // Add BBs from the original outer loop to the original inner loop (excluding
1267 // BBs already in inner loop)
1268 for (BasicBlock
*BB
: NewInner
->blocks())
1269 if (LI
->getLoopFor(BB
) == NewInner
)
1270 NewOuter
->addBlockEntry(BB
);
1272 // Now remove inner loop header and latch from the new inner loop and move
1273 // other BBs (the loop body) to the new inner loop.
1274 BasicBlock
*OuterHeader
= NewOuter
->getHeader();
1275 BasicBlock
*OuterLatch
= NewOuter
->getLoopLatch();
1276 for (BasicBlock
*BB
: OrigInnerBBs
) {
1277 // Nothing will change for BBs in child loops.
1278 if (LI
->getLoopFor(BB
) != NewOuter
)
1280 // Remove the new outer loop header and latch from the new inner loop.
1281 if (BB
== OuterHeader
|| BB
== OuterLatch
)
1282 NewInner
->removeBlockFromLoop(BB
);
1284 LI
->changeLoopFor(BB
, NewInner
);
1287 // The preheader of the original outer loop becomes part of the new
1289 NewOuter
->addBlockEntry(OrigOuterPreHeader
);
1290 LI
->changeLoopFor(OrigOuterPreHeader
, NewOuter
);
1292 // Tell SE that we move the loops around.
1293 SE
->forgetLoop(NewOuter
);
1296 bool LoopInterchangeTransform::transform() {
1297 bool Transformed
= false;
1299 if (InnerLoop
->getSubLoops().empty()) {
1300 BasicBlock
*InnerLoopPreHeader
= InnerLoop
->getLoopPreheader();
1301 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1302 auto &InductionPHIs
= LIL
.getInnerLoopInductions();
1303 if (InductionPHIs
.empty()) {
1304 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1308 SmallVector
<Instruction
*, 8> InnerIndexVarList
;
1309 for (PHINode
*CurInductionPHI
: InductionPHIs
) {
1310 if (CurInductionPHI
->getIncomingBlock(0) == InnerLoopPreHeader
)
1311 InnerIndexVarList
.push_back(
1312 dyn_cast
<Instruction
>(CurInductionPHI
->getIncomingValue(1)));
1314 InnerIndexVarList
.push_back(
1315 dyn_cast
<Instruction
>(CurInductionPHI
->getIncomingValue(0)));
1318 // Create a new latch block for the inner loop. We split at the
1319 // current latch's terminator and then move the condition and all
1320 // operands that are not either loop-invariant or the induction PHI into the
1322 BasicBlock
*NewLatch
=
1323 SplitBlock(InnerLoop
->getLoopLatch(),
1324 InnerLoop
->getLoopLatch()->getTerminator(), DT
, LI
);
1326 SmallSetVector
<Instruction
*, 4> WorkList
;
1328 auto MoveInstructions
= [&i
, &WorkList
, this, &InductionPHIs
, NewLatch
]() {
1329 for (; i
< WorkList
.size(); i
++) {
1330 // Duplicate instruction and move it the new latch. Update uses that
1332 Instruction
*NewI
= WorkList
[i
]->clone();
1333 NewI
->insertBefore(NewLatch
->getFirstNonPHI());
1334 assert(!NewI
->mayHaveSideEffects() &&
1335 "Moving instructions with side-effects may change behavior of "
1337 for (Use
&U
: llvm::make_early_inc_range(WorkList
[i
]->uses())) {
1338 Instruction
*UserI
= cast
<Instruction
>(U
.getUser());
1339 if (!InnerLoop
->contains(UserI
->getParent()) ||
1340 UserI
->getParent() == NewLatch
||
1341 llvm::is_contained(InductionPHIs
, UserI
))
1344 // Add operands of moved instruction to the worklist, except if they are
1345 // outside the inner loop or are the induction PHI.
1346 for (Value
*Op
: WorkList
[i
]->operands()) {
1347 Instruction
*OpI
= dyn_cast
<Instruction
>(Op
);
1349 this->LI
->getLoopFor(OpI
->getParent()) != this->InnerLoop
||
1350 llvm::is_contained(InductionPHIs
, OpI
))
1352 WorkList
.insert(OpI
);
1357 // FIXME: Should we interchange when we have a constant condition?
1358 Instruction
*CondI
= dyn_cast
<Instruction
>(
1359 cast
<BranchInst
>(InnerLoop
->getLoopLatch()->getTerminator())
1362 WorkList
.insert(CondI
);
1364 for (Instruction
*InnerIndexVar
: InnerIndexVarList
)
1365 WorkList
.insert(cast
<Instruction
>(InnerIndexVar
));
1369 // Ensure the inner loop phi nodes have a separate basic block.
1370 BasicBlock
*InnerLoopHeader
= InnerLoop
->getHeader();
1371 if (InnerLoopHeader
->getFirstNonPHI() != InnerLoopHeader
->getTerminator()) {
1372 SplitBlock(InnerLoopHeader
, InnerLoopHeader
->getFirstNonPHI(), DT
, LI
);
1373 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1376 // Instructions in the original inner loop preheader may depend on values
1377 // defined in the outer loop header. Move them there, because the original
1378 // inner loop preheader will become the entry into the interchanged loop nest.
1379 // Currently we move all instructions and rely on LICM to move invariant
1380 // instructions outside the loop nest.
1381 BasicBlock
*InnerLoopPreHeader
= InnerLoop
->getLoopPreheader();
1382 BasicBlock
*OuterLoopHeader
= OuterLoop
->getHeader();
1383 if (InnerLoopPreHeader
!= OuterLoopHeader
) {
1384 SmallPtrSet
<Instruction
*, 4> NeedsMoving
;
1385 for (Instruction
&I
:
1386 make_early_inc_range(make_range(InnerLoopPreHeader
->begin(),
1387 std::prev(InnerLoopPreHeader
->end()))))
1388 I
.moveBeforePreserving(OuterLoopHeader
->getTerminator());
1391 Transformed
|= adjustLoopLinks();
1393 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1400 /// \brief Move all instructions except the terminator from FromBB right before
1402 static void moveBBContents(BasicBlock
*FromBB
, Instruction
*InsertBefore
) {
1403 BasicBlock
*ToBB
= InsertBefore
->getParent();
1405 ToBB
->splice(InsertBefore
->getIterator(), FromBB
, FromBB
->begin(),
1406 FromBB
->getTerminator()->getIterator());
1409 /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1410 static void swapBBContents(BasicBlock
*BB1
, BasicBlock
*BB2
) {
1411 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1412 // from BB1 afterwards.
1413 auto Iter
= map_range(*BB1
, [](Instruction
&I
) { return &I
; });
1414 SmallVector
<Instruction
*, 4> TempInstrs(Iter
.begin(), std::prev(Iter
.end()));
1415 for (Instruction
*I
: TempInstrs
)
1416 I
->removeFromParent();
1418 // Move instructions from BB2 to BB1.
1419 moveBBContents(BB2
, BB1
->getTerminator());
1421 // Move instructions from TempInstrs to BB2.
1422 for (Instruction
*I
: TempInstrs
)
1423 I
->insertBefore(BB2
->getTerminator());
1426 // Update BI to jump to NewBB instead of OldBB. Records updates to the
1427 // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1428 // \p OldBB is exactly once in BI's successor list.
1429 static void updateSuccessor(BranchInst
*BI
, BasicBlock
*OldBB
,
1431 std::vector
<DominatorTree::UpdateType
> &DTUpdates
,
1432 bool MustUpdateOnce
= true) {
1433 assert((!MustUpdateOnce
||
1434 llvm::count_if(successors(BI
),
1435 [OldBB
](BasicBlock
*BB
) {
1437 }) == 1) && "BI must jump to OldBB exactly once.");
1438 bool Changed
= false;
1439 for (Use
&Op
: BI
->operands())
1446 DTUpdates
.push_back(
1447 {DominatorTree::UpdateKind::Insert
, BI
->getParent(), NewBB
});
1448 DTUpdates
.push_back(
1449 {DominatorTree::UpdateKind::Delete
, BI
->getParent(), OldBB
});
1451 assert(Changed
&& "Expected a successor to be updated");
1454 // Move Lcssa PHIs to the right place.
1455 static void moveLCSSAPhis(BasicBlock
*InnerExit
, BasicBlock
*InnerHeader
,
1456 BasicBlock
*InnerLatch
, BasicBlock
*OuterHeader
,
1457 BasicBlock
*OuterLatch
, BasicBlock
*OuterExit
,
1458 Loop
*InnerLoop
, LoopInfo
*LI
) {
1460 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1461 // defined either in the header or latch. Those blocks will become header and
1462 // latch of the new outer loop, and the only possible users can PHI nodes
1463 // in the exit block of the loop nest or the outer loop header (reduction
1464 // PHIs, in that case, the incoming value must be defined in the inner loop
1465 // header). We can just substitute the user with the incoming value and remove
1467 for (PHINode
&P
: make_early_inc_range(InnerExit
->phis())) {
1468 assert(P
.getNumIncomingValues() == 1 &&
1469 "Only loops with a single exit are supported!");
1471 // Incoming values are guaranteed be instructions currently.
1472 auto IncI
= cast
<Instruction
>(P
.getIncomingValueForBlock(InnerLatch
));
1473 // In case of multi-level nested loops, follow LCSSA to find the incoming
1474 // value defined from the innermost loop.
1475 auto IncIInnerMost
= cast
<Instruction
>(followLCSSA(IncI
));
1476 // Skip phis with incoming values from the inner loop body, excluding the
1477 // header and latch.
1478 if (IncIInnerMost
->getParent() != InnerLatch
&&
1479 IncIInnerMost
->getParent() != InnerHeader
)
1482 assert(all_of(P
.users(),
1483 [OuterHeader
, OuterExit
, IncI
, InnerHeader
](User
*U
) {
1484 return (cast
<PHINode
>(U
)->getParent() == OuterHeader
&&
1485 IncI
->getParent() == InnerHeader
) ||
1486 cast
<PHINode
>(U
)->getParent() == OuterExit
;
1488 "Can only replace phis iff the uses are in the loop nest exit or "
1489 "the incoming value is defined in the inner header (it will "
1490 "dominate all loop blocks after interchanging)");
1491 P
.replaceAllUsesWith(IncI
);
1492 P
.eraseFromParent();
1495 SmallVector
<PHINode
*, 8> LcssaInnerExit
;
1496 for (PHINode
&P
: InnerExit
->phis())
1497 LcssaInnerExit
.push_back(&P
);
1499 SmallVector
<PHINode
*, 8> LcssaInnerLatch
;
1500 for (PHINode
&P
: InnerLatch
->phis())
1501 LcssaInnerLatch
.push_back(&P
);
1503 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1504 // If a PHI node has users outside of InnerExit, it has a use outside the
1505 // interchanged loop and we have to preserve it. We move these to
1506 // InnerLatch, which will become the new exit block for the innermost
1507 // loop after interchanging.
1508 for (PHINode
*P
: LcssaInnerExit
)
1509 P
->moveBefore(InnerLatch
->getFirstNonPHI());
1511 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1512 // and we have to move them to the new inner latch.
1513 for (PHINode
*P
: LcssaInnerLatch
)
1514 P
->moveBefore(InnerExit
->getFirstNonPHI());
1516 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1517 // incoming values defined in the outer loop, we have to add a new PHI
1518 // in the inner loop latch, which became the exit block of the outer loop,
1519 // after interchanging.
1521 for (PHINode
&P
: OuterExit
->phis()) {
1522 if (P
.getNumIncomingValues() != 1)
1524 // Skip Phis with incoming values defined in the inner loop. Those should
1525 // already have been updated.
1526 auto I
= dyn_cast
<Instruction
>(P
.getIncomingValue(0));
1527 if (!I
|| LI
->getLoopFor(I
->getParent()) == InnerLoop
)
1530 PHINode
*NewPhi
= dyn_cast
<PHINode
>(P
.clone());
1531 NewPhi
->setIncomingValue(0, P
.getIncomingValue(0));
1532 NewPhi
->setIncomingBlock(0, OuterLatch
);
1533 // We might have incoming edges from other BBs, i.e., the original outer
1535 for (auto *Pred
: predecessors(InnerLatch
)) {
1536 if (Pred
== OuterLatch
)
1538 NewPhi
->addIncoming(P
.getIncomingValue(0), Pred
);
1540 NewPhi
->insertBefore(InnerLatch
->getFirstNonPHI());
1541 P
.setIncomingValue(0, NewPhi
);
1545 // Now adjust the incoming blocks for the LCSSA PHIs.
1546 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1547 // with the new latch.
1548 InnerLatch
->replacePhiUsesWith(InnerLatch
, OuterLatch
);
1551 bool LoopInterchangeTransform::adjustLoopBranches() {
1552 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1553 std::vector
<DominatorTree::UpdateType
> DTUpdates
;
1555 BasicBlock
*OuterLoopPreHeader
= OuterLoop
->getLoopPreheader();
1556 BasicBlock
*InnerLoopPreHeader
= InnerLoop
->getLoopPreheader();
1558 assert(OuterLoopPreHeader
!= OuterLoop
->getHeader() &&
1559 InnerLoopPreHeader
!= InnerLoop
->getHeader() && OuterLoopPreHeader
&&
1560 InnerLoopPreHeader
&& "Guaranteed by loop-simplify form");
1561 // Ensure that both preheaders do not contain PHI nodes and have single
1562 // predecessors. This allows us to move them easily. We use
1563 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1564 // preheaders do not satisfy those conditions.
1565 if (isa
<PHINode
>(OuterLoopPreHeader
->begin()) ||
1566 !OuterLoopPreHeader
->getUniquePredecessor())
1567 OuterLoopPreHeader
=
1568 InsertPreheaderForLoop(OuterLoop
, DT
, LI
, nullptr, true);
1569 if (InnerLoopPreHeader
== OuterLoop
->getHeader())
1570 InnerLoopPreHeader
=
1571 InsertPreheaderForLoop(InnerLoop
, DT
, LI
, nullptr, true);
1573 // Adjust the loop preheader
1574 BasicBlock
*InnerLoopHeader
= InnerLoop
->getHeader();
1575 BasicBlock
*OuterLoopHeader
= OuterLoop
->getHeader();
1576 BasicBlock
*InnerLoopLatch
= InnerLoop
->getLoopLatch();
1577 BasicBlock
*OuterLoopLatch
= OuterLoop
->getLoopLatch();
1578 BasicBlock
*OuterLoopPredecessor
= OuterLoopPreHeader
->getUniquePredecessor();
1579 BasicBlock
*InnerLoopLatchPredecessor
=
1580 InnerLoopLatch
->getUniquePredecessor();
1581 BasicBlock
*InnerLoopLatchSuccessor
;
1582 BasicBlock
*OuterLoopLatchSuccessor
;
1584 BranchInst
*OuterLoopLatchBI
=
1585 dyn_cast
<BranchInst
>(OuterLoopLatch
->getTerminator());
1586 BranchInst
*InnerLoopLatchBI
=
1587 dyn_cast
<BranchInst
>(InnerLoopLatch
->getTerminator());
1588 BranchInst
*OuterLoopHeaderBI
=
1589 dyn_cast
<BranchInst
>(OuterLoopHeader
->getTerminator());
1590 BranchInst
*InnerLoopHeaderBI
=
1591 dyn_cast
<BranchInst
>(InnerLoopHeader
->getTerminator());
1593 if (!OuterLoopPredecessor
|| !InnerLoopLatchPredecessor
||
1594 !OuterLoopLatchBI
|| !InnerLoopLatchBI
|| !OuterLoopHeaderBI
||
1598 BranchInst
*InnerLoopLatchPredecessorBI
=
1599 dyn_cast
<BranchInst
>(InnerLoopLatchPredecessor
->getTerminator());
1600 BranchInst
*OuterLoopPredecessorBI
=
1601 dyn_cast
<BranchInst
>(OuterLoopPredecessor
->getTerminator());
1603 if (!OuterLoopPredecessorBI
|| !InnerLoopLatchPredecessorBI
)
1605 BasicBlock
*InnerLoopHeaderSuccessor
= InnerLoopHeader
->getUniqueSuccessor();
1606 if (!InnerLoopHeaderSuccessor
)
1609 // Adjust Loop Preheader and headers.
1610 // The branches in the outer loop predecessor and the outer loop header can
1611 // be unconditional branches or conditional branches with duplicates. Consider
1612 // this when updating the successors.
1613 updateSuccessor(OuterLoopPredecessorBI
, OuterLoopPreHeader
,
1614 InnerLoopPreHeader
, DTUpdates
, /*MustUpdateOnce=*/false);
1615 // The outer loop header might or might not branch to the outer latch.
1616 // We are guaranteed to branch to the inner loop preheader.
1617 if (llvm::is_contained(OuterLoopHeaderBI
->successors(), OuterLoopLatch
)) {
1618 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1619 updateSuccessor(OuterLoopHeaderBI
, OuterLoopLatch
, InnerLoopLatch
,
1621 /*MustUpdateOnce=*/false);
1623 updateSuccessor(OuterLoopHeaderBI
, InnerLoopPreHeader
,
1624 InnerLoopHeaderSuccessor
, DTUpdates
,
1625 /*MustUpdateOnce=*/false);
1627 // Adjust reduction PHI's now that the incoming block has changed.
1628 InnerLoopHeaderSuccessor
->replacePhiUsesWith(InnerLoopHeader
,
1631 updateSuccessor(InnerLoopHeaderBI
, InnerLoopHeaderSuccessor
,
1632 OuterLoopPreHeader
, DTUpdates
);
1634 // -------------Adjust loop latches-----------
1635 if (InnerLoopLatchBI
->getSuccessor(0) == InnerLoopHeader
)
1636 InnerLoopLatchSuccessor
= InnerLoopLatchBI
->getSuccessor(1);
1638 InnerLoopLatchSuccessor
= InnerLoopLatchBI
->getSuccessor(0);
1640 updateSuccessor(InnerLoopLatchPredecessorBI
, InnerLoopLatch
,
1641 InnerLoopLatchSuccessor
, DTUpdates
);
1643 if (OuterLoopLatchBI
->getSuccessor(0) == OuterLoopHeader
)
1644 OuterLoopLatchSuccessor
= OuterLoopLatchBI
->getSuccessor(1);
1646 OuterLoopLatchSuccessor
= OuterLoopLatchBI
->getSuccessor(0);
1648 updateSuccessor(InnerLoopLatchBI
, InnerLoopLatchSuccessor
,
1649 OuterLoopLatchSuccessor
, DTUpdates
);
1650 updateSuccessor(OuterLoopLatchBI
, OuterLoopLatchSuccessor
, InnerLoopLatch
,
1653 DT
->applyUpdates(DTUpdates
);
1654 restructureLoops(OuterLoop
, InnerLoop
, InnerLoopPreHeader
,
1655 OuterLoopPreHeader
);
1657 moveLCSSAPhis(InnerLoopLatchSuccessor
, InnerLoopHeader
, InnerLoopLatch
,
1658 OuterLoopHeader
, OuterLoopLatch
, InnerLoop
->getExitBlock(),
1660 // For PHIs in the exit block of the outer loop, outer's latch has been
1661 // replaced by Inners'.
1662 OuterLoopLatchSuccessor
->replacePhiUsesWith(OuterLoopLatch
, InnerLoopLatch
);
1664 auto &OuterInnerReductions
= LIL
.getOuterInnerReductions();
1665 // Now update the reduction PHIs in the inner and outer loop headers.
1666 SmallVector
<PHINode
*, 4> InnerLoopPHIs
, OuterLoopPHIs
;
1667 for (PHINode
&PHI
: InnerLoopHeader
->phis())
1668 if (OuterInnerReductions
.contains(&PHI
))
1669 InnerLoopPHIs
.push_back(&PHI
);
1671 for (PHINode
&PHI
: OuterLoopHeader
->phis())
1672 if (OuterInnerReductions
.contains(&PHI
))
1673 OuterLoopPHIs
.push_back(&PHI
);
1675 // Now move the remaining reduction PHIs from outer to inner loop header and
1676 // vice versa. The PHI nodes must be part of a reduction across the inner and
1677 // outer loop and all the remains to do is and updating the incoming blocks.
1678 for (PHINode
*PHI
: OuterLoopPHIs
) {
1679 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI
->dump(););
1680 PHI
->moveBefore(InnerLoopHeader
->getFirstNonPHI());
1681 assert(OuterInnerReductions
.count(PHI
) && "Expected a reduction PHI node");
1683 for (PHINode
*PHI
: InnerLoopPHIs
) {
1684 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI
->dump(););
1685 PHI
->moveBefore(OuterLoopHeader
->getFirstNonPHI());
1686 assert(OuterInnerReductions
.count(PHI
) && "Expected a reduction PHI node");
1689 // Update the incoming blocks for moved PHI nodes.
1690 OuterLoopHeader
->replacePhiUsesWith(InnerLoopPreHeader
, OuterLoopPreHeader
);
1691 OuterLoopHeader
->replacePhiUsesWith(InnerLoopLatch
, OuterLoopLatch
);
1692 InnerLoopHeader
->replacePhiUsesWith(OuterLoopPreHeader
, InnerLoopPreHeader
);
1693 InnerLoopHeader
->replacePhiUsesWith(OuterLoopLatch
, InnerLoopLatch
);
1695 // Values defined in the outer loop header could be used in the inner loop
1696 // latch. In that case, we need to create LCSSA phis for them, because after
1697 // interchanging they will be defined in the new inner loop and used in the
1699 SmallVector
<Instruction
*, 4> MayNeedLCSSAPhis
;
1700 for (Instruction
&I
:
1701 make_range(OuterLoopHeader
->begin(), std::prev(OuterLoopHeader
->end())))
1702 MayNeedLCSSAPhis
.push_back(&I
);
1703 formLCSSAForInstructions(MayNeedLCSSAPhis
, *DT
, *LI
, SE
);
1708 bool LoopInterchangeTransform::adjustLoopLinks() {
1709 // Adjust all branches in the inner and outer loop.
1710 bool Changed
= adjustLoopBranches();
1712 // We have interchanged the preheaders so we need to interchange the data in
1713 // the preheaders as well. This is because the content of the inner
1714 // preheader was previously executed inside the outer loop.
1715 BasicBlock
*OuterLoopPreHeader
= OuterLoop
->getLoopPreheader();
1716 BasicBlock
*InnerLoopPreHeader
= InnerLoop
->getLoopPreheader();
1717 swapBBContents(OuterLoopPreHeader
, InnerLoopPreHeader
);
1722 PreservedAnalyses
LoopInterchangePass::run(LoopNest
&LN
,
1723 LoopAnalysisManager
&AM
,
1724 LoopStandardAnalysisResults
&AR
,
1726 Function
&F
= *LN
.getParent();
1727 SmallVector
<Loop
*, 8> LoopList(LN
.getLoops());
1728 // Ensure minimum depth of the loop nest to do the interchange.
1729 if (!hasMinimumLoopDepth(LoopList
))
1730 return PreservedAnalyses::all();
1732 DependenceInfo
DI(&F
, &AR
.AA
, &AR
.SE
, &AR
.LI
);
1733 std::unique_ptr
<CacheCost
> CC
=
1734 CacheCost::getCacheCost(LN
.getOutermostLoop(), AR
, DI
);
1735 OptimizationRemarkEmitter
ORE(&F
);
1736 if (!LoopInterchange(&AR
.SE
, &AR
.LI
, &DI
, &AR
.DT
, CC
, &ORE
).run(LN
))
1737 return PreservedAnalyses::all();
1738 U
.markLoopNestChanged(true);
1739 return getLoopPassPreservedAnalyses();