[ARM] MVE integer min and max
[llvm-complete.git] / lib / Target / Hexagon / HexagonHardwareLoops.cpp
blobcecbaedb6d70e7ad59d63ec9175d19c2f0844a20
1 //===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass identifies loops where we can generate the Hexagon hardware
10 // loop instruction. The hardware loop can perform loop branches with a
11 // zero-cycle overhead.
13 // The pattern that defines the induction variable can changed depending on
14 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15 // normalizes induction variables, and the Loop Strength Reduction pass
16 // run by 'llc' may also make changes to the induction variable.
17 // The pattern detected by this phase is due to running Strength Reduction.
19 // Criteria for hardware loops:
20 // - Countable loops (w/ ind. var for a trip count)
21 // - Assumes loops are normalized by IndVarSimplify
22 // - Try inner-most loops first
23 // - No function calls in loops.
25 //===----------------------------------------------------------------------===//
27 #include "HexagonInstrInfo.h"
28 #include "HexagonSubtarget.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/CodeGen/MachineBasicBlock.h"
36 #include "llvm/CodeGen/MachineDominators.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineFunctionPass.h"
39 #include "llvm/CodeGen/MachineInstr.h"
40 #include "llvm/CodeGen/MachineInstrBuilder.h"
41 #include "llvm/CodeGen/MachineLoopInfo.h"
42 #include "llvm/CodeGen/MachineOperand.h"
43 #include "llvm/CodeGen/MachineRegisterInfo.h"
44 #include "llvm/CodeGen/TargetRegisterInfo.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/MathExtras.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include <cassert>
54 #include <cstdint>
55 #include <cstdlib>
56 #include <iterator>
57 #include <map>
58 #include <set>
59 #include <string>
60 #include <utility>
61 #include <vector>
63 using namespace llvm;
65 #define DEBUG_TYPE "hwloops"
67 #ifndef NDEBUG
68 static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
70 // Option to create preheader only for a specific function.
71 static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
72 cl::init(""));
73 #endif
75 // Option to create a preheader if one doesn't exist.
76 static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
77 cl::Hidden, cl::init(true),
78 cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
80 // Turn it off by default. If a preheader block is not created here, the
81 // software pipeliner may be unable to find a block suitable to serve as
82 // a preheader. In that case SWP will not run.
83 static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
84 cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
85 "instructions"));
87 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
89 namespace llvm {
91 FunctionPass *createHexagonHardwareLoops();
92 void initializeHexagonHardwareLoopsPass(PassRegistry&);
94 } // end namespace llvm
96 namespace {
98 class CountValue;
100 struct HexagonHardwareLoops : public MachineFunctionPass {
101 MachineLoopInfo *MLI;
102 MachineRegisterInfo *MRI;
103 MachineDominatorTree *MDT;
104 const HexagonInstrInfo *TII;
105 const HexagonRegisterInfo *TRI;
106 #ifndef NDEBUG
107 static int Counter;
108 #endif
110 public:
111 static char ID;
113 HexagonHardwareLoops() : MachineFunctionPass(ID) {}
115 bool runOnMachineFunction(MachineFunction &MF) override;
117 StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
119 void getAnalysisUsage(AnalysisUsage &AU) const override {
120 AU.addRequired<MachineDominatorTree>();
121 AU.addRequired<MachineLoopInfo>();
122 MachineFunctionPass::getAnalysisUsage(AU);
125 private:
126 using LoopFeederMap = std::map<unsigned, MachineInstr *>;
128 /// Kinds of comparisons in the compare instructions.
129 struct Comparison {
130 enum Kind {
131 EQ = 0x01,
132 NE = 0x02,
133 L = 0x04,
134 G = 0x08,
135 U = 0x40,
136 LTs = L,
137 LEs = L | EQ,
138 GTs = G,
139 GEs = G | EQ,
140 LTu = L | U,
141 LEu = L | EQ | U,
142 GTu = G | U,
143 GEu = G | EQ | U
146 static Kind getSwappedComparison(Kind Cmp) {
147 assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
148 if ((Cmp & L) || (Cmp & G))
149 return (Kind)(Cmp ^ (L|G));
150 return Cmp;
153 static Kind getNegatedComparison(Kind Cmp) {
154 if ((Cmp & L) || (Cmp & G))
155 return (Kind)((Cmp ^ (L | G)) ^ EQ);
156 if ((Cmp & NE) || (Cmp & EQ))
157 return (Kind)(Cmp ^ (EQ | NE));
158 return (Kind)0;
161 static bool isSigned(Kind Cmp) {
162 return (Cmp & (L | G) && !(Cmp & U));
165 static bool isUnsigned(Kind Cmp) {
166 return (Cmp & U);
170 /// Find the register that contains the loop controlling
171 /// induction variable.
172 /// If successful, it will return true and set the \p Reg, \p IVBump
173 /// and \p IVOp arguments. Otherwise it will return false.
174 /// The returned induction register is the register R that follows the
175 /// following induction pattern:
176 /// loop:
177 /// R = phi ..., [ R.next, LatchBlock ]
178 /// R.next = R + #bump
179 /// if (R.next < #N) goto loop
180 /// IVBump is the immediate value added to R, and IVOp is the instruction
181 /// "R.next = R + #bump".
182 bool findInductionRegister(MachineLoop *L, unsigned &Reg,
183 int64_t &IVBump, MachineInstr *&IVOp) const;
185 /// Return the comparison kind for the specified opcode.
186 Comparison::Kind getComparisonKind(unsigned CondOpc,
187 MachineOperand *InitialValue,
188 const MachineOperand *Endvalue,
189 int64_t IVBump) const;
191 /// Analyze the statements in a loop to determine if the loop
192 /// has a computable trip count and, if so, return a value that represents
193 /// the trip count expression.
194 CountValue *getLoopTripCount(MachineLoop *L,
195 SmallVectorImpl<MachineInstr *> &OldInsts);
197 /// Return the expression that represents the number of times
198 /// a loop iterates. The function takes the operands that represent the
199 /// loop start value, loop end value, and induction value. Based upon
200 /// these operands, the function attempts to compute the trip count.
201 /// If the trip count is not directly available (as an immediate value,
202 /// or a register), the function will attempt to insert computation of it
203 /// to the loop's preheader.
204 CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
205 const MachineOperand *End, unsigned IVReg,
206 int64_t IVBump, Comparison::Kind Cmp) const;
208 /// Return true if the instruction is not valid within a hardware
209 /// loop.
210 bool isInvalidLoopOperation(const MachineInstr *MI,
211 bool IsInnerHWLoop) const;
213 /// Return true if the loop contains an instruction that inhibits
214 /// using the hardware loop.
215 bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
217 /// Given a loop, check if we can convert it to a hardware loop.
218 /// If so, then perform the conversion and return true.
219 bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
221 /// Return true if the instruction is now dead.
222 bool isDead(const MachineInstr *MI,
223 SmallVectorImpl<MachineInstr *> &DeadPhis) const;
225 /// Remove the instruction if it is now dead.
226 void removeIfDead(MachineInstr *MI);
228 /// Make sure that the "bump" instruction executes before the
229 /// compare. We need that for the IV fixup, so that the compare
230 /// instruction would not use a bumped value that has not yet been
231 /// defined. If the instructions are out of order, try to reorder them.
232 bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
234 /// Return true if MO and MI pair is visited only once. If visited
235 /// more than once, this indicates there is recursion. In such a case,
236 /// return false.
237 bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
238 const MachineOperand *MO,
239 LoopFeederMap &LoopFeederPhi) const;
241 /// Return true if the Phi may generate a value that may underflow,
242 /// or may wrap.
243 bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
244 MachineBasicBlock *MBB, MachineLoop *L,
245 LoopFeederMap &LoopFeederPhi) const;
247 /// Return true if the induction variable may underflow an unsigned
248 /// value in the first iteration.
249 bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
250 const MachineOperand *EndVal,
251 MachineBasicBlock *MBB, MachineLoop *L,
252 LoopFeederMap &LoopFeederPhi) const;
254 /// Check if the given operand has a compile-time known constant
255 /// value. Return true if yes, and false otherwise. When returning true, set
256 /// Val to the corresponding constant value.
257 bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
259 /// Check if the operand has a compile-time known constant value.
260 bool isImmediate(const MachineOperand &MO) const {
261 int64_t V;
262 return checkForImmediate(MO, V);
265 /// Return the immediate for the specified operand.
266 int64_t getImmediate(const MachineOperand &MO) const {
267 int64_t V;
268 if (!checkForImmediate(MO, V))
269 llvm_unreachable("Invalid operand");
270 return V;
273 /// Reset the given machine operand to now refer to a new immediate
274 /// value. Assumes that the operand was already referencing an immediate
275 /// value, either directly, or via a register.
276 void setImmediate(MachineOperand &MO, int64_t Val);
278 /// Fix the data flow of the induction variable.
279 /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
280 /// |
281 /// +-> back to phi
282 /// where "bump" is the increment of the induction variable:
283 /// iv = iv + #const.
284 /// Due to some prior code transformations, the actual flow may look
285 /// like this:
286 /// phi -+-> bump ---> back to phi
287 /// |
288 /// +-> comparison-in-latch (against upper_bound-bump),
289 /// i.e. the comparison that controls the loop execution may be using
290 /// the value of the induction variable from before the increment.
292 /// Return true if the loop's flow is the desired one (i.e. it's
293 /// either been fixed, or no fixing was necessary).
294 /// Otherwise, return false. This can happen if the induction variable
295 /// couldn't be identified, or if the value in the latch's comparison
296 /// cannot be adjusted to reflect the post-bump value.
297 bool fixupInductionVariable(MachineLoop *L);
299 /// Given a loop, if it does not have a preheader, create one.
300 /// Return the block that is the preheader.
301 MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
304 char HexagonHardwareLoops::ID = 0;
305 #ifndef NDEBUG
306 int HexagonHardwareLoops::Counter = 0;
307 #endif
309 /// Abstraction for a trip count of a loop. A smaller version
310 /// of the MachineOperand class without the concerns of changing the
311 /// operand representation.
312 class CountValue {
313 public:
314 enum CountValueType {
315 CV_Register,
316 CV_Immediate
319 private:
320 CountValueType Kind;
321 union Values {
322 struct {
323 unsigned Reg;
324 unsigned Sub;
325 } R;
326 unsigned ImmVal;
327 } Contents;
329 public:
330 explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
331 Kind = t;
332 if (Kind == CV_Register) {
333 Contents.R.Reg = v;
334 Contents.R.Sub = u;
335 } else {
336 Contents.ImmVal = v;
340 bool isReg() const { return Kind == CV_Register; }
341 bool isImm() const { return Kind == CV_Immediate; }
343 unsigned getReg() const {
344 assert(isReg() && "Wrong CountValue accessor");
345 return Contents.R.Reg;
348 unsigned getSubReg() const {
349 assert(isReg() && "Wrong CountValue accessor");
350 return Contents.R.Sub;
353 unsigned getImm() const {
354 assert(isImm() && "Wrong CountValue accessor");
355 return Contents.ImmVal;
358 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
359 if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); }
360 if (isImm()) { OS << Contents.ImmVal; }
364 } // end anonymous namespace
366 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
367 "Hexagon Hardware Loops", false, false)
368 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
369 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
370 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
371 "Hexagon Hardware Loops", false, false)
373 FunctionPass *llvm::createHexagonHardwareLoops() {
374 return new HexagonHardwareLoops();
377 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
378 LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
379 if (skipFunction(MF.getFunction()))
380 return false;
382 bool Changed = false;
384 MLI = &getAnalysis<MachineLoopInfo>();
385 MRI = &MF.getRegInfo();
386 MDT = &getAnalysis<MachineDominatorTree>();
387 const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
388 TII = HST.getInstrInfo();
389 TRI = HST.getRegisterInfo();
391 for (auto &L : *MLI)
392 if (!L->getParentLoop()) {
393 bool L0Used = false;
394 bool L1Used = false;
395 Changed |= convertToHardwareLoop(L, L0Used, L1Used);
398 return Changed;
401 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
402 unsigned &Reg,
403 int64_t &IVBump,
404 MachineInstr *&IVOp
405 ) const {
406 MachineBasicBlock *Header = L->getHeader();
407 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
408 MachineBasicBlock *Latch = L->getLoopLatch();
409 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
410 if (!Header || !Preheader || !Latch || !ExitingBlock)
411 return false;
413 // This pair represents an induction register together with an immediate
414 // value that will be added to it in each loop iteration.
415 using RegisterBump = std::pair<unsigned, int64_t>;
417 // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
418 // from an induction operation
419 // R.next = R + bump
420 // where bump is an immediate value.
421 using InductionMap = std::map<unsigned, RegisterBump>;
423 InductionMap IndMap;
425 using instr_iterator = MachineBasicBlock::instr_iterator;
427 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
428 I != E && I->isPHI(); ++I) {
429 MachineInstr *Phi = &*I;
431 // Have a PHI instruction. Get the operand that corresponds to the
432 // latch block, and see if is a result of an addition of form "reg+imm",
433 // where the "reg" is defined by the PHI node we are looking at.
434 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
435 if (Phi->getOperand(i+1).getMBB() != Latch)
436 continue;
438 unsigned PhiOpReg = Phi->getOperand(i).getReg();
439 MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
441 if (DI->getDesc().isAdd()) {
442 // If the register operand to the add is the PHI we're looking at, this
443 // meets the induction pattern.
444 unsigned IndReg = DI->getOperand(1).getReg();
445 MachineOperand &Opnd2 = DI->getOperand(2);
446 int64_t V;
447 if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
448 unsigned UpdReg = DI->getOperand(0).getReg();
449 IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
452 } // for (i)
453 } // for (instr)
455 SmallVector<MachineOperand,2> Cond;
456 MachineBasicBlock *TB = nullptr, *FB = nullptr;
457 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
458 if (NotAnalyzed)
459 return false;
461 unsigned PredR, PredPos, PredRegFlags;
462 if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
463 return false;
465 MachineInstr *PredI = MRI->getVRegDef(PredR);
466 if (!PredI->isCompare())
467 return false;
469 unsigned CmpReg1 = 0, CmpReg2 = 0;
470 int CmpImm = 0, CmpMask = 0;
471 bool CmpAnalyzed =
472 TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
473 // Fail if the compare was not analyzed, or it's not comparing a register
474 // with an immediate value. Not checking the mask here, since we handle
475 // the individual compare opcodes (including A4_cmpb*) later on.
476 if (!CmpAnalyzed)
477 return false;
479 // Exactly one of the input registers to the comparison should be among
480 // the induction registers.
481 InductionMap::iterator IndMapEnd = IndMap.end();
482 InductionMap::iterator F = IndMapEnd;
483 if (CmpReg1 != 0) {
484 InductionMap::iterator F1 = IndMap.find(CmpReg1);
485 if (F1 != IndMapEnd)
486 F = F1;
488 if (CmpReg2 != 0) {
489 InductionMap::iterator F2 = IndMap.find(CmpReg2);
490 if (F2 != IndMapEnd) {
491 if (F != IndMapEnd)
492 return false;
493 F = F2;
496 if (F == IndMapEnd)
497 return false;
499 Reg = F->second.first;
500 IVBump = F->second.second;
501 IVOp = MRI->getVRegDef(F->first);
502 return true;
505 // Return the comparison kind for the specified opcode.
506 HexagonHardwareLoops::Comparison::Kind
507 HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
508 MachineOperand *InitialValue,
509 const MachineOperand *EndValue,
510 int64_t IVBump) const {
511 Comparison::Kind Cmp = (Comparison::Kind)0;
512 switch (CondOpc) {
513 case Hexagon::C2_cmpeq:
514 case Hexagon::C2_cmpeqi:
515 case Hexagon::C2_cmpeqp:
516 Cmp = Comparison::EQ;
517 break;
518 case Hexagon::C4_cmpneq:
519 case Hexagon::C4_cmpneqi:
520 Cmp = Comparison::NE;
521 break;
522 case Hexagon::C2_cmplt:
523 Cmp = Comparison::LTs;
524 break;
525 case Hexagon::C2_cmpltu:
526 Cmp = Comparison::LTu;
527 break;
528 case Hexagon::C4_cmplte:
529 case Hexagon::C4_cmpltei:
530 Cmp = Comparison::LEs;
531 break;
532 case Hexagon::C4_cmplteu:
533 case Hexagon::C4_cmplteui:
534 Cmp = Comparison::LEu;
535 break;
536 case Hexagon::C2_cmpgt:
537 case Hexagon::C2_cmpgti:
538 case Hexagon::C2_cmpgtp:
539 Cmp = Comparison::GTs;
540 break;
541 case Hexagon::C2_cmpgtu:
542 case Hexagon::C2_cmpgtui:
543 case Hexagon::C2_cmpgtup:
544 Cmp = Comparison::GTu;
545 break;
546 case Hexagon::C2_cmpgei:
547 Cmp = Comparison::GEs;
548 break;
549 case Hexagon::C2_cmpgeui:
550 Cmp = Comparison::GEs;
551 break;
552 default:
553 return (Comparison::Kind)0;
555 return Cmp;
558 /// Analyze the statements in a loop to determine if the loop has
559 /// a computable trip count and, if so, return a value that represents
560 /// the trip count expression.
562 /// This function iterates over the phi nodes in the loop to check for
563 /// induction variable patterns that are used in the calculation for
564 /// the number of time the loop is executed.
565 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
566 SmallVectorImpl<MachineInstr *> &OldInsts) {
567 MachineBasicBlock *TopMBB = L->getTopBlock();
568 MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
569 assert(PI != TopMBB->pred_end() &&
570 "Loop must have more than one incoming edge!");
571 MachineBasicBlock *Backedge = *PI++;
572 if (PI == TopMBB->pred_end()) // dead loop?
573 return nullptr;
574 MachineBasicBlock *Incoming = *PI++;
575 if (PI != TopMBB->pred_end()) // multiple backedges?
576 return nullptr;
578 // Make sure there is one incoming and one backedge and determine which
579 // is which.
580 if (L->contains(Incoming)) {
581 if (L->contains(Backedge))
582 return nullptr;
583 std::swap(Incoming, Backedge);
584 } else if (!L->contains(Backedge))
585 return nullptr;
587 // Look for the cmp instruction to determine if we can get a useful trip
588 // count. The trip count can be either a register or an immediate. The
589 // location of the value depends upon the type (reg or imm).
590 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
591 if (!ExitingBlock)
592 return nullptr;
594 unsigned IVReg = 0;
595 int64_t IVBump = 0;
596 MachineInstr *IVOp;
597 bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
598 if (!FoundIV)
599 return nullptr;
601 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
603 MachineOperand *InitialValue = nullptr;
604 MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
605 MachineBasicBlock *Latch = L->getLoopLatch();
606 for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
607 MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
608 if (MBB == Preheader)
609 InitialValue = &IV_Phi->getOperand(i);
610 else if (MBB == Latch)
611 IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
613 if (!InitialValue)
614 return nullptr;
616 SmallVector<MachineOperand,2> Cond;
617 MachineBasicBlock *TB = nullptr, *FB = nullptr;
618 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
619 if (NotAnalyzed)
620 return nullptr;
622 MachineBasicBlock *Header = L->getHeader();
623 // TB must be non-null. If FB is also non-null, one of them must be
624 // the header. Otherwise, branch to TB could be exiting the loop, and
625 // the fall through can go to the header.
626 assert (TB && "Exit block without a branch?");
627 if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
628 MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
629 SmallVector<MachineOperand,2> LCond;
630 bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
631 if (NotAnalyzed)
632 return nullptr;
633 if (TB == Latch)
634 TB = (LTB == Header) ? LTB : LFB;
635 else
636 FB = (LTB == Header) ? LTB: LFB;
638 assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
639 if (!TB || (FB && TB != Header && FB != Header))
640 return nullptr;
642 // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
643 // to put imm(0), followed by P in the vector Cond.
644 // If TB is not the header, it means that the "not-taken" path must lead
645 // to the header.
646 bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
647 unsigned PredReg, PredPos, PredRegFlags;
648 if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
649 return nullptr;
650 MachineInstr *CondI = MRI->getVRegDef(PredReg);
651 unsigned CondOpc = CondI->getOpcode();
653 unsigned CmpReg1 = 0, CmpReg2 = 0;
654 int Mask = 0, ImmValue = 0;
655 bool AnalyzedCmp =
656 TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
657 if (!AnalyzedCmp)
658 return nullptr;
660 // The comparison operator type determines how we compute the loop
661 // trip count.
662 OldInsts.push_back(CondI);
663 OldInsts.push_back(IVOp);
665 // Sadly, the following code gets information based on the position
666 // of the operands in the compare instruction. This has to be done
667 // this way, because the comparisons check for a specific relationship
668 // between the operands (e.g. is-less-than), rather than to find out
669 // what relationship the operands are in (as on PPC).
670 Comparison::Kind Cmp;
671 bool isSwapped = false;
672 const MachineOperand &Op1 = CondI->getOperand(1);
673 const MachineOperand &Op2 = CondI->getOperand(2);
674 const MachineOperand *EndValue = nullptr;
676 if (Op1.isReg()) {
677 if (Op2.isImm() || Op1.getReg() == IVReg)
678 EndValue = &Op2;
679 else {
680 EndValue = &Op1;
681 isSwapped = true;
685 if (!EndValue)
686 return nullptr;
688 Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
689 if (!Cmp)
690 return nullptr;
691 if (Negated)
692 Cmp = Comparison::getNegatedComparison(Cmp);
693 if (isSwapped)
694 Cmp = Comparison::getSwappedComparison(Cmp);
696 if (InitialValue->isReg()) {
697 unsigned R = InitialValue->getReg();
698 MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
699 if (!MDT->properlyDominates(DefBB, Header)) {
700 int64_t V;
701 if (!checkForImmediate(*InitialValue, V))
702 return nullptr;
704 OldInsts.push_back(MRI->getVRegDef(R));
706 if (EndValue->isReg()) {
707 unsigned R = EndValue->getReg();
708 MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
709 if (!MDT->properlyDominates(DefBB, Header)) {
710 int64_t V;
711 if (!checkForImmediate(*EndValue, V))
712 return nullptr;
714 OldInsts.push_back(MRI->getVRegDef(R));
717 return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
720 /// Helper function that returns the expression that represents the
721 /// number of times a loop iterates. The function takes the operands that
722 /// represent the loop start value, loop end value, and induction value.
723 /// Based upon these operands, the function attempts to compute the trip count.
724 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
725 const MachineOperand *Start,
726 const MachineOperand *End,
727 unsigned IVReg,
728 int64_t IVBump,
729 Comparison::Kind Cmp) const {
730 // Cannot handle comparison EQ, i.e. while (A == B).
731 if (Cmp == Comparison::EQ)
732 return nullptr;
734 // Check if either the start or end values are an assignment of an immediate.
735 // If so, use the immediate value rather than the register.
736 if (Start->isReg()) {
737 const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
738 if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
739 StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
740 Start = &StartValInstr->getOperand(1);
742 if (End->isReg()) {
743 const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
744 if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
745 EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
746 End = &EndValInstr->getOperand(1);
749 if (!Start->isReg() && !Start->isImm())
750 return nullptr;
751 if (!End->isReg() && !End->isImm())
752 return nullptr;
754 bool CmpLess = Cmp & Comparison::L;
755 bool CmpGreater = Cmp & Comparison::G;
756 bool CmpHasEqual = Cmp & Comparison::EQ;
758 // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
759 if (CmpLess && IVBump < 0)
760 // Loop going while iv is "less" with the iv value going down. Must wrap.
761 return nullptr;
763 if (CmpGreater && IVBump > 0)
764 // Loop going while iv is "greater" with the iv value going up. Must wrap.
765 return nullptr;
767 // Phis that may feed into the loop.
768 LoopFeederMap LoopFeederPhi;
770 // Check if the initial value may be zero and can be decremented in the first
771 // iteration. If the value is zero, the endloop instruction will not decrement
772 // the loop counter, so we shouldn't generate a hardware loop in this case.
773 if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
774 LoopFeederPhi))
775 return nullptr;
777 if (Start->isImm() && End->isImm()) {
778 // Both, start and end are immediates.
779 int64_t StartV = Start->getImm();
780 int64_t EndV = End->getImm();
781 int64_t Dist = EndV - StartV;
782 if (Dist == 0)
783 return nullptr;
785 bool Exact = (Dist % IVBump) == 0;
787 if (Cmp == Comparison::NE) {
788 if (!Exact)
789 return nullptr;
790 if ((Dist < 0) ^ (IVBump < 0))
791 return nullptr;
794 // For comparisons that include the final value (i.e. include equality
795 // with the final value), we need to increase the distance by 1.
796 if (CmpHasEqual)
797 Dist = Dist > 0 ? Dist+1 : Dist-1;
799 // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
800 // CmpGreater should imply Dist < 0. These conditions could actually
801 // fail, for example, in unreachable code (which may still appear to be
802 // reachable in the CFG).
803 if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
804 return nullptr;
806 // "Normalized" distance, i.e. with the bump set to +-1.
807 int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
808 : (-Dist + (-IVBump - 1)) / (-IVBump);
809 assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
811 uint64_t Count = Dist1;
813 if (Count > 0xFFFFFFFFULL)
814 return nullptr;
816 return new CountValue(CountValue::CV_Immediate, Count);
819 // A general case: Start and End are some values, but the actual
820 // iteration count may not be available. If it is not, insert
821 // a computation of it into the preheader.
823 // If the induction variable bump is not a power of 2, quit.
824 // Othwerise we'd need a general integer division.
825 if (!isPowerOf2_64(std::abs(IVBump)))
826 return nullptr;
828 MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
829 assert (PH && "Should have a preheader by now");
830 MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
831 DebugLoc DL;
832 if (InsertPos != PH->end())
833 DL = InsertPos->getDebugLoc();
835 // If Start is an immediate and End is a register, the trip count
836 // will be "reg - imm". Hexagon's "subtract immediate" instruction
837 // is actually "reg + -imm".
839 // If the loop IV is going downwards, i.e. if the bump is negative,
840 // then the iteration count (computed as End-Start) will need to be
841 // negated. To avoid the negation, just swap Start and End.
842 if (IVBump < 0) {
843 std::swap(Start, End);
844 IVBump = -IVBump;
846 // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
847 // Signedness, and "including equality" are preserved.
849 bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
850 bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
852 int64_t StartV = 0, EndV = 0;
853 if (Start->isImm())
854 StartV = Start->getImm();
855 if (End->isImm())
856 EndV = End->getImm();
858 int64_t AdjV = 0;
859 // To compute the iteration count, we would need this computation:
860 // Count = (End - Start + (IVBump-1)) / IVBump
861 // or, when CmpHasEqual:
862 // Count = (End - Start + (IVBump-1)+1) / IVBump
863 // The "IVBump-1" part is the adjustment (AdjV). We can avoid
864 // generating an instruction specifically to add it if we can adjust
865 // the immediate values for Start or End.
867 if (CmpHasEqual) {
868 // Need to add 1 to the total iteration count.
869 if (Start->isImm())
870 StartV--;
871 else if (End->isImm())
872 EndV++;
873 else
874 AdjV += 1;
877 if (Cmp != Comparison::NE) {
878 if (Start->isImm())
879 StartV -= (IVBump-1);
880 else if (End->isImm())
881 EndV += (IVBump-1);
882 else
883 AdjV += (IVBump-1);
886 unsigned R = 0, SR = 0;
887 if (Start->isReg()) {
888 R = Start->getReg();
889 SR = Start->getSubReg();
890 } else {
891 R = End->getReg();
892 SR = End->getSubReg();
894 const TargetRegisterClass *RC = MRI->getRegClass(R);
895 // Hardware loops cannot handle 64-bit registers. If it's a double
896 // register, it has to have a subregister.
897 if (!SR && RC == &Hexagon::DoubleRegsRegClass)
898 return nullptr;
899 const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
901 // Compute DistR (register with the distance between Start and End).
902 unsigned DistR, DistSR;
904 // Avoid special case, where the start value is an imm(0).
905 if (Start->isImm() && StartV == 0) {
906 DistR = End->getReg();
907 DistSR = End->getSubReg();
908 } else {
909 const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
910 (RegToImm ? TII->get(Hexagon::A2_subri) :
911 TII->get(Hexagon::A2_addi));
912 if (RegToReg || RegToImm) {
913 unsigned SubR = MRI->createVirtualRegister(IntRC);
914 MachineInstrBuilder SubIB =
915 BuildMI(*PH, InsertPos, DL, SubD, SubR);
917 if (RegToReg)
918 SubIB.addReg(End->getReg(), 0, End->getSubReg())
919 .addReg(Start->getReg(), 0, Start->getSubReg());
920 else
921 SubIB.addImm(EndV)
922 .addReg(Start->getReg(), 0, Start->getSubReg());
923 DistR = SubR;
924 } else {
925 // If the loop has been unrolled, we should use the original loop count
926 // instead of recalculating the value. This will avoid additional
927 // 'Add' instruction.
928 const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
929 if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
930 EndValInstr->getOperand(1).getSubReg() == 0 &&
931 EndValInstr->getOperand(2).getImm() == StartV) {
932 DistR = EndValInstr->getOperand(1).getReg();
933 } else {
934 unsigned SubR = MRI->createVirtualRegister(IntRC);
935 MachineInstrBuilder SubIB =
936 BuildMI(*PH, InsertPos, DL, SubD, SubR);
937 SubIB.addReg(End->getReg(), 0, End->getSubReg())
938 .addImm(-StartV);
939 DistR = SubR;
942 DistSR = 0;
945 // From DistR, compute AdjR (register with the adjusted distance).
946 unsigned AdjR, AdjSR;
948 if (AdjV == 0) {
949 AdjR = DistR;
950 AdjSR = DistSR;
951 } else {
952 // Generate CountR = ADD DistR, AdjVal
953 unsigned AddR = MRI->createVirtualRegister(IntRC);
954 MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
955 BuildMI(*PH, InsertPos, DL, AddD, AddR)
956 .addReg(DistR, 0, DistSR)
957 .addImm(AdjV);
959 AdjR = AddR;
960 AdjSR = 0;
963 // From AdjR, compute CountR (register with the final count).
964 unsigned CountR, CountSR;
966 if (IVBump == 1) {
967 CountR = AdjR;
968 CountSR = AdjSR;
969 } else {
970 // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
971 unsigned Shift = Log2_32(IVBump);
973 // Generate NormR = LSR DistR, Shift.
974 unsigned LsrR = MRI->createVirtualRegister(IntRC);
975 const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
976 BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
977 .addReg(AdjR, 0, AdjSR)
978 .addImm(Shift);
980 CountR = LsrR;
981 CountSR = 0;
984 return new CountValue(CountValue::CV_Register, CountR, CountSR);
987 /// Return true if the operation is invalid within hardware loop.
988 bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
989 bool IsInnerHWLoop) const {
990 // Call is not allowed because the callee may use a hardware loop except for
991 // the case when the call never returns.
992 if (MI->getDesc().isCall())
993 return !TII->doesNotReturn(*MI);
995 // Check if the instruction defines a hardware loop register.
996 using namespace Hexagon;
998 static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
999 static const unsigned Regs1[] = { LC1, SA1 };
1000 auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
1001 : makeArrayRef(Regs1, array_lengthof(Regs1));
1002 for (unsigned R : CheckRegs)
1003 if (MI->modifiesRegister(R, TRI))
1004 return true;
1006 return false;
1009 /// Return true if the loop contains an instruction that inhibits
1010 /// the use of the hardware loop instruction.
1011 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
1012 bool IsInnerHWLoop) const {
1013 LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1014 << printMBBReference(**L->block_begin()));
1015 for (MachineBasicBlock *MBB : L->getBlocks()) {
1016 for (MachineBasicBlock::iterator
1017 MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
1018 const MachineInstr *MI = &*MII;
1019 if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
1020 LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
1021 MI->dump(););
1022 return true;
1026 return false;
1029 /// Returns true if the instruction is dead. This was essentially
1030 /// copied from DeadMachineInstructionElim::isDead, but with special cases
1031 /// for inline asm, physical registers and instructions with side effects
1032 /// removed.
1033 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1034 SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1035 // Examine each operand.
1036 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1037 const MachineOperand &MO = MI->getOperand(i);
1038 if (!MO.isReg() || !MO.isDef())
1039 continue;
1041 unsigned Reg = MO.getReg();
1042 if (MRI->use_nodbg_empty(Reg))
1043 continue;
1045 using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1047 // This instruction has users, but if the only user is the phi node for the
1048 // parent block, and the only use of that phi node is this instruction, then
1049 // this instruction is dead: both it (and the phi node) can be removed.
1050 use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1051 use_nodbg_iterator End = MRI->use_nodbg_end();
1052 if (std::next(I) != End || !I->getParent()->isPHI())
1053 return false;
1055 MachineInstr *OnePhi = I->getParent();
1056 for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1057 const MachineOperand &OPO = OnePhi->getOperand(j);
1058 if (!OPO.isReg() || !OPO.isDef())
1059 continue;
1061 unsigned OPReg = OPO.getReg();
1062 use_nodbg_iterator nextJ;
1063 for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1064 J != End; J = nextJ) {
1065 nextJ = std::next(J);
1066 MachineOperand &Use = *J;
1067 MachineInstr *UseMI = Use.getParent();
1069 // If the phi node has a user that is not MI, bail.
1070 if (MI != UseMI)
1071 return false;
1074 DeadPhis.push_back(OnePhi);
1077 // If there are no defs with uses, the instruction is dead.
1078 return true;
1081 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1082 // This procedure was essentially copied from DeadMachineInstructionElim.
1084 SmallVector<MachineInstr*, 1> DeadPhis;
1085 if (isDead(MI, DeadPhis)) {
1086 LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);
1088 // It is possible that some DBG_VALUE instructions refer to this
1089 // instruction. Examine each def operand for such references;
1090 // if found, mark the DBG_VALUE as undef (but don't delete it).
1091 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1092 const MachineOperand &MO = MI->getOperand(i);
1093 if (!MO.isReg() || !MO.isDef())
1094 continue;
1095 unsigned Reg = MO.getReg();
1096 MachineRegisterInfo::use_iterator nextI;
1097 for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
1098 E = MRI->use_end(); I != E; I = nextI) {
1099 nextI = std::next(I); // I is invalidated by the setReg
1100 MachineOperand &Use = *I;
1101 MachineInstr *UseMI = I->getParent();
1102 if (UseMI == MI)
1103 continue;
1104 if (Use.isDebug())
1105 UseMI->getOperand(0).setReg(0U);
1109 MI->eraseFromParent();
1110 for (unsigned i = 0; i < DeadPhis.size(); ++i)
1111 DeadPhis[i]->eraseFromParent();
1115 /// Check if the loop is a candidate for converting to a hardware
1116 /// loop. If so, then perform the transformation.
1118 /// This function works on innermost loops first. A loop can be converted
1119 /// if it is a counting loop; either a register value or an immediate.
1121 /// The code makes several assumptions about the representation of the loop
1122 /// in llvm.
1123 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1124 bool &RecL0used,
1125 bool &RecL1used) {
1126 // This is just for sanity.
1127 assert(L->getHeader() && "Loop without a header?");
1129 bool Changed = false;
1130 bool L0Used = false;
1131 bool L1Used = false;
1133 // Process nested loops first.
1134 for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
1135 Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
1136 L0Used |= RecL0used;
1137 L1Used |= RecL1used;
1140 // If a nested loop has been converted, then we can't convert this loop.
1141 if (Changed && L0Used && L1Used)
1142 return Changed;
1144 unsigned LOOP_i;
1145 unsigned LOOP_r;
1146 unsigned ENDLOOP;
1148 // Flag used to track loopN instruction:
1149 // 1 - Hardware loop is being generated for the inner most loop.
1150 // 0 - Hardware loop is being generated for the outer loop.
1151 unsigned IsInnerHWLoop = 1;
1153 if (L0Used) {
1154 LOOP_i = Hexagon::J2_loop1i;
1155 LOOP_r = Hexagon::J2_loop1r;
1156 ENDLOOP = Hexagon::ENDLOOP1;
1157 IsInnerHWLoop = 0;
1158 } else {
1159 LOOP_i = Hexagon::J2_loop0i;
1160 LOOP_r = Hexagon::J2_loop0r;
1161 ENDLOOP = Hexagon::ENDLOOP0;
1164 #ifndef NDEBUG
1165 // Stop trying after reaching the limit (if any).
1166 int Limit = HWLoopLimit;
1167 if (Limit >= 0) {
1168 if (Counter >= HWLoopLimit)
1169 return false;
1170 Counter++;
1172 #endif
1174 // Does the loop contain any invalid instructions?
1175 if (containsInvalidInstruction(L, IsInnerHWLoop))
1176 return false;
1178 MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1179 // Don't generate hw loop if the loop has more than one exit.
1180 if (!LastMBB)
1181 return false;
1183 MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
1184 if (LastI == LastMBB->end())
1185 return false;
1187 // Is the induction variable bump feeding the latch condition?
1188 if (!fixupInductionVariable(L))
1189 return false;
1191 // Ensure the loop has a preheader: the loop instruction will be
1192 // placed there.
1193 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1194 if (!Preheader) {
1195 Preheader = createPreheaderForLoop(L);
1196 if (!Preheader)
1197 return false;
1200 MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1202 SmallVector<MachineInstr*, 2> OldInsts;
1203 // Are we able to determine the trip count for the loop?
1204 CountValue *TripCount = getLoopTripCount(L, OldInsts);
1205 if (!TripCount)
1206 return false;
1208 // Is the trip count available in the preheader?
1209 if (TripCount->isReg()) {
1210 // There will be a use of the register inserted into the preheader,
1211 // so make sure that the register is actually defined at that point.
1212 MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1213 MachineBasicBlock *BBDef = TCDef->getParent();
1214 if (!MDT->dominates(BBDef, Preheader))
1215 return false;
1218 // Determine the loop start.
1219 MachineBasicBlock *TopBlock = L->getTopBlock();
1220 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1221 MachineBasicBlock *LoopStart = nullptr;
1222 if (ExitingBlock != L->getLoopLatch()) {
1223 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1224 SmallVector<MachineOperand, 2> Cond;
1226 if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1227 return false;
1229 if (L->contains(TB))
1230 LoopStart = TB;
1231 else if (L->contains(FB))
1232 LoopStart = FB;
1233 else
1234 return false;
1236 else
1237 LoopStart = TopBlock;
1239 // Convert the loop to a hardware loop.
1240 LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1241 DebugLoc DL;
1242 if (InsertPos != Preheader->end())
1243 DL = InsertPos->getDebugLoc();
1245 if (TripCount->isReg()) {
1246 // Create a copy of the loop count register.
1247 unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1248 BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1249 .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1250 // Add the Loop instruction to the beginning of the loop.
1251 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1252 .addReg(CountReg);
1253 } else {
1254 assert(TripCount->isImm() && "Expecting immediate value for trip count");
1255 // Add the Loop immediate instruction to the beginning of the loop,
1256 // if the immediate fits in the instructions. Otherwise, we need to
1257 // create a new virtual register.
1258 int64_t CountImm = TripCount->getImm();
1259 if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
1260 unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1261 BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1262 .addImm(CountImm);
1263 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1264 .addMBB(LoopStart).addReg(CountReg);
1265 } else
1266 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1267 .addMBB(LoopStart).addImm(CountImm);
1270 // Make sure the loop start always has a reference in the CFG. We need
1271 // to create a BlockAddress operand to get this mechanism to work both the
1272 // MachineBasicBlock and BasicBlock objects need the flag set.
1273 LoopStart->setHasAddressTaken();
1274 // This line is needed to set the hasAddressTaken flag on the BasicBlock
1275 // object.
1276 BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1278 // Replace the loop branch with an endloop instruction.
1279 DebugLoc LastIDL = LastI->getDebugLoc();
1280 BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1282 // The loop ends with either:
1283 // - a conditional branch followed by an unconditional branch, or
1284 // - a conditional branch to the loop start.
1285 if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1286 LastI->getOpcode() == Hexagon::J2_jumpf) {
1287 // Delete one and change/add an uncond. branch to out of the loop.
1288 MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1289 LastI = LastMBB->erase(LastI);
1290 if (!L->contains(BranchTarget)) {
1291 if (LastI != LastMBB->end())
1292 LastI = LastMBB->erase(LastI);
1293 SmallVector<MachineOperand, 0> Cond;
1294 TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1296 } else {
1297 // Conditional branch to loop start; just delete it.
1298 LastMBB->erase(LastI);
1300 delete TripCount;
1302 // The induction operation and the comparison may now be
1303 // unneeded. If these are unneeded, then remove them.
1304 for (unsigned i = 0; i < OldInsts.size(); ++i)
1305 removeIfDead(OldInsts[i]);
1307 ++NumHWLoops;
1309 // Set RecL1used and RecL0used only after hardware loop has been
1310 // successfully generated. Doing it earlier can cause wrong loop instruction
1311 // to be used.
1312 if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1313 RecL1used = true;
1314 else
1315 RecL0used = true;
1317 return true;
1320 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1321 MachineInstr *CmpI) {
1322 assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1324 MachineBasicBlock *BB = BumpI->getParent();
1325 if (CmpI->getParent() != BB)
1326 return false;
1328 using instr_iterator = MachineBasicBlock::instr_iterator;
1330 // Check if things are in order to begin with.
1331 for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1332 if (&*I == CmpI)
1333 return true;
1335 // Out of order.
1336 unsigned PredR = CmpI->getOperand(0).getReg();
1337 bool FoundBump = false;
1338 instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1339 for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1340 MachineInstr *In = &*I;
1341 for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1342 MachineOperand &MO = In->getOperand(i);
1343 if (MO.isReg() && MO.isUse()) {
1344 if (MO.getReg() == PredR) // Found an intervening use of PredR.
1345 return false;
1349 if (In == BumpI) {
1350 BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1351 FoundBump = true;
1352 break;
1355 assert (FoundBump && "Cannot determine instruction order");
1356 return FoundBump;
1359 /// This function is required to break recursion. Visiting phis in a loop may
1360 /// result in recursion during compilation. We break the recursion by making
1361 /// sure that we visit a MachineOperand and its definition in a
1362 /// MachineInstruction only once. If we attempt to visit more than once, then
1363 /// there is recursion, and will return false.
1364 bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1365 MachineInstr *MI,
1366 const MachineOperand *MO,
1367 LoopFeederMap &LoopFeederPhi) const {
1368 if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1369 LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1370 << printMBBReference(**L->block_begin()));
1371 // Ignore all BBs that form Loop.
1372 for (MachineBasicBlock *MBB : L->getBlocks()) {
1373 if (A == MBB)
1374 return false;
1376 MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1377 LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1378 return true;
1379 } else
1380 // Already visited node.
1381 return false;
1384 /// Return true if a Phi may generate a value that can underflow.
1385 /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1386 bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1387 MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1388 MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1389 assert(Phi->isPHI() && "Expecting a Phi.");
1390 // Walk through each Phi, and its used operands. Make sure that
1391 // if there is recursion in Phi, we won't generate hardware loops.
1392 for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1393 if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1394 if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1395 Phi->getParent(), L, LoopFeederPhi))
1396 return true;
1397 return false;
1400 /// Return true if the induction variable can underflow in the first iteration.
1401 /// An example, is an initial unsigned value that is 0 and is decrement in the
1402 /// first itertion of a do-while loop. In this case, we cannot generate a
1403 /// hardware loop because the endloop instruction does not decrement the loop
1404 /// counter if it is <= 1. We only need to perform this analysis if the
1405 /// initial value is a register.
1407 /// This function assumes the initial value may underfow unless proven
1408 /// otherwise. If the type is signed, then we don't care because signed
1409 /// underflow is undefined. We attempt to prove the initial value is not
1410 /// zero by perfoming a crude analysis of the loop counter. This function
1411 /// checks if the initial value is used in any comparison prior to the loop
1412 /// and, if so, assumes the comparison is a range check. This is inexact,
1413 /// but will catch the simple cases.
1414 bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1415 const MachineOperand *InitVal, const MachineOperand *EndVal,
1416 MachineBasicBlock *MBB, MachineLoop *L,
1417 LoopFeederMap &LoopFeederPhi) const {
1418 // Only check register values since they are unknown.
1419 if (!InitVal->isReg())
1420 return false;
1422 if (!EndVal->isImm())
1423 return false;
1425 // A register value that is assigned an immediate is a known value, and it
1426 // won't underflow in the first iteration.
1427 int64_t Imm;
1428 if (checkForImmediate(*InitVal, Imm))
1429 return (EndVal->getImm() == Imm);
1431 unsigned Reg = InitVal->getReg();
1433 // We don't know the value of a physical register.
1434 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1435 return true;
1437 MachineInstr *Def = MRI->getVRegDef(Reg);
1438 if (!Def)
1439 return true;
1441 // If the initial value is a Phi or copy and the operands may not underflow,
1442 // then the definition cannot be underflow either.
1443 if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1444 L, LoopFeederPhi))
1445 return false;
1446 if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1447 EndVal, Def->getParent(),
1448 L, LoopFeederPhi))
1449 return false;
1451 // Iterate over the uses of the initial value. If the initial value is used
1452 // in a compare, then we assume this is a range check that ensures the loop
1453 // doesn't underflow. This is not an exact test and should be improved.
1454 for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1455 E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1456 MachineInstr *MI = &*I;
1457 unsigned CmpReg1 = 0, CmpReg2 = 0;
1458 int CmpMask = 0, CmpValue = 0;
1460 if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1461 continue;
1463 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1464 SmallVector<MachineOperand, 2> Cond;
1465 if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1466 continue;
1468 Comparison::Kind Cmp =
1469 getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1470 if (Cmp == 0)
1471 continue;
1472 if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1473 Cmp = Comparison::getNegatedComparison(Cmp);
1474 if (CmpReg2 != 0 && CmpReg2 == Reg)
1475 Cmp = Comparison::getSwappedComparison(Cmp);
1477 // Signed underflow is undefined.
1478 if (Comparison::isSigned(Cmp))
1479 return false;
1481 // Check if there is a comparison of the initial value. If the initial value
1482 // is greater than or not equal to another value, then assume this is a
1483 // range check.
1484 if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1485 return false;
1488 // OK - this is a hack that needs to be improved. We really need to analyze
1489 // the instructions performed on the initial value. This works on the simplest
1490 // cases only.
1491 if (!Def->isCopy() && !Def->isPHI())
1492 return false;
1494 return true;
1497 bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1498 int64_t &Val) const {
1499 if (MO.isImm()) {
1500 Val = MO.getImm();
1501 return true;
1503 if (!MO.isReg())
1504 return false;
1506 // MO is a register. Check whether it is defined as an immediate value,
1507 // and if so, get the value of it in TV. That value will then need to be
1508 // processed to handle potential subregisters in MO.
1509 int64_t TV;
1511 unsigned R = MO.getReg();
1512 if (!TargetRegisterInfo::isVirtualRegister(R))
1513 return false;
1514 MachineInstr *DI = MRI->getVRegDef(R);
1515 unsigned DOpc = DI->getOpcode();
1516 switch (DOpc) {
1517 case TargetOpcode::COPY:
1518 case Hexagon::A2_tfrsi:
1519 case Hexagon::A2_tfrpi:
1520 case Hexagon::CONST32:
1521 case Hexagon::CONST64:
1522 // Call recursively to avoid an extra check whether operand(1) is
1523 // indeed an immediate (it could be a global address, for example),
1524 // plus we can handle COPY at the same time.
1525 if (!checkForImmediate(DI->getOperand(1), TV))
1526 return false;
1527 break;
1528 case Hexagon::A2_combineii:
1529 case Hexagon::A4_combineir:
1530 case Hexagon::A4_combineii:
1531 case Hexagon::A4_combineri:
1532 case Hexagon::A2_combinew: {
1533 const MachineOperand &S1 = DI->getOperand(1);
1534 const MachineOperand &S2 = DI->getOperand(2);
1535 int64_t V1, V2;
1536 if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1537 return false;
1538 TV = V2 | (static_cast<uint64_t>(V1) << 32);
1539 break;
1541 case TargetOpcode::REG_SEQUENCE: {
1542 const MachineOperand &S1 = DI->getOperand(1);
1543 const MachineOperand &S3 = DI->getOperand(3);
1544 int64_t V1, V3;
1545 if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1546 return false;
1547 unsigned Sub2 = DI->getOperand(2).getImm();
1548 unsigned Sub4 = DI->getOperand(4).getImm();
1549 if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1550 TV = V1 | (V3 << 32);
1551 else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1552 TV = V3 | (V1 << 32);
1553 else
1554 llvm_unreachable("Unexpected form of REG_SEQUENCE");
1555 break;
1558 default:
1559 return false;
1562 // By now, we should have successfully obtained the immediate value defining
1563 // the register referenced in MO. Handle a potential use of a subregister.
1564 switch (MO.getSubReg()) {
1565 case Hexagon::isub_lo:
1566 Val = TV & 0xFFFFFFFFULL;
1567 break;
1568 case Hexagon::isub_hi:
1569 Val = (TV >> 32) & 0xFFFFFFFFULL;
1570 break;
1571 default:
1572 Val = TV;
1573 break;
1575 return true;
1578 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1579 if (MO.isImm()) {
1580 MO.setImm(Val);
1581 return;
1584 assert(MO.isReg());
1585 unsigned R = MO.getReg();
1586 MachineInstr *DI = MRI->getVRegDef(R);
1588 const TargetRegisterClass *RC = MRI->getRegClass(R);
1589 unsigned NewR = MRI->createVirtualRegister(RC);
1590 MachineBasicBlock &B = *DI->getParent();
1591 DebugLoc DL = DI->getDebugLoc();
1592 BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1593 MO.setReg(NewR);
1596 static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
1597 // These two instructions are not extendable.
1598 if (CmpOpc == Hexagon::A4_cmpbeqi)
1599 return isUInt<8>(Imm);
1600 if (CmpOpc == Hexagon::A4_cmpbgti)
1601 return isInt<8>(Imm);
1602 // The rest of the comparison-with-immediate instructions are extendable.
1603 return true;
1606 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1607 MachineBasicBlock *Header = L->getHeader();
1608 MachineBasicBlock *Latch = L->getLoopLatch();
1609 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1611 if (!(Header && Latch && ExitingBlock))
1612 return false;
1614 // These data structures follow the same concept as the corresponding
1615 // ones in findInductionRegister (where some comments are).
1616 using RegisterBump = std::pair<unsigned, int64_t>;
1617 using RegisterInduction = std::pair<unsigned, RegisterBump>;
1618 using RegisterInductionSet = std::set<RegisterInduction>;
1620 // Register candidates for induction variables, with their associated bumps.
1621 RegisterInductionSet IndRegs;
1623 // Look for induction patterns:
1624 // %1 = PHI ..., [ latch, %2 ]
1625 // %2 = ADD %1, imm
1626 using instr_iterator = MachineBasicBlock::instr_iterator;
1628 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1629 I != E && I->isPHI(); ++I) {
1630 MachineInstr *Phi = &*I;
1632 // Have a PHI instruction.
1633 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1634 if (Phi->getOperand(i+1).getMBB() != Latch)
1635 continue;
1637 unsigned PhiReg = Phi->getOperand(i).getReg();
1638 MachineInstr *DI = MRI->getVRegDef(PhiReg);
1640 if (DI->getDesc().isAdd()) {
1641 // If the register operand to the add/sub is the PHI we are looking
1642 // at, this meets the induction pattern.
1643 unsigned IndReg = DI->getOperand(1).getReg();
1644 MachineOperand &Opnd2 = DI->getOperand(2);
1645 int64_t V;
1646 if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1647 unsigned UpdReg = DI->getOperand(0).getReg();
1648 IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1651 } // for (i)
1652 } // for (instr)
1654 if (IndRegs.empty())
1655 return false;
1657 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1658 SmallVector<MachineOperand,2> Cond;
1659 // AnalyzeBranch returns true if it fails to analyze branch.
1660 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1661 if (NotAnalyzed || Cond.empty())
1662 return false;
1664 if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1665 MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1666 SmallVector<MachineOperand,2> LCond;
1667 bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1668 if (NotAnalyzed)
1669 return false;
1671 // Since latch is not the exiting block, the latch branch should be an
1672 // unconditional branch to the loop header.
1673 if (TB == Latch)
1674 TB = (LTB == Header) ? LTB : LFB;
1675 else
1676 FB = (LTB == Header) ? LTB : LFB;
1678 if (TB != Header) {
1679 if (FB != Header) {
1680 // The latch/exit block does not go back to the header.
1681 return false;
1683 // FB is the header (i.e., uncond. jump to branch header)
1684 // In this case, the LoopBody -> TB should not be a back edge otherwise
1685 // it could result in an infinite loop after conversion to hw_loop.
1686 // This case can happen when the Latch has two jumps like this:
1687 // Jmp_c OuterLoopHeader <-- TB
1688 // Jmp InnerLoopHeader <-- FB
1689 if (MDT->dominates(TB, FB))
1690 return false;
1693 // Expecting a predicate register as a condition. It won't be a hardware
1694 // predicate register at this point yet, just a vreg.
1695 // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1696 // into Cond, followed by the predicate register. For non-negated branches
1697 // it's just the register.
1698 unsigned CSz = Cond.size();
1699 if (CSz != 1 && CSz != 2)
1700 return false;
1702 if (!Cond[CSz-1].isReg())
1703 return false;
1705 unsigned P = Cond[CSz-1].getReg();
1706 MachineInstr *PredDef = MRI->getVRegDef(P);
1708 if (!PredDef->isCompare())
1709 return false;
1711 SmallSet<unsigned,2> CmpRegs;
1712 MachineOperand *CmpImmOp = nullptr;
1714 // Go over all operands to the compare and look for immediate and register
1715 // operands. Assume that if the compare has a single register use and a
1716 // single immediate operand, then the register is being compared with the
1717 // immediate value.
1718 for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1719 MachineOperand &MO = PredDef->getOperand(i);
1720 if (MO.isReg()) {
1721 // Skip all implicit references. In one case there was:
1722 // %140 = FCMPUGT32_rr %138, %139, implicit %usr
1723 if (MO.isImplicit())
1724 continue;
1725 if (MO.isUse()) {
1726 if (!isImmediate(MO)) {
1727 CmpRegs.insert(MO.getReg());
1728 continue;
1730 // Consider the register to be the "immediate" operand.
1731 if (CmpImmOp)
1732 return false;
1733 CmpImmOp = &MO;
1735 } else if (MO.isImm()) {
1736 if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1737 return false;
1738 CmpImmOp = &MO;
1742 if (CmpRegs.empty())
1743 return false;
1745 // Check if the compared register follows the order we want. Fix if needed.
1746 for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1747 I != E; ++I) {
1748 // This is a success. If the register used in the comparison is one that
1749 // we have identified as a bumped (updated) induction register, there is
1750 // nothing to do.
1751 if (CmpRegs.count(I->first))
1752 return true;
1754 // Otherwise, if the register being compared comes out of a PHI node,
1755 // and has been recognized as following the induction pattern, and is
1756 // compared against an immediate, we can fix it.
1757 const RegisterBump &RB = I->second;
1758 if (CmpRegs.count(RB.first)) {
1759 if (!CmpImmOp) {
1760 // If both operands to the compare instruction are registers, see if
1761 // it can be changed to use induction register as one of the operands.
1762 MachineInstr *IndI = nullptr;
1763 MachineInstr *nonIndI = nullptr;
1764 MachineOperand *IndMO = nullptr;
1765 MachineOperand *nonIndMO = nullptr;
1767 for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1768 MachineOperand &MO = PredDef->getOperand(i);
1769 if (MO.isReg() && MO.getReg() == RB.first) {
1770 LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1771 << ") = " << *(MRI->getVRegDef(I->first)));
1772 if (IndI)
1773 return false;
1775 IndI = MRI->getVRegDef(I->first);
1776 IndMO = &MO;
1777 } else if (MO.isReg()) {
1778 LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1779 << ") = " << *(MRI->getVRegDef(MO.getReg())));
1780 if (nonIndI)
1781 return false;
1783 nonIndI = MRI->getVRegDef(MO.getReg());
1784 nonIndMO = &MO;
1787 if (IndI && nonIndI &&
1788 nonIndI->getOpcode() == Hexagon::A2_addi &&
1789 nonIndI->getOperand(2).isImm() &&
1790 nonIndI->getOperand(2).getImm() == - RB.second) {
1791 bool Order = orderBumpCompare(IndI, PredDef);
1792 if (Order) {
1793 IndMO->setReg(I->first);
1794 nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1795 return true;
1798 return false;
1801 // It is not valid to do this transformation on an unsigned comparison
1802 // because it may underflow.
1803 Comparison::Kind Cmp =
1804 getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1805 if (!Cmp || Comparison::isUnsigned(Cmp))
1806 return false;
1808 // If the register is being compared against an immediate, try changing
1809 // the compare instruction to use induction register and adjust the
1810 // immediate operand.
1811 int64_t CmpImm = getImmediate(*CmpImmOp);
1812 int64_t V = RB.second;
1813 // Handle Overflow (64-bit).
1814 if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1815 ((V < 0) && (CmpImm < INT64_MIN - V)))
1816 return false;
1817 CmpImm += V;
1818 // Most comparisons of register against an immediate value allow
1819 // the immediate to be constant-extended. There are some exceptions
1820 // though. Make sure the new combination will work.
1821 if (CmpImmOp->isImm())
1822 if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
1823 return false;
1825 // Make sure that the compare happens after the bump. Otherwise,
1826 // after the fixup, the compare would use a yet-undefined register.
1827 MachineInstr *BumpI = MRI->getVRegDef(I->first);
1828 bool Order = orderBumpCompare(BumpI, PredDef);
1829 if (!Order)
1830 return false;
1832 // Finally, fix the compare instruction.
1833 setImmediate(*CmpImmOp, CmpImm);
1834 for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1835 MachineOperand &MO = PredDef->getOperand(i);
1836 if (MO.isReg() && MO.getReg() == RB.first) {
1837 MO.setReg(I->first);
1838 return true;
1844 return false;
1847 /// createPreheaderForLoop - Create a preheader for a given loop.
1848 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1849 MachineLoop *L) {
1850 if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1851 return TmpPH;
1852 if (!HWCreatePreheader)
1853 return nullptr;
1855 MachineBasicBlock *Header = L->getHeader();
1856 MachineBasicBlock *Latch = L->getLoopLatch();
1857 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1858 MachineFunction *MF = Header->getParent();
1859 DebugLoc DL;
1861 #ifndef NDEBUG
1862 if ((!PHFn.empty()) && (PHFn != MF->getName()))
1863 return nullptr;
1864 #endif
1866 if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1867 return nullptr;
1869 using instr_iterator = MachineBasicBlock::instr_iterator;
1871 // Verify that all existing predecessors have analyzable branches
1872 // (or no branches at all).
1873 using MBBVector = std::vector<MachineBasicBlock *>;
1875 MBBVector Preds(Header->pred_begin(), Header->pred_end());
1876 SmallVector<MachineOperand,2> Tmp1;
1877 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1879 if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1880 return nullptr;
1882 for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1883 MachineBasicBlock *PB = *I;
1884 bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1885 if (NotAnalyzed)
1886 return nullptr;
1889 MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
1890 MF->insert(Header->getIterator(), NewPH);
1892 if (Header->pred_size() > 2) {
1893 // Ensure that the header has only two predecessors: the preheader and
1894 // the loop latch. Any additional predecessors of the header should
1895 // join at the newly created preheader. Inspect all PHI nodes from the
1896 // header and create appropriate corresponding PHI nodes in the preheader.
1898 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1899 I != E && I->isPHI(); ++I) {
1900 MachineInstr *PN = &*I;
1902 const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1903 MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1904 NewPH->insert(NewPH->end(), NewPN);
1906 unsigned PR = PN->getOperand(0).getReg();
1907 const TargetRegisterClass *RC = MRI->getRegClass(PR);
1908 unsigned NewPR = MRI->createVirtualRegister(RC);
1909 NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1911 // Copy all non-latch operands of a header's PHI node to the newly
1912 // created PHI node in the preheader.
1913 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1914 unsigned PredR = PN->getOperand(i).getReg();
1915 unsigned PredRSub = PN->getOperand(i).getSubReg();
1916 MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1917 if (PredB == Latch)
1918 continue;
1920 MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1921 MO.setSubReg(PredRSub);
1922 NewPN->addOperand(MO);
1923 NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1926 // Remove copied operands from the old PHI node and add the value
1927 // coming from the preheader's PHI.
1928 for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1929 MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1930 if (PredB != Latch) {
1931 PN->RemoveOperand(i+1);
1932 PN->RemoveOperand(i);
1935 PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1936 PN->addOperand(MachineOperand::CreateMBB(NewPH));
1938 } else {
1939 assert(Header->pred_size() == 2);
1941 // The header has only two predecessors, but the non-latch predecessor
1942 // is not a preheader (e.g. it has other successors, etc.)
1943 // In such a case we don't need any extra PHI nodes in the new preheader,
1944 // all we need is to adjust existing PHIs in the header to now refer to
1945 // the new preheader.
1946 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1947 I != E && I->isPHI(); ++I) {
1948 MachineInstr *PN = &*I;
1949 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1950 MachineOperand &MO = PN->getOperand(i+1);
1951 if (MO.getMBB() != Latch)
1952 MO.setMBB(NewPH);
1957 // "Reroute" the CFG edges to link in the new preheader.
1958 // If any of the predecessors falls through to the header, insert a branch
1959 // to the new preheader in that place.
1960 SmallVector<MachineOperand,1> Tmp2;
1961 SmallVector<MachineOperand,1> EmptyCond;
1963 TB = FB = nullptr;
1965 for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1966 MachineBasicBlock *PB = *I;
1967 if (PB != Latch) {
1968 Tmp2.clear();
1969 bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1970 (void)NotAnalyzed; // suppress compiler warning
1971 assert (!NotAnalyzed && "Should be analyzable!");
1972 if (TB != Header && (Tmp2.empty() || FB != Header))
1973 TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1974 PB->ReplaceUsesOfBlockWith(Header, NewPH);
1978 // It can happen that the latch block will fall through into the header.
1979 // Insert an unconditional branch to the header.
1980 TB = FB = nullptr;
1981 bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1982 (void)LatchNotAnalyzed; // suppress compiler warning
1983 assert (!LatchNotAnalyzed && "Should be analyzable!");
1984 if (!TB && !FB)
1985 TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1987 // Finally, the branch from the preheader to the header.
1988 TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1989 NewPH->addSuccessor(Header);
1991 MachineLoop *ParentLoop = L->getParentLoop();
1992 if (ParentLoop)
1993 ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1995 // Update the dominator information with the new preheader.
1996 if (MDT) {
1997 if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1998 if (MachineDomTreeNode *DHN = HN->getIDom()) {
1999 MDT->addNewBlock(NewPH, DHN->getBlock());
2000 MDT->changeImmediateDominator(Header, NewPH);
2005 return NewPH;