AMDGPU: Mark test as XFAIL in expensive_checks builds
[llvm-project.git] / llvm / lib / Target / ARM / MVETPAndVPTOptimisationsPass.cpp
blob12fd8c7924565dfd5dd5d2145133e16e8c485567
1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass does a few optimisations related to Tail predicated loops
10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11 /// the goal is to maximize the sizes of the blocks that will be created by the
12 /// MVE VPT Block Insertion pass (which runs after register allocation). For
13 /// tail predicated loops we transform the loop into something that will
14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15 ///
16 //===----------------------------------------------------------------------===//
18 #include "ARM.h"
19 #include "ARMSubtarget.h"
20 #include "MVETailPredUtils.h"
21 #include "Thumb2InstrInfo.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineLoopInfo.h"
29 #include "llvm/InitializePasses.h"
30 #include "llvm/Support/Debug.h"
31 #include <cassert>
33 using namespace llvm;
35 #define DEBUG_TYPE "arm-mve-vpt-opts"
37 static cl::opt<bool>
38 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
39 cl::desc("Enable merging Loop End and Dec instructions."),
40 cl::init(true));
42 static cl::opt<bool>
43 SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
44 cl::desc("Enable setting lr as a predicate in tail predication regions."),
45 cl::init(true));
47 namespace {
48 class MVETPAndVPTOptimisations : public MachineFunctionPass {
49 public:
50 static char ID;
51 const Thumb2InstrInfo *TII;
52 MachineRegisterInfo *MRI;
54 MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
56 bool runOnMachineFunction(MachineFunction &Fn) override;
58 void getAnalysisUsage(AnalysisUsage &AU) const override {
59 AU.addRequired<MachineLoopInfoWrapperPass>();
60 AU.addPreserved<MachineLoopInfoWrapperPass>();
61 AU.addRequired<MachineDominatorTreeWrapperPass>();
62 AU.addPreserved<MachineDominatorTreeWrapperPass>();
63 MachineFunctionPass::getAnalysisUsage(AU);
66 StringRef getPassName() const override {
67 return "ARM MVE TailPred and VPT Optimisation Pass";
70 private:
71 bool LowerWhileLoopStart(MachineLoop *ML);
72 bool MergeLoopEnd(MachineLoop *ML);
73 bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
74 MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
75 MachineInstr &Instr,
76 MachineOperand &User,
77 Register Target);
78 bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
79 bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
80 bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
81 bool ConvertVPSEL(MachineBasicBlock &MBB);
82 bool HintDoLoopStartReg(MachineBasicBlock &MBB);
83 MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
84 MachineInstr *LoopStart);
87 char MVETPAndVPTOptimisations::ID = 0;
89 } // end anonymous namespace
91 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
92 "ARM MVE TailPred and VPT Optimisations pass", false,
93 false)
94 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
95 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
96 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
97 "ARM MVE TailPred and VPT Optimisations pass", false, false)
99 static MachineInstr *LookThroughCOPY(MachineInstr *MI,
100 MachineRegisterInfo *MRI) {
101 while (MI && MI->getOpcode() == TargetOpcode::COPY &&
102 MI->getOperand(1).getReg().isVirtual())
103 MI = MRI->getVRegDef(MI->getOperand(1).getReg());
104 return MI;
107 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
108 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
109 // at the moment, returning a t2DoLoopStart in LoopStart.
110 static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
111 MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
112 MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
113 MachineBasicBlock *Header = ML->getHeader();
114 MachineBasicBlock *Latch = ML->getLoopLatch();
115 if (!Header || !Latch) {
116 LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
117 return false;
120 // Find the loop end from the terminators.
121 LoopEnd = nullptr;
122 for (auto &T : Latch->terminators()) {
123 if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
124 LoopEnd = &T;
125 break;
127 if (T.getOpcode() == ARM::t2LoopEndDec &&
128 T.getOperand(2).getMBB() == Header) {
129 LoopEnd = &T;
130 break;
133 if (!LoopEnd) {
134 LLVM_DEBUG(dbgs() << " no LoopEnd\n");
135 return false;
137 LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
139 // Find the dec from the use of the end. There may be copies between
140 // instructions. We expect the loop to loop like:
141 // $vs = t2DoLoopStart ...
142 // loop:
143 // $vp = phi [ $vs ], [ $vd ]
144 // ...
145 // $vd = t2LoopDec $vp
146 // ...
147 // t2LoopEnd $vd, loop
148 if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
149 LoopDec = LoopEnd;
150 else {
151 LoopDec =
152 LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
153 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
154 LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
155 return false;
158 LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
160 LoopPhi =
161 LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
162 if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
163 LoopPhi->getNumOperands() != 5 ||
164 (LoopPhi->getOperand(2).getMBB() != Latch &&
165 LoopPhi->getOperand(4).getMBB() != Latch)) {
166 LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
167 return false;
169 LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
171 Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
172 ? LoopPhi->getOperand(3).getReg()
173 : LoopPhi->getOperand(1).getReg();
174 LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
175 if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
176 LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
177 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
178 LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
179 return false;
181 LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
183 return true;
186 static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
187 MachineBasicBlock *MBB = MI->getParent();
188 assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
189 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
191 // Subs
192 MachineInstrBuilder MIB =
193 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
194 MIB.add(MI->getOperand(0));
195 MIB.add(MI->getOperand(1));
196 MIB.addImm(0);
197 MIB.addImm(ARMCC::AL);
198 MIB.addReg(ARM::NoRegister);
199 MIB.addReg(ARM::CPSR, RegState::Define);
201 // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
202 for (MachineInstr &I : MBB->terminators()) {
203 if (I.getOpcode() == ARM::t2WhileLoopStart) {
204 MachineInstrBuilder MIB =
205 BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
206 MIB.add(MI->getOperand(1)); // branch target
207 MIB.addImm(ARMCC::EQ);
208 MIB.addReg(ARM::CPSR);
209 I.eraseFromParent();
210 break;
214 MI->eraseFromParent();
217 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
218 // start of a while loop:
219 // %a:gprlr = t2WhileLoopSetup %Cnt
220 // t2WhileLoopStart %a, %BB
221 // We want to convert those to a single instruction which, like t2LoopEndDec and
222 // t2DoLoopStartTP is both a terminator and produces a value:
223 // %a:grplr: t2WhileLoopStartLR %Cnt, %BB
225 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
226 // t2WhileLoopStart are not valid past regalloc.
227 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
228 LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
229 << ML->getHeader()->getName() << "\n");
231 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
232 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
233 return false;
235 if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
236 return false;
238 Register LR = LoopStart->getOperand(0).getReg();
239 auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
240 return MI.getOpcode() == ARM::t2WhileLoopStart;
242 if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
243 RevertWhileLoopSetup(LoopStart, TII);
244 RevertLoopDec(LoopStart, TII);
245 RevertLoopEnd(LoopStart, TII);
246 return true;
249 MachineInstrBuilder MI =
250 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
251 TII->get(ARM::t2WhileLoopStartLR), LR)
252 .add(LoopStart->getOperand(1))
253 .add(WLSIt->getOperand(1));
254 (void)MI;
255 LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
257 WLSIt->eraseFromParent();
258 LoopStart->eraseFromParent();
259 return true;
262 // Return true if this instruction is invalid in a low overhead loop, usually
263 // because it clobbers LR.
264 static bool IsInvalidTPInstruction(MachineInstr &MI) {
265 return MI.isCall() || isLoopStart(MI);
268 // Starting from PreHeader, search for invalid instructions back until the
269 // LoopStart block is reached. If invalid instructions are found, the loop start
270 // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
271 // return the new DLS LoopStart if updated.
272 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
273 MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
274 SmallVector<MachineBasicBlock *> Worklist;
275 SmallPtrSet<MachineBasicBlock *, 4> Visited;
276 Worklist.push_back(PreHeader);
277 Visited.insert(LoopStart->getParent());
279 while (!Worklist.empty()) {
280 MachineBasicBlock *MBB = Worklist.pop_back_val();
281 if (Visited.count(MBB))
282 continue;
284 for (MachineInstr &MI : *MBB) {
285 if (!IsInvalidTPInstruction(MI))
286 continue;
288 LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
290 // Create a t2DoLoopStart at the end of the preheader.
291 MachineInstrBuilder MIB =
292 BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
293 LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
294 MIB.add(LoopStart->getOperand(0));
295 MIB.add(LoopStart->getOperand(1));
297 // Make sure to remove the kill flags, to prevent them from being invalid.
298 LoopStart->getOperand(1).setIsKill(false);
300 // Revert the t2WhileLoopStartLR to a CMP and Br.
301 RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
302 return MIB;
305 Visited.insert(MBB);
306 for (auto *Pred : MBB->predecessors())
307 Worklist.push_back(Pred);
309 return LoopStart;
312 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
313 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
314 // will be valid to be used for the low overhead loop, which means nothing else
315 // is using LR (especially calls) and there are no superfluous copies in the
316 // loop. The t2LoopEndDec is a branching terminator that produces a value (the
317 // decrement) around the loop edge, which means we need to be careful that they
318 // will be valid to allocate without any spilling.
319 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
320 if (!MergeEndDec)
321 return false;
323 LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
324 << "\n");
326 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
327 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
328 return false;
330 // Check if there is an illegal instruction (a call) in the low overhead loop
331 // and if so revert it now before we get any further. While loops also need to
332 // check the preheaders, but can be reverted to a DLS loop if needed.
333 auto *PreHeader = ML->getLoopPreheader();
334 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
335 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
337 for (MachineBasicBlock *MBB : ML->blocks()) {
338 for (MachineInstr &MI : *MBB) {
339 if (IsInvalidTPInstruction(MI)) {
340 LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
341 if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
342 RevertDoLoopStart(LoopStart, TII);
343 else
344 RevertWhileLoopStartLR(LoopStart, TII);
345 RevertLoopDec(LoopDec, TII);
346 RevertLoopEnd(LoopEnd, TII);
347 return true;
352 // Remove any copies from the loop, to ensure the phi that remains is both
353 // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
354 // that cannot spill, we need to be careful what remains in the loop.
355 Register PhiReg = LoopPhi->getOperand(0).getReg();
356 Register DecReg = LoopDec->getOperand(0).getReg();
357 Register StartReg = LoopStart->getOperand(0).getReg();
358 // Ensure the uses are expected, and collect any copies we want to remove.
359 SmallVector<MachineInstr *, 4> Copies;
360 auto CheckUsers = [&Copies](Register BaseReg,
361 ArrayRef<MachineInstr *> ExpectedUsers,
362 MachineRegisterInfo *MRI) {
363 SmallVector<Register, 4> Worklist;
364 Worklist.push_back(BaseReg);
365 while (!Worklist.empty()) {
366 Register Reg = Worklist.pop_back_val();
367 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
368 if (llvm::is_contained(ExpectedUsers, &MI))
369 continue;
370 if (MI.getOpcode() != TargetOpcode::COPY ||
371 !MI.getOperand(0).getReg().isVirtual()) {
372 LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
373 return false;
375 Worklist.push_back(MI.getOperand(0).getReg());
376 Copies.push_back(&MI);
379 return true;
381 if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
382 !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
383 !CheckUsers(StartReg, {LoopPhi}, MRI)) {
384 // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
385 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
386 RevertWhileLoopStartLR(LoopStart, TII);
387 RevertLoopDec(LoopDec, TII);
388 RevertLoopEnd(LoopEnd, TII);
389 return true;
391 return false;
394 MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
395 MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
396 MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
398 if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
399 LoopPhi->getOperand(3).setReg(StartReg);
400 LoopPhi->getOperand(1).setReg(DecReg);
401 } else {
402 LoopPhi->getOperand(1).setReg(StartReg);
403 LoopPhi->getOperand(3).setReg(DecReg);
406 SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
407 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
408 if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
409 // If the LoopEnd falls through, need to insert a t2B to the fall-through
410 // block so that the non-analyzable t2LoopEndDec doesn't fall through.
411 MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator();
412 BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
413 .addMBB(&*MBBI)
414 .add(predOps(ARMCC::AL));
417 // Replace the loop dec and loop end as a single instruction.
418 MachineInstrBuilder MI =
419 BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
420 TII->get(ARM::t2LoopEndDec), DecReg)
421 .addReg(PhiReg)
422 .add(LoopEnd->getOperand(1));
423 (void)MI;
424 LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
426 LoopDec->eraseFromParent();
427 LoopEnd->eraseFromParent();
428 for (auto *MI : Copies)
429 MI->eraseFromParent();
430 return true;
433 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
434 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
435 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
436 // VCTP operand much simpler.
437 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
438 MachineDominatorTree *DT) {
439 LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
440 << ML->getHeader()->getName() << "\n");
442 // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
443 // in the loop.
444 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
445 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
446 return false;
447 if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
448 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
449 return false;
451 SmallVector<MachineInstr *, 4> VCTPs;
452 SmallVector<MachineInstr *, 4> MVEInstrs;
453 for (MachineBasicBlock *BB : ML->blocks()) {
454 for (MachineInstr &MI : *BB)
455 if (isVCTP(&MI))
456 VCTPs.push_back(&MI);
457 else if (findFirstVPTPredOperandIdx(MI) != -1)
458 MVEInstrs.push_back(&MI);
461 if (VCTPs.empty()) {
462 LLVM_DEBUG(dbgs() << " no VCTPs\n");
463 return false;
466 // Check all VCTPs are the same.
467 MachineInstr *FirstVCTP = *VCTPs.begin();
468 for (MachineInstr *VCTP : VCTPs) {
469 LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
470 if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
471 VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
472 LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
473 return false;
477 // Check for the register being used can be setup before the loop. We expect
478 // this to be:
479 // $vx = ...
480 // loop:
481 // $vp = PHI [ $vx ], [ $vd ]
482 // ..
483 // $vpr = VCTP $vp
484 // ..
485 // $vd = t2SUBri $vp, #n
486 // ..
487 Register CountReg = FirstVCTP->getOperand(1).getReg();
488 if (!CountReg.isVirtual()) {
489 LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
490 return false;
492 MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
493 if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
494 Phi->getNumOperands() != 5 ||
495 (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
496 Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
497 LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
498 return false;
500 CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
501 ? Phi->getOperand(3).getReg()
502 : Phi->getOperand(1).getReg();
504 // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
505 // the preheader and add the new CountReg to it. We attempt to place it late
506 // in the preheader, but may need to move that earlier based on uses.
507 MachineBasicBlock *MBB = LoopStart->getParent();
508 MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
509 for (MachineInstr &Use :
510 MRI->use_instructions(LoopStart->getOperand(0).getReg()))
511 if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
512 !DT->dominates(ML->getHeader(), Use.getParent())) {
513 LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
514 return false;
517 unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
518 ? ARM::t2DoLoopStartTP
519 : ARM::t2WhileLoopStartTP;
520 MachineInstrBuilder MI =
521 BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
522 .add(LoopStart->getOperand(0))
523 .add(LoopStart->getOperand(1))
524 .addReg(CountReg);
525 if (NewOpc == ARM::t2WhileLoopStartTP)
526 MI.add(LoopStart->getOperand(2));
527 LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
528 << *MI.getInstr());
529 MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
530 LoopStart->eraseFromParent();
532 if (SetLRPredicate) {
533 // Each instruction in the loop needs to be using LR as the predicate from
534 // the Phi as the predicate.
535 Register LR = LoopPhi->getOperand(0).getReg();
536 for (MachineInstr *MI : MVEInstrs) {
537 int Idx = findFirstVPTPredOperandIdx(*MI);
538 MI->getOperand(Idx + 2).setReg(LR);
542 return true;
545 // Returns true if Opcode is any VCMP Opcode.
546 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
548 // Returns true if a VCMP with this Opcode can have its operands swapped.
549 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
550 // and VCMPr instructions (since the r is always on the right).
551 static bool CanHaveSwappedOperands(unsigned Opcode) {
552 switch (Opcode) {
553 default:
554 return true;
555 case ARM::MVE_VCMPf32:
556 case ARM::MVE_VCMPf16:
557 case ARM::MVE_VCMPf32r:
558 case ARM::MVE_VCMPf16r:
559 case ARM::MVE_VCMPi8r:
560 case ARM::MVE_VCMPi16r:
561 case ARM::MVE_VCMPi32r:
562 case ARM::MVE_VCMPu8r:
563 case ARM::MVE_VCMPu16r:
564 case ARM::MVE_VCMPu32r:
565 case ARM::MVE_VCMPs8r:
566 case ARM::MVE_VCMPs16r:
567 case ARM::MVE_VCMPs32r:
568 return false;
572 // Returns the CondCode of a VCMP Instruction.
573 static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
574 assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
575 return ARMCC::CondCodes(Instr.getOperand(3).getImm());
578 // Returns true if Cond is equivalent to a VPNOT instruction on the result of
579 // Prev. Cond and Prev must be VCMPs.
580 static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
581 assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
583 // Opcodes must match.
584 if (Cond.getOpcode() != Prev.getOpcode())
585 return false;
587 MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
588 MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
590 // If the VCMP has the opposite condition with the same operands, we can
591 // replace it with a VPNOT
592 ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
593 ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
594 if (ExpectedCode == GetCondCode(Prev))
595 if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
596 return true;
597 // Check again with operands swapped if possible
598 if (!CanHaveSwappedOperands(Cond.getOpcode()))
599 return false;
600 ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
601 return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
602 CondOP2.isIdenticalTo(PrevOP1);
605 // Returns true if Instr writes to VCCR.
606 static bool IsWritingToVCCR(MachineInstr &Instr) {
607 if (Instr.getNumOperands() == 0)
608 return false;
609 MachineOperand &Dst = Instr.getOperand(0);
610 if (!Dst.isReg())
611 return false;
612 Register DstReg = Dst.getReg();
613 if (!DstReg.isVirtual())
614 return false;
615 MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
616 const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
617 return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
620 // Transforms
621 // <Instr that uses %A ('User' Operand)>
622 // Into
623 // %K = VPNOT %Target
624 // <Instr that uses %K ('User' Operand)>
625 // And returns the newly inserted VPNOT.
626 // This optimization is done in the hopes of preventing spills/reloads of VPR by
627 // reducing the number of VCCR values with overlapping lifetimes.
628 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
629 MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
630 Register Target) {
631 Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
633 MachineInstrBuilder MIBuilder =
634 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
635 .addDef(NewResult)
636 .addReg(Target);
637 addUnpredicatedMveVpredNOp(MIBuilder);
639 // Make the user use NewResult instead, and clear its kill flag.
640 User.setReg(NewResult);
641 User.setIsKill(false);
643 LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
644 MIBuilder.getInstr()->dump());
646 return *MIBuilder.getInstr();
649 // Moves a VPNOT before its first user if an instruction that uses Reg is found
650 // in-between the VPNOT and its user.
651 // Returns true if there is at least one user of the VPNOT in the block.
652 static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
653 MachineBasicBlock::iterator Iter,
654 Register Reg) {
655 assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
656 assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
657 "The VPNOT cannot be predicated");
659 MachineInstr &VPNOT = *Iter;
660 Register VPNOTResult = VPNOT.getOperand(0).getReg();
661 Register VPNOTOperand = VPNOT.getOperand(1).getReg();
663 // Whether the VPNOT will need to be moved, and whether we found a user of the
664 // VPNOT.
665 bool MustMove = false, HasUser = false;
666 MachineOperand *VPNOTOperandKiller = nullptr;
667 for (; Iter != MBB.end(); ++Iter) {
668 if (MachineOperand *MO =
669 Iter->findRegisterUseOperand(VPNOTOperand, /*TRI=*/nullptr,
670 /*isKill*/ true)) {
671 // If we find the operand that kills the VPNOTOperand's result, save it.
672 VPNOTOperandKiller = MO;
675 if (Iter->findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != -1) {
676 MustMove = true;
677 continue;
680 if (Iter->findRegisterUseOperandIdx(VPNOTResult, /*TRI=*/nullptr) == -1)
681 continue;
683 HasUser = true;
684 if (!MustMove)
685 break;
687 // Move the VPNOT right before Iter
688 LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
689 Iter->dump());
690 MBB.splice(Iter, &MBB, VPNOT.getIterator());
691 // If we move the instr, and its operand was killed earlier, remove the kill
692 // flag.
693 if (VPNOTOperandKiller)
694 VPNOTOperandKiller->setIsKill(false);
696 break;
698 return HasUser;
701 // This optimisation attempts to reduce the number of overlapping lifetimes of
702 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
703 // this replaces
704 // %A:vccr = (something)
705 // %B:vccr = VPNOT %A
706 // %Foo = (some op that uses %B)
707 // %Bar = (some op that uses %A)
708 // With
709 // %A:vccr = (something)
710 // %B:vccr = VPNOT %A
711 // %Foo = (some op that uses %B)
712 // %TMP2:vccr = VPNOT %B
713 // %Bar = (some op that uses %A)
714 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
715 MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
716 SmallVector<MachineInstr *, 4> DeadInstructions;
717 bool Modified = false;
719 while (Iter != End) {
720 Register VCCRValue, OppositeVCCRValue;
721 // The first loop looks for 2 unpredicated instructions:
722 // %A:vccr = (instr) ; A is stored in VCCRValue
723 // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
724 for (; Iter != End; ++Iter) {
725 // We're only interested in unpredicated instructions that write to VCCR.
726 if (!IsWritingToVCCR(*Iter) ||
727 getVPTInstrPredicate(*Iter) != ARMVCC::None)
728 continue;
729 Register Dst = Iter->getOperand(0).getReg();
731 // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
732 // found what we were looking for.
733 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
734 Iter->findRegisterUseOperandIdx(VCCRValue, /*TRI=*/nullptr) != -1) {
735 // Move the VPNOT closer to its first user if needed, and ignore if it
736 // has no users.
737 if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
738 continue;
740 OppositeVCCRValue = Dst;
741 ++Iter;
742 break;
745 // Else, just set VCCRValue.
746 VCCRValue = Dst;
749 // If the first inner loop didn't find anything, stop here.
750 if (Iter == End)
751 break;
753 assert(VCCRValue && OppositeVCCRValue &&
754 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
755 "stopped before the end of the block!");
756 assert(VCCRValue != OppositeVCCRValue &&
757 "VCCRValue should not be equal to OppositeVCCRValue!");
759 // LastVPNOTResult always contains the same value as OppositeVCCRValue.
760 Register LastVPNOTResult = OppositeVCCRValue;
762 // This second loop tries to optimize the remaining instructions.
763 for (; Iter != End; ++Iter) {
764 bool IsInteresting = false;
766 if (MachineOperand *MO =
767 Iter->findRegisterUseOperand(VCCRValue, /*TRI=*/nullptr)) {
768 IsInteresting = true;
770 // - If the instruction is a VPNOT, it can be removed, and we can just
771 // replace its uses with LastVPNOTResult.
772 // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
773 if (Iter->getOpcode() == ARM::MVE_VPNOT) {
774 Register Result = Iter->getOperand(0).getReg();
776 MRI->replaceRegWith(Result, LastVPNOTResult);
777 DeadInstructions.push_back(&*Iter);
778 Modified = true;
780 LLVM_DEBUG(dbgs()
781 << "Replacing all uses of '" << printReg(Result)
782 << "' with '" << printReg(LastVPNOTResult) << "'\n");
783 } else {
784 MachineInstr &VPNOT =
785 ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
786 Modified = true;
788 LastVPNOTResult = VPNOT.getOperand(0).getReg();
789 std::swap(VCCRValue, OppositeVCCRValue);
791 LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
792 << "' with '" << printReg(LastVPNOTResult)
793 << "' in instr: " << *Iter);
795 } else {
796 // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
797 // instead as they contain the same value.
798 if (MachineOperand *MO = Iter->findRegisterUseOperand(
799 OppositeVCCRValue, /*TRI=*/nullptr)) {
800 IsInteresting = true;
802 // This is pointless if LastVPNOTResult == OppositeVCCRValue.
803 if (LastVPNOTResult != OppositeVCCRValue) {
804 LLVM_DEBUG(dbgs() << "Replacing usage of '"
805 << printReg(OppositeVCCRValue) << "' with '"
806 << printReg(LastVPNOTResult) << " for instr: ";
807 Iter->dump());
808 MO->setReg(LastVPNOTResult);
809 Modified = true;
812 MO->setIsKill(false);
815 // If this is an unpredicated VPNOT on
816 // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
817 if (Iter->getOpcode() == ARM::MVE_VPNOT &&
818 getVPTInstrPredicate(*Iter) == ARMVCC::None) {
819 Register VPNOTOperand = Iter->getOperand(1).getReg();
820 if (VPNOTOperand == LastVPNOTResult ||
821 VPNOTOperand == OppositeVCCRValue) {
822 IsInteresting = true;
824 std::swap(VCCRValue, OppositeVCCRValue);
825 LastVPNOTResult = Iter->getOperand(0).getReg();
830 // If this instruction was not interesting, and it writes to VCCR, stop.
831 if (!IsInteresting && IsWritingToVCCR(*Iter))
832 break;
836 for (MachineInstr *DeadInstruction : DeadInstructions)
837 DeadInstruction->eraseFromParent();
839 return Modified;
842 // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
843 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
844 SmallVector<MachineInstr *, 4> DeadInstructions;
846 // The last VCMP that we have seen and that couldn't be replaced.
847 // This is reset when an instruction that writes to VCCR/VPR is found, or when
848 // a VCMP is replaced with a VPNOT.
849 // We'll only replace VCMPs with VPNOTs when this is not null, and when the
850 // current VCMP is the opposite of PrevVCMP.
851 MachineInstr *PrevVCMP = nullptr;
852 // If we find an instruction that kills the result of PrevVCMP, we save the
853 // operand here to remove the kill flag in case we need to use PrevVCMP's
854 // result.
855 MachineOperand *PrevVCMPResultKiller = nullptr;
857 for (MachineInstr &Instr : MBB.instrs()) {
858 if (PrevVCMP) {
859 if (MachineOperand *MO =
860 Instr.findRegisterUseOperand(PrevVCMP->getOperand(0).getReg(),
861 /*TRI=*/nullptr, /*isKill*/ true)) {
862 // If we come accross the instr that kills PrevVCMP's result, record it
863 // so we can remove the kill flag later if we need to.
864 PrevVCMPResultKiller = MO;
868 // Ignore predicated instructions.
869 if (getVPTInstrPredicate(Instr) != ARMVCC::None)
870 continue;
872 // Only look at VCMPs
873 if (!IsVCMP(Instr.getOpcode())) {
874 // If the instruction writes to VCCR, forget the previous VCMP.
875 if (IsWritingToVCCR(Instr))
876 PrevVCMP = nullptr;
877 continue;
880 if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
881 PrevVCMP = &Instr;
882 continue;
885 // The register containing the result of the VCMP that we're going to
886 // replace.
887 Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
889 // Build a VPNOT to replace the VCMP, reusing its operands.
890 MachineInstrBuilder MIBuilder =
891 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
892 .add(Instr.getOperand(0))
893 .addReg(PrevVCMPResultReg);
894 addUnpredicatedMveVpredNOp(MIBuilder);
895 LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
896 MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
897 Instr.dump());
899 // If we found an instruction that uses, and kills PrevVCMP's result,
900 // remove the kill flag.
901 if (PrevVCMPResultKiller)
902 PrevVCMPResultKiller->setIsKill(false);
904 // Finally, mark the old VCMP for removal and reset
905 // PrevVCMP/PrevVCMPResultKiller.
906 DeadInstructions.push_back(&Instr);
907 PrevVCMP = nullptr;
908 PrevVCMPResultKiller = nullptr;
911 for (MachineInstr *DeadInstruction : DeadInstructions)
912 DeadInstruction->eraseFromParent();
914 return !DeadInstructions.empty();
917 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
918 MachineDominatorTree *DT) {
919 // Scan through the block, looking for instructions that use constants moves
920 // into VPR that are the negative of one another. These are expected to be
921 // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
922 // mask is kept it or and VPNOT's of it are added or reused as we scan through
923 // the function.
924 unsigned LastVPTImm = 0;
925 Register LastVPTReg = 0;
926 SmallSet<MachineInstr *, 4> DeadInstructions;
928 for (MachineInstr &Instr : MBB.instrs()) {
929 // Look for predicated MVE instructions.
930 int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
931 if (PIdx == -1)
932 continue;
933 Register VPR = Instr.getOperand(PIdx + 1).getReg();
934 if (!VPR.isVirtual())
935 continue;
937 // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
938 MachineInstr *Copy = MRI->getVRegDef(VPR);
939 if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
940 !Copy->getOperand(1).getReg().isVirtual() ||
941 MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
942 LastVPTReg = 0;
943 continue;
945 Register GPR = Copy->getOperand(1).getReg();
947 // Find the Immediate used by the copy.
948 auto getImm = [&](Register GPR) -> unsigned {
949 MachineInstr *Def = MRI->getVRegDef(GPR);
950 if (Def && (Def->getOpcode() == ARM::t2MOVi ||
951 Def->getOpcode() == ARM::t2MOVi16))
952 return Def->getOperand(1).getImm();
953 return -1U;
955 unsigned Imm = getImm(GPR);
956 if (Imm == -1U) {
957 LastVPTReg = 0;
958 continue;
961 unsigned NotImm = ~Imm & 0xffff;
962 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
963 MRI->clearKillFlags(LastVPTReg);
964 Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
965 if (MRI->use_empty(VPR)) {
966 DeadInstructions.insert(Copy);
967 if (MRI->hasOneUse(GPR))
968 DeadInstructions.insert(MRI->getVRegDef(GPR));
970 LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
971 VPR = LastVPTReg;
972 } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
973 // We have found the not of a previous constant. Create a VPNot of the
974 // earlier predicate reg and use it instead of the copy.
975 Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
976 auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
977 TII->get(ARM::MVE_VPNOT), NewVPR)
978 .addReg(LastVPTReg);
979 addUnpredicatedMveVpredNOp(VPNot);
981 // Use the new register and check if the def is now dead.
982 Instr.getOperand(PIdx + 1).setReg(NewVPR);
983 if (MRI->use_empty(VPR)) {
984 DeadInstructions.insert(Copy);
985 if (MRI->hasOneUse(GPR))
986 DeadInstructions.insert(MRI->getVRegDef(GPR));
988 LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
989 << Instr);
990 VPR = NewVPR;
993 LastVPTImm = Imm;
994 LastVPTReg = VPR;
997 for (MachineInstr *DI : DeadInstructions)
998 DI->eraseFromParent();
1000 return !DeadInstructions.empty();
1003 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
1004 // somewhat blunt approximation to allow tail predicated with vpsel
1005 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
1006 // different semantics under tail predication. Until that is modelled we just
1007 // convert to a VMOVT (via a predicated VORR) instead.
1008 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
1009 bool HasVCTP = false;
1010 SmallVector<MachineInstr *, 4> DeadInstructions;
1012 for (MachineInstr &MI : MBB.instrs()) {
1013 if (isVCTP(&MI)) {
1014 HasVCTP = true;
1015 continue;
1018 if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
1019 continue;
1021 MachineInstrBuilder MIBuilder =
1022 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
1023 .add(MI.getOperand(0))
1024 .add(MI.getOperand(1))
1025 .add(MI.getOperand(1))
1026 .addImm(ARMVCC::Then)
1027 .add(MI.getOperand(4))
1028 .add(MI.getOperand(5))
1029 .add(MI.getOperand(2));
1030 // Silence unused variable warning in release builds.
1031 (void)MIBuilder;
1032 LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
1033 dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
1034 DeadInstructions.push_back(&MI);
1037 for (MachineInstr *DeadInstruction : DeadInstructions)
1038 DeadInstruction->eraseFromParent();
1040 return !DeadInstructions.empty();
1043 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1044 // the instruction may be removable as a noop.
1045 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1046 bool Changed = false;
1047 for (MachineInstr &MI : MBB.instrs()) {
1048 if (MI.getOpcode() != ARM::t2DoLoopStart)
1049 continue;
1050 Register R = MI.getOperand(1).getReg();
1051 MachineFunction *MF = MI.getParent()->getParent();
1052 MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
1053 Changed = true;
1055 return Changed;
1058 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1059 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
1061 if (!STI.isThumb2() || !STI.hasLOB())
1062 return false;
1064 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1065 MRI = &Fn.getRegInfo();
1066 MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1067 MachineDominatorTree *DT =
1068 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1070 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1071 << "********** Function: " << Fn.getName() << '\n');
1073 bool Modified = false;
1074 for (MachineLoop *ML : MLI->getLoopsInPreorder()) {
1075 Modified |= LowerWhileLoopStart(ML);
1076 Modified |= MergeLoopEnd(ML);
1077 Modified |= ConvertTailPredLoop(ML, DT);
1080 for (MachineBasicBlock &MBB : Fn) {
1081 Modified |= HintDoLoopStartReg(MBB);
1082 Modified |= ReplaceConstByVPNOTs(MBB, DT);
1083 Modified |= ReplaceVCMPsByVPNOTs(MBB);
1084 Modified |= ReduceOldVCCRValueUses(MBB);
1085 Modified |= ConvertVPSEL(MBB);
1088 LLVM_DEBUG(dbgs() << "**************************************\n");
1089 return Modified;
1092 /// createMVETPAndVPTOptimisationsPass
1093 FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
1094 return new MVETPAndVPTOptimisations();