[ARM] Better patterns for fp <> predicate vectors
[llvm-complete.git] / lib / Target / ARM / Thumb2ITBlockPass.cpp
blob2087fd59906031fafc059053c4dfcffdf25444b4
1 //===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "ARM.h"
10 #include "ARMMachineFunctionInfo.h"
11 #include "ARMSubtarget.h"
12 #include "MCTargetDesc/ARMBaseInfo.h"
13 #include "Thumb2InstrInfo.h"
14 #include "llvm/ADT/SmallSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/CodeGen/MachineBasicBlock.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstr.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineInstrBundle.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/IR/DebugLoc.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include <cassert>
29 #include <new>
31 using namespace llvm;
33 #define DEBUG_TYPE "thumb2-it"
34 #define PASS_NAME "Thumb IT blocks insertion pass"
36 STATISTIC(NumITs, "Number of IT blocks inserted");
37 STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
39 using RegisterSet = SmallSet<unsigned, 4>;
41 namespace {
43 class Thumb2ITBlock : public MachineFunctionPass {
44 public:
45 static char ID;
47 bool restrictIT;
48 const Thumb2InstrInfo *TII;
49 const TargetRegisterInfo *TRI;
50 ARMFunctionInfo *AFI;
52 Thumb2ITBlock() : MachineFunctionPass(ID) {}
54 bool runOnMachineFunction(MachineFunction &Fn) override;
56 MachineFunctionProperties getRequiredProperties() const override {
57 return MachineFunctionProperties().set(
58 MachineFunctionProperties::Property::NoVRegs);
61 StringRef getPassName() const override {
62 return PASS_NAME;
65 private:
66 bool MoveCopyOutOfITBlock(MachineInstr *MI,
67 ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
68 RegisterSet &Defs, RegisterSet &Uses);
69 bool InsertITInstructions(MachineBasicBlock &Block);
72 char Thumb2ITBlock::ID = 0;
74 } // end anonymous namespace
76 INITIALIZE_PASS(Thumb2ITBlock, DEBUG_TYPE, PASS_NAME, false, false)
78 /// TrackDefUses - Tracking what registers are being defined and used by
79 /// instructions in the IT block. This also tracks "dependencies", i.e. uses
80 /// in the IT block that are defined before the IT instruction.
81 static void TrackDefUses(MachineInstr *MI, RegisterSet &Defs, RegisterSet &Uses,
82 const TargetRegisterInfo *TRI) {
83 using RegList = SmallVector<unsigned, 4>;
84 RegList LocalDefs;
85 RegList LocalUses;
87 for (auto &MO : MI->operands()) {
88 if (!MO.isReg())
89 continue;
90 unsigned Reg = MO.getReg();
91 if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
92 continue;
93 if (MO.isUse())
94 LocalUses.push_back(Reg);
95 else
96 LocalDefs.push_back(Reg);
99 auto InsertUsesDefs = [&](RegList &Regs, RegisterSet &UsesDefs) {
100 for (unsigned Reg : Regs)
101 for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
102 Subreg.isValid(); ++Subreg)
103 UsesDefs.insert(*Subreg);
106 InsertUsesDefs(LocalDefs, Defs);
107 InsertUsesDefs(LocalUses, Uses);
110 /// Clear kill flags for any uses in the given set. This will likely
111 /// conservatively remove more kill flags than are necessary, but removing them
112 /// is safer than incorrect kill flags remaining on instructions.
113 static void ClearKillFlags(MachineInstr *MI, RegisterSet &Uses) {
114 for (MachineOperand &MO : MI->operands()) {
115 if (!MO.isReg() || MO.isDef() || !MO.isKill())
116 continue;
117 if (!Uses.count(MO.getReg()))
118 continue;
119 MO.setIsKill(false);
123 static bool isCopy(MachineInstr *MI) {
124 switch (MI->getOpcode()) {
125 default:
126 return false;
127 case ARM::MOVr:
128 case ARM::MOVr_TC:
129 case ARM::tMOVr:
130 case ARM::t2MOVr:
131 return true;
135 bool
136 Thumb2ITBlock::MoveCopyOutOfITBlock(MachineInstr *MI,
137 ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
138 RegisterSet &Defs, RegisterSet &Uses) {
139 if (!isCopy(MI))
140 return false;
141 // llvm models select's as two-address instructions. That means a copy
142 // is inserted before a t2MOVccr, etc. If the copy is scheduled in
143 // between selects we would end up creating multiple IT blocks.
144 assert(MI->getOperand(0).getSubReg() == 0 &&
145 MI->getOperand(1).getSubReg() == 0 &&
146 "Sub-register indices still around?");
148 unsigned DstReg = MI->getOperand(0).getReg();
149 unsigned SrcReg = MI->getOperand(1).getReg();
151 // First check if it's safe to move it.
152 if (Uses.count(DstReg) || Defs.count(SrcReg))
153 return false;
155 // If the CPSR is defined by this copy, then we don't want to move it. E.g.,
156 // if we have:
158 // movs r1, r1
159 // rsb r1, 0
160 // movs r2, r2
161 // rsb r2, 0
163 // we don't want this to be converted to:
165 // movs r1, r1
166 // movs r2, r2
167 // itt mi
168 // rsb r1, 0
169 // rsb r2, 0
171 const MCInstrDesc &MCID = MI->getDesc();
172 if (MI->hasOptionalDef() &&
173 MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
174 return false;
176 // Then peek at the next instruction to see if it's predicated on CC or OCC.
177 // If not, then there is nothing to be gained by moving the copy.
178 MachineBasicBlock::iterator I = MI;
179 ++I;
180 MachineBasicBlock::iterator E = MI->getParent()->end();
182 while (I != E && I->isDebugInstr())
183 ++I;
185 if (I != E) {
186 unsigned NPredReg = 0;
187 ARMCC::CondCodes NCC = getITInstrPredicate(*I, NPredReg);
188 if (NCC == CC || NCC == OCC)
189 return true;
191 return false;
194 bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) {
195 bool Modified = false;
196 RegisterSet Defs, Uses;
197 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
199 while (MBBI != E) {
200 MachineInstr *MI = &*MBBI;
201 DebugLoc dl = MI->getDebugLoc();
202 unsigned PredReg = 0;
203 ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg);
204 if (CC == ARMCC::AL) {
205 ++MBBI;
206 continue;
209 Defs.clear();
210 Uses.clear();
211 TrackDefUses(MI, Defs, Uses, TRI);
213 // Insert an IT instruction.
214 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
215 .addImm(CC);
217 // Add implicit use of ITSTATE to IT block instructions.
218 MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
219 true/*isImp*/, false/*isKill*/));
221 MachineInstr *LastITMI = MI;
222 MachineBasicBlock::iterator InsertPos = MIB.getInstr();
223 ++MBBI;
225 // Form IT block.
226 ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
227 unsigned Mask = 0, Pos = 3;
229 // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it
230 // is set: skip the loop
231 if (!restrictIT) {
232 // Branches, including tricky ones like LDM_RET, need to end an IT
233 // block so check the instruction we just put in the block.
234 for (; MBBI != E && Pos &&
235 (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
236 if (MBBI->isDebugInstr())
237 continue;
239 MachineInstr *NMI = &*MBBI;
240 MI = NMI;
242 unsigned NPredReg = 0;
243 ARMCC::CondCodes NCC = getITInstrPredicate(*NMI, NPredReg);
244 if (NCC == CC || NCC == OCC) {
245 Mask |= ((NCC ^ CC) & 1) << Pos;
246 // Add implicit use of ITSTATE.
247 NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
248 true/*isImp*/, false/*isKill*/));
249 LastITMI = NMI;
250 } else {
251 if (NCC == ARMCC::AL &&
252 MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
253 --MBBI;
254 MBB.remove(NMI);
255 MBB.insert(InsertPos, NMI);
256 ClearKillFlags(MI, Uses);
257 ++NumMovedInsts;
258 continue;
260 break;
262 TrackDefUses(NMI, Defs, Uses, TRI);
263 --Pos;
267 // Finalize IT mask.
268 Mask |= (1 << Pos);
269 MIB.addImm(Mask);
271 // Last instruction in IT block kills ITSTATE.
272 LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
274 // Finalize the bundle.
275 finalizeBundle(MBB, InsertPos.getInstrIterator(),
276 ++LastITMI->getIterator());
278 Modified = true;
279 ++NumITs;
282 return Modified;
285 bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) {
286 const ARMSubtarget &STI =
287 static_cast<const ARMSubtarget &>(Fn.getSubtarget());
288 if (!STI.isThumb2())
289 return false;
290 AFI = Fn.getInfo<ARMFunctionInfo>();
291 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
292 TRI = STI.getRegisterInfo();
293 restrictIT = STI.restrictIT();
295 if (!AFI->isThumbFunction())
296 return false;
298 bool Modified = false;
299 for (auto &MBB : Fn )
300 Modified |= InsertITInstructions(MBB);
302 if (Modified)
303 AFI->setHasITBlocks(true);
305 return Modified;
308 /// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
309 /// insertion pass.
310 FunctionPass *llvm::createThumb2ITBlockPass() { return new Thumb2ITBlock(); }
312 #undef DEBUG_TYPE
313 #define DEBUG_TYPE "arm-mve-vpt"
315 namespace {
316 class MVEVPTBlock : public MachineFunctionPass {
317 public:
318 static char ID;
319 const Thumb2InstrInfo *TII;
320 const TargetRegisterInfo *TRI;
322 MVEVPTBlock() : MachineFunctionPass(ID) {}
324 bool runOnMachineFunction(MachineFunction &Fn) override;
326 MachineFunctionProperties getRequiredProperties() const override {
327 return MachineFunctionProperties().set(
328 MachineFunctionProperties::Property::NoVRegs);
331 StringRef getPassName() const override {
332 return "MVE VPT block insertion pass";
335 private:
336 bool InsertVPTBlocks(MachineBasicBlock &MBB);
339 char MVEVPTBlock::ID = 0;
341 } // end anonymous namespace
343 INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
345 enum VPTMaskValue {
346 T = 8, // 0b1000
347 TT = 4, // 0b0100
348 TE = 12, // 0b1100
349 TTT = 2, // 0b0010
350 TTE = 6, // 0b0110
351 TEE = 10, // 0b1010
352 TET = 14, // 0b1110
353 TTTT = 1, // 0b0001
354 TTTE = 3, // 0b0011
355 TTEE = 5, // 0b0101
356 TTET = 7, // 0b0111
357 TEEE = 9, // 0b1001
358 TEET = 11, // 0b1011
359 TETT = 13, // 0b1101
360 TETE = 15 // 0b1111
363 bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
364 bool Modified = false;
365 MachineBasicBlock::iterator MBIter = Block.begin();
366 MachineBasicBlock::iterator EndIter = Block.end();
368 while (MBIter != EndIter) {
369 MachineInstr *MI = &*MBIter;
370 unsigned PredReg = 0;
371 DebugLoc dl = MI->getDebugLoc();
373 ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
375 // The idea of the predicate is that None, Then and Else are for use when
376 // handling assembly language: they correspond to the three possible
377 // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
378 // from assembly source or disassembled from object code, you expect to see
379 // a mixture whenever there's a long VPT block. But in code generation, we
380 // hope we'll never generate an Else as input to this pass.
382 assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
384 if (Pred == ARMVCC::None) {
385 ++MBIter;
386 continue;
389 MachineInstrBuilder MIBuilder =
390 BuildMI(Block, MBIter, dl, TII->get(ARM::MVE_VPST));
392 MachineBasicBlock::iterator VPSTInsertPos = MIBuilder.getInstr();
393 int VPTInstCnt = 1;
394 ARMVCC::VPTCodes NextPred;
396 do {
397 ++MBIter;
398 NextPred = getVPTInstrPredicate(*MBIter, PredReg);
399 } while (NextPred != ARMVCC::None && NextPred == Pred && ++VPTInstCnt < 4);
401 switch (VPTInstCnt) {
402 case 1:
403 MIBuilder.addImm(VPTMaskValue::T);
404 break;
405 case 2:
406 MIBuilder.addImm(VPTMaskValue::TT);
407 break;
408 case 3:
409 MIBuilder.addImm(VPTMaskValue::TTT);
410 break;
411 case 4:
412 MIBuilder.addImm(VPTMaskValue::TTTT);
413 break;
414 default:
415 llvm_unreachable("Unexpected number of instruction in a VPT block");
418 MachineInstr *LastMI = &*MBIter;
419 finalizeBundle(Block, VPSTInsertPos.getInstrIterator(),
420 ++LastMI->getIterator());
422 Modified = true;
423 LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump());
425 ++MBIter;
427 return Modified;
430 bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
431 const ARMSubtarget &STI =
432 static_cast<const ARMSubtarget &>(Fn.getSubtarget());
434 if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
435 return false;
437 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
438 TRI = STI.getRegisterInfo();
440 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
441 << "********** Function: " << Fn.getName() << '\n');
443 bool Modified = false;
444 for (MachineBasicBlock &MBB : Fn)
445 Modified |= InsertVPTBlocks(MBB);
447 LLVM_DEBUG(dbgs() << "**************************************\n");
448 return Modified;
451 /// createMVEVPTBlock - Returns an instance of the MVE VPT block
452 /// insertion pass.
453 FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }