AMDGPU: Mark test as XFAIL in expensive_checks builds
[llvm-project.git] / llvm / lib / Target / ARM / Thumb2SizeReduction.cpp
blobc527bf5ce8b09e15403aac3bd34a8414642cbddf
1 //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "ARM.h"
10 #include "ARMBaseInstrInfo.h"
11 #include "ARMSubtarget.h"
12 #include "MCTargetDesc/ARMBaseInfo.h"
13 #include "Thumb2InstrInfo.h"
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/PostOrderIterator.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineOperand.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/IR/DebugLoc.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/MC/MCAsmInfo.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <cassert>
38 #include <cstdint>
39 #include <functional>
40 #include <iterator>
41 #include <utility>
43 using namespace llvm;
45 #define DEBUG_TYPE "thumb2-reduce-size"
46 #define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
48 STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
49 STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
50 STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
52 static cl::opt<int> ReduceLimit("t2-reduce-limit",
53 cl::init(-1), cl::Hidden);
54 static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
55 cl::init(-1), cl::Hidden);
56 static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
57 cl::init(-1), cl::Hidden);
59 namespace {
61 /// ReduceTable - A static table with information on mapping from wide
62 /// opcodes to narrow
63 struct ReduceEntry {
64 uint16_t WideOpc; // Wide opcode
65 uint16_t NarrowOpc1; // Narrow opcode to transform to
66 uint16_t NarrowOpc2; // Narrow opcode when it's two-address
67 uint8_t Imm1Limit; // Limit of immediate field (bits)
68 uint8_t Imm2Limit; // Limit of immediate field when it's two-address
69 unsigned LowRegs1 : 1; // Only possible if low-registers are used
70 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
71 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
72 // 1 - No cc field.
73 // 2 - Always set CPSR.
74 unsigned PredCC2 : 2;
75 unsigned PartFlag : 1; // 16-bit instruction does partial flag update
76 unsigned Special : 1; // Needs to be dealt with specially
77 unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
80 static const ReduceEntry ReduceTable[] = {
81 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
82 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
83 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
84 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
85 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
86 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
87 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
88 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
89 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
90 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
91 //FIXME: Disable CMN, as CCodes are backwards from compare expectations
92 //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
93 { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
94 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
95 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
96 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
97 // FIXME: adr.n immediate offset must be multiple of 4.
98 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
99 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
100 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
101 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
102 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
103 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
104 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
105 // FIXME: Do we need the 16-bit 'S' variant?
106 { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
107 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
108 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
109 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
110 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
111 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
112 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
113 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
114 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
115 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
116 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
117 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
118 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
119 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
120 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
121 { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
122 { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
123 { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
124 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
125 { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
126 { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
128 // FIXME: Clean this up after splitting each Thumb load / store opcode
129 // into multiple ones.
130 { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
131 { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
132 { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
133 { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
134 { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
135 { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
136 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
137 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
138 { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
139 { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
140 { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
141 { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
142 { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
143 { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
144 { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
145 { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
147 { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
148 { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
149 { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
150 // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
151 // tSTMIA_UPD is a change in semantics which can only be used if the base
152 // register is killed. This difference is correctly handled elsewhere.
153 { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
154 { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
155 { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
158 class Thumb2SizeReduce : public MachineFunctionPass {
159 public:
160 static char ID;
162 const Thumb2InstrInfo *TII;
163 const ARMSubtarget *STI;
165 Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr);
167 bool runOnMachineFunction(MachineFunction &MF) override;
169 MachineFunctionProperties getRequiredProperties() const override {
170 return MachineFunctionProperties().set(
171 MachineFunctionProperties::Property::NoVRegs);
174 StringRef getPassName() const override {
175 return THUMB2_SIZE_REDUCE_NAME;
178 private:
179 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
180 DenseMap<unsigned, unsigned> ReduceOpcodeMap;
182 bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
184 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
185 bool is2Addr, ARMCC::CondCodes Pred,
186 bool LiveCPSR, bool &HasCC, bool &CCDead);
188 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
189 const ReduceEntry &Entry);
191 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
192 const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
194 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
195 /// instruction.
196 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
197 const ReduceEntry &Entry, bool LiveCPSR,
198 bool IsSelfLoop);
200 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
201 /// non-two-address instruction.
202 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
203 const ReduceEntry &Entry, bool LiveCPSR,
204 bool IsSelfLoop);
206 /// ReduceMI - Attempt to reduce MI, return true on success.
207 bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR,
208 bool IsSelfLoop, bool SkipPrologueEpilogue);
210 /// ReduceMBB - Reduce width of instructions in the specified basic block.
211 bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue);
213 bool OptimizeSize;
214 bool MinimizeSize;
216 // Last instruction to define CPSR in the current block.
217 MachineInstr *CPSRDef;
218 // Was CPSR last defined by a high latency instruction?
219 // When CPSRDef is null, this refers to CPSR defs in predecessors.
220 bool HighLatencyCPSR;
222 struct MBBInfo {
223 // The flags leaving this block have high latency.
224 bool HighLatencyCPSR = false;
225 // Has this block been visited yet?
226 bool Visited = false;
228 MBBInfo() = default;
231 SmallVector<MBBInfo, 8> BlockInfo;
233 std::function<bool(const Function &)> PredicateFtor;
236 char Thumb2SizeReduce::ID = 0;
238 } // end anonymous namespace
240 INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false,
241 false)
243 Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
244 : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
245 OptimizeSize = MinimizeSize = false;
246 for (unsigned i = 0, e = std::size(ReduceTable); i != e; ++i) {
247 unsigned FromOpc = ReduceTable[i].WideOpc;
248 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
249 llvm_unreachable("Duplicated entries?");
253 static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
254 return is_contained(MCID.implicit_defs(), ARM::CPSR);
257 // Check for a likely high-latency flag def.
258 static bool isHighLatencyCPSR(MachineInstr *Def) {
259 switch(Def->getOpcode()) {
260 case ARM::FMSTAT:
261 case ARM::tMUL:
262 return true;
264 return false;
267 /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
268 /// the 's' 16-bit instruction partially update CPSR. Abort the
269 /// transformation to avoid adding false dependency on last CPSR setting
270 /// instruction which hurts the ability for out-of-order execution engine
271 /// to do register renaming magic.
272 /// This function checks if there is a read-of-write dependency between the
273 /// last instruction that defines the CPSR and the current instruction. If there
274 /// is, then there is no harm done since the instruction cannot be retired
275 /// before the CPSR setting instruction anyway.
276 /// Note, we are not doing full dependency analysis here for the sake of compile
277 /// time. We're not looking for cases like:
278 /// r0 = muls ...
279 /// r1 = add.w r0, ...
280 /// ...
281 /// = mul.w r1
282 /// In this case it would have been ok to narrow the mul.w to muls since there
283 /// are indirect RAW dependency between the muls and the mul.w
284 bool
285 Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
286 // Disable the check for -Oz (aka OptimizeForSizeHarder).
287 if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
288 return false;
290 if (!CPSRDef)
291 // If this BB loops back to itself, conservatively avoid narrowing the
292 // first instruction that does partial flag update.
293 return HighLatencyCPSR || FirstInSelfLoop;
295 SmallSet<unsigned, 2> Defs;
296 for (const MachineOperand &MO : CPSRDef->operands()) {
297 if (!MO.isReg() || MO.isUndef() || MO.isUse())
298 continue;
299 Register Reg = MO.getReg();
300 if (Reg == 0 || Reg == ARM::CPSR)
301 continue;
302 Defs.insert(Reg);
305 for (const MachineOperand &MO : Use->operands()) {
306 if (!MO.isReg() || MO.isUndef() || MO.isDef())
307 continue;
308 Register Reg = MO.getReg();
309 if (Defs.count(Reg))
310 return false;
313 // If the current CPSR has high latency, try to avoid the false dependency.
314 if (HighLatencyCPSR)
315 return true;
317 // tMOVi8 usually doesn't start long dependency chains, and there are a lot
318 // of them, so always shrink them when CPSR doesn't have high latency.
319 if (Use->getOpcode() == ARM::t2MOVi ||
320 Use->getOpcode() == ARM::t2MOVi16)
321 return false;
323 // No read-after-write dependency. The narrowing will add false dependency.
324 return true;
327 bool
328 Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
329 bool is2Addr, ARMCC::CondCodes Pred,
330 bool LiveCPSR, bool &HasCC, bool &CCDead) {
331 if ((is2Addr && Entry.PredCC2 == 0) ||
332 (!is2Addr && Entry.PredCC1 == 0)) {
333 if (Pred == ARMCC::AL) {
334 // Not predicated, must set CPSR.
335 if (!HasCC) {
336 // Original instruction was not setting CPSR, but CPSR is not
337 // currently live anyway. It's ok to set it. The CPSR def is
338 // dead though.
339 if (!LiveCPSR) {
340 HasCC = true;
341 CCDead = true;
342 return true;
344 return false;
346 } else {
347 // Predicated, must not set CPSR.
348 if (HasCC)
349 return false;
351 } else if ((is2Addr && Entry.PredCC2 == 2) ||
352 (!is2Addr && Entry.PredCC1 == 2)) {
353 /// Old opcode has an optional def of CPSR.
354 if (HasCC)
355 return true;
356 // If old opcode does not implicitly define CPSR, then it's not ok since
357 // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
358 if (!HasImplicitCPSRDef(MI->getDesc()))
359 return false;
360 HasCC = true;
361 } else {
362 // 16-bit instruction does not set CPSR.
363 if (HasCC)
364 return false;
367 return true;
370 static bool VerifyLowRegs(MachineInstr *MI) {
371 unsigned Opc = MI->getOpcode();
372 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
373 bool isLROk = (Opc == ARM::t2STMDB_UPD);
374 bool isSPOk = isPCOk || isLROk;
375 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
376 const MachineOperand &MO = MI->getOperand(i);
377 if (!MO.isReg() || MO.isImplicit())
378 continue;
379 Register Reg = MO.getReg();
380 if (Reg == 0 || Reg == ARM::CPSR)
381 continue;
382 if (isPCOk && Reg == ARM::PC)
383 continue;
384 if (isLROk && Reg == ARM::LR)
385 continue;
386 if (Reg == ARM::SP) {
387 if (isSPOk)
388 continue;
389 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
390 // Special case for these ldr / str with sp as base register.
391 continue;
393 if (!isARMLowRegister(Reg))
394 return false;
396 return true;
399 bool
400 Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
401 const ReduceEntry &Entry) {
402 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
403 return false;
405 unsigned Scale = 1;
406 bool HasImmOffset = false;
407 bool HasShift = false;
408 bool HasOffReg = true;
409 bool isLdStMul = false;
410 unsigned Opc = Entry.NarrowOpc1;
411 unsigned OpNum = 3; // First 'rest' of operands.
412 uint8_t ImmLimit = Entry.Imm1Limit;
414 switch (Entry.WideOpc) {
415 default:
416 llvm_unreachable("Unexpected Thumb2 load / store opcode!");
417 case ARM::t2LDRi12:
418 case ARM::t2STRi12:
419 if (MI->getOperand(1).getReg() == ARM::SP) {
420 Opc = Entry.NarrowOpc2;
421 ImmLimit = Entry.Imm2Limit;
424 Scale = 4;
425 HasImmOffset = true;
426 HasOffReg = false;
427 break;
428 case ARM::t2LDRBi12:
429 case ARM::t2STRBi12:
430 HasImmOffset = true;
431 HasOffReg = false;
432 break;
433 case ARM::t2LDRHi12:
434 case ARM::t2STRHi12:
435 Scale = 2;
436 HasImmOffset = true;
437 HasOffReg = false;
438 break;
439 case ARM::t2LDRs:
440 case ARM::t2LDRBs:
441 case ARM::t2LDRHs:
442 case ARM::t2LDRSBs:
443 case ARM::t2LDRSHs:
444 case ARM::t2STRs:
445 case ARM::t2STRBs:
446 case ARM::t2STRHs:
447 HasShift = true;
448 OpNum = 4;
449 break;
450 case ARM::t2LDR_POST:
451 case ARM::t2STR_POST: {
452 if (!MinimizeSize)
453 return false;
455 if (!MI->hasOneMemOperand() ||
456 (*MI->memoperands_begin())->getAlign() < Align(4))
457 return false;
459 // We're creating a completely different type of load/store - LDM from LDR.
460 // For this reason we can't reuse the logic at the end of this function; we
461 // have to implement the MI building here.
462 bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
463 Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
464 Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
465 unsigned Offset = MI->getOperand(3).getImm();
466 unsigned PredImm = MI->getOperand(4).getImm();
467 Register PredReg = MI->getOperand(5).getReg();
468 assert(isARMLowRegister(Rt));
469 assert(isARMLowRegister(Rn));
471 if (Offset != 4)
472 return false;
474 // Add the 16-bit load / store instruction.
475 DebugLoc dl = MI->getDebugLoc();
476 auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
477 .addReg(Rn, RegState::Define)
478 .addReg(Rn)
479 .addImm(PredImm)
480 .addReg(PredReg)
481 .addReg(Rt, IsStore ? 0 : RegState::Define);
483 // Transfer memoperands.
484 MIB.setMemRefs(MI->memoperands());
486 // Transfer MI flags.
487 MIB.setMIFlags(MI->getFlags());
489 // Kill the old instruction.
490 MI->eraseFromBundle();
491 ++NumLdSts;
492 return true;
494 case ARM::t2LDMIA: {
495 Register BaseReg = MI->getOperand(0).getReg();
496 assert(isARMLowRegister(BaseReg));
498 // For the non-writeback version (this one), the base register must be
499 // one of the registers being loaded.
500 bool isOK = false;
501 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) {
502 if (MO.getReg() == BaseReg) {
503 isOK = true;
504 break;
508 if (!isOK)
509 return false;
511 OpNum = 0;
512 isLdStMul = true;
513 break;
515 case ARM::t2STMIA: {
516 // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this
517 // if the base register is killed, as then it doesn't matter what its value
518 // is after the instruction.
519 if (!MI->getOperand(0).isKill())
520 return false;
522 // If the base register is in the register list and isn't the lowest
523 // numbered register (i.e. it's in operand 4 onwards) then with writeback
524 // the stored value is unknown, so we can't convert to tSTMIA_UPD.
525 Register BaseReg = MI->getOperand(0).getReg();
526 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4))
527 if (MO.getReg() == BaseReg)
528 return false;
530 break;
532 case ARM::t2LDMIA_RET: {
533 Register BaseReg = MI->getOperand(1).getReg();
534 if (BaseReg != ARM::SP)
535 return false;
536 Opc = Entry.NarrowOpc2; // tPOP_RET
537 OpNum = 2;
538 isLdStMul = true;
539 break;
541 case ARM::t2LDMIA_UPD:
542 case ARM::t2STMIA_UPD:
543 case ARM::t2STMDB_UPD: {
544 OpNum = 0;
546 Register BaseReg = MI->getOperand(1).getReg();
547 if (BaseReg == ARM::SP &&
548 (Entry.WideOpc == ARM::t2LDMIA_UPD ||
549 Entry.WideOpc == ARM::t2STMDB_UPD)) {
550 Opc = Entry.NarrowOpc2; // tPOP or tPUSH
551 OpNum = 2;
552 } else if (!isARMLowRegister(BaseReg) ||
553 (Entry.WideOpc != ARM::t2LDMIA_UPD &&
554 Entry.WideOpc != ARM::t2STMIA_UPD)) {
555 return false;
558 isLdStMul = true;
559 break;
563 unsigned OffsetReg = 0;
564 bool OffsetKill = false;
565 bool OffsetInternal = false;
566 if (HasShift) {
567 OffsetReg = MI->getOperand(2).getReg();
568 OffsetKill = MI->getOperand(2).isKill();
569 OffsetInternal = MI->getOperand(2).isInternalRead();
571 if (MI->getOperand(3).getImm())
572 // Thumb1 addressing mode doesn't support shift.
573 return false;
576 unsigned OffsetImm = 0;
577 if (HasImmOffset) {
578 OffsetImm = MI->getOperand(2).getImm();
579 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
581 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
582 // Make sure the immediate field fits.
583 return false;
586 // Add the 16-bit load / store instruction.
587 DebugLoc dl = MI->getDebugLoc();
588 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
590 // tSTMIA_UPD takes a defining register operand. We've already checked that
591 // the register is killed, so mark it as dead here.
592 if (Entry.WideOpc == ARM::t2STMIA)
593 MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
595 if (!isLdStMul) {
596 MIB.add(MI->getOperand(0));
597 MIB.add(MI->getOperand(1));
599 if (HasImmOffset)
600 MIB.addImm(OffsetImm / Scale);
602 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
604 if (HasOffReg)
605 MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
606 getInternalReadRegState(OffsetInternal));
609 // Transfer the rest of operands.
610 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum))
611 MIB.add(MO);
613 // Transfer memoperands.
614 MIB.setMemRefs(MI->memoperands());
616 // Transfer MI flags.
617 MIB.setMIFlags(MI->getFlags());
619 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
620 << " to 16-bit: " << *MIB);
622 MBB.erase_instr(MI);
623 ++NumLdSts;
624 return true;
627 bool
628 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
629 const ReduceEntry &Entry,
630 bool LiveCPSR, bool IsSelfLoop) {
631 unsigned Opc = MI->getOpcode();
632 if (Opc == ARM::t2ADDri) {
633 // If the source register is SP, try to reduce to tADDrSPi, otherwise
634 // it's a normal reduce.
635 if (MI->getOperand(1).getReg() != ARM::SP) {
636 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
637 return true;
638 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
640 // Try to reduce to tADDrSPi.
641 unsigned Imm = MI->getOperand(2).getImm();
642 // The immediate must be in range, the destination register must be a low
643 // reg, the predicate must be "always" and the condition flags must not
644 // be being set.
645 if (Imm & 3 || Imm > 1020)
646 return false;
647 if (!isARMLowRegister(MI->getOperand(0).getReg()))
648 return false;
649 if (MI->getOperand(3).getImm() != ARMCC::AL)
650 return false;
651 const MCInstrDesc &MCID = MI->getDesc();
652 if (MCID.hasOptionalDef() &&
653 MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
654 return false;
656 MachineInstrBuilder MIB =
657 BuildMI(MBB, MI, MI->getDebugLoc(),
658 TII->get(ARM::tADDrSPi))
659 .add(MI->getOperand(0))
660 .add(MI->getOperand(1))
661 .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
662 .add(predOps(ARMCC::AL));
664 // Transfer MI flags.
665 MIB.setMIFlags(MI->getFlags());
667 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
668 << " to 16-bit: " << *MIB);
670 MBB.erase_instr(MI);
671 ++NumNarrows;
672 return true;
675 if (Entry.LowRegs1 && !VerifyLowRegs(MI))
676 return false;
678 if (MI->mayLoadOrStore())
679 return ReduceLoadStore(MBB, MI, Entry);
681 switch (Opc) {
682 default: break;
683 case ARM::t2ADDSri:
684 case ARM::t2ADDSrr: {
685 Register PredReg;
686 if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
687 switch (Opc) {
688 default: break;
689 case ARM::t2ADDSri:
690 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
691 return true;
692 [[fallthrough]];
693 case ARM::t2ADDSrr:
694 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
697 break;
699 case ARM::t2RSBri:
700 case ARM::t2RSBSri:
701 case ARM::t2SXTB:
702 case ARM::t2SXTH:
703 case ARM::t2UXTB:
704 case ARM::t2UXTH:
705 if (MI->getOperand(2).getImm() == 0)
706 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
707 break;
708 case ARM::t2MOVi16:
709 // Can convert only 'pure' immediate operands, not immediates obtained as
710 // globals' addresses.
711 if (MI->getOperand(1).isImm())
712 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
713 break;
714 case ARM::t2CMPrr: {
715 // Try to reduce to the lo-reg only version first. Why there are two
716 // versions of the instruction is a mystery.
717 // It would be nice to just have two entries in the main table that
718 // are prioritized, but the table assumes a unique entry for each
719 // source insn opcode. So for now, we hack a local entry record to use.
720 static const ReduceEntry NarrowEntry =
721 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
722 if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
723 return true;
724 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
726 case ARM::t2TEQrr: {
727 Register PredReg;
728 // Can only convert to eors if we're not in an IT block.
729 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
730 break;
731 // TODO if Operand 0 is not killed but Operand 1 is, then we could write
732 // to Op1 instead.
733 if (MI->getOperand(0).isKill())
734 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
737 return false;
740 bool
741 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
742 const ReduceEntry &Entry,
743 bool LiveCPSR, bool IsSelfLoop) {
744 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
745 return false;
747 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
748 // Don't issue movs with shifter operand for some CPUs unless we
749 // are optimizing for size.
750 return false;
752 Register Reg0 = MI->getOperand(0).getReg();
753 Register Reg1 = MI->getOperand(1).getReg();
754 // t2MUL is "special". The tied source operand is second, not first.
755 if (MI->getOpcode() == ARM::t2MUL) {
756 // MULS can be slower than MUL
757 if (!MinimizeSize && STI->avoidMULS())
758 return false;
759 Register Reg2 = MI->getOperand(2).getReg();
760 // Early exit if the regs aren't all low regs.
761 if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
762 || !isARMLowRegister(Reg2))
763 return false;
764 if (Reg0 != Reg2) {
765 // If the other operand also isn't the same as the destination, we
766 // can't reduce.
767 if (Reg1 != Reg0)
768 return false;
769 // Try to commute the operands to make it a 2-address instruction.
770 MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
771 if (!CommutedMI)
772 return false;
774 } else if (Reg0 != Reg1) {
775 // Try to commute the operands to make it a 2-address instruction.
776 unsigned CommOpIdx1 = 1;
777 unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
778 if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
779 MI->getOperand(CommOpIdx2).getReg() != Reg0)
780 return false;
781 MachineInstr *CommutedMI =
782 TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
783 if (!CommutedMI)
784 return false;
786 if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
787 return false;
788 if (Entry.Imm2Limit) {
789 unsigned Imm = MI->getOperand(2).getImm();
790 unsigned Limit = (1 << Entry.Imm2Limit) - 1;
791 if (Imm > Limit)
792 return false;
793 } else {
794 Register Reg2 = MI->getOperand(2).getReg();
795 if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
796 return false;
799 // Check if it's possible / necessary to transfer the predicate.
800 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
801 Register PredReg;
802 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
803 bool SkipPred = false;
804 if (Pred != ARMCC::AL) {
805 if (!NewMCID.isPredicable())
806 // Can't transfer predicate, fail.
807 return false;
808 } else {
809 SkipPred = !NewMCID.isPredicable();
812 bool HasCC = false;
813 bool CCDead = false;
814 const MCInstrDesc &MCID = MI->getDesc();
815 if (MCID.hasOptionalDef()) {
816 unsigned NumOps = MCID.getNumOperands();
817 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
818 if (HasCC && MI->getOperand(NumOps-1).isDead())
819 CCDead = true;
821 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
822 return false;
824 // Avoid adding a false dependency on partial flag update by some 16-bit
825 // instructions which has the 's' bit set.
826 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
827 canAddPseudoFlagDep(MI, IsSelfLoop))
828 return false;
830 // Add the 16-bit instruction.
831 DebugLoc dl = MI->getDebugLoc();
832 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
833 MIB.add(MI->getOperand(0));
834 if (NewMCID.hasOptionalDef())
835 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
837 // Transfer the rest of operands.
838 unsigned NumOps = MCID.getNumOperands();
839 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
840 if (i < NumOps && MCID.operands()[i].isOptionalDef())
841 continue;
842 if (SkipPred && MCID.operands()[i].isPredicate())
843 continue;
844 MIB.add(MI->getOperand(i));
847 // Transfer MI flags.
848 MIB.setMIFlags(MI->getFlags());
850 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
851 << " to 16-bit: " << *MIB);
853 MBB.erase_instr(MI);
854 ++Num2Addrs;
855 return true;
858 bool
859 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
860 const ReduceEntry &Entry,
861 bool LiveCPSR, bool IsSelfLoop) {
862 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
863 return false;
865 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
866 // Don't issue movs with shifter operand for some CPUs unless we
867 // are optimizing for size.
868 return false;
870 unsigned Limit = ~0U;
871 if (Entry.Imm1Limit)
872 Limit = (1 << Entry.Imm1Limit) - 1;
874 const MCInstrDesc &MCID = MI->getDesc();
875 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
876 if (MCID.operands()[i].isPredicate())
877 continue;
878 const MachineOperand &MO = MI->getOperand(i);
879 if (MO.isReg()) {
880 Register Reg = MO.getReg();
881 if (!Reg || Reg == ARM::CPSR)
882 continue;
883 if (Entry.LowRegs1 && !isARMLowRegister(Reg))
884 return false;
885 } else if (MO.isImm() && !MCID.operands()[i].isPredicate()) {
886 if (((unsigned)MO.getImm()) > Limit)
887 return false;
891 // Check if it's possible / necessary to transfer the predicate.
892 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
893 Register PredReg;
894 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
895 bool SkipPred = false;
896 if (Pred != ARMCC::AL) {
897 if (!NewMCID.isPredicable())
898 // Can't transfer predicate, fail.
899 return false;
900 } else {
901 SkipPred = !NewMCID.isPredicable();
904 bool HasCC = false;
905 bool CCDead = false;
906 if (MCID.hasOptionalDef()) {
907 unsigned NumOps = MCID.getNumOperands();
908 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
909 if (HasCC && MI->getOperand(NumOps-1).isDead())
910 CCDead = true;
912 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
913 return false;
915 // Avoid adding a false dependency on partial flag update by some 16-bit
916 // instructions which has the 's' bit set.
917 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
918 canAddPseudoFlagDep(MI, IsSelfLoop))
919 return false;
921 // Add the 16-bit instruction.
922 DebugLoc dl = MI->getDebugLoc();
923 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
925 // TEQ is special in that it doesn't define a register but we're converting
926 // it into an EOR which does. So add the first operand as a def and then
927 // again as a use.
928 if (MCID.getOpcode() == ARM::t2TEQrr) {
929 MIB.add(MI->getOperand(0));
930 MIB->getOperand(0).setIsKill(false);
931 MIB->getOperand(0).setIsDef(true);
932 MIB->getOperand(0).setIsDead(true);
934 if (NewMCID.hasOptionalDef())
935 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
936 MIB.add(MI->getOperand(0));
937 } else {
938 MIB.add(MI->getOperand(0));
939 if (NewMCID.hasOptionalDef())
940 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
943 // Transfer the rest of operands.
944 unsigned NumOps = MCID.getNumOperands();
945 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
946 if (i < NumOps && MCID.operands()[i].isOptionalDef())
947 continue;
948 if ((MCID.getOpcode() == ARM::t2RSBSri ||
949 MCID.getOpcode() == ARM::t2RSBri ||
950 MCID.getOpcode() == ARM::t2SXTB ||
951 MCID.getOpcode() == ARM::t2SXTH ||
952 MCID.getOpcode() == ARM::t2UXTB ||
953 MCID.getOpcode() == ARM::t2UXTH) && i == 2)
954 // Skip the zero immediate operand, it's now implicit.
955 continue;
956 bool isPred = (i < NumOps && MCID.operands()[i].isPredicate());
957 if (SkipPred && isPred)
958 continue;
959 const MachineOperand &MO = MI->getOperand(i);
960 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
961 // Skip implicit def of CPSR. Either it's modeled as an optional
962 // def now or it's already an implicit def on the new instruction.
963 continue;
964 MIB.add(MO);
966 if (!MCID.isPredicable() && NewMCID.isPredicable())
967 MIB.add(predOps(ARMCC::AL));
969 // Transfer MI flags.
970 MIB.setMIFlags(MI->getFlags());
972 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
973 << " to 16-bit: " << *MIB);
975 MBB.erase_instr(MI);
976 ++NumNarrows;
977 return true;
980 static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
981 bool HasDef = false;
982 for (const MachineOperand &MO : MI.operands()) {
983 if (!MO.isReg() || MO.isUndef() || MO.isUse())
984 continue;
985 if (MO.getReg() != ARM::CPSR)
986 continue;
988 DefCPSR = true;
989 if (!MO.isDead())
990 HasDef = true;
993 return HasDef || LiveCPSR;
996 static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
997 for (const MachineOperand &MO : MI.operands()) {
998 if (!MO.isReg() || MO.isUndef() || MO.isDef())
999 continue;
1000 if (MO.getReg() != ARM::CPSR)
1001 continue;
1002 assert(LiveCPSR && "CPSR liveness tracking is wrong!");
1003 if (MO.isKill()) {
1004 LiveCPSR = false;
1005 break;
1009 return LiveCPSR;
1012 bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
1013 bool LiveCPSR, bool IsSelfLoop,
1014 bool SkipPrologueEpilogue) {
1015 unsigned Opcode = MI->getOpcode();
1016 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
1017 if (OPI == ReduceOpcodeMap.end())
1018 return false;
1019 if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) ||
1020 MI->getFlag(MachineInstr::FrameDestroy)))
1021 return false;
1022 const ReduceEntry &Entry = ReduceTable[OPI->second];
1024 // Don't attempt normal reductions on "special" cases for now.
1025 if (Entry.Special)
1026 return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
1028 // Try to transform to a 16-bit two-address instruction.
1029 if (Entry.NarrowOpc2 &&
1030 ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
1031 return true;
1033 // Try to transform to a 16-bit non-two-address instruction.
1034 if (Entry.NarrowOpc1 &&
1035 ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
1036 return true;
1038 return false;
1041 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB,
1042 bool SkipPrologueEpilogue) {
1043 bool Modified = false;
1045 // Yes, CPSR could be livein.
1046 bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
1047 MachineInstr *BundleMI = nullptr;
1049 CPSRDef = nullptr;
1050 HighLatencyCPSR = false;
1052 // Check predecessors for the latest CPSRDef.
1053 for (auto *Pred : MBB.predecessors()) {
1054 const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
1055 if (!PInfo.Visited) {
1056 // Since blocks are visited in RPO, this must be a back-edge.
1057 continue;
1059 if (PInfo.HighLatencyCPSR) {
1060 HighLatencyCPSR = true;
1061 break;
1065 // If this BB loops back to itself, conservatively avoid narrowing the
1066 // first instruction that does partial flag update.
1067 bool IsSelfLoop = MBB.isSuccessor(&MBB);
1068 MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
1069 MachineBasicBlock::instr_iterator NextMII;
1070 for (; MII != E; MII = NextMII) {
1071 NextMII = std::next(MII);
1073 MachineInstr *MI = &*MII;
1074 if (MI->isBundle()) {
1075 BundleMI = MI;
1076 continue;
1078 if (MI->isDebugInstr())
1079 continue;
1081 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
1083 // Does NextMII belong to the same bundle as MI?
1084 bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
1086 if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) {
1087 Modified = true;
1088 MachineBasicBlock::instr_iterator I = std::prev(NextMII);
1089 MI = &*I;
1090 // Removing and reinserting the first instruction in a bundle will break
1091 // up the bundle. Fix the bundling if it was broken.
1092 if (NextInSameBundle && !NextMII->isBundledWithPred())
1093 NextMII->bundleWithPred();
1096 if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) {
1097 // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
1098 // marker is only on the BUNDLE instruction. Process the BUNDLE
1099 // instruction as we finish with the bundled instruction to work around
1100 // the inconsistency.
1101 if (BundleMI->killsRegister(ARM::CPSR, /*TRI=*/nullptr))
1102 LiveCPSR = false;
1103 MachineOperand *MO =
1104 BundleMI->findRegisterDefOperand(ARM::CPSR, /*TRI=*/nullptr);
1105 if (MO && !MO->isDead())
1106 LiveCPSR = true;
1107 MO = BundleMI->findRegisterUseOperand(ARM::CPSR, /*TRI=*/nullptr);
1108 if (MO && !MO->isKill())
1109 LiveCPSR = true;
1112 bool DefCPSR = false;
1113 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
1114 if (MI->isCall()) {
1115 // Calls don't really set CPSR.
1116 CPSRDef = nullptr;
1117 HighLatencyCPSR = false;
1118 IsSelfLoop = false;
1119 } else if (DefCPSR) {
1120 // This is the last CPSR defining instruction.
1121 CPSRDef = MI;
1122 HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
1123 IsSelfLoop = false;
1127 MBBInfo &Info = BlockInfo[MBB.getNumber()];
1128 Info.HighLatencyCPSR = HighLatencyCPSR;
1129 Info.Visited = true;
1130 return Modified;
1133 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
1134 if (PredicateFtor && !PredicateFtor(MF.getFunction()))
1135 return false;
1137 STI = &MF.getSubtarget<ARMSubtarget>();
1138 if (STI->isThumb1Only() || STI->prefers32BitThumb())
1139 return false;
1141 TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
1143 // Optimizing / minimizing size? Minimizing size implies optimizing for size.
1144 OptimizeSize = MF.getFunction().hasOptSize();
1145 MinimizeSize = STI->hasMinSize();
1147 BlockInfo.clear();
1148 BlockInfo.resize(MF.getNumBlockIDs());
1150 // Visit blocks in reverse post-order so LastCPSRDef is known for all
1151 // predecessors.
1152 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
1153 bool Modified = false;
1154 bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1155 MF.getFunction().needsUnwindTableEntry();
1156 for (MachineBasicBlock *MBB : RPOT)
1157 Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI);
1158 return Modified;
1161 /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
1162 /// reduction pass.
1163 FunctionPass *llvm::createThumb2SizeReductionPass(
1164 std::function<bool(const Function &)> Ftor) {
1165 return new Thumb2SizeReduce(std::move(Ftor));