1 //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling, if-conversion, and other late
11 // optimizations. This pass should be run after register allocation but before
12 // the post-regalloc scheduling pass.
14 //===----------------------------------------------------------------------===//
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMBaseRegisterInfo.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMMachineFunctionInfo.h"
21 #include "ARMSubtarget.h"
22 #include "MCTargetDesc/ARMAddressingModes.h"
23 #include "llvm/CodeGen/LivePhysRegs.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/Support/Debug.h"
30 #define DEBUG_TYPE "arm-pseudo"
33 VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden
,
34 cl::desc("Verify machine code after expanding ARM pseudos"));
36 #define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass"
39 class ARMExpandPseudo
: public MachineFunctionPass
{
42 ARMExpandPseudo() : MachineFunctionPass(ID
) {}
44 const ARMBaseInstrInfo
*TII
;
45 const TargetRegisterInfo
*TRI
;
46 const ARMSubtarget
*STI
;
49 bool runOnMachineFunction(MachineFunction
&Fn
) override
;
51 MachineFunctionProperties
getRequiredProperties() const override
{
52 return MachineFunctionProperties().set(
53 MachineFunctionProperties::Property::NoVRegs
);
56 StringRef
getPassName() const override
{
57 return ARM_EXPAND_PSEUDO_NAME
;
61 void TransferImpOps(MachineInstr
&OldMI
,
62 MachineInstrBuilder
&UseMI
, MachineInstrBuilder
&DefMI
);
63 bool ExpandMI(MachineBasicBlock
&MBB
,
64 MachineBasicBlock::iterator MBBI
,
65 MachineBasicBlock::iterator
&NextMBBI
);
66 bool ExpandMBB(MachineBasicBlock
&MBB
);
67 void ExpandVLD(MachineBasicBlock::iterator
&MBBI
);
68 void ExpandVST(MachineBasicBlock::iterator
&MBBI
);
69 void ExpandLaneOp(MachineBasicBlock::iterator
&MBBI
);
70 void ExpandVTBL(MachineBasicBlock::iterator
&MBBI
,
71 unsigned Opc
, bool IsExt
);
72 void ExpandMQQPRLoadStore(MachineBasicBlock::iterator
&MBBI
);
73 void ExpandMOV32BitImm(MachineBasicBlock
&MBB
,
74 MachineBasicBlock::iterator
&MBBI
);
75 void CMSEClearGPRegs(MachineBasicBlock
&MBB
,
76 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
,
77 const SmallVectorImpl
<unsigned> &ClearRegs
,
79 MachineBasicBlock
&CMSEClearFPRegs(MachineBasicBlock
&MBB
,
80 MachineBasicBlock::iterator MBBI
);
81 MachineBasicBlock
&CMSEClearFPRegsV8(MachineBasicBlock
&MBB
,
82 MachineBasicBlock::iterator MBBI
,
83 const BitVector
&ClearRegs
);
84 MachineBasicBlock
&CMSEClearFPRegsV81(MachineBasicBlock
&MBB
,
85 MachineBasicBlock::iterator MBBI
,
86 const BitVector
&ClearRegs
);
87 void CMSESaveClearFPRegs(MachineBasicBlock
&MBB
,
88 MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
89 const LivePhysRegs
&LiveRegs
,
90 SmallVectorImpl
<unsigned> &AvailableRegs
);
91 void CMSESaveClearFPRegsV8(MachineBasicBlock
&MBB
,
92 MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
93 const LivePhysRegs
&LiveRegs
,
94 SmallVectorImpl
<unsigned> &ScratchRegs
);
95 void CMSESaveClearFPRegsV81(MachineBasicBlock
&MBB
,
96 MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
97 const LivePhysRegs
&LiveRegs
);
98 void CMSERestoreFPRegs(MachineBasicBlock
&MBB
,
99 MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
100 SmallVectorImpl
<unsigned> &AvailableRegs
);
101 void CMSERestoreFPRegsV8(MachineBasicBlock
&MBB
,
102 MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
103 SmallVectorImpl
<unsigned> &AvailableRegs
);
104 void CMSERestoreFPRegsV81(MachineBasicBlock
&MBB
,
105 MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
106 SmallVectorImpl
<unsigned> &AvailableRegs
);
107 bool ExpandCMP_SWAP(MachineBasicBlock
&MBB
,
108 MachineBasicBlock::iterator MBBI
, unsigned LdrexOp
,
109 unsigned StrexOp
, unsigned UxtOp
,
110 MachineBasicBlock::iterator
&NextMBBI
);
112 bool ExpandCMP_SWAP_64(MachineBasicBlock
&MBB
,
113 MachineBasicBlock::iterator MBBI
,
114 MachineBasicBlock::iterator
&NextMBBI
);
116 char ARMExpandPseudo::ID
= 0;
119 INITIALIZE_PASS(ARMExpandPseudo
, DEBUG_TYPE
, ARM_EXPAND_PSEUDO_NAME
, false,
122 /// TransferImpOps - Transfer implicit operands on the pseudo instruction to
123 /// the instructions created from the expansion.
124 void ARMExpandPseudo::TransferImpOps(MachineInstr
&OldMI
,
125 MachineInstrBuilder
&UseMI
,
126 MachineInstrBuilder
&DefMI
) {
127 const MCInstrDesc
&Desc
= OldMI
.getDesc();
128 for (unsigned i
= Desc
.getNumOperands(), e
= OldMI
.getNumOperands();
130 const MachineOperand
&MO
= OldMI
.getOperand(i
);
131 assert(MO
.isReg() && MO
.getReg());
140 // Constants for register spacing in NEON load/store instructions.
141 // For quad-register load-lane and store-lane pseudo instructors, the
142 // spacing is initially assumed to be EvenDblSpc, and that is changed to
143 // OddDblSpc depending on the lane number operand.
144 enum NEONRegSpacing
{
146 SingleLowSpc
, // Single spacing, low registers, three and four vectors.
147 SingleHighQSpc
, // Single spacing, high registers, four vectors.
148 SingleHighTSpc
, // Single spacing, high registers, three vectors.
153 // Entries for NEON load/store information table. The table is sorted by
154 // PseudoOpc for fast binary-search lookups.
155 struct NEONLdStTableEntry
{
160 bool hasWritebackOperand
;
161 uint8_t RegSpacing
; // One of type NEONRegSpacing
162 uint8_t NumRegs
; // D registers loaded or stored
163 uint8_t RegElts
; // elements per D register; used for lane ops
164 // FIXME: Temporary flag to denote whether the real instruction takes
165 // a single register (like the encoding) or all of the registers in
166 // the list (like the asm syntax and the isel DAG). When all definitions
167 // are converted to take only the single encoded register, this will
169 bool copyAllListRegs
;
171 // Comparison methods for binary search of the table.
172 bool operator<(const NEONLdStTableEntry
&TE
) const {
173 return PseudoOpc
< TE
.PseudoOpc
;
175 friend bool operator<(const NEONLdStTableEntry
&TE
, unsigned PseudoOpc
) {
176 return TE
.PseudoOpc
< PseudoOpc
;
178 friend bool LLVM_ATTRIBUTE_UNUSED
operator<(unsigned PseudoOpc
,
179 const NEONLdStTableEntry
&TE
) {
180 return PseudoOpc
< TE
.PseudoOpc
;
185 static const NEONLdStTableEntry NEONLdStTable
[] = {
186 { ARM::VLD1LNq16Pseudo
, ARM::VLD1LNd16
, true, false, false, EvenDblSpc
, 1, 4 ,true},
187 { ARM::VLD1LNq16Pseudo_UPD
, ARM::VLD1LNd16_UPD
, true, true, true, EvenDblSpc
, 1, 4 ,true},
188 { ARM::VLD1LNq32Pseudo
, ARM::VLD1LNd32
, true, false, false, EvenDblSpc
, 1, 2 ,true},
189 { ARM::VLD1LNq32Pseudo_UPD
, ARM::VLD1LNd32_UPD
, true, true, true, EvenDblSpc
, 1, 2 ,true},
190 { ARM::VLD1LNq8Pseudo
, ARM::VLD1LNd8
, true, false, false, EvenDblSpc
, 1, 8 ,true},
191 { ARM::VLD1LNq8Pseudo_UPD
, ARM::VLD1LNd8_UPD
, true, true, true, EvenDblSpc
, 1, 8 ,true},
193 { ARM::VLD1d16QPseudo
, ARM::VLD1d16Q
, true, false, false, SingleSpc
, 4, 4 ,false},
194 { ARM::VLD1d16QPseudoWB_fixed
, ARM::VLD1d16Qwb_fixed
, true, true, false, SingleSpc
, 4, 4 ,false},
195 { ARM::VLD1d16QPseudoWB_register
, ARM::VLD1d16Qwb_register
, true, true, true, SingleSpc
, 4, 4 ,false},
196 { ARM::VLD1d16TPseudo
, ARM::VLD1d16T
, true, false, false, SingleSpc
, 3, 4 ,false},
197 { ARM::VLD1d16TPseudoWB_fixed
, ARM::VLD1d16Twb_fixed
, true, true, false, SingleSpc
, 3, 4 ,false},
198 { ARM::VLD1d16TPseudoWB_register
, ARM::VLD1d16Twb_register
, true, true, true, SingleSpc
, 3, 4 ,false},
200 { ARM::VLD1d32QPseudo
, ARM::VLD1d32Q
, true, false, false, SingleSpc
, 4, 2 ,false},
201 { ARM::VLD1d32QPseudoWB_fixed
, ARM::VLD1d32Qwb_fixed
, true, true, false, SingleSpc
, 4, 2 ,false},
202 { ARM::VLD1d32QPseudoWB_register
, ARM::VLD1d32Qwb_register
, true, true, true, SingleSpc
, 4, 2 ,false},
203 { ARM::VLD1d32TPseudo
, ARM::VLD1d32T
, true, false, false, SingleSpc
, 3, 2 ,false},
204 { ARM::VLD1d32TPseudoWB_fixed
, ARM::VLD1d32Twb_fixed
, true, true, false, SingleSpc
, 3, 2 ,false},
205 { ARM::VLD1d32TPseudoWB_register
, ARM::VLD1d32Twb_register
, true, true, true, SingleSpc
, 3, 2 ,false},
207 { ARM::VLD1d64QPseudo
, ARM::VLD1d64Q
, true, false, false, SingleSpc
, 4, 1 ,false},
208 { ARM::VLD1d64QPseudoWB_fixed
, ARM::VLD1d64Qwb_fixed
, true, true, false, SingleSpc
, 4, 1 ,false},
209 { ARM::VLD1d64QPseudoWB_register
, ARM::VLD1d64Qwb_register
, true, true, true, SingleSpc
, 4, 1 ,false},
210 { ARM::VLD1d64TPseudo
, ARM::VLD1d64T
, true, false, false, SingleSpc
, 3, 1 ,false},
211 { ARM::VLD1d64TPseudoWB_fixed
, ARM::VLD1d64Twb_fixed
, true, true, false, SingleSpc
, 3, 1 ,false},
212 { ARM::VLD1d64TPseudoWB_register
, ARM::VLD1d64Twb_register
, true, true, true, SingleSpc
, 3, 1 ,false},
214 { ARM::VLD1d8QPseudo
, ARM::VLD1d8Q
, true, false, false, SingleSpc
, 4, 8 ,false},
215 { ARM::VLD1d8QPseudoWB_fixed
, ARM::VLD1d8Qwb_fixed
, true, true, false, SingleSpc
, 4, 8 ,false},
216 { ARM::VLD1d8QPseudoWB_register
, ARM::VLD1d8Qwb_register
, true, true, true, SingleSpc
, 4, 8 ,false},
217 { ARM::VLD1d8TPseudo
, ARM::VLD1d8T
, true, false, false, SingleSpc
, 3, 8 ,false},
218 { ARM::VLD1d8TPseudoWB_fixed
, ARM::VLD1d8Twb_fixed
, true, true, false, SingleSpc
, 3, 8 ,false},
219 { ARM::VLD1d8TPseudoWB_register
, ARM::VLD1d8Twb_register
, true, true, true, SingleSpc
, 3, 8 ,false},
221 { ARM::VLD1q16HighQPseudo
, ARM::VLD1d16Q
, true, false, false, SingleHighQSpc
, 4, 4 ,false},
222 { ARM::VLD1q16HighQPseudo_UPD
, ARM::VLD1d16Qwb_fixed
, true, true, true, SingleHighQSpc
, 4, 4 ,false},
223 { ARM::VLD1q16HighTPseudo
, ARM::VLD1d16T
, true, false, false, SingleHighTSpc
, 3, 4 ,false},
224 { ARM::VLD1q16HighTPseudo_UPD
, ARM::VLD1d16Twb_fixed
, true, true, true, SingleHighTSpc
, 3, 4 ,false},
225 { ARM::VLD1q16LowQPseudo_UPD
, ARM::VLD1d16Qwb_fixed
, true, true, true, SingleLowSpc
, 4, 4 ,false},
226 { ARM::VLD1q16LowTPseudo_UPD
, ARM::VLD1d16Twb_fixed
, true, true, true, SingleLowSpc
, 3, 4 ,false},
228 { ARM::VLD1q32HighQPseudo
, ARM::VLD1d32Q
, true, false, false, SingleHighQSpc
, 4, 2 ,false},
229 { ARM::VLD1q32HighQPseudo_UPD
, ARM::VLD1d32Qwb_fixed
, true, true, true, SingleHighQSpc
, 4, 2 ,false},
230 { ARM::VLD1q32HighTPseudo
, ARM::VLD1d32T
, true, false, false, SingleHighTSpc
, 3, 2 ,false},
231 { ARM::VLD1q32HighTPseudo_UPD
, ARM::VLD1d32Twb_fixed
, true, true, true, SingleHighTSpc
, 3, 2 ,false},
232 { ARM::VLD1q32LowQPseudo_UPD
, ARM::VLD1d32Qwb_fixed
, true, true, true, SingleLowSpc
, 4, 2 ,false},
233 { ARM::VLD1q32LowTPseudo_UPD
, ARM::VLD1d32Twb_fixed
, true, true, true, SingleLowSpc
, 3, 2 ,false},
235 { ARM::VLD1q64HighQPseudo
, ARM::VLD1d64Q
, true, false, false, SingleHighQSpc
, 4, 1 ,false},
236 { ARM::VLD1q64HighQPseudo_UPD
, ARM::VLD1d64Qwb_fixed
, true, true, true, SingleHighQSpc
, 4, 1 ,false},
237 { ARM::VLD1q64HighTPseudo
, ARM::VLD1d64T
, true, false, false, SingleHighTSpc
, 3, 1 ,false},
238 { ARM::VLD1q64HighTPseudo_UPD
, ARM::VLD1d64Twb_fixed
, true, true, true, SingleHighTSpc
, 3, 1 ,false},
239 { ARM::VLD1q64LowQPseudo_UPD
, ARM::VLD1d64Qwb_fixed
, true, true, true, SingleLowSpc
, 4, 1 ,false},
240 { ARM::VLD1q64LowTPseudo_UPD
, ARM::VLD1d64Twb_fixed
, true, true, true, SingleLowSpc
, 3, 1 ,false},
242 { ARM::VLD1q8HighQPseudo
, ARM::VLD1d8Q
, true, false, false, SingleHighQSpc
, 4, 8 ,false},
243 { ARM::VLD1q8HighQPseudo_UPD
, ARM::VLD1d8Qwb_fixed
, true, true, true, SingleHighQSpc
, 4, 8 ,false},
244 { ARM::VLD1q8HighTPseudo
, ARM::VLD1d8T
, true, false, false, SingleHighTSpc
, 3, 8 ,false},
245 { ARM::VLD1q8HighTPseudo_UPD
, ARM::VLD1d8Twb_fixed
, true, true, true, SingleHighTSpc
, 3, 8 ,false},
246 { ARM::VLD1q8LowQPseudo_UPD
, ARM::VLD1d8Qwb_fixed
, true, true, true, SingleLowSpc
, 4, 8 ,false},
247 { ARM::VLD1q8LowTPseudo_UPD
, ARM::VLD1d8Twb_fixed
, true, true, true, SingleLowSpc
, 3, 8 ,false},
249 { ARM::VLD2DUPq16EvenPseudo
, ARM::VLD2DUPd16x2
, true, false, false, EvenDblSpc
, 2, 4 ,false},
250 { ARM::VLD2DUPq16OddPseudo
, ARM::VLD2DUPd16x2
, true, false, false, OddDblSpc
, 2, 4 ,false},
251 { ARM::VLD2DUPq16OddPseudoWB_fixed
, ARM::VLD2DUPd16x2wb_fixed
, true, true, false, OddDblSpc
, 2, 4 ,false},
252 { ARM::VLD2DUPq16OddPseudoWB_register
, ARM::VLD2DUPd16x2wb_register
, true, true, true, OddDblSpc
, 2, 4 ,false},
253 { ARM::VLD2DUPq32EvenPseudo
, ARM::VLD2DUPd32x2
, true, false, false, EvenDblSpc
, 2, 2 ,false},
254 { ARM::VLD2DUPq32OddPseudo
, ARM::VLD2DUPd32x2
, true, false, false, OddDblSpc
, 2, 2 ,false},
255 { ARM::VLD2DUPq32OddPseudoWB_fixed
, ARM::VLD2DUPd32x2wb_fixed
, true, true, false, OddDblSpc
, 2, 2 ,false},
256 { ARM::VLD2DUPq32OddPseudoWB_register
, ARM::VLD2DUPd32x2wb_register
, true, true, true, OddDblSpc
, 2, 2 ,false},
257 { ARM::VLD2DUPq8EvenPseudo
, ARM::VLD2DUPd8x2
, true, false, false, EvenDblSpc
, 2, 8 ,false},
258 { ARM::VLD2DUPq8OddPseudo
, ARM::VLD2DUPd8x2
, true, false, false, OddDblSpc
, 2, 8 ,false},
259 { ARM::VLD2DUPq8OddPseudoWB_fixed
, ARM::VLD2DUPd8x2wb_fixed
, true, true, false, OddDblSpc
, 2, 8 ,false},
260 { ARM::VLD2DUPq8OddPseudoWB_register
, ARM::VLD2DUPd8x2wb_register
, true, true, true, OddDblSpc
, 2, 8 ,false},
262 { ARM::VLD2LNd16Pseudo
, ARM::VLD2LNd16
, true, false, false, SingleSpc
, 2, 4 ,true},
263 { ARM::VLD2LNd16Pseudo_UPD
, ARM::VLD2LNd16_UPD
, true, true, true, SingleSpc
, 2, 4 ,true},
264 { ARM::VLD2LNd32Pseudo
, ARM::VLD2LNd32
, true, false, false, SingleSpc
, 2, 2 ,true},
265 { ARM::VLD2LNd32Pseudo_UPD
, ARM::VLD2LNd32_UPD
, true, true, true, SingleSpc
, 2, 2 ,true},
266 { ARM::VLD2LNd8Pseudo
, ARM::VLD2LNd8
, true, false, false, SingleSpc
, 2, 8 ,true},
267 { ARM::VLD2LNd8Pseudo_UPD
, ARM::VLD2LNd8_UPD
, true, true, true, SingleSpc
, 2, 8 ,true},
268 { ARM::VLD2LNq16Pseudo
, ARM::VLD2LNq16
, true, false, false, EvenDblSpc
, 2, 4 ,true},
269 { ARM::VLD2LNq16Pseudo_UPD
, ARM::VLD2LNq16_UPD
, true, true, true, EvenDblSpc
, 2, 4 ,true},
270 { ARM::VLD2LNq32Pseudo
, ARM::VLD2LNq32
, true, false, false, EvenDblSpc
, 2, 2 ,true},
271 { ARM::VLD2LNq32Pseudo_UPD
, ARM::VLD2LNq32_UPD
, true, true, true, EvenDblSpc
, 2, 2 ,true},
273 { ARM::VLD2q16Pseudo
, ARM::VLD2q16
, true, false, false, SingleSpc
, 4, 4 ,false},
274 { ARM::VLD2q16PseudoWB_fixed
, ARM::VLD2q16wb_fixed
, true, true, false, SingleSpc
, 4, 4 ,false},
275 { ARM::VLD2q16PseudoWB_register
, ARM::VLD2q16wb_register
, true, true, true, SingleSpc
, 4, 4 ,false},
276 { ARM::VLD2q32Pseudo
, ARM::VLD2q32
, true, false, false, SingleSpc
, 4, 2 ,false},
277 { ARM::VLD2q32PseudoWB_fixed
, ARM::VLD2q32wb_fixed
, true, true, false, SingleSpc
, 4, 2 ,false},
278 { ARM::VLD2q32PseudoWB_register
, ARM::VLD2q32wb_register
, true, true, true, SingleSpc
, 4, 2 ,false},
279 { ARM::VLD2q8Pseudo
, ARM::VLD2q8
, true, false, false, SingleSpc
, 4, 8 ,false},
280 { ARM::VLD2q8PseudoWB_fixed
, ARM::VLD2q8wb_fixed
, true, true, false, SingleSpc
, 4, 8 ,false},
281 { ARM::VLD2q8PseudoWB_register
, ARM::VLD2q8wb_register
, true, true, true, SingleSpc
, 4, 8 ,false},
283 { ARM::VLD3DUPd16Pseudo
, ARM::VLD3DUPd16
, true, false, false, SingleSpc
, 3, 4,true},
284 { ARM::VLD3DUPd16Pseudo_UPD
, ARM::VLD3DUPd16_UPD
, true, true, true, SingleSpc
, 3, 4,true},
285 { ARM::VLD3DUPd32Pseudo
, ARM::VLD3DUPd32
, true, false, false, SingleSpc
, 3, 2,true},
286 { ARM::VLD3DUPd32Pseudo_UPD
, ARM::VLD3DUPd32_UPD
, true, true, true, SingleSpc
, 3, 2,true},
287 { ARM::VLD3DUPd8Pseudo
, ARM::VLD3DUPd8
, true, false, false, SingleSpc
, 3, 8,true},
288 { ARM::VLD3DUPd8Pseudo_UPD
, ARM::VLD3DUPd8_UPD
, true, true, true, SingleSpc
, 3, 8,true},
289 { ARM::VLD3DUPq16EvenPseudo
, ARM::VLD3DUPq16
, true, false, false, EvenDblSpc
, 3, 4 ,true},
290 { ARM::VLD3DUPq16OddPseudo
, ARM::VLD3DUPq16
, true, false, false, OddDblSpc
, 3, 4 ,true},
291 { ARM::VLD3DUPq16OddPseudo_UPD
, ARM::VLD3DUPq16_UPD
, true, true, true, OddDblSpc
, 3, 4 ,true},
292 { ARM::VLD3DUPq32EvenPseudo
, ARM::VLD3DUPq32
, true, false, false, EvenDblSpc
, 3, 2 ,true},
293 { ARM::VLD3DUPq32OddPseudo
, ARM::VLD3DUPq32
, true, false, false, OddDblSpc
, 3, 2 ,true},
294 { ARM::VLD3DUPq32OddPseudo_UPD
, ARM::VLD3DUPq32_UPD
, true, true, true, OddDblSpc
, 3, 2 ,true},
295 { ARM::VLD3DUPq8EvenPseudo
, ARM::VLD3DUPq8
, true, false, false, EvenDblSpc
, 3, 8 ,true},
296 { ARM::VLD3DUPq8OddPseudo
, ARM::VLD3DUPq8
, true, false, false, OddDblSpc
, 3, 8 ,true},
297 { ARM::VLD3DUPq8OddPseudo_UPD
, ARM::VLD3DUPq8_UPD
, true, true, true, OddDblSpc
, 3, 8 ,true},
299 { ARM::VLD3LNd16Pseudo
, ARM::VLD3LNd16
, true, false, false, SingleSpc
, 3, 4 ,true},
300 { ARM::VLD3LNd16Pseudo_UPD
, ARM::VLD3LNd16_UPD
, true, true, true, SingleSpc
, 3, 4 ,true},
301 { ARM::VLD3LNd32Pseudo
, ARM::VLD3LNd32
, true, false, false, SingleSpc
, 3, 2 ,true},
302 { ARM::VLD3LNd32Pseudo_UPD
, ARM::VLD3LNd32_UPD
, true, true, true, SingleSpc
, 3, 2 ,true},
303 { ARM::VLD3LNd8Pseudo
, ARM::VLD3LNd8
, true, false, false, SingleSpc
, 3, 8 ,true},
304 { ARM::VLD3LNd8Pseudo_UPD
, ARM::VLD3LNd8_UPD
, true, true, true, SingleSpc
, 3, 8 ,true},
305 { ARM::VLD3LNq16Pseudo
, ARM::VLD3LNq16
, true, false, false, EvenDblSpc
, 3, 4 ,true},
306 { ARM::VLD3LNq16Pseudo_UPD
, ARM::VLD3LNq16_UPD
, true, true, true, EvenDblSpc
, 3, 4 ,true},
307 { ARM::VLD3LNq32Pseudo
, ARM::VLD3LNq32
, true, false, false, EvenDblSpc
, 3, 2 ,true},
308 { ARM::VLD3LNq32Pseudo_UPD
, ARM::VLD3LNq32_UPD
, true, true, true, EvenDblSpc
, 3, 2 ,true},
310 { ARM::VLD3d16Pseudo
, ARM::VLD3d16
, true, false, false, SingleSpc
, 3, 4 ,true},
311 { ARM::VLD3d16Pseudo_UPD
, ARM::VLD3d16_UPD
, true, true, true, SingleSpc
, 3, 4 ,true},
312 { ARM::VLD3d32Pseudo
, ARM::VLD3d32
, true, false, false, SingleSpc
, 3, 2 ,true},
313 { ARM::VLD3d32Pseudo_UPD
, ARM::VLD3d32_UPD
, true, true, true, SingleSpc
, 3, 2 ,true},
314 { ARM::VLD3d8Pseudo
, ARM::VLD3d8
, true, false, false, SingleSpc
, 3, 8 ,true},
315 { ARM::VLD3d8Pseudo_UPD
, ARM::VLD3d8_UPD
, true, true, true, SingleSpc
, 3, 8 ,true},
317 { ARM::VLD3q16Pseudo_UPD
, ARM::VLD3q16_UPD
, true, true, true, EvenDblSpc
, 3, 4 ,true},
318 { ARM::VLD3q16oddPseudo
, ARM::VLD3q16
, true, false, false, OddDblSpc
, 3, 4 ,true},
319 { ARM::VLD3q16oddPseudo_UPD
, ARM::VLD3q16_UPD
, true, true, true, OddDblSpc
, 3, 4 ,true},
320 { ARM::VLD3q32Pseudo_UPD
, ARM::VLD3q32_UPD
, true, true, true, EvenDblSpc
, 3, 2 ,true},
321 { ARM::VLD3q32oddPseudo
, ARM::VLD3q32
, true, false, false, OddDblSpc
, 3, 2 ,true},
322 { ARM::VLD3q32oddPseudo_UPD
, ARM::VLD3q32_UPD
, true, true, true, OddDblSpc
, 3, 2 ,true},
323 { ARM::VLD3q8Pseudo_UPD
, ARM::VLD3q8_UPD
, true, true, true, EvenDblSpc
, 3, 8 ,true},
324 { ARM::VLD3q8oddPseudo
, ARM::VLD3q8
, true, false, false, OddDblSpc
, 3, 8 ,true},
325 { ARM::VLD3q8oddPseudo_UPD
, ARM::VLD3q8_UPD
, true, true, true, OddDblSpc
, 3, 8 ,true},
327 { ARM::VLD4DUPd16Pseudo
, ARM::VLD4DUPd16
, true, false, false, SingleSpc
, 4, 4,true},
328 { ARM::VLD4DUPd16Pseudo_UPD
, ARM::VLD4DUPd16_UPD
, true, true, true, SingleSpc
, 4, 4,true},
329 { ARM::VLD4DUPd32Pseudo
, ARM::VLD4DUPd32
, true, false, false, SingleSpc
, 4, 2,true},
330 { ARM::VLD4DUPd32Pseudo_UPD
, ARM::VLD4DUPd32_UPD
, true, true, true, SingleSpc
, 4, 2,true},
331 { ARM::VLD4DUPd8Pseudo
, ARM::VLD4DUPd8
, true, false, false, SingleSpc
, 4, 8,true},
332 { ARM::VLD4DUPd8Pseudo_UPD
, ARM::VLD4DUPd8_UPD
, true, true, true, SingleSpc
, 4, 8,true},
333 { ARM::VLD4DUPq16EvenPseudo
, ARM::VLD4DUPq16
, true, false, false, EvenDblSpc
, 4, 4 ,true},
334 { ARM::VLD4DUPq16OddPseudo
, ARM::VLD4DUPq16
, true, false, false, OddDblSpc
, 4, 4 ,true},
335 { ARM::VLD4DUPq16OddPseudo_UPD
, ARM::VLD4DUPq16_UPD
, true, true, true, OddDblSpc
, 4, 4 ,true},
336 { ARM::VLD4DUPq32EvenPseudo
, ARM::VLD4DUPq32
, true, false, false, EvenDblSpc
, 4, 2 ,true},
337 { ARM::VLD4DUPq32OddPseudo
, ARM::VLD4DUPq32
, true, false, false, OddDblSpc
, 4, 2 ,true},
338 { ARM::VLD4DUPq32OddPseudo_UPD
, ARM::VLD4DUPq32_UPD
, true, true, true, OddDblSpc
, 4, 2 ,true},
339 { ARM::VLD4DUPq8EvenPseudo
, ARM::VLD4DUPq8
, true, false, false, EvenDblSpc
, 4, 8 ,true},
340 { ARM::VLD4DUPq8OddPseudo
, ARM::VLD4DUPq8
, true, false, false, OddDblSpc
, 4, 8 ,true},
341 { ARM::VLD4DUPq8OddPseudo_UPD
, ARM::VLD4DUPq8_UPD
, true, true, true, OddDblSpc
, 4, 8 ,true},
343 { ARM::VLD4LNd16Pseudo
, ARM::VLD4LNd16
, true, false, false, SingleSpc
, 4, 4 ,true},
344 { ARM::VLD4LNd16Pseudo_UPD
, ARM::VLD4LNd16_UPD
, true, true, true, SingleSpc
, 4, 4 ,true},
345 { ARM::VLD4LNd32Pseudo
, ARM::VLD4LNd32
, true, false, false, SingleSpc
, 4, 2 ,true},
346 { ARM::VLD4LNd32Pseudo_UPD
, ARM::VLD4LNd32_UPD
, true, true, true, SingleSpc
, 4, 2 ,true},
347 { ARM::VLD4LNd8Pseudo
, ARM::VLD4LNd8
, true, false, false, SingleSpc
, 4, 8 ,true},
348 { ARM::VLD4LNd8Pseudo_UPD
, ARM::VLD4LNd8_UPD
, true, true, true, SingleSpc
, 4, 8 ,true},
349 { ARM::VLD4LNq16Pseudo
, ARM::VLD4LNq16
, true, false, false, EvenDblSpc
, 4, 4 ,true},
350 { ARM::VLD4LNq16Pseudo_UPD
, ARM::VLD4LNq16_UPD
, true, true, true, EvenDblSpc
, 4, 4 ,true},
351 { ARM::VLD4LNq32Pseudo
, ARM::VLD4LNq32
, true, false, false, EvenDblSpc
, 4, 2 ,true},
352 { ARM::VLD4LNq32Pseudo_UPD
, ARM::VLD4LNq32_UPD
, true, true, true, EvenDblSpc
, 4, 2 ,true},
354 { ARM::VLD4d16Pseudo
, ARM::VLD4d16
, true, false, false, SingleSpc
, 4, 4 ,true},
355 { ARM::VLD4d16Pseudo_UPD
, ARM::VLD4d16_UPD
, true, true, true, SingleSpc
, 4, 4 ,true},
356 { ARM::VLD4d32Pseudo
, ARM::VLD4d32
, true, false, false, SingleSpc
, 4, 2 ,true},
357 { ARM::VLD4d32Pseudo_UPD
, ARM::VLD4d32_UPD
, true, true, true, SingleSpc
, 4, 2 ,true},
358 { ARM::VLD4d8Pseudo
, ARM::VLD4d8
, true, false, false, SingleSpc
, 4, 8 ,true},
359 { ARM::VLD4d8Pseudo_UPD
, ARM::VLD4d8_UPD
, true, true, true, SingleSpc
, 4, 8 ,true},
361 { ARM::VLD4q16Pseudo_UPD
, ARM::VLD4q16_UPD
, true, true, true, EvenDblSpc
, 4, 4 ,true},
362 { ARM::VLD4q16oddPseudo
, ARM::VLD4q16
, true, false, false, OddDblSpc
, 4, 4 ,true},
363 { ARM::VLD4q16oddPseudo_UPD
, ARM::VLD4q16_UPD
, true, true, true, OddDblSpc
, 4, 4 ,true},
364 { ARM::VLD4q32Pseudo_UPD
, ARM::VLD4q32_UPD
, true, true, true, EvenDblSpc
, 4, 2 ,true},
365 { ARM::VLD4q32oddPseudo
, ARM::VLD4q32
, true, false, false, OddDblSpc
, 4, 2 ,true},
366 { ARM::VLD4q32oddPseudo_UPD
, ARM::VLD4q32_UPD
, true, true, true, OddDblSpc
, 4, 2 ,true},
367 { ARM::VLD4q8Pseudo_UPD
, ARM::VLD4q8_UPD
, true, true, true, EvenDblSpc
, 4, 8 ,true},
368 { ARM::VLD4q8oddPseudo
, ARM::VLD4q8
, true, false, false, OddDblSpc
, 4, 8 ,true},
369 { ARM::VLD4q8oddPseudo_UPD
, ARM::VLD4q8_UPD
, true, true, true, OddDblSpc
, 4, 8 ,true},
371 { ARM::VST1LNq16Pseudo
, ARM::VST1LNd16
, false, false, false, EvenDblSpc
, 1, 4 ,true},
372 { ARM::VST1LNq16Pseudo_UPD
, ARM::VST1LNd16_UPD
, false, true, true, EvenDblSpc
, 1, 4 ,true},
373 { ARM::VST1LNq32Pseudo
, ARM::VST1LNd32
, false, false, false, EvenDblSpc
, 1, 2 ,true},
374 { ARM::VST1LNq32Pseudo_UPD
, ARM::VST1LNd32_UPD
, false, true, true, EvenDblSpc
, 1, 2 ,true},
375 { ARM::VST1LNq8Pseudo
, ARM::VST1LNd8
, false, false, false, EvenDblSpc
, 1, 8 ,true},
376 { ARM::VST1LNq8Pseudo_UPD
, ARM::VST1LNd8_UPD
, false, true, true, EvenDblSpc
, 1, 8 ,true},
378 { ARM::VST1d16QPseudo
, ARM::VST1d16Q
, false, false, false, SingleSpc
, 4, 4 ,false},
379 { ARM::VST1d16QPseudoWB_fixed
, ARM::VST1d16Qwb_fixed
, false, true, false, SingleSpc
, 4, 4 ,false},
380 { ARM::VST1d16QPseudoWB_register
, ARM::VST1d16Qwb_register
, false, true, true, SingleSpc
, 4, 4 ,false},
381 { ARM::VST1d16TPseudo
, ARM::VST1d16T
, false, false, false, SingleSpc
, 3, 4 ,false},
382 { ARM::VST1d16TPseudoWB_fixed
, ARM::VST1d16Twb_fixed
, false, true, false, SingleSpc
, 3, 4 ,false},
383 { ARM::VST1d16TPseudoWB_register
, ARM::VST1d16Twb_register
, false, true, true, SingleSpc
, 3, 4 ,false},
385 { ARM::VST1d32QPseudo
, ARM::VST1d32Q
, false, false, false, SingleSpc
, 4, 2 ,false},
386 { ARM::VST1d32QPseudoWB_fixed
, ARM::VST1d32Qwb_fixed
, false, true, false, SingleSpc
, 4, 2 ,false},
387 { ARM::VST1d32QPseudoWB_register
, ARM::VST1d32Qwb_register
, false, true, true, SingleSpc
, 4, 2 ,false},
388 { ARM::VST1d32TPseudo
, ARM::VST1d32T
, false, false, false, SingleSpc
, 3, 2 ,false},
389 { ARM::VST1d32TPseudoWB_fixed
, ARM::VST1d32Twb_fixed
, false, true, false, SingleSpc
, 3, 2 ,false},
390 { ARM::VST1d32TPseudoWB_register
, ARM::VST1d32Twb_register
, false, true, true, SingleSpc
, 3, 2 ,false},
392 { ARM::VST1d64QPseudo
, ARM::VST1d64Q
, false, false, false, SingleSpc
, 4, 1 ,false},
393 { ARM::VST1d64QPseudoWB_fixed
, ARM::VST1d64Qwb_fixed
, false, true, false, SingleSpc
, 4, 1 ,false},
394 { ARM::VST1d64QPseudoWB_register
, ARM::VST1d64Qwb_register
, false, true, true, SingleSpc
, 4, 1 ,false},
395 { ARM::VST1d64TPseudo
, ARM::VST1d64T
, false, false, false, SingleSpc
, 3, 1 ,false},
396 { ARM::VST1d64TPseudoWB_fixed
, ARM::VST1d64Twb_fixed
, false, true, false, SingleSpc
, 3, 1 ,false},
397 { ARM::VST1d64TPseudoWB_register
, ARM::VST1d64Twb_register
, false, true, true, SingleSpc
, 3, 1 ,false},
399 { ARM::VST1d8QPseudo
, ARM::VST1d8Q
, false, false, false, SingleSpc
, 4, 8 ,false},
400 { ARM::VST1d8QPseudoWB_fixed
, ARM::VST1d8Qwb_fixed
, false, true, false, SingleSpc
, 4, 8 ,false},
401 { ARM::VST1d8QPseudoWB_register
, ARM::VST1d8Qwb_register
, false, true, true, SingleSpc
, 4, 8 ,false},
402 { ARM::VST1d8TPseudo
, ARM::VST1d8T
, false, false, false, SingleSpc
, 3, 8 ,false},
403 { ARM::VST1d8TPseudoWB_fixed
, ARM::VST1d8Twb_fixed
, false, true, false, SingleSpc
, 3, 8 ,false},
404 { ARM::VST1d8TPseudoWB_register
, ARM::VST1d8Twb_register
, false, true, true, SingleSpc
, 3, 8 ,false},
406 { ARM::VST1q16HighQPseudo
, ARM::VST1d16Q
, false, false, false, SingleHighQSpc
, 4, 4 ,false},
407 { ARM::VST1q16HighQPseudo_UPD
, ARM::VST1d16Qwb_fixed
, false, true, true, SingleHighQSpc
, 4, 8 ,false},
408 { ARM::VST1q16HighTPseudo
, ARM::VST1d16T
, false, false, false, SingleHighTSpc
, 3, 4 ,false},
409 { ARM::VST1q16HighTPseudo_UPD
, ARM::VST1d16Twb_fixed
, false, true, true, SingleHighTSpc
, 3, 4 ,false},
410 { ARM::VST1q16LowQPseudo_UPD
, ARM::VST1d16Qwb_fixed
, false, true, true, SingleLowSpc
, 4, 4 ,false},
411 { ARM::VST1q16LowTPseudo_UPD
, ARM::VST1d16Twb_fixed
, false, true, true, SingleLowSpc
, 3, 4 ,false},
413 { ARM::VST1q32HighQPseudo
, ARM::VST1d32Q
, false, false, false, SingleHighQSpc
, 4, 2 ,false},
414 { ARM::VST1q32HighQPseudo_UPD
, ARM::VST1d32Qwb_fixed
, false, true, true, SingleHighQSpc
, 4, 8 ,false},
415 { ARM::VST1q32HighTPseudo
, ARM::VST1d32T
, false, false, false, SingleHighTSpc
, 3, 2 ,false},
416 { ARM::VST1q32HighTPseudo_UPD
, ARM::VST1d32Twb_fixed
, false, true, true, SingleHighTSpc
, 3, 2 ,false},
417 { ARM::VST1q32LowQPseudo_UPD
, ARM::VST1d32Qwb_fixed
, false, true, true, SingleLowSpc
, 4, 2 ,false},
418 { ARM::VST1q32LowTPseudo_UPD
, ARM::VST1d32Twb_fixed
, false, true, true, SingleLowSpc
, 3, 2 ,false},
420 { ARM::VST1q64HighQPseudo
, ARM::VST1d64Q
, false, false, false, SingleHighQSpc
, 4, 1 ,false},
421 { ARM::VST1q64HighQPseudo_UPD
, ARM::VST1d64Qwb_fixed
, false, true, true, SingleHighQSpc
, 4, 8 ,false},
422 { ARM::VST1q64HighTPseudo
, ARM::VST1d64T
, false, false, false, SingleHighTSpc
, 3, 1 ,false},
423 { ARM::VST1q64HighTPseudo_UPD
, ARM::VST1d64Twb_fixed
, false, true, true, SingleHighTSpc
, 3, 1 ,false},
424 { ARM::VST1q64LowQPseudo_UPD
, ARM::VST1d64Qwb_fixed
, false, true, true, SingleLowSpc
, 4, 1 ,false},
425 { ARM::VST1q64LowTPseudo_UPD
, ARM::VST1d64Twb_fixed
, false, true, true, SingleLowSpc
, 3, 1 ,false},
427 { ARM::VST1q8HighQPseudo
, ARM::VST1d8Q
, false, false, false, SingleHighQSpc
, 4, 8 ,false},
428 { ARM::VST1q8HighQPseudo_UPD
, ARM::VST1d8Qwb_fixed
, false, true, true, SingleHighQSpc
, 4, 8 ,false},
429 { ARM::VST1q8HighTPseudo
, ARM::VST1d8T
, false, false, false, SingleHighTSpc
, 3, 8 ,false},
430 { ARM::VST1q8HighTPseudo_UPD
, ARM::VST1d8Twb_fixed
, false, true, true, SingleHighTSpc
, 3, 8 ,false},
431 { ARM::VST1q8LowQPseudo_UPD
, ARM::VST1d8Qwb_fixed
, false, true, true, SingleLowSpc
, 4, 8 ,false},
432 { ARM::VST1q8LowTPseudo_UPD
, ARM::VST1d8Twb_fixed
, false, true, true, SingleLowSpc
, 3, 8 ,false},
434 { ARM::VST2LNd16Pseudo
, ARM::VST2LNd16
, false, false, false, SingleSpc
, 2, 4 ,true},
435 { ARM::VST2LNd16Pseudo_UPD
, ARM::VST2LNd16_UPD
, false, true, true, SingleSpc
, 2, 4 ,true},
436 { ARM::VST2LNd32Pseudo
, ARM::VST2LNd32
, false, false, false, SingleSpc
, 2, 2 ,true},
437 { ARM::VST2LNd32Pseudo_UPD
, ARM::VST2LNd32_UPD
, false, true, true, SingleSpc
, 2, 2 ,true},
438 { ARM::VST2LNd8Pseudo
, ARM::VST2LNd8
, false, false, false, SingleSpc
, 2, 8 ,true},
439 { ARM::VST2LNd8Pseudo_UPD
, ARM::VST2LNd8_UPD
, false, true, true, SingleSpc
, 2, 8 ,true},
440 { ARM::VST2LNq16Pseudo
, ARM::VST2LNq16
, false, false, false, EvenDblSpc
, 2, 4,true},
441 { ARM::VST2LNq16Pseudo_UPD
, ARM::VST2LNq16_UPD
, false, true, true, EvenDblSpc
, 2, 4,true},
442 { ARM::VST2LNq32Pseudo
, ARM::VST2LNq32
, false, false, false, EvenDblSpc
, 2, 2,true},
443 { ARM::VST2LNq32Pseudo_UPD
, ARM::VST2LNq32_UPD
, false, true, true, EvenDblSpc
, 2, 2,true},
445 { ARM::VST2q16Pseudo
, ARM::VST2q16
, false, false, false, SingleSpc
, 4, 4 ,false},
446 { ARM::VST2q16PseudoWB_fixed
, ARM::VST2q16wb_fixed
, false, true, false, SingleSpc
, 4, 4 ,false},
447 { ARM::VST2q16PseudoWB_register
, ARM::VST2q16wb_register
, false, true, true, SingleSpc
, 4, 4 ,false},
448 { ARM::VST2q32Pseudo
, ARM::VST2q32
, false, false, false, SingleSpc
, 4, 2 ,false},
449 { ARM::VST2q32PseudoWB_fixed
, ARM::VST2q32wb_fixed
, false, true, false, SingleSpc
, 4, 2 ,false},
450 { ARM::VST2q32PseudoWB_register
, ARM::VST2q32wb_register
, false, true, true, SingleSpc
, 4, 2 ,false},
451 { ARM::VST2q8Pseudo
, ARM::VST2q8
, false, false, false, SingleSpc
, 4, 8 ,false},
452 { ARM::VST2q8PseudoWB_fixed
, ARM::VST2q8wb_fixed
, false, true, false, SingleSpc
, 4, 8 ,false},
453 { ARM::VST2q8PseudoWB_register
, ARM::VST2q8wb_register
, false, true, true, SingleSpc
, 4, 8 ,false},
455 { ARM::VST3LNd16Pseudo
, ARM::VST3LNd16
, false, false, false, SingleSpc
, 3, 4 ,true},
456 { ARM::VST3LNd16Pseudo_UPD
, ARM::VST3LNd16_UPD
, false, true, true, SingleSpc
, 3, 4 ,true},
457 { ARM::VST3LNd32Pseudo
, ARM::VST3LNd32
, false, false, false, SingleSpc
, 3, 2 ,true},
458 { ARM::VST3LNd32Pseudo_UPD
, ARM::VST3LNd32_UPD
, false, true, true, SingleSpc
, 3, 2 ,true},
459 { ARM::VST3LNd8Pseudo
, ARM::VST3LNd8
, false, false, false, SingleSpc
, 3, 8 ,true},
460 { ARM::VST3LNd8Pseudo_UPD
, ARM::VST3LNd8_UPD
, false, true, true, SingleSpc
, 3, 8 ,true},
461 { ARM::VST3LNq16Pseudo
, ARM::VST3LNq16
, false, false, false, EvenDblSpc
, 3, 4,true},
462 { ARM::VST3LNq16Pseudo_UPD
, ARM::VST3LNq16_UPD
, false, true, true, EvenDblSpc
, 3, 4,true},
463 { ARM::VST3LNq32Pseudo
, ARM::VST3LNq32
, false, false, false, EvenDblSpc
, 3, 2,true},
464 { ARM::VST3LNq32Pseudo_UPD
, ARM::VST3LNq32_UPD
, false, true, true, EvenDblSpc
, 3, 2,true},
466 { ARM::VST3d16Pseudo
, ARM::VST3d16
, false, false, false, SingleSpc
, 3, 4 ,true},
467 { ARM::VST3d16Pseudo_UPD
, ARM::VST3d16_UPD
, false, true, true, SingleSpc
, 3, 4 ,true},
468 { ARM::VST3d32Pseudo
, ARM::VST3d32
, false, false, false, SingleSpc
, 3, 2 ,true},
469 { ARM::VST3d32Pseudo_UPD
, ARM::VST3d32_UPD
, false, true, true, SingleSpc
, 3, 2 ,true},
470 { ARM::VST3d8Pseudo
, ARM::VST3d8
, false, false, false, SingleSpc
, 3, 8 ,true},
471 { ARM::VST3d8Pseudo_UPD
, ARM::VST3d8_UPD
, false, true, true, SingleSpc
, 3, 8 ,true},
473 { ARM::VST3q16Pseudo_UPD
, ARM::VST3q16_UPD
, false, true, true, EvenDblSpc
, 3, 4 ,true},
474 { ARM::VST3q16oddPseudo
, ARM::VST3q16
, false, false, false, OddDblSpc
, 3, 4 ,true},
475 { ARM::VST3q16oddPseudo_UPD
, ARM::VST3q16_UPD
, false, true, true, OddDblSpc
, 3, 4 ,true},
476 { ARM::VST3q32Pseudo_UPD
, ARM::VST3q32_UPD
, false, true, true, EvenDblSpc
, 3, 2 ,true},
477 { ARM::VST3q32oddPseudo
, ARM::VST3q32
, false, false, false, OddDblSpc
, 3, 2 ,true},
478 { ARM::VST3q32oddPseudo_UPD
, ARM::VST3q32_UPD
, false, true, true, OddDblSpc
, 3, 2 ,true},
479 { ARM::VST3q8Pseudo_UPD
, ARM::VST3q8_UPD
, false, true, true, EvenDblSpc
, 3, 8 ,true},
480 { ARM::VST3q8oddPseudo
, ARM::VST3q8
, false, false, false, OddDblSpc
, 3, 8 ,true},
481 { ARM::VST3q8oddPseudo_UPD
, ARM::VST3q8_UPD
, false, true, true, OddDblSpc
, 3, 8 ,true},
483 { ARM::VST4LNd16Pseudo
, ARM::VST4LNd16
, false, false, false, SingleSpc
, 4, 4 ,true},
484 { ARM::VST4LNd16Pseudo_UPD
, ARM::VST4LNd16_UPD
, false, true, true, SingleSpc
, 4, 4 ,true},
485 { ARM::VST4LNd32Pseudo
, ARM::VST4LNd32
, false, false, false, SingleSpc
, 4, 2 ,true},
486 { ARM::VST4LNd32Pseudo_UPD
, ARM::VST4LNd32_UPD
, false, true, true, SingleSpc
, 4, 2 ,true},
487 { ARM::VST4LNd8Pseudo
, ARM::VST4LNd8
, false, false, false, SingleSpc
, 4, 8 ,true},
488 { ARM::VST4LNd8Pseudo_UPD
, ARM::VST4LNd8_UPD
, false, true, true, SingleSpc
, 4, 8 ,true},
489 { ARM::VST4LNq16Pseudo
, ARM::VST4LNq16
, false, false, false, EvenDblSpc
, 4, 4,true},
490 { ARM::VST4LNq16Pseudo_UPD
, ARM::VST4LNq16_UPD
, false, true, true, EvenDblSpc
, 4, 4,true},
491 { ARM::VST4LNq32Pseudo
, ARM::VST4LNq32
, false, false, false, EvenDblSpc
, 4, 2,true},
492 { ARM::VST4LNq32Pseudo_UPD
, ARM::VST4LNq32_UPD
, false, true, true, EvenDblSpc
, 4, 2,true},
494 { ARM::VST4d16Pseudo
, ARM::VST4d16
, false, false, false, SingleSpc
, 4, 4 ,true},
495 { ARM::VST4d16Pseudo_UPD
, ARM::VST4d16_UPD
, false, true, true, SingleSpc
, 4, 4 ,true},
496 { ARM::VST4d32Pseudo
, ARM::VST4d32
, false, false, false, SingleSpc
, 4, 2 ,true},
497 { ARM::VST4d32Pseudo_UPD
, ARM::VST4d32_UPD
, false, true, true, SingleSpc
, 4, 2 ,true},
498 { ARM::VST4d8Pseudo
, ARM::VST4d8
, false, false, false, SingleSpc
, 4, 8 ,true},
499 { ARM::VST4d8Pseudo_UPD
, ARM::VST4d8_UPD
, false, true, true, SingleSpc
, 4, 8 ,true},
501 { ARM::VST4q16Pseudo_UPD
, ARM::VST4q16_UPD
, false, true, true, EvenDblSpc
, 4, 4 ,true},
502 { ARM::VST4q16oddPseudo
, ARM::VST4q16
, false, false, false, OddDblSpc
, 4, 4 ,true},
503 { ARM::VST4q16oddPseudo_UPD
, ARM::VST4q16_UPD
, false, true, true, OddDblSpc
, 4, 4 ,true},
504 { ARM::VST4q32Pseudo_UPD
, ARM::VST4q32_UPD
, false, true, true, EvenDblSpc
, 4, 2 ,true},
505 { ARM::VST4q32oddPseudo
, ARM::VST4q32
, false, false, false, OddDblSpc
, 4, 2 ,true},
506 { ARM::VST4q32oddPseudo_UPD
, ARM::VST4q32_UPD
, false, true, true, OddDblSpc
, 4, 2 ,true},
507 { ARM::VST4q8Pseudo_UPD
, ARM::VST4q8_UPD
, false, true, true, EvenDblSpc
, 4, 8 ,true},
508 { ARM::VST4q8oddPseudo
, ARM::VST4q8
, false, false, false, OddDblSpc
, 4, 8 ,true},
509 { ARM::VST4q8oddPseudo_UPD
, ARM::VST4q8_UPD
, false, true, true, OddDblSpc
, 4, 8 ,true}
512 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
513 /// load or store pseudo instruction.
514 static const NEONLdStTableEntry
*LookupNEONLdSt(unsigned Opcode
) {
516 // Make sure the table is sorted.
517 static std::atomic
<bool> TableChecked(false);
518 if (!TableChecked
.load(std::memory_order_relaxed
)) {
519 assert(llvm::is_sorted(NEONLdStTable
) && "NEONLdStTable is not sorted!");
520 TableChecked
.store(true, std::memory_order_relaxed
);
524 auto I
= llvm::lower_bound(NEONLdStTable
, Opcode
);
525 if (I
!= std::end(NEONLdStTable
) && I
->PseudoOpc
== Opcode
)
530 /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
531 /// corresponding to the specified register spacing. Not all of the results
532 /// are necessarily valid, e.g., a Q register only has 2 D subregisters.
533 static void GetDSubRegs(unsigned Reg
, NEONRegSpacing RegSpc
,
534 const TargetRegisterInfo
*TRI
, unsigned &D0
,
535 unsigned &D1
, unsigned &D2
, unsigned &D3
) {
536 if (RegSpc
== SingleSpc
|| RegSpc
== SingleLowSpc
) {
537 D0
= TRI
->getSubReg(Reg
, ARM::dsub_0
);
538 D1
= TRI
->getSubReg(Reg
, ARM::dsub_1
);
539 D2
= TRI
->getSubReg(Reg
, ARM::dsub_2
);
540 D3
= TRI
->getSubReg(Reg
, ARM::dsub_3
);
541 } else if (RegSpc
== SingleHighQSpc
) {
542 D0
= TRI
->getSubReg(Reg
, ARM::dsub_4
);
543 D1
= TRI
->getSubReg(Reg
, ARM::dsub_5
);
544 D2
= TRI
->getSubReg(Reg
, ARM::dsub_6
);
545 D3
= TRI
->getSubReg(Reg
, ARM::dsub_7
);
546 } else if (RegSpc
== SingleHighTSpc
) {
547 D0
= TRI
->getSubReg(Reg
, ARM::dsub_3
);
548 D1
= TRI
->getSubReg(Reg
, ARM::dsub_4
);
549 D2
= TRI
->getSubReg(Reg
, ARM::dsub_5
);
550 D3
= TRI
->getSubReg(Reg
, ARM::dsub_6
);
551 } else if (RegSpc
== EvenDblSpc
) {
552 D0
= TRI
->getSubReg(Reg
, ARM::dsub_0
);
553 D1
= TRI
->getSubReg(Reg
, ARM::dsub_2
);
554 D2
= TRI
->getSubReg(Reg
, ARM::dsub_4
);
555 D3
= TRI
->getSubReg(Reg
, ARM::dsub_6
);
557 assert(RegSpc
== OddDblSpc
&& "unknown register spacing");
558 D0
= TRI
->getSubReg(Reg
, ARM::dsub_1
);
559 D1
= TRI
->getSubReg(Reg
, ARM::dsub_3
);
560 D2
= TRI
->getSubReg(Reg
, ARM::dsub_5
);
561 D3
= TRI
->getSubReg(Reg
, ARM::dsub_7
);
565 /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
566 /// operands to real VLD instructions with D register operands.
567 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator
&MBBI
) {
568 MachineInstr
&MI
= *MBBI
;
569 MachineBasicBlock
&MBB
= *MI
.getParent();
570 LLVM_DEBUG(dbgs() << "Expanding: "; MI
.dump());
572 const NEONLdStTableEntry
*TableEntry
= LookupNEONLdSt(MI
.getOpcode());
573 assert(TableEntry
&& TableEntry
->IsLoad
&& "NEONLdStTable lookup failed");
574 NEONRegSpacing RegSpc
= (NEONRegSpacing
)TableEntry
->RegSpacing
;
575 unsigned NumRegs
= TableEntry
->NumRegs
;
577 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
578 TII
->get(TableEntry
->RealOpc
));
581 bool DstIsDead
= MI
.getOperand(OpIdx
).isDead();
582 Register DstReg
= MI
.getOperand(OpIdx
++).getReg();
584 bool IsVLD2DUP
= TableEntry
->RealOpc
== ARM::VLD2DUPd8x2
||
585 TableEntry
->RealOpc
== ARM::VLD2DUPd16x2
||
586 TableEntry
->RealOpc
== ARM::VLD2DUPd32x2
||
587 TableEntry
->RealOpc
== ARM::VLD2DUPd8x2wb_fixed
||
588 TableEntry
->RealOpc
== ARM::VLD2DUPd16x2wb_fixed
||
589 TableEntry
->RealOpc
== ARM::VLD2DUPd32x2wb_fixed
||
590 TableEntry
->RealOpc
== ARM::VLD2DUPd8x2wb_register
||
591 TableEntry
->RealOpc
== ARM::VLD2DUPd16x2wb_register
||
592 TableEntry
->RealOpc
== ARM::VLD2DUPd32x2wb_register
;
595 unsigned SubRegIndex
;
596 if (RegSpc
== EvenDblSpc
) {
597 SubRegIndex
= ARM::dsub_0
;
599 assert(RegSpc
== OddDblSpc
&& "Unexpected spacing!");
600 SubRegIndex
= ARM::dsub_1
;
602 Register SubReg
= TRI
->getSubReg(DstReg
, SubRegIndex
);
603 unsigned DstRegPair
= TRI
->getMatchingSuperReg(SubReg
, ARM::dsub_0
,
604 &ARM::DPairSpcRegClass
);
605 MIB
.addReg(DstRegPair
, RegState::Define
| getDeadRegState(DstIsDead
));
607 unsigned D0
, D1
, D2
, D3
;
608 GetDSubRegs(DstReg
, RegSpc
, TRI
, D0
, D1
, D2
, D3
);
609 MIB
.addReg(D0
, RegState::Define
| getDeadRegState(DstIsDead
));
610 if (NumRegs
> 1 && TableEntry
->copyAllListRegs
)
611 MIB
.addReg(D1
, RegState::Define
| getDeadRegState(DstIsDead
));
612 if (NumRegs
> 2 && TableEntry
->copyAllListRegs
)
613 MIB
.addReg(D2
, RegState::Define
| getDeadRegState(DstIsDead
));
614 if (NumRegs
> 3 && TableEntry
->copyAllListRegs
)
615 MIB
.addReg(D3
, RegState::Define
| getDeadRegState(DstIsDead
));
618 if (TableEntry
->isUpdating
)
619 MIB
.add(MI
.getOperand(OpIdx
++));
621 // Copy the addrmode6 operands.
622 MIB
.add(MI
.getOperand(OpIdx
++));
623 MIB
.add(MI
.getOperand(OpIdx
++));
625 // Copy the am6offset operand.
626 if (TableEntry
->hasWritebackOperand
) {
627 // TODO: The writing-back pseudo instructions we translate here are all
628 // defined to take am6offset nodes that are capable to represent both fixed
629 // and register forms. Some real instructions, however, do not rely on
630 // am6offset and have separate definitions for such forms. When this is the
631 // case, fixed forms do not take any offset nodes, so here we skip them for
632 // such instructions. Once all real and pseudo writing-back instructions are
633 // rewritten without use of am6offset nodes, this code will go away.
634 const MachineOperand
&AM6Offset
= MI
.getOperand(OpIdx
++);
635 if (TableEntry
->RealOpc
== ARM::VLD1d8Qwb_fixed
||
636 TableEntry
->RealOpc
== ARM::VLD1d16Qwb_fixed
||
637 TableEntry
->RealOpc
== ARM::VLD1d32Qwb_fixed
||
638 TableEntry
->RealOpc
== ARM::VLD1d64Qwb_fixed
||
639 TableEntry
->RealOpc
== ARM::VLD1d8Twb_fixed
||
640 TableEntry
->RealOpc
== ARM::VLD1d16Twb_fixed
||
641 TableEntry
->RealOpc
== ARM::VLD1d32Twb_fixed
||
642 TableEntry
->RealOpc
== ARM::VLD1d64Twb_fixed
||
643 TableEntry
->RealOpc
== ARM::VLD2DUPd8x2wb_fixed
||
644 TableEntry
->RealOpc
== ARM::VLD2DUPd16x2wb_fixed
||
645 TableEntry
->RealOpc
== ARM::VLD2DUPd32x2wb_fixed
) {
646 assert(AM6Offset
.getReg() == 0 &&
647 "A fixed writing-back pseudo instruction provides an offset "
654 // For an instruction writing double-spaced subregs, the pseudo instruction
655 // has an extra operand that is a use of the super-register. Record the
656 // operand index and skip over it.
657 unsigned SrcOpIdx
= 0;
659 if (RegSpc
== EvenDblSpc
|| RegSpc
== OddDblSpc
||
660 RegSpc
== SingleLowSpc
|| RegSpc
== SingleHighQSpc
||
661 RegSpc
== SingleHighTSpc
)
665 // Copy the predicate operands.
666 MIB
.add(MI
.getOperand(OpIdx
++));
667 MIB
.add(MI
.getOperand(OpIdx
++));
669 // Copy the super-register source operand used for double-spaced subregs over
670 // to the new instruction as an implicit operand.
672 MachineOperand MO
= MI
.getOperand(SrcOpIdx
);
673 MO
.setImplicit(true);
676 // Add an implicit def for the super-register.
677 MIB
.addReg(DstReg
, RegState::ImplicitDefine
| getDeadRegState(DstIsDead
));
678 TransferImpOps(MI
, MIB
, MIB
);
680 // Transfer memoperands.
681 MIB
.cloneMemRefs(MI
);
682 MI
.eraseFromParent();
683 LLVM_DEBUG(dbgs() << "To: "; MIB
.getInstr()->dump(););
686 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
687 /// operands to real VST instructions with D register operands.
688 void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator
&MBBI
) {
689 MachineInstr
&MI
= *MBBI
;
690 MachineBasicBlock
&MBB
= *MI
.getParent();
691 LLVM_DEBUG(dbgs() << "Expanding: "; MI
.dump());
693 const NEONLdStTableEntry
*TableEntry
= LookupNEONLdSt(MI
.getOpcode());
694 assert(TableEntry
&& !TableEntry
->IsLoad
&& "NEONLdStTable lookup failed");
695 NEONRegSpacing RegSpc
= (NEONRegSpacing
)TableEntry
->RegSpacing
;
696 unsigned NumRegs
= TableEntry
->NumRegs
;
698 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
699 TII
->get(TableEntry
->RealOpc
));
701 if (TableEntry
->isUpdating
)
702 MIB
.add(MI
.getOperand(OpIdx
++));
704 // Copy the addrmode6 operands.
705 MIB
.add(MI
.getOperand(OpIdx
++));
706 MIB
.add(MI
.getOperand(OpIdx
++));
708 if (TableEntry
->hasWritebackOperand
) {
709 // TODO: The writing-back pseudo instructions we translate here are all
710 // defined to take am6offset nodes that are capable to represent both fixed
711 // and register forms. Some real instructions, however, do not rely on
712 // am6offset and have separate definitions for such forms. When this is the
713 // case, fixed forms do not take any offset nodes, so here we skip them for
714 // such instructions. Once all real and pseudo writing-back instructions are
715 // rewritten without use of am6offset nodes, this code will go away.
716 const MachineOperand
&AM6Offset
= MI
.getOperand(OpIdx
++);
717 if (TableEntry
->RealOpc
== ARM::VST1d8Qwb_fixed
||
718 TableEntry
->RealOpc
== ARM::VST1d16Qwb_fixed
||
719 TableEntry
->RealOpc
== ARM::VST1d32Qwb_fixed
||
720 TableEntry
->RealOpc
== ARM::VST1d64Qwb_fixed
||
721 TableEntry
->RealOpc
== ARM::VST1d8Twb_fixed
||
722 TableEntry
->RealOpc
== ARM::VST1d16Twb_fixed
||
723 TableEntry
->RealOpc
== ARM::VST1d32Twb_fixed
||
724 TableEntry
->RealOpc
== ARM::VST1d64Twb_fixed
) {
725 assert(AM6Offset
.getReg() == 0 &&
726 "A fixed writing-back pseudo instruction provides an offset "
733 bool SrcIsKill
= MI
.getOperand(OpIdx
).isKill();
734 bool SrcIsUndef
= MI
.getOperand(OpIdx
).isUndef();
735 Register SrcReg
= MI
.getOperand(OpIdx
++).getReg();
736 unsigned D0
, D1
, D2
, D3
;
737 GetDSubRegs(SrcReg
, RegSpc
, TRI
, D0
, D1
, D2
, D3
);
738 MIB
.addReg(D0
, getUndefRegState(SrcIsUndef
));
739 if (NumRegs
> 1 && TableEntry
->copyAllListRegs
)
740 MIB
.addReg(D1
, getUndefRegState(SrcIsUndef
));
741 if (NumRegs
> 2 && TableEntry
->copyAllListRegs
)
742 MIB
.addReg(D2
, getUndefRegState(SrcIsUndef
));
743 if (NumRegs
> 3 && TableEntry
->copyAllListRegs
)
744 MIB
.addReg(D3
, getUndefRegState(SrcIsUndef
));
746 // Copy the predicate operands.
747 MIB
.add(MI
.getOperand(OpIdx
++));
748 MIB
.add(MI
.getOperand(OpIdx
++));
750 if (SrcIsKill
&& !SrcIsUndef
) // Add an implicit kill for the super-reg.
751 MIB
->addRegisterKilled(SrcReg
, TRI
, true);
752 else if (!SrcIsUndef
)
753 MIB
.addReg(SrcReg
, RegState::Implicit
); // Add implicit uses for src reg.
754 TransferImpOps(MI
, MIB
, MIB
);
756 // Transfer memoperands.
757 MIB
.cloneMemRefs(MI
);
758 MI
.eraseFromParent();
759 LLVM_DEBUG(dbgs() << "To: "; MIB
.getInstr()->dump(););
762 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
763 /// register operands to real instructions with D register operands.
764 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator
&MBBI
) {
765 MachineInstr
&MI
= *MBBI
;
766 MachineBasicBlock
&MBB
= *MI
.getParent();
767 LLVM_DEBUG(dbgs() << "Expanding: "; MI
.dump());
769 const NEONLdStTableEntry
*TableEntry
= LookupNEONLdSt(MI
.getOpcode());
770 assert(TableEntry
&& "NEONLdStTable lookup failed");
771 NEONRegSpacing RegSpc
= (NEONRegSpacing
)TableEntry
->RegSpacing
;
772 unsigned NumRegs
= TableEntry
->NumRegs
;
773 unsigned RegElts
= TableEntry
->RegElts
;
775 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
776 TII
->get(TableEntry
->RealOpc
));
778 // The lane operand is always the 3rd from last operand, before the 2
779 // predicate operands.
780 unsigned Lane
= MI
.getOperand(MI
.getDesc().getNumOperands() - 3).getImm();
782 // Adjust the lane and spacing as needed for Q registers.
783 assert(RegSpc
!= OddDblSpc
&& "unexpected register spacing for VLD/VST-lane");
784 if (RegSpc
== EvenDblSpc
&& Lane
>= RegElts
) {
788 assert(Lane
< RegElts
&& "out of range lane for VLD/VST-lane");
790 unsigned D0
= 0, D1
= 0, D2
= 0, D3
= 0;
792 bool DstIsDead
= false;
793 if (TableEntry
->IsLoad
) {
794 DstIsDead
= MI
.getOperand(OpIdx
).isDead();
795 DstReg
= MI
.getOperand(OpIdx
++).getReg();
796 GetDSubRegs(DstReg
, RegSpc
, TRI
, D0
, D1
, D2
, D3
);
797 MIB
.addReg(D0
, RegState::Define
| getDeadRegState(DstIsDead
));
799 MIB
.addReg(D1
, RegState::Define
| getDeadRegState(DstIsDead
));
801 MIB
.addReg(D2
, RegState::Define
| getDeadRegState(DstIsDead
));
803 MIB
.addReg(D3
, RegState::Define
| getDeadRegState(DstIsDead
));
806 if (TableEntry
->isUpdating
)
807 MIB
.add(MI
.getOperand(OpIdx
++));
809 // Copy the addrmode6 operands.
810 MIB
.add(MI
.getOperand(OpIdx
++));
811 MIB
.add(MI
.getOperand(OpIdx
++));
812 // Copy the am6offset operand.
813 if (TableEntry
->hasWritebackOperand
)
814 MIB
.add(MI
.getOperand(OpIdx
++));
816 // Grab the super-register source.
817 MachineOperand MO
= MI
.getOperand(OpIdx
++);
818 if (!TableEntry
->IsLoad
)
819 GetDSubRegs(MO
.getReg(), RegSpc
, TRI
, D0
, D1
, D2
, D3
);
821 // Add the subregs as sources of the new instruction.
822 unsigned SrcFlags
= (getUndefRegState(MO
.isUndef()) |
823 getKillRegState(MO
.isKill()));
824 MIB
.addReg(D0
, SrcFlags
);
826 MIB
.addReg(D1
, SrcFlags
);
828 MIB
.addReg(D2
, SrcFlags
);
830 MIB
.addReg(D3
, SrcFlags
);
832 // Add the lane number operand.
836 // Copy the predicate operands.
837 MIB
.add(MI
.getOperand(OpIdx
++));
838 MIB
.add(MI
.getOperand(OpIdx
++));
840 // Copy the super-register source to be an implicit source.
841 MO
.setImplicit(true);
843 if (TableEntry
->IsLoad
)
844 // Add an implicit def for the super-register.
845 MIB
.addReg(DstReg
, RegState::ImplicitDefine
| getDeadRegState(DstIsDead
));
846 TransferImpOps(MI
, MIB
, MIB
);
847 // Transfer memoperands.
848 MIB
.cloneMemRefs(MI
);
849 MI
.eraseFromParent();
852 /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
853 /// register operands to real instructions with D register operands.
854 void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator
&MBBI
,
855 unsigned Opc
, bool IsExt
) {
856 MachineInstr
&MI
= *MBBI
;
857 MachineBasicBlock
&MBB
= *MI
.getParent();
858 LLVM_DEBUG(dbgs() << "Expanding: "; MI
.dump());
860 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(Opc
));
863 // Transfer the destination register operand.
864 MIB
.add(MI
.getOperand(OpIdx
++));
866 MachineOperand
VdSrc(MI
.getOperand(OpIdx
++));
870 bool SrcIsKill
= MI
.getOperand(OpIdx
).isKill();
871 Register SrcReg
= MI
.getOperand(OpIdx
++).getReg();
872 unsigned D0
, D1
, D2
, D3
;
873 GetDSubRegs(SrcReg
, SingleSpc
, TRI
, D0
, D1
, D2
, D3
);
876 // Copy the other source register operand.
877 MachineOperand
VmSrc(MI
.getOperand(OpIdx
++));
880 // Copy the predicate operands.
881 MIB
.add(MI
.getOperand(OpIdx
++));
882 MIB
.add(MI
.getOperand(OpIdx
++));
884 // Add an implicit kill and use for the super-reg.
885 MIB
.addReg(SrcReg
, RegState::Implicit
| getKillRegState(SrcIsKill
));
886 TransferImpOps(MI
, MIB
, MIB
);
887 MI
.eraseFromParent();
888 LLVM_DEBUG(dbgs() << "To: "; MIB
.getInstr()->dump(););
891 void ARMExpandPseudo::ExpandMQQPRLoadStore(MachineBasicBlock::iterator
&MBBI
) {
892 MachineInstr
&MI
= *MBBI
;
893 MachineBasicBlock
&MBB
= *MI
.getParent();
895 MI
.getOpcode() == ARM::MQQPRStore
|| MI
.getOpcode() == ARM::MQQQQPRStore
898 MachineInstrBuilder MIB
=
899 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(NewOpc
));
901 unsigned Flags
= getKillRegState(MI
.getOperand(0).isKill()) |
902 getDefRegState(MI
.getOperand(0).isDef());
903 Register SrcReg
= MI
.getOperand(0).getReg();
905 // Copy the destination register.
906 MIB
.add(MI
.getOperand(1));
907 MIB
.add(predOps(ARMCC::AL
));
908 MIB
.addReg(TRI
->getSubReg(SrcReg
, ARM::dsub_0
), Flags
);
909 MIB
.addReg(TRI
->getSubReg(SrcReg
, ARM::dsub_1
), Flags
);
910 MIB
.addReg(TRI
->getSubReg(SrcReg
, ARM::dsub_2
), Flags
);
911 MIB
.addReg(TRI
->getSubReg(SrcReg
, ARM::dsub_3
), Flags
);
912 if (MI
.getOpcode() == ARM::MQQQQPRStore
||
913 MI
.getOpcode() == ARM::MQQQQPRLoad
) {
914 MIB
.addReg(TRI
->getSubReg(SrcReg
, ARM::dsub_4
), Flags
);
915 MIB
.addReg(TRI
->getSubReg(SrcReg
, ARM::dsub_5
), Flags
);
916 MIB
.addReg(TRI
->getSubReg(SrcReg
, ARM::dsub_6
), Flags
);
917 MIB
.addReg(TRI
->getSubReg(SrcReg
, ARM::dsub_7
), Flags
);
920 if (NewOpc
== ARM::VSTMDIA
)
921 MIB
.addReg(SrcReg
, RegState::Implicit
);
923 TransferImpOps(MI
, MIB
, MIB
);
924 MIB
.cloneMemRefs(MI
);
925 MI
.eraseFromParent();
928 static bool IsAnAddressOperand(const MachineOperand
&MO
) {
929 // This check is overly conservative. Unless we are certain that the machine
930 // operand is not a symbol reference, we return that it is a symbol reference.
931 // This is important as the load pair may not be split up Windows.
932 switch (MO
.getType()) {
933 case MachineOperand::MO_Register
:
934 case MachineOperand::MO_Immediate
:
935 case MachineOperand::MO_CImmediate
:
936 case MachineOperand::MO_FPImmediate
:
937 case MachineOperand::MO_ShuffleMask
:
939 case MachineOperand::MO_MachineBasicBlock
:
941 case MachineOperand::MO_FrameIndex
:
943 case MachineOperand::MO_ConstantPoolIndex
:
944 case MachineOperand::MO_TargetIndex
:
945 case MachineOperand::MO_JumpTableIndex
:
946 case MachineOperand::MO_ExternalSymbol
:
947 case MachineOperand::MO_GlobalAddress
:
948 case MachineOperand::MO_BlockAddress
:
950 case MachineOperand::MO_RegisterMask
:
951 case MachineOperand::MO_RegisterLiveOut
:
953 case MachineOperand::MO_Metadata
:
954 case MachineOperand::MO_MCSymbol
:
956 case MachineOperand::MO_CFIIndex
:
958 case MachineOperand::MO_IntrinsicID
:
959 case MachineOperand::MO_Predicate
:
960 llvm_unreachable("should not exist post-isel");
962 llvm_unreachable("unhandled machine operand type");
965 static MachineOperand
makeImplicit(const MachineOperand
&MO
) {
966 MachineOperand NewMO
= MO
;
971 void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock
&MBB
,
972 MachineBasicBlock::iterator
&MBBI
) {
973 MachineInstr
&MI
= *MBBI
;
974 unsigned Opcode
= MI
.getOpcode();
976 ARMCC::CondCodes Pred
= getInstrPredicate(MI
, PredReg
);
977 Register DstReg
= MI
.getOperand(0).getReg();
978 bool DstIsDead
= MI
.getOperand(0).isDead();
979 bool isCC
= Opcode
== ARM::MOVCCi32imm
|| Opcode
== ARM::t2MOVCCi32imm
;
980 const MachineOperand
&MO
= MI
.getOperand(isCC
? 2 : 1);
981 bool RequiresBundling
= STI
->isTargetWindows() && IsAnAddressOperand(MO
);
982 MachineInstrBuilder LO16
, HI16
;
983 LLVM_DEBUG(dbgs() << "Expanding: "; MI
.dump());
985 if (!STI
->hasV6T2Ops() &&
986 (Opcode
== ARM::MOVi32imm
|| Opcode
== ARM::MOVCCi32imm
)) {
987 // FIXME Windows CE supports older ARM CPUs
988 assert(!STI
->isTargetWindows() && "Windows on ARM requires ARMv7+");
990 assert (MO
.isImm() && "MOVi32imm w/ non-immediate source operand!");
991 unsigned ImmVal
= (unsigned)MO
.getImm();
992 unsigned SOImmValV1
= 0, SOImmValV2
= 0;
994 if (ARM_AM::isSOImmTwoPartVal(ImmVal
)) { // Expand into a movi + orr.
995 LO16
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::MOVi
), DstReg
);
996 HI16
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::ORRri
))
997 .addReg(DstReg
, RegState::Define
| getDeadRegState(DstIsDead
))
999 SOImmValV1
= ARM_AM::getSOImmTwoPartFirst(ImmVal
);
1000 SOImmValV2
= ARM_AM::getSOImmTwoPartSecond(ImmVal
);
1001 } else { // Expand into a mvn + sub.
1002 LO16
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::MVNi
), DstReg
);
1003 HI16
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::SUBri
))
1004 .addReg(DstReg
, RegState::Define
| getDeadRegState(DstIsDead
))
1006 SOImmValV1
= ARM_AM::getSOImmTwoPartFirst(-ImmVal
);
1007 SOImmValV2
= ARM_AM::getSOImmTwoPartSecond(-ImmVal
);
1008 SOImmValV1
= ~(-SOImmValV1
);
1011 unsigned MIFlags
= MI
.getFlags();
1012 LO16
= LO16
.addImm(SOImmValV1
);
1013 HI16
= HI16
.addImm(SOImmValV2
);
1014 LO16
.cloneMemRefs(MI
);
1015 HI16
.cloneMemRefs(MI
);
1016 LO16
.setMIFlags(MIFlags
);
1017 HI16
.setMIFlags(MIFlags
);
1018 LO16
.addImm(Pred
).addReg(PredReg
).add(condCodeOp());
1019 HI16
.addImm(Pred
).addReg(PredReg
).add(condCodeOp());
1021 LO16
.add(makeImplicit(MI
.getOperand(1)));
1022 TransferImpOps(MI
, LO16
, HI16
);
1023 MI
.eraseFromParent();
1027 unsigned LO16Opc
= 0;
1028 unsigned HI16Opc
= 0;
1029 unsigned MIFlags
= MI
.getFlags();
1030 if (Opcode
== ARM::t2MOVi32imm
|| Opcode
== ARM::t2MOVCCi32imm
) {
1031 LO16Opc
= ARM::t2MOVi16
;
1032 HI16Opc
= ARM::t2MOVTi16
;
1034 LO16Opc
= ARM::MOVi16
;
1035 HI16Opc
= ARM::MOVTi16
;
1038 LO16
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(LO16Opc
), DstReg
);
1039 HI16
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(HI16Opc
))
1040 .addReg(DstReg
, RegState::Define
| getDeadRegState(DstIsDead
))
1043 LO16
.setMIFlags(MIFlags
);
1044 HI16
.setMIFlags(MIFlags
);
1046 switch (MO
.getType()) {
1047 case MachineOperand::MO_Immediate
: {
1048 unsigned Imm
= MO
.getImm();
1049 unsigned Lo16
= Imm
& 0xffff;
1050 unsigned Hi16
= (Imm
>> 16) & 0xffff;
1051 LO16
= LO16
.addImm(Lo16
);
1052 HI16
= HI16
.addImm(Hi16
);
1055 case MachineOperand::MO_ExternalSymbol
: {
1056 const char *ES
= MO
.getSymbolName();
1057 unsigned TF
= MO
.getTargetFlags();
1058 LO16
= LO16
.addExternalSymbol(ES
, TF
| ARMII::MO_LO16
);
1059 HI16
= HI16
.addExternalSymbol(ES
, TF
| ARMII::MO_HI16
);
1063 const GlobalValue
*GV
= MO
.getGlobal();
1064 unsigned TF
= MO
.getTargetFlags();
1065 LO16
= LO16
.addGlobalAddress(GV
, MO
.getOffset(), TF
| ARMII::MO_LO16
);
1066 HI16
= HI16
.addGlobalAddress(GV
, MO
.getOffset(), TF
| ARMII::MO_HI16
);
1071 LO16
.cloneMemRefs(MI
);
1072 HI16
.cloneMemRefs(MI
);
1073 LO16
.addImm(Pred
).addReg(PredReg
);
1074 HI16
.addImm(Pred
).addReg(PredReg
);
1076 if (RequiresBundling
)
1077 finalizeBundle(MBB
, LO16
->getIterator(), MBBI
->getIterator());
1080 LO16
.add(makeImplicit(MI
.getOperand(1)));
1081 TransferImpOps(MI
, LO16
, HI16
);
1082 MI
.eraseFromParent();
1083 LLVM_DEBUG(dbgs() << "To: "; LO16
.getInstr()->dump(););
1084 LLVM_DEBUG(dbgs() << "And: "; HI16
.getInstr()->dump(););
1087 // The size of the area, accessed by that VLSTM/VLLDM
1088 // S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad)
1089 static const int CMSE_FP_SAVE_SIZE
= 136;
1091 static void determineGPRegsToClear(const MachineInstr
&MI
,
1092 const std::initializer_list
<unsigned> &Regs
,
1093 SmallVectorImpl
<unsigned> &ClearRegs
) {
1094 SmallVector
<unsigned, 4> OpRegs
;
1095 for (const MachineOperand
&Op
: MI
.operands()) {
1096 if (!Op
.isReg() || !Op
.isUse())
1098 OpRegs
.push_back(Op
.getReg());
1102 std::set_difference(Regs
.begin(), Regs
.end(), OpRegs
.begin(), OpRegs
.end(),
1103 std::back_inserter(ClearRegs
));
1106 void ARMExpandPseudo::CMSEClearGPRegs(
1107 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
1108 const DebugLoc
&DL
, const SmallVectorImpl
<unsigned> &ClearRegs
,
1109 unsigned ClobberReg
) {
1111 if (STI
->hasV8_1MMainlineOps()) {
1112 // Clear the registers using the CLRM instruction.
1113 MachineInstrBuilder CLRM
=
1114 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::t2CLRM
)).add(predOps(ARMCC::AL
));
1115 for (unsigned R
: ClearRegs
)
1116 CLRM
.addReg(R
, RegState::Define
);
1117 CLRM
.addReg(ARM::APSR
, RegState::Define
);
1118 CLRM
.addReg(ARM::CPSR
, RegState::Define
| RegState::Implicit
);
1120 // Clear the registers and flags by copying ClobberReg into them.
1121 // (Baseline can't do a high register clear in one instruction).
1122 for (unsigned Reg
: ClearRegs
) {
1123 if (Reg
== ClobberReg
)
1125 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::tMOVr
), Reg
)
1127 .add(predOps(ARMCC::AL
));
1130 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::t2MSR_M
))
1131 .addImm(STI
->hasDSP() ? 0xc00 : 0x800)
1133 .add(predOps(ARMCC::AL
));
1137 // Find which FP registers need to be cleared. The parameter `ClearRegs` is
1138 // initialised with all elements set to true, and this function resets all the
1139 // bits, which correspond to register uses. Returns true if any floating point
1140 // register is defined, false otherwise.
1141 static bool determineFPRegsToClear(const MachineInstr
&MI
,
1142 BitVector
&ClearRegs
) {
1144 for (const MachineOperand
&Op
: MI
.operands()) {
1148 unsigned Reg
= Op
.getReg();
1150 if ((Reg
>= ARM::Q0
&& Reg
<= ARM::Q7
) ||
1151 (Reg
>= ARM::D0
&& Reg
<= ARM::D15
) ||
1152 (Reg
>= ARM::S0
&& Reg
<= ARM::S31
))
1157 if (Reg
>= ARM::Q0
&& Reg
<= ARM::Q7
) {
1158 int R
= Reg
- ARM::Q0
;
1159 ClearRegs
.reset(R
* 4, (R
+ 1) * 4);
1160 } else if (Reg
>= ARM::D0
&& Reg
<= ARM::D15
) {
1161 int R
= Reg
- ARM::D0
;
1162 ClearRegs
.reset(R
* 2, (R
+ 1) * 2);
1163 } else if (Reg
>= ARM::S0
&& Reg
<= ARM::S31
) {
1164 ClearRegs
[Reg
- ARM::S0
] = false;
1171 ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock
&MBB
,
1172 MachineBasicBlock::iterator MBBI
) {
1173 BitVector
ClearRegs(16, true);
1174 (void)determineFPRegsToClear(*MBBI
, ClearRegs
);
1176 if (STI
->hasV8_1MMainlineOps())
1177 return CMSEClearFPRegsV81(MBB
, MBBI
, ClearRegs
);
1179 return CMSEClearFPRegsV8(MBB
, MBBI
, ClearRegs
);
1182 // Clear the FP registers for v8.0-M, by copying over the content
1183 // of LR. Uses R12 as a scratch register.
1185 ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock
&MBB
,
1186 MachineBasicBlock::iterator MBBI
,
1187 const BitVector
&ClearRegs
) {
1188 if (!STI
->hasFPRegs())
1192 const DebugLoc
&DL
= RetI
.getDebugLoc();
1194 // If optimising for minimum size, clear FP registers unconditionally.
1195 // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and
1196 // don't clear them if they belong to the non-secure state.
1197 MachineBasicBlock
*ClearBB
, *DoneBB
;
1198 if (STI
->hasMinSize()) {
1199 ClearBB
= DoneBB
= &MBB
;
1201 MachineFunction
*MF
= MBB
.getParent();
1202 ClearBB
= MF
->CreateMachineBasicBlock(MBB
.getBasicBlock());
1203 DoneBB
= MF
->CreateMachineBasicBlock(MBB
.getBasicBlock());
1205 MF
->insert(++MBB
.getIterator(), ClearBB
);
1206 MF
->insert(++ClearBB
->getIterator(), DoneBB
);
1208 DoneBB
->splice(DoneBB
->end(), &MBB
, MBBI
, MBB
.end());
1209 DoneBB
->transferSuccessors(&MBB
);
1210 MBB
.addSuccessor(ClearBB
);
1211 MBB
.addSuccessor(DoneBB
);
1212 ClearBB
->addSuccessor(DoneBB
);
1214 // At the new basic blocks we need to have live-in the registers, used
1215 // for the return value as well as LR, used to clear registers.
1216 for (const MachineOperand
&Op
: RetI
.operands()) {
1219 Register Reg
= Op
.getReg();
1220 if (Reg
== ARM::NoRegister
|| Reg
== ARM::LR
)
1222 assert(Register::isPhysicalRegister(Reg
) && "Unallocated register");
1223 ClearBB
->addLiveIn(Reg
);
1224 DoneBB
->addLiveIn(Reg
);
1226 ClearBB
->addLiveIn(ARM::LR
);
1227 DoneBB
->addLiveIn(ARM::LR
);
1229 // Read the CONTROL register.
1230 BuildMI(MBB
, MBB
.end(), DL
, TII
->get(ARM::t2MRS_M
), ARM::R12
)
1232 .add(predOps(ARMCC::AL
));
1233 // Check bit 3 (SFPA).
1234 BuildMI(MBB
, MBB
.end(), DL
, TII
->get(ARM::t2TSTri
))
1237 .add(predOps(ARMCC::AL
));
1238 // If SFPA is clear, jump over ClearBB to DoneBB.
1239 BuildMI(MBB
, MBB
.end(), DL
, TII
->get(ARM::tBcc
))
1242 .addReg(ARM::CPSR
, RegState::Kill
);
1245 // Emit the clearing sequence
1246 for (unsigned D
= 0; D
< 8; D
++) {
1247 // Attempt to clear as double
1248 if (ClearRegs
[D
* 2 + 0] && ClearRegs
[D
* 2 + 1]) {
1249 unsigned Reg
= ARM::D0
+ D
;
1250 BuildMI(ClearBB
, DL
, TII
->get(ARM::VMOVDRR
), Reg
)
1253 .add(predOps(ARMCC::AL
));
1255 // Clear first part as single
1256 if (ClearRegs
[D
* 2 + 0]) {
1257 unsigned Reg
= ARM::S0
+ D
* 2;
1258 BuildMI(ClearBB
, DL
, TII
->get(ARM::VMOVSR
), Reg
)
1260 .add(predOps(ARMCC::AL
));
1262 // Clear second part as single
1263 if (ClearRegs
[D
* 2 + 1]) {
1264 unsigned Reg
= ARM::S0
+ D
* 2 + 1;
1265 BuildMI(ClearBB
, DL
, TII
->get(ARM::VMOVSR
), Reg
)
1267 .add(predOps(ARMCC::AL
));
1272 // Clear FPSCR bits 0-4, 7, 28-31
1273 // The other bits are program global according to the AAPCS
1274 BuildMI(ClearBB
, DL
, TII
->get(ARM::VMRS
), ARM::R12
)
1275 .add(predOps(ARMCC::AL
));
1276 BuildMI(ClearBB
, DL
, TII
->get(ARM::t2BICri
), ARM::R12
)
1279 .add(predOps(ARMCC::AL
))
1281 BuildMI(ClearBB
, DL
, TII
->get(ARM::t2BICri
), ARM::R12
)
1284 .add(predOps(ARMCC::AL
))
1286 BuildMI(ClearBB
, DL
, TII
->get(ARM::VMSR
))
1288 .add(predOps(ARMCC::AL
));
1294 ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock
&MBB
,
1295 MachineBasicBlock::iterator MBBI
,
1296 const BitVector
&ClearRegs
) {
1299 // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for
1300 // each contiguous sequence of S-registers.
1301 int Start
= -1, End
= -1;
1302 for (int S
= 0, E
= ClearRegs
.size(); S
!= E
; ++S
) {
1303 if (ClearRegs
[S
] && S
== End
+ 1) {
1304 End
= S
; // extend range
1307 // Emit current range.
1309 MachineInstrBuilder VSCCLRM
=
1310 BuildMI(MBB
, MBBI
, RetI
.getDebugLoc(), TII
->get(ARM::VSCCLRMS
))
1311 .add(predOps(ARMCC::AL
));
1312 while (++Start
<= End
)
1313 VSCCLRM
.addReg(ARM::S0
+ Start
, RegState::Define
);
1314 VSCCLRM
.addReg(ARM::VPR
, RegState::Define
);
1320 MachineInstrBuilder VSCCLRM
=
1321 BuildMI(MBB
, MBBI
, RetI
.getDebugLoc(), TII
->get(ARM::VSCCLRMS
))
1322 .add(predOps(ARMCC::AL
));
1323 while (++Start
<= End
)
1324 VSCCLRM
.addReg(ARM::S0
+ Start
, RegState::Define
);
1325 VSCCLRM
.addReg(ARM::VPR
, RegState::Define
);
1331 void ARMExpandPseudo::CMSESaveClearFPRegs(
1332 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
1333 const LivePhysRegs
&LiveRegs
, SmallVectorImpl
<unsigned> &ScratchRegs
) {
1334 if (STI
->hasV8_1MMainlineOps())
1335 CMSESaveClearFPRegsV81(MBB
, MBBI
, DL
, LiveRegs
);
1337 CMSESaveClearFPRegsV8(MBB
, MBBI
, DL
, LiveRegs
, ScratchRegs
);
1340 // Save and clear FP registers if present
1341 void ARMExpandPseudo::CMSESaveClearFPRegsV8(
1342 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
1343 const LivePhysRegs
&LiveRegs
, SmallVectorImpl
<unsigned> &ScratchRegs
) {
1344 if (!STI
->hasFPRegs())
1347 // Store an available register for FPSCR clearing
1348 assert(!ScratchRegs
.empty());
1349 unsigned SpareReg
= ScratchRegs
.front();
1351 // save space on stack for VLSTM
1352 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::tSUBspi
), ARM::SP
)
1354 .addImm(CMSE_FP_SAVE_SIZE
>> 2)
1355 .add(predOps(ARMCC::AL
));
1357 // Use ScratchRegs to store the fp regs
1358 std::vector
<std::tuple
<unsigned, unsigned, unsigned>> ClearedFPRegs
;
1359 std::vector
<unsigned> NonclearedFPRegs
;
1360 for (const MachineOperand
&Op
: MBBI
->operands()) {
1361 if (Op
.isReg() && Op
.isUse()) {
1362 unsigned Reg
= Op
.getReg();
1363 assert(!ARM::DPRRegClass
.contains(Reg
) ||
1364 ARM::DPR_VFP2RegClass
.contains(Reg
));
1365 assert(!ARM::QPRRegClass
.contains(Reg
));
1366 if (ARM::DPR_VFP2RegClass
.contains(Reg
)) {
1367 if (ScratchRegs
.size() >= 2) {
1368 unsigned SaveReg2
= ScratchRegs
.pop_back_val();
1369 unsigned SaveReg1
= ScratchRegs
.pop_back_val();
1370 ClearedFPRegs
.emplace_back(Reg
, SaveReg1
, SaveReg2
);
1372 // Save the fp register to the normal registers
1373 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VMOVRRD
))
1374 .addReg(SaveReg1
, RegState::Define
)
1375 .addReg(SaveReg2
, RegState::Define
)
1377 .add(predOps(ARMCC::AL
));
1379 NonclearedFPRegs
.push_back(Reg
);
1381 } else if (ARM::SPRRegClass
.contains(Reg
)) {
1382 if (ScratchRegs
.size() >= 1) {
1383 unsigned SaveReg
= ScratchRegs
.pop_back_val();
1384 ClearedFPRegs
.emplace_back(Reg
, SaveReg
, 0);
1386 // Save the fp register to the normal registers
1387 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VMOVRS
), SaveReg
)
1389 .add(predOps(ARMCC::AL
));
1391 NonclearedFPRegs
.push_back(Reg
);
1397 bool passesFPReg
= (!NonclearedFPRegs
.empty() || !ClearedFPRegs
.empty());
1399 // Lazy store all fp registers to the stack
1400 MachineInstrBuilder VLSTM
= BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VLSTM
))
1402 .add(predOps(ARMCC::AL
));
1403 for (auto R
: {ARM::VPR
, ARM::FPSCR
, ARM::FPSCR_NZCV
, ARM::Q0
, ARM::Q1
,
1404 ARM::Q2
, ARM::Q3
, ARM::Q4
, ARM::Q5
, ARM::Q6
, ARM::Q7
})
1405 VLSTM
.addReg(R
, RegState::Implicit
|
1406 (LiveRegs
.contains(R
) ? 0 : RegState::Undef
));
1408 // Restore all arguments
1409 for (const auto &Regs
: ClearedFPRegs
) {
1410 unsigned Reg
, SaveReg1
, SaveReg2
;
1411 std::tie(Reg
, SaveReg1
, SaveReg2
) = Regs
;
1412 if (ARM::DPR_VFP2RegClass
.contains(Reg
))
1413 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VMOVDRR
), Reg
)
1416 .add(predOps(ARMCC::AL
));
1417 else if (ARM::SPRRegClass
.contains(Reg
))
1418 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VMOVSR
), Reg
)
1420 .add(predOps(ARMCC::AL
));
1423 for (unsigned Reg
: NonclearedFPRegs
) {
1424 if (ARM::DPR_VFP2RegClass
.contains(Reg
)) {
1425 if (STI
->isLittle()) {
1426 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VLDRD
), Reg
)
1428 .addImm((Reg
- ARM::D0
) * 2)
1429 .add(predOps(ARMCC::AL
));
1431 // For big-endian targets we need to load the two subregisters of Reg
1432 // manually because VLDRD would load them in wrong order
1433 unsigned SReg0
= TRI
->getSubReg(Reg
, ARM::ssub_0
);
1434 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VLDRS
), SReg0
)
1436 .addImm((Reg
- ARM::D0
) * 2)
1437 .add(predOps(ARMCC::AL
));
1438 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VLDRS
), SReg0
+ 1)
1440 .addImm((Reg
- ARM::D0
) * 2 + 1)
1441 .add(predOps(ARMCC::AL
));
1443 } else if (ARM::SPRRegClass
.contains(Reg
)) {
1444 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VLDRS
), Reg
)
1446 .addImm(Reg
- ARM::S0
)
1447 .add(predOps(ARMCC::AL
));
1450 // restore FPSCR from stack and clear bits 0-4, 7, 28-31
1451 // The other bits are program global according to the AAPCS
1453 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::t2LDRi8
), SpareReg
)
1456 .add(predOps(ARMCC::AL
));
1457 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::t2BICri
), SpareReg
)
1460 .add(predOps(ARMCC::AL
))
1462 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::t2BICri
), SpareReg
)
1465 .add(predOps(ARMCC::AL
))
1467 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VMSR
))
1469 .add(predOps(ARMCC::AL
));
1470 // The ldr must happen after a floating point instruction. To prevent the
1471 // post-ra scheduler to mess with the order, we create a bundle.
1472 finalizeBundle(MBB
, VLSTM
->getIterator(), MBBI
->getIterator());
1476 void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock
&MBB
,
1477 MachineBasicBlock::iterator MBBI
,
1479 const LivePhysRegs
&LiveRegs
) {
1480 BitVector
ClearRegs(32, true);
1481 bool DefFP
= determineFPRegsToClear(*MBBI
, ClearRegs
);
1483 // If the instruction does not write to a FP register and no elements were
1484 // removed from the set, then no FP registers were used to pass
1485 // arguments/returns.
1486 if (!DefFP
&& ClearRegs
.count() == ClearRegs
.size()) {
1487 // save space on stack for VLSTM
1488 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::tSUBspi
), ARM::SP
)
1490 .addImm(CMSE_FP_SAVE_SIZE
>> 2)
1491 .add(predOps(ARMCC::AL
));
1493 // Lazy store all FP registers to the stack
1494 MachineInstrBuilder VLSTM
= BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VLSTM
))
1496 .add(predOps(ARMCC::AL
));
1497 for (auto R
: {ARM::VPR
, ARM::FPSCR
, ARM::FPSCR_NZCV
, ARM::Q0
, ARM::Q1
,
1498 ARM::Q2
, ARM::Q3
, ARM::Q4
, ARM::Q5
, ARM::Q6
, ARM::Q7
})
1499 VLSTM
.addReg(R
, RegState::Implicit
|
1500 (LiveRegs
.contains(R
) ? 0 : RegState::Undef
));
1502 // Push all the callee-saved registers (s16-s31).
1503 MachineInstrBuilder VPUSH
=
1504 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VSTMSDB_UPD
), ARM::SP
)
1506 .add(predOps(ARMCC::AL
));
1507 for (int Reg
= ARM::S16
; Reg
<= ARM::S31
; ++Reg
)
1510 // Clear FP registers with a VSCCLRM.
1511 (void)CMSEClearFPRegsV81(MBB
, MBBI
, ClearRegs
);
1513 // Save floating-point context.
1514 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VSTR_FPCXTS_pre
), ARM::SP
)
1517 .add(predOps(ARMCC::AL
));
1521 // Restore FP registers if present
1522 void ARMExpandPseudo::CMSERestoreFPRegs(
1523 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
1524 SmallVectorImpl
<unsigned> &AvailableRegs
) {
1525 if (STI
->hasV8_1MMainlineOps())
1526 CMSERestoreFPRegsV81(MBB
, MBBI
, DL
, AvailableRegs
);
1528 CMSERestoreFPRegsV8(MBB
, MBBI
, DL
, AvailableRegs
);
1531 void ARMExpandPseudo::CMSERestoreFPRegsV8(
1532 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
1533 SmallVectorImpl
<unsigned> &AvailableRegs
) {
1534 if (!STI
->hasFPRegs())
1537 // Use AvailableRegs to store the fp regs
1538 std::vector
<std::tuple
<unsigned, unsigned, unsigned>> ClearedFPRegs
;
1539 std::vector
<unsigned> NonclearedFPRegs
;
1540 for (const MachineOperand
&Op
: MBBI
->operands()) {
1541 if (Op
.isReg() && Op
.isDef()) {
1542 unsigned Reg
= Op
.getReg();
1543 assert(!ARM::DPRRegClass
.contains(Reg
) ||
1544 ARM::DPR_VFP2RegClass
.contains(Reg
));
1545 assert(!ARM::QPRRegClass
.contains(Reg
));
1546 if (ARM::DPR_VFP2RegClass
.contains(Reg
)) {
1547 if (AvailableRegs
.size() >= 2) {
1548 unsigned SaveReg2
= AvailableRegs
.pop_back_val();
1549 unsigned SaveReg1
= AvailableRegs
.pop_back_val();
1550 ClearedFPRegs
.emplace_back(Reg
, SaveReg1
, SaveReg2
);
1552 // Save the fp register to the normal registers
1553 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VMOVRRD
))
1554 .addReg(SaveReg1
, RegState::Define
)
1555 .addReg(SaveReg2
, RegState::Define
)
1557 .add(predOps(ARMCC::AL
));
1559 NonclearedFPRegs
.push_back(Reg
);
1561 } else if (ARM::SPRRegClass
.contains(Reg
)) {
1562 if (AvailableRegs
.size() >= 1) {
1563 unsigned SaveReg
= AvailableRegs
.pop_back_val();
1564 ClearedFPRegs
.emplace_back(Reg
, SaveReg
, 0);
1566 // Save the fp register to the normal registers
1567 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VMOVRS
), SaveReg
)
1569 .add(predOps(ARMCC::AL
));
1571 NonclearedFPRegs
.push_back(Reg
);
1577 // Push FP regs that cannot be restored via normal registers on the stack
1578 for (unsigned Reg
: NonclearedFPRegs
) {
1579 if (ARM::DPR_VFP2RegClass
.contains(Reg
))
1580 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VSTRD
), Reg
)
1582 .addImm((Reg
- ARM::D0
) * 2)
1583 .add(predOps(ARMCC::AL
));
1584 else if (ARM::SPRRegClass
.contains(Reg
))
1585 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VSTRS
), Reg
)
1587 .addImm(Reg
- ARM::S0
)
1588 .add(predOps(ARMCC::AL
));
1591 // Lazy load fp regs from stack
1592 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VLLDM
))
1594 .add(predOps(ARMCC::AL
));
1596 // Restore all FP registers via normal registers
1597 for (const auto &Regs
: ClearedFPRegs
) {
1598 unsigned Reg
, SaveReg1
, SaveReg2
;
1599 std::tie(Reg
, SaveReg1
, SaveReg2
) = Regs
;
1600 if (ARM::DPR_VFP2RegClass
.contains(Reg
))
1601 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VMOVDRR
), Reg
)
1604 .add(predOps(ARMCC::AL
));
1605 else if (ARM::SPRRegClass
.contains(Reg
))
1606 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VMOVSR
), Reg
)
1608 .add(predOps(ARMCC::AL
));
1611 // Pop the stack space
1612 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::tADDspi
), ARM::SP
)
1614 .addImm(CMSE_FP_SAVE_SIZE
>> 2)
1615 .add(predOps(ARMCC::AL
));
1618 static bool definesOrUsesFPReg(const MachineInstr
&MI
) {
1619 for (const MachineOperand
&Op
: MI
.operands()) {
1622 unsigned Reg
= Op
.getReg();
1623 if ((Reg
>= ARM::Q0
&& Reg
<= ARM::Q7
) ||
1624 (Reg
>= ARM::D0
&& Reg
<= ARM::D15
) ||
1625 (Reg
>= ARM::S0
&& Reg
<= ARM::S31
))
1631 void ARMExpandPseudo::CMSERestoreFPRegsV81(
1632 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, DebugLoc
&DL
,
1633 SmallVectorImpl
<unsigned> &AvailableRegs
) {
1634 if (!definesOrUsesFPReg(*MBBI
)) {
1635 // Load FP registers from stack.
1636 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VLLDM
))
1638 .add(predOps(ARMCC::AL
));
1640 // Pop the stack space
1641 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::tADDspi
), ARM::SP
)
1643 .addImm(CMSE_FP_SAVE_SIZE
>> 2)
1644 .add(predOps(ARMCC::AL
));
1646 // Restore the floating point context.
1647 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(ARM::VLDR_FPCXTS_post
),
1651 .add(predOps(ARMCC::AL
));
1653 // Pop all the callee-saved registers (s16-s31).
1654 MachineInstrBuilder VPOP
=
1655 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::VLDMSIA_UPD
), ARM::SP
)
1657 .add(predOps(ARMCC::AL
));
1658 for (int Reg
= ARM::S16
; Reg
<= ARM::S31
; ++Reg
)
1659 VPOP
.addReg(Reg
, RegState::Define
);
1663 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
1664 /// possible. This only gets used at -O0 so we don't care about efficiency of
1665 /// the generated code.
1666 bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock
&MBB
,
1667 MachineBasicBlock::iterator MBBI
,
1668 unsigned LdrexOp
, unsigned StrexOp
,
1670 MachineBasicBlock::iterator
&NextMBBI
) {
1671 bool IsThumb
= STI
->isThumb();
1672 MachineInstr
&MI
= *MBBI
;
1673 DebugLoc DL
= MI
.getDebugLoc();
1674 const MachineOperand
&Dest
= MI
.getOperand(0);
1675 Register TempReg
= MI
.getOperand(1).getReg();
1676 // Duplicating undef operands into 2 instructions does not guarantee the same
1677 // value on both; However undef should be replaced by xzr anyway.
1678 assert(!MI
.getOperand(2).isUndef() && "cannot handle undef");
1679 Register AddrReg
= MI
.getOperand(2).getReg();
1680 Register DesiredReg
= MI
.getOperand(3).getReg();
1681 Register NewReg
= MI
.getOperand(4).getReg();
1684 assert(STI
->hasV8MBaselineOps() &&
1685 "CMP_SWAP not expected to be custom expanded for Thumb1");
1686 assert((UxtOp
== 0 || UxtOp
== ARM::tUXTB
|| UxtOp
== ARM::tUXTH
) &&
1687 "ARMv8-M.baseline does not have t2UXTB/t2UXTH");
1688 assert((UxtOp
== 0 || ARM::tGPRRegClass
.contains(DesiredReg
)) &&
1689 "DesiredReg used for UXT op must be tGPR");
1692 MachineFunction
*MF
= MBB
.getParent();
1693 auto LoadCmpBB
= MF
->CreateMachineBasicBlock(MBB
.getBasicBlock());
1694 auto StoreBB
= MF
->CreateMachineBasicBlock(MBB
.getBasicBlock());
1695 auto DoneBB
= MF
->CreateMachineBasicBlock(MBB
.getBasicBlock());
1697 MF
->insert(++MBB
.getIterator(), LoadCmpBB
);
1698 MF
->insert(++LoadCmpBB
->getIterator(), StoreBB
);
1699 MF
->insert(++StoreBB
->getIterator(), DoneBB
);
1702 MachineInstrBuilder MIB
=
1703 BuildMI(MBB
, MBBI
, DL
, TII
->get(UxtOp
), DesiredReg
)
1704 .addReg(DesiredReg
, RegState::Kill
);
1707 MIB
.add(predOps(ARMCC::AL
));
1711 // ldrex rDest, [rAddr]
1712 // cmp rDest, rDesired
1715 MachineInstrBuilder MIB
;
1716 MIB
= BuildMI(LoadCmpBB
, DL
, TII
->get(LdrexOp
), Dest
.getReg());
1717 MIB
.addReg(AddrReg
);
1718 if (LdrexOp
== ARM::t2LDREX
)
1719 MIB
.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
1720 MIB
.add(predOps(ARMCC::AL
));
1722 unsigned CMPrr
= IsThumb
? ARM::tCMPhir
: ARM::CMPrr
;
1723 BuildMI(LoadCmpBB
, DL
, TII
->get(CMPrr
))
1724 .addReg(Dest
.getReg(), getKillRegState(Dest
.isDead()))
1726 .add(predOps(ARMCC::AL
));
1727 unsigned Bcc
= IsThumb
? ARM::tBcc
: ARM::Bcc
;
1728 BuildMI(LoadCmpBB
, DL
, TII
->get(Bcc
))
1731 .addReg(ARM::CPSR
, RegState::Kill
);
1732 LoadCmpBB
->addSuccessor(DoneBB
);
1733 LoadCmpBB
->addSuccessor(StoreBB
);
1736 // strex rTempReg, rNew, [rAddr]
1739 MIB
= BuildMI(StoreBB
, DL
, TII
->get(StrexOp
), TempReg
)
1742 if (StrexOp
== ARM::t2STREX
)
1743 MIB
.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
1744 MIB
.add(predOps(ARMCC::AL
));
1746 unsigned CMPri
= IsThumb
? ARM::t2CMPri
: ARM::CMPri
;
1747 BuildMI(StoreBB
, DL
, TII
->get(CMPri
))
1748 .addReg(TempReg
, RegState::Kill
)
1750 .add(predOps(ARMCC::AL
));
1751 BuildMI(StoreBB
, DL
, TII
->get(Bcc
))
1754 .addReg(ARM::CPSR
, RegState::Kill
);
1755 StoreBB
->addSuccessor(LoadCmpBB
);
1756 StoreBB
->addSuccessor(DoneBB
);
1758 DoneBB
->splice(DoneBB
->end(), &MBB
, MI
, MBB
.end());
1759 DoneBB
->transferSuccessors(&MBB
);
1761 MBB
.addSuccessor(LoadCmpBB
);
1763 NextMBBI
= MBB
.end();
1764 MI
.eraseFromParent();
1766 // Recompute livein lists.
1767 LivePhysRegs LiveRegs
;
1768 computeAndAddLiveIns(LiveRegs
, *DoneBB
);
1769 computeAndAddLiveIns(LiveRegs
, *StoreBB
);
1770 computeAndAddLiveIns(LiveRegs
, *LoadCmpBB
);
1771 // Do an extra pass around the loop to get loop carried registers right.
1772 StoreBB
->clearLiveIns();
1773 computeAndAddLiveIns(LiveRegs
, *StoreBB
);
1774 LoadCmpBB
->clearLiveIns();
1775 computeAndAddLiveIns(LiveRegs
, *LoadCmpBB
);
1780 /// ARM's ldrexd/strexd take a consecutive register pair (represented as a
1781 /// single GPRPair register), Thumb's take two separate registers so we need to
1782 /// extract the subregs from the pair.
1783 static void addExclusiveRegPair(MachineInstrBuilder
&MIB
, MachineOperand
&Reg
,
1784 unsigned Flags
, bool IsThumb
,
1785 const TargetRegisterInfo
*TRI
) {
1787 Register RegLo
= TRI
->getSubReg(Reg
.getReg(), ARM::gsub_0
);
1788 Register RegHi
= TRI
->getSubReg(Reg
.getReg(), ARM::gsub_1
);
1789 MIB
.addReg(RegLo
, Flags
);
1790 MIB
.addReg(RegHi
, Flags
);
1792 MIB
.addReg(Reg
.getReg(), Flags
);
1795 /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
1796 bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock
&MBB
,
1797 MachineBasicBlock::iterator MBBI
,
1798 MachineBasicBlock::iterator
&NextMBBI
) {
1799 bool IsThumb
= STI
->isThumb();
1800 MachineInstr
&MI
= *MBBI
;
1801 DebugLoc DL
= MI
.getDebugLoc();
1802 MachineOperand
&Dest
= MI
.getOperand(0);
1803 Register TempReg
= MI
.getOperand(1).getReg();
1804 // Duplicating undef operands into 2 instructions does not guarantee the same
1805 // value on both; However undef should be replaced by xzr anyway.
1806 assert(!MI
.getOperand(2).isUndef() && "cannot handle undef");
1807 Register AddrReg
= MI
.getOperand(2).getReg();
1808 Register DesiredReg
= MI
.getOperand(3).getReg();
1809 MachineOperand New
= MI
.getOperand(4);
1810 New
.setIsKill(false);
1812 Register DestLo
= TRI
->getSubReg(Dest
.getReg(), ARM::gsub_0
);
1813 Register DestHi
= TRI
->getSubReg(Dest
.getReg(), ARM::gsub_1
);
1814 Register DesiredLo
= TRI
->getSubReg(DesiredReg
, ARM::gsub_0
);
1815 Register DesiredHi
= TRI
->getSubReg(DesiredReg
, ARM::gsub_1
);
1817 MachineFunction
*MF
= MBB
.getParent();
1818 auto LoadCmpBB
= MF
->CreateMachineBasicBlock(MBB
.getBasicBlock());
1819 auto StoreBB
= MF
->CreateMachineBasicBlock(MBB
.getBasicBlock());
1820 auto DoneBB
= MF
->CreateMachineBasicBlock(MBB
.getBasicBlock());
1822 MF
->insert(++MBB
.getIterator(), LoadCmpBB
);
1823 MF
->insert(++LoadCmpBB
->getIterator(), StoreBB
);
1824 MF
->insert(++StoreBB
->getIterator(), DoneBB
);
1827 // ldrexd rDestLo, rDestHi, [rAddr]
1828 // cmp rDestLo, rDesiredLo
1829 // sbcs dead rTempReg, rDestHi, rDesiredHi
1831 unsigned LDREXD
= IsThumb
? ARM::t2LDREXD
: ARM::LDREXD
;
1832 MachineInstrBuilder MIB
;
1833 MIB
= BuildMI(LoadCmpBB
, DL
, TII
->get(LDREXD
));
1834 addExclusiveRegPair(MIB
, Dest
, RegState::Define
, IsThumb
, TRI
);
1835 MIB
.addReg(AddrReg
).add(predOps(ARMCC::AL
));
1837 unsigned CMPrr
= IsThumb
? ARM::tCMPhir
: ARM::CMPrr
;
1838 BuildMI(LoadCmpBB
, DL
, TII
->get(CMPrr
))
1839 .addReg(DestLo
, getKillRegState(Dest
.isDead()))
1841 .add(predOps(ARMCC::AL
));
1843 BuildMI(LoadCmpBB
, DL
, TII
->get(CMPrr
))
1844 .addReg(DestHi
, getKillRegState(Dest
.isDead()))
1846 .addImm(ARMCC::EQ
).addReg(ARM::CPSR
, RegState::Kill
);
1848 unsigned Bcc
= IsThumb
? ARM::tBcc
: ARM::Bcc
;
1849 BuildMI(LoadCmpBB
, DL
, TII
->get(Bcc
))
1852 .addReg(ARM::CPSR
, RegState::Kill
);
1853 LoadCmpBB
->addSuccessor(DoneBB
);
1854 LoadCmpBB
->addSuccessor(StoreBB
);
1857 // strexd rTempReg, rNewLo, rNewHi, [rAddr]
1860 unsigned STREXD
= IsThumb
? ARM::t2STREXD
: ARM::STREXD
;
1861 MIB
= BuildMI(StoreBB
, DL
, TII
->get(STREXD
), TempReg
);
1862 unsigned Flags
= getKillRegState(New
.isDead());
1863 addExclusiveRegPair(MIB
, New
, Flags
, IsThumb
, TRI
);
1864 MIB
.addReg(AddrReg
).add(predOps(ARMCC::AL
));
1866 unsigned CMPri
= IsThumb
? ARM::t2CMPri
: ARM::CMPri
;
1867 BuildMI(StoreBB
, DL
, TII
->get(CMPri
))
1868 .addReg(TempReg
, RegState::Kill
)
1870 .add(predOps(ARMCC::AL
));
1871 BuildMI(StoreBB
, DL
, TII
->get(Bcc
))
1874 .addReg(ARM::CPSR
, RegState::Kill
);
1875 StoreBB
->addSuccessor(LoadCmpBB
);
1876 StoreBB
->addSuccessor(DoneBB
);
1878 DoneBB
->splice(DoneBB
->end(), &MBB
, MI
, MBB
.end());
1879 DoneBB
->transferSuccessors(&MBB
);
1881 MBB
.addSuccessor(LoadCmpBB
);
1883 NextMBBI
= MBB
.end();
1884 MI
.eraseFromParent();
1886 // Recompute livein lists.
1887 LivePhysRegs LiveRegs
;
1888 computeAndAddLiveIns(LiveRegs
, *DoneBB
);
1889 computeAndAddLiveIns(LiveRegs
, *StoreBB
);
1890 computeAndAddLiveIns(LiveRegs
, *LoadCmpBB
);
1891 // Do an extra pass around the loop to get loop carried registers right.
1892 StoreBB
->clearLiveIns();
1893 computeAndAddLiveIns(LiveRegs
, *StoreBB
);
1894 LoadCmpBB
->clearLiveIns();
1895 computeAndAddLiveIns(LiveRegs
, *LoadCmpBB
);
1900 static void CMSEPushCalleeSaves(const TargetInstrInfo
&TII
,
1901 MachineBasicBlock
&MBB
,
1902 MachineBasicBlock::iterator MBBI
, int JumpReg
,
1903 const LivePhysRegs
&LiveRegs
, bool Thumb1Only
) {
1904 const DebugLoc
&DL
= MBBI
->getDebugLoc();
1905 if (Thumb1Only
) { // push Lo and Hi regs separately
1906 MachineInstrBuilder PushMIB
=
1907 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::tPUSH
)).add(predOps(ARMCC::AL
));
1908 for (int Reg
= ARM::R4
; Reg
< ARM::R8
; ++Reg
) {
1910 Reg
, Reg
== JumpReg
|| LiveRegs
.contains(Reg
) ? 0 : RegState::Undef
);
1913 // Thumb1 can only tPUSH low regs, so we copy the high regs to the low
1914 // regs that we just saved and push the low regs again, taking care to
1915 // not clobber JumpReg. If JumpReg is one of the low registers, push first
1916 // the values of r9-r11, and then r8. That would leave them ordered in
1917 // memory, and allow us to later pop them with a single instructions.
1918 // FIXME: Could also use any of r0-r3 that are free (including in the
1919 // first PUSH above).
1920 for (int LoReg
= ARM::R7
, HiReg
= ARM::R11
; LoReg
>= ARM::R4
; --LoReg
) {
1921 if (JumpReg
== LoReg
)
1923 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::tMOVr
), LoReg
)
1924 .addReg(HiReg
, LiveRegs
.contains(HiReg
) ? 0 : RegState::Undef
)
1925 .add(predOps(ARMCC::AL
));
1928 MachineInstrBuilder PushMIB2
=
1929 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::tPUSH
)).add(predOps(ARMCC::AL
));
1930 for (int Reg
= ARM::R4
; Reg
< ARM::R8
; ++Reg
) {
1933 PushMIB2
.addReg(Reg
, RegState::Kill
);
1936 // If we couldn't use a low register for temporary storage (because it was
1937 // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been
1939 if (JumpReg
>= ARM::R4
&& JumpReg
<= ARM::R7
) {
1940 int LoReg
= JumpReg
== ARM::R4
? ARM::R5
: ARM::R4
;
1941 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::tMOVr
), LoReg
)
1942 .addReg(ARM::R8
, LiveRegs
.contains(ARM::R8
) ? 0 : RegState::Undef
)
1943 .add(predOps(ARMCC::AL
));
1944 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::tPUSH
))
1945 .add(predOps(ARMCC::AL
))
1946 .addReg(LoReg
, RegState::Kill
);
1948 } else { // push Lo and Hi registers with a single instruction
1949 MachineInstrBuilder PushMIB
=
1950 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::t2STMDB_UPD
), ARM::SP
)
1952 .add(predOps(ARMCC::AL
));
1953 for (int Reg
= ARM::R4
; Reg
< ARM::R12
; ++Reg
) {
1955 Reg
, Reg
== JumpReg
|| LiveRegs
.contains(Reg
) ? 0 : RegState::Undef
);
1960 static void CMSEPopCalleeSaves(const TargetInstrInfo
&TII
,
1961 MachineBasicBlock
&MBB
,
1962 MachineBasicBlock::iterator MBBI
, int JumpReg
,
1964 const DebugLoc
&DL
= MBBI
->getDebugLoc();
1966 MachineInstrBuilder PopMIB
=
1967 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::tPOP
)).add(predOps(ARMCC::AL
));
1968 for (int R
= 0; R
< 4; ++R
) {
1969 PopMIB
.addReg(ARM::R4
+ R
, RegState::Define
);
1970 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::tMOVr
), ARM::R8
+ R
)
1971 .addReg(ARM::R4
+ R
, RegState::Kill
)
1972 .add(predOps(ARMCC::AL
));
1974 MachineInstrBuilder PopMIB2
=
1975 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::tPOP
)).add(predOps(ARMCC::AL
));
1976 for (int R
= 0; R
< 4; ++R
)
1977 PopMIB2
.addReg(ARM::R4
+ R
, RegState::Define
);
1978 } else { // pop Lo and Hi registers with a single instruction
1979 MachineInstrBuilder PopMIB
=
1980 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::t2LDMIA_UPD
), ARM::SP
)
1982 .add(predOps(ARMCC::AL
));
1983 for (int Reg
= ARM::R4
; Reg
< ARM::R12
; ++Reg
)
1984 PopMIB
.addReg(Reg
, RegState::Define
);
1988 bool ARMExpandPseudo::ExpandMI(MachineBasicBlock
&MBB
,
1989 MachineBasicBlock::iterator MBBI
,
1990 MachineBasicBlock::iterator
&NextMBBI
) {
1991 MachineInstr
&MI
= *MBBI
;
1992 unsigned Opcode
= MI
.getOpcode();
1999 Register DstReg
= MI
.getOperand(0).getReg();
2000 if (DstReg
== MI
.getOperand(3).getReg()) {
2002 unsigned NewOpc
= Opcode
== ARM::VBSPd
? ARM::VBITd
: ARM::VBITq
;
2003 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(NewOpc
))
2004 .add(MI
.getOperand(0))
2005 .add(MI
.getOperand(3))
2006 .add(MI
.getOperand(2))
2007 .add(MI
.getOperand(1))
2008 .addImm(MI
.getOperand(4).getImm())
2009 .add(MI
.getOperand(5));
2010 } else if (DstReg
== MI
.getOperand(2).getReg()) {
2012 unsigned NewOpc
= Opcode
== ARM::VBSPd
? ARM::VBIFd
: ARM::VBIFq
;
2013 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(NewOpc
))
2014 .add(MI
.getOperand(0))
2015 .add(MI
.getOperand(2))
2016 .add(MI
.getOperand(3))
2017 .add(MI
.getOperand(1))
2018 .addImm(MI
.getOperand(4).getImm())
2019 .add(MI
.getOperand(5));
2022 unsigned NewOpc
= Opcode
== ARM::VBSPd
? ARM::VBSLd
: ARM::VBSLq
;
2023 if (DstReg
== MI
.getOperand(1).getReg()) {
2024 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(NewOpc
))
2025 .add(MI
.getOperand(0))
2026 .add(MI
.getOperand(1))
2027 .add(MI
.getOperand(2))
2028 .add(MI
.getOperand(3))
2029 .addImm(MI
.getOperand(4).getImm())
2030 .add(MI
.getOperand(5));
2032 // Use move to satisfy constraints
2033 unsigned MoveOpc
= Opcode
== ARM::VBSPd
? ARM::VORRd
: ARM::VORRq
;
2034 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(MoveOpc
))
2037 getRenamableRegState(MI
.getOperand(0).isRenamable()))
2038 .add(MI
.getOperand(1))
2039 .add(MI
.getOperand(1))
2040 .addImm(MI
.getOperand(4).getImm())
2041 .add(MI
.getOperand(5));
2042 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(NewOpc
))
2043 .add(MI
.getOperand(0))
2046 getRenamableRegState(MI
.getOperand(0).isRenamable()))
2047 .add(MI
.getOperand(2))
2048 .add(MI
.getOperand(3))
2049 .addImm(MI
.getOperand(4).getImm())
2050 .add(MI
.getOperand(5));
2053 MI
.eraseFromParent();
2057 case ARM::TCRETURNdi
:
2058 case ARM::TCRETURNri
: {
2059 MachineBasicBlock::iterator MBBI
= MBB
.getLastNonDebugInstr();
2060 assert(MBBI
->isReturn() &&
2061 "Can only insert epilog into returning blocks");
2062 unsigned RetOpcode
= MBBI
->getOpcode();
2063 DebugLoc dl
= MBBI
->getDebugLoc();
2064 const ARMBaseInstrInfo
&TII
= *static_cast<const ARMBaseInstrInfo
*>(
2065 MBB
.getParent()->getSubtarget().getInstrInfo());
2067 // Tail call return: adjust the stack pointer and jump to callee.
2068 MBBI
= MBB
.getLastNonDebugInstr();
2069 MachineOperand
&JumpTarget
= MBBI
->getOperand(0);
2071 // Jump to label or value in register.
2072 if (RetOpcode
== ARM::TCRETURNdi
) {
2075 ? (STI
->isTargetMachO() ? ARM::tTAILJMPd
: ARM::tTAILJMPdND
)
2077 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, dl
, TII
.get(TCOpcode
));
2078 if (JumpTarget
.isGlobal())
2079 MIB
.addGlobalAddress(JumpTarget
.getGlobal(), JumpTarget
.getOffset(),
2080 JumpTarget
.getTargetFlags());
2082 assert(JumpTarget
.isSymbol());
2083 MIB
.addExternalSymbol(JumpTarget
.getSymbolName(),
2084 JumpTarget
.getTargetFlags());
2087 // Add the default predicate in Thumb mode.
2089 MIB
.add(predOps(ARMCC::AL
));
2090 } else if (RetOpcode
== ARM::TCRETURNri
) {
2092 STI
->isThumb() ? ARM::tTAILJMPr
2093 : (STI
->hasV4TOps() ? ARM::TAILJMPr
: ARM::TAILJMPr4
);
2094 BuildMI(MBB
, MBBI
, dl
,
2096 .addReg(JumpTarget
.getReg(), RegState::Kill
);
2099 auto NewMI
= std::prev(MBBI
);
2100 for (unsigned i
= 2, e
= MBBI
->getNumOperands(); i
!= e
; ++i
)
2101 NewMI
->addOperand(MBBI
->getOperand(i
));
2104 // Update call site info and delete the pseudo instruction TCRETURN.
2105 if (MI
.isCandidateForCallSiteEntry())
2106 MI
.getMF()->moveCallSiteInfo(&MI
, &*NewMI
);
2112 case ARM::tBXNS_RET
: {
2113 MachineBasicBlock
&AfterBB
= CMSEClearFPRegs(MBB
, MBBI
);
2115 if (STI
->hasV8_1MMainlineOps()) {
2116 // Restore the non-secure floating point context.
2117 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(),
2118 TII
->get(ARM::VLDR_FPCXTNS_post
), ARM::SP
)
2121 .add(predOps(ARMCC::AL
));
2124 // Clear all GPR that are not a use of the return instruction.
2125 assert(llvm::all_of(MBBI
->operands(), [](const MachineOperand
&Op
) {
2126 return !Op
.isReg() || Op
.getReg() != ARM::R12
;
2128 SmallVector
<unsigned, 5> ClearRegs
;
2129 determineGPRegsToClear(
2130 *MBBI
, {ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
, ARM::R12
}, ClearRegs
);
2131 CMSEClearGPRegs(AfterBB
, AfterBB
.end(), MBBI
->getDebugLoc(), ClearRegs
,
2134 MachineInstrBuilder NewMI
=
2135 BuildMI(AfterBB
, AfterBB
.end(), MBBI
->getDebugLoc(),
2136 TII
->get(ARM::tBXNS
))
2138 .add(predOps(ARMCC::AL
));
2139 for (const MachineOperand
&Op
: MI
.operands())
2140 NewMI
->addOperand(Op
);
2141 MI
.eraseFromParent();
2144 case ARM::tBLXNS_CALL
: {
2145 DebugLoc DL
= MBBI
->getDebugLoc();
2146 unsigned JumpReg
= MBBI
->getOperand(0).getReg();
2148 // Figure out which registers are live at the point immediately before the
2149 // call. When we indiscriminately push a set of registers, the live
2150 // registers are added as ordinary use operands, whereas dead registers
2152 LivePhysRegs
LiveRegs(*TRI
);
2153 LiveRegs
.addLiveOuts(MBB
);
2154 for (const MachineInstr
&MI
: make_range(MBB
.rbegin(), MBBI
.getReverse()))
2155 LiveRegs
.stepBackward(MI
);
2156 LiveRegs
.stepBackward(*MBBI
);
2158 CMSEPushCalleeSaves(*TII
, MBB
, MBBI
, JumpReg
, LiveRegs
,
2159 AFI
->isThumb1OnlyFunction());
2161 SmallVector
<unsigned, 16> ClearRegs
;
2162 determineGPRegsToClear(*MBBI
,
2163 {ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
, ARM::R4
,
2164 ARM::R5
, ARM::R6
, ARM::R7
, ARM::R8
, ARM::R9
,
2165 ARM::R10
, ARM::R11
, ARM::R12
},
2167 auto OriginalClearRegs
= ClearRegs
;
2169 // Get the first cleared register as a scratch (to use later with tBIC).
2170 // We need to use the first so we can ensure it is a low register.
2171 unsigned ScratchReg
= ClearRegs
.front();
2173 // Clear LSB of JumpReg
2174 if (AFI
->isThumb2Function()) {
2175 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::t2BICri
), JumpReg
)
2178 .add(predOps(ARMCC::AL
))
2181 // We need to use an extra register to cope with 8M Baseline,
2182 // since we have saved all of the registers we are ok to trash a non
2183 // argument register here.
2184 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::tMOVi8
), ScratchReg
)
2187 .add(predOps(ARMCC::AL
));
2188 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::tBIC
), JumpReg
)
2189 .addReg(ARM::CPSR
, RegState::Define
)
2192 .add(predOps(ARMCC::AL
));
2195 CMSESaveClearFPRegs(MBB
, MBBI
, DL
, LiveRegs
,
2196 ClearRegs
); // save+clear FP regs with ClearRegs
2197 CMSEClearGPRegs(MBB
, MBBI
, DL
, ClearRegs
, JumpReg
);
2199 const MachineInstrBuilder NewCall
=
2200 BuildMI(MBB
, MBBI
, DL
, TII
->get(ARM::tBLXNSr
))
2201 .add(predOps(ARMCC::AL
))
2202 .addReg(JumpReg
, RegState::Kill
);
2204 for (int I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
2205 NewCall
->addOperand(MI
.getOperand(I
));
2206 if (MI
.isCandidateForCallSiteEntry())
2207 MI
.getMF()->moveCallSiteInfo(&MI
, NewCall
.getInstr());
2209 CMSERestoreFPRegs(MBB
, MBBI
, DL
, OriginalClearRegs
); // restore FP registers
2211 CMSEPopCalleeSaves(*TII
, MBB
, MBBI
, JumpReg
, AFI
->isThumb1OnlyFunction());
2213 MI
.eraseFromParent();
2218 case ARM::VMOVDcc
: {
2219 unsigned newOpc
= Opcode
!= ARM::VMOVDcc
? ARM::VMOVS
: ARM::VMOVD
;
2220 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(newOpc
),
2221 MI
.getOperand(1).getReg())
2222 .add(MI
.getOperand(2))
2223 .addImm(MI
.getOperand(3).getImm()) // 'pred'
2224 .add(MI
.getOperand(4))
2225 .add(makeImplicit(MI
.getOperand(1)));
2227 MI
.eraseFromParent();
2232 unsigned Opc
= AFI
->isThumbFunction() ? ARM::t2MOVr
: ARM::MOVr
;
2233 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(Opc
),
2234 MI
.getOperand(1).getReg())
2235 .add(MI
.getOperand(2))
2236 .addImm(MI
.getOperand(3).getImm()) // 'pred'
2237 .add(MI
.getOperand(4))
2238 .add(condCodeOp()) // 's' bit
2239 .add(makeImplicit(MI
.getOperand(1)));
2241 MI
.eraseFromParent();
2244 case ARM::MOVCCsi
: {
2245 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::MOVsi
),
2246 (MI
.getOperand(1).getReg()))
2247 .add(MI
.getOperand(2))
2248 .addImm(MI
.getOperand(3).getImm())
2249 .addImm(MI
.getOperand(4).getImm()) // 'pred'
2250 .add(MI
.getOperand(5))
2251 .add(condCodeOp()) // 's' bit
2252 .add(makeImplicit(MI
.getOperand(1)));
2254 MI
.eraseFromParent();
2257 case ARM::MOVCCsr
: {
2258 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::MOVsr
),
2259 (MI
.getOperand(1).getReg()))
2260 .add(MI
.getOperand(2))
2261 .add(MI
.getOperand(3))
2262 .addImm(MI
.getOperand(4).getImm())
2263 .addImm(MI
.getOperand(5).getImm()) // 'pred'
2264 .add(MI
.getOperand(6))
2265 .add(condCodeOp()) // 's' bit
2266 .add(makeImplicit(MI
.getOperand(1)));
2268 MI
.eraseFromParent();
2271 case ARM::t2MOVCCi16
:
2272 case ARM::MOVCCi16
: {
2273 unsigned NewOpc
= AFI
->isThumbFunction() ? ARM::t2MOVi16
: ARM::MOVi16
;
2274 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(NewOpc
),
2275 MI
.getOperand(1).getReg())
2276 .addImm(MI
.getOperand(2).getImm())
2277 .addImm(MI
.getOperand(3).getImm()) // 'pred'
2278 .add(MI
.getOperand(4))
2279 .add(makeImplicit(MI
.getOperand(1)));
2280 MI
.eraseFromParent();
2285 unsigned Opc
= AFI
->isThumbFunction() ? ARM::t2MOVi
: ARM::MOVi
;
2286 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(Opc
),
2287 MI
.getOperand(1).getReg())
2288 .addImm(MI
.getOperand(2).getImm())
2289 .addImm(MI
.getOperand(3).getImm()) // 'pred'
2290 .add(MI
.getOperand(4))
2291 .add(condCodeOp()) // 's' bit
2292 .add(makeImplicit(MI
.getOperand(1)));
2294 MI
.eraseFromParent();
2299 unsigned Opc
= AFI
->isThumbFunction() ? ARM::t2MVNi
: ARM::MVNi
;
2300 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(Opc
),
2301 MI
.getOperand(1).getReg())
2302 .addImm(MI
.getOperand(2).getImm())
2303 .addImm(MI
.getOperand(3).getImm()) // 'pred'
2304 .add(MI
.getOperand(4))
2305 .add(condCodeOp()) // 's' bit
2306 .add(makeImplicit(MI
.getOperand(1)));
2308 MI
.eraseFromParent();
2311 case ARM::t2MOVCClsl
:
2312 case ARM::t2MOVCClsr
:
2313 case ARM::t2MOVCCasr
:
2314 case ARM::t2MOVCCror
: {
2317 case ARM::t2MOVCClsl
: NewOpc
= ARM::t2LSLri
; break;
2318 case ARM::t2MOVCClsr
: NewOpc
= ARM::t2LSRri
; break;
2319 case ARM::t2MOVCCasr
: NewOpc
= ARM::t2ASRri
; break;
2320 case ARM::t2MOVCCror
: NewOpc
= ARM::t2RORri
; break;
2321 default: llvm_unreachable("unexpeced conditional move");
2323 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(NewOpc
),
2324 MI
.getOperand(1).getReg())
2325 .add(MI
.getOperand(2))
2326 .addImm(MI
.getOperand(3).getImm())
2327 .addImm(MI
.getOperand(4).getImm()) // 'pred'
2328 .add(MI
.getOperand(5))
2329 .add(condCodeOp()) // 's' bit
2330 .add(makeImplicit(MI
.getOperand(1)));
2331 MI
.eraseFromParent();
2334 case ARM::Int_eh_sjlj_dispatchsetup
: {
2335 MachineFunction
&MF
= *MI
.getParent()->getParent();
2336 const ARMBaseInstrInfo
*AII
=
2337 static_cast<const ARMBaseInstrInfo
*>(TII
);
2338 const ARMBaseRegisterInfo
&RI
= AII
->getRegisterInfo();
2339 // For functions using a base pointer, we rematerialize it (via the frame
2340 // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
2341 // for us. Otherwise, expand to nothing.
2342 if (RI
.hasBasePointer(MF
)) {
2343 int32_t NumBytes
= AFI
->getFramePtrSpillOffset();
2344 Register FramePtr
= RI
.getFrameRegister(MF
);
2345 assert(MF
.getSubtarget().getFrameLowering()->hasFP(MF
) &&
2346 "base pointer without frame pointer?");
2348 if (AFI
->isThumb2Function()) {
2349 emitT2RegPlusImmediate(MBB
, MBBI
, MI
.getDebugLoc(), ARM::R6
,
2350 FramePtr
, -NumBytes
, ARMCC::AL
, 0, *TII
);
2351 } else if (AFI
->isThumbFunction()) {
2352 emitThumbRegPlusImmediate(MBB
, MBBI
, MI
.getDebugLoc(), ARM::R6
,
2353 FramePtr
, -NumBytes
, *TII
, RI
);
2355 emitARMRegPlusImmediate(MBB
, MBBI
, MI
.getDebugLoc(), ARM::R6
,
2356 FramePtr
, -NumBytes
, ARMCC::AL
, 0,
2359 // If there's dynamic realignment, adjust for it.
2360 if (RI
.hasStackRealignment(MF
)) {
2361 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2362 Align MaxAlign
= MFI
.getMaxAlign();
2363 assert (!AFI
->isThumb1OnlyFunction());
2364 // Emit bic r6, r6, MaxAlign
2365 assert(MaxAlign
<= Align(256) &&
2366 "The BIC instruction cannot encode "
2367 "immediates larger than 256 with all lower "
2369 unsigned bicOpc
= AFI
->isThumbFunction() ?
2370 ARM::t2BICri
: ARM::BICri
;
2371 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(bicOpc
), ARM::R6
)
2372 .addReg(ARM::R6
, RegState::Kill
)
2373 .addImm(MaxAlign
.value() - 1)
2374 .add(predOps(ARMCC::AL
))
2378 MI
.eraseFromParent();
2382 case ARM::MOVsrl_flag
:
2383 case ARM::MOVsra_flag
: {
2384 // These are just fancy MOVs instructions.
2385 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::MOVsi
),
2386 MI
.getOperand(0).getReg())
2387 .add(MI
.getOperand(1))
2388 .addImm(ARM_AM::getSORegOpc(
2389 (Opcode
== ARM::MOVsrl_flag
? ARM_AM::lsr
: ARM_AM::asr
), 1))
2390 .add(predOps(ARMCC::AL
))
2391 .addReg(ARM::CPSR
, RegState::Define
);
2392 MI
.eraseFromParent();
2396 // This encodes as "MOVs Rd, Rm, rrx
2397 MachineInstrBuilder MIB
=
2398 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::MOVsi
),
2399 MI
.getOperand(0).getReg())
2400 .add(MI
.getOperand(1))
2401 .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx
, 0))
2402 .add(predOps(ARMCC::AL
))
2404 TransferImpOps(MI
, MIB
, MIB
);
2405 MI
.eraseFromParent();
2410 const bool Thumb
= Opcode
== ARM::tTPsoft
;
2412 MachineInstrBuilder MIB
;
2413 MachineFunction
*MF
= MBB
.getParent();
2414 if (STI
->genLongCalls()) {
2415 MachineConstantPool
*MCP
= MF
->getConstantPool();
2416 unsigned PCLabelID
= AFI
->createPICLabelUId();
2417 MachineConstantPoolValue
*CPV
=
2418 ARMConstantPoolSymbol::Create(MF
->getFunction().getContext(),
2419 "__aeabi_read_tp", PCLabelID
, 0);
2420 Register Reg
= MI
.getOperand(0).getReg();
2422 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
2423 TII
->get(Thumb
? ARM::tLDRpci
: ARM::LDRi12
), Reg
)
2424 .addConstantPoolIndex(MCP
->getConstantPoolIndex(CPV
, Align(4)));
2427 MIB
.add(predOps(ARMCC::AL
));
2430 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
2431 TII
->get(Thumb
? gettBLXrOpcode(*MF
) : getBLXOpcode(*MF
)));
2433 MIB
.add(predOps(ARMCC::AL
));
2434 MIB
.addReg(Reg
, RegState::Kill
);
2436 MIB
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
2437 TII
->get(Thumb
? ARM::tBL
: ARM::BL
));
2439 MIB
.add(predOps(ARMCC::AL
));
2440 MIB
.addExternalSymbol("__aeabi_read_tp", 0);
2443 MIB
.cloneMemRefs(MI
);
2444 TransferImpOps(MI
, MIB
, MIB
);
2445 // Update the call site info.
2446 if (MI
.isCandidateForCallSiteEntry())
2447 MF
->moveCallSiteInfo(&MI
, &*MIB
);
2448 MI
.eraseFromParent();
2451 case ARM::tLDRpci_pic
:
2452 case ARM::t2LDRpci_pic
: {
2453 unsigned NewLdOpc
= (Opcode
== ARM::tLDRpci_pic
)
2454 ? ARM::tLDRpci
: ARM::t2LDRpci
;
2455 Register DstReg
= MI
.getOperand(0).getReg();
2456 bool DstIsDead
= MI
.getOperand(0).isDead();
2457 MachineInstrBuilder MIB1
=
2458 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(NewLdOpc
), DstReg
)
2459 .add(MI
.getOperand(1))
2460 .add(predOps(ARMCC::AL
));
2461 MIB1
.cloneMemRefs(MI
);
2462 MachineInstrBuilder MIB2
=
2463 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::tPICADD
))
2464 .addReg(DstReg
, RegState::Define
| getDeadRegState(DstIsDead
))
2466 .add(MI
.getOperand(2));
2467 TransferImpOps(MI
, MIB1
, MIB2
);
2468 MI
.eraseFromParent();
2472 case ARM::LDRLIT_ga_abs
:
2473 case ARM::LDRLIT_ga_pcrel
:
2474 case ARM::LDRLIT_ga_pcrel_ldr
:
2475 case ARM::tLDRLIT_ga_abs
:
2476 case ARM::tLDRLIT_ga_pcrel
: {
2477 Register DstReg
= MI
.getOperand(0).getReg();
2478 bool DstIsDead
= MI
.getOperand(0).isDead();
2479 const MachineOperand
&MO1
= MI
.getOperand(1);
2480 auto Flags
= MO1
.getTargetFlags();
2481 const GlobalValue
*GV
= MO1
.getGlobal();
2483 Opcode
!= ARM::tLDRLIT_ga_pcrel
&& Opcode
!= ARM::tLDRLIT_ga_abs
;
2485 Opcode
!= ARM::LDRLIT_ga_abs
&& Opcode
!= ARM::tLDRLIT_ga_abs
;
2486 unsigned LDRLITOpc
= IsARM
? ARM::LDRi12
: ARM::tLDRpci
;
2487 unsigned PICAddOpc
=
2489 ? (Opcode
== ARM::LDRLIT_ga_pcrel_ldr
? ARM::PICLDR
: ARM::PICADD
)
2492 // We need a new const-pool entry to load from.
2493 MachineConstantPool
*MCP
= MBB
.getParent()->getConstantPool();
2494 unsigned ARMPCLabelIndex
= 0;
2495 MachineConstantPoolValue
*CPV
;
2498 unsigned PCAdj
= IsARM
? 8 : 4;
2499 auto Modifier
= (Flags
& ARMII::MO_GOT
)
2501 : ARMCP::no_modifier
;
2502 ARMPCLabelIndex
= AFI
->createPICLabelUId();
2503 CPV
= ARMConstantPoolConstant::Create(
2504 GV
, ARMPCLabelIndex
, ARMCP::CPValue
, PCAdj
, Modifier
,
2505 /*AddCurrentAddr*/ Modifier
== ARMCP::GOT_PREL
);
2507 CPV
= ARMConstantPoolConstant::Create(GV
, ARMCP::no_modifier
);
2509 MachineInstrBuilder MIB
=
2510 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(LDRLITOpc
), DstReg
)
2511 .addConstantPoolIndex(MCP
->getConstantPoolIndex(CPV
, Align(4)));
2514 MIB
.add(predOps(ARMCC::AL
));
2517 MachineInstrBuilder MIB
=
2518 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(PICAddOpc
))
2519 .addReg(DstReg
, RegState::Define
| getDeadRegState(DstIsDead
))
2521 .addImm(ARMPCLabelIndex
);
2524 MIB
.add(predOps(ARMCC::AL
));
2527 MI
.eraseFromParent();
2530 case ARM::MOV_ga_pcrel
:
2531 case ARM::MOV_ga_pcrel_ldr
:
2532 case ARM::t2MOV_ga_pcrel
: {
2533 // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
2534 unsigned LabelId
= AFI
->createPICLabelUId();
2535 Register DstReg
= MI
.getOperand(0).getReg();
2536 bool DstIsDead
= MI
.getOperand(0).isDead();
2537 const MachineOperand
&MO1
= MI
.getOperand(1);
2538 const GlobalValue
*GV
= MO1
.getGlobal();
2539 unsigned TF
= MO1
.getTargetFlags();
2540 bool isARM
= Opcode
!= ARM::t2MOV_ga_pcrel
;
2541 unsigned LO16Opc
= isARM
? ARM::MOVi16_ga_pcrel
: ARM::t2MOVi16_ga_pcrel
;
2542 unsigned HI16Opc
= isARM
? ARM::MOVTi16_ga_pcrel
:ARM::t2MOVTi16_ga_pcrel
;
2543 unsigned LO16TF
= TF
| ARMII::MO_LO16
;
2544 unsigned HI16TF
= TF
| ARMII::MO_HI16
;
2545 unsigned PICAddOpc
= isARM
2546 ? (Opcode
== ARM::MOV_ga_pcrel_ldr
? ARM::PICLDR
: ARM::PICADD
)
2548 MachineInstrBuilder MIB1
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
2549 TII
->get(LO16Opc
), DstReg
)
2550 .addGlobalAddress(GV
, MO1
.getOffset(), TF
| LO16TF
)
2553 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(HI16Opc
), DstReg
)
2555 .addGlobalAddress(GV
, MO1
.getOffset(), TF
| HI16TF
)
2558 MachineInstrBuilder MIB3
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
2559 TII
->get(PICAddOpc
))
2560 .addReg(DstReg
, RegState::Define
| getDeadRegState(DstIsDead
))
2561 .addReg(DstReg
).addImm(LabelId
);
2563 MIB3
.add(predOps(ARMCC::AL
));
2564 if (Opcode
== ARM::MOV_ga_pcrel_ldr
)
2565 MIB3
.cloneMemRefs(MI
);
2567 TransferImpOps(MI
, MIB1
, MIB3
);
2568 MI
.eraseFromParent();
2572 case ARM::MOVi32imm
:
2573 case ARM::MOVCCi32imm
:
2574 case ARM::t2MOVi32imm
:
2575 case ARM::t2MOVCCi32imm
:
2576 ExpandMOV32BitImm(MBB
, MBBI
);
2579 case ARM::SUBS_PC_LR
: {
2580 MachineInstrBuilder MIB
=
2581 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::SUBri
), ARM::PC
)
2583 .add(MI
.getOperand(0))
2584 .add(MI
.getOperand(1))
2585 .add(MI
.getOperand(2))
2586 .addReg(ARM::CPSR
, RegState::Undef
);
2587 TransferImpOps(MI
, MIB
, MIB
);
2588 MI
.eraseFromParent();
2591 case ARM::VLDMQIA
: {
2592 unsigned NewOpc
= ARM::VLDMDIA
;
2593 MachineInstrBuilder MIB
=
2594 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(NewOpc
));
2597 // Grab the Q register destination.
2598 bool DstIsDead
= MI
.getOperand(OpIdx
).isDead();
2599 Register DstReg
= MI
.getOperand(OpIdx
++).getReg();
2601 // Copy the source register.
2602 MIB
.add(MI
.getOperand(OpIdx
++));
2604 // Copy the predicate operands.
2605 MIB
.add(MI
.getOperand(OpIdx
++));
2606 MIB
.add(MI
.getOperand(OpIdx
++));
2608 // Add the destination operands (D subregs).
2609 Register D0
= TRI
->getSubReg(DstReg
, ARM::dsub_0
);
2610 Register D1
= TRI
->getSubReg(DstReg
, ARM::dsub_1
);
2611 MIB
.addReg(D0
, RegState::Define
| getDeadRegState(DstIsDead
))
2612 .addReg(D1
, RegState::Define
| getDeadRegState(DstIsDead
));
2614 // Add an implicit def for the super-register.
2615 MIB
.addReg(DstReg
, RegState::ImplicitDefine
| getDeadRegState(DstIsDead
));
2616 TransferImpOps(MI
, MIB
, MIB
);
2617 MIB
.cloneMemRefs(MI
);
2618 MI
.eraseFromParent();
2622 case ARM::VSTMQIA
: {
2623 unsigned NewOpc
= ARM::VSTMDIA
;
2624 MachineInstrBuilder MIB
=
2625 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(NewOpc
));
2628 // Grab the Q register source.
2629 bool SrcIsKill
= MI
.getOperand(OpIdx
).isKill();
2630 Register SrcReg
= MI
.getOperand(OpIdx
++).getReg();
2632 // Copy the destination register.
2633 MachineOperand
Dst(MI
.getOperand(OpIdx
++));
2636 // Copy the predicate operands.
2637 MIB
.add(MI
.getOperand(OpIdx
++));
2638 MIB
.add(MI
.getOperand(OpIdx
++));
2640 // Add the source operands (D subregs).
2641 Register D0
= TRI
->getSubReg(SrcReg
, ARM::dsub_0
);
2642 Register D1
= TRI
->getSubReg(SrcReg
, ARM::dsub_1
);
2643 MIB
.addReg(D0
, SrcIsKill
? RegState::Kill
: 0)
2644 .addReg(D1
, SrcIsKill
? RegState::Kill
: 0);
2646 if (SrcIsKill
) // Add an implicit kill for the Q register.
2647 MIB
->addRegisterKilled(SrcReg
, TRI
, true);
2649 TransferImpOps(MI
, MIB
, MIB
);
2650 MIB
.cloneMemRefs(MI
);
2651 MI
.eraseFromParent();
2655 case ARM::VLD2q8Pseudo
:
2656 case ARM::VLD2q16Pseudo
:
2657 case ARM::VLD2q32Pseudo
:
2658 case ARM::VLD2q8PseudoWB_fixed
:
2659 case ARM::VLD2q16PseudoWB_fixed
:
2660 case ARM::VLD2q32PseudoWB_fixed
:
2661 case ARM::VLD2q8PseudoWB_register
:
2662 case ARM::VLD2q16PseudoWB_register
:
2663 case ARM::VLD2q32PseudoWB_register
:
2664 case ARM::VLD3d8Pseudo
:
2665 case ARM::VLD3d16Pseudo
:
2666 case ARM::VLD3d32Pseudo
:
2667 case ARM::VLD1d8TPseudo
:
2668 case ARM::VLD1d8TPseudoWB_fixed
:
2669 case ARM::VLD1d8TPseudoWB_register
:
2670 case ARM::VLD1d16TPseudo
:
2671 case ARM::VLD1d16TPseudoWB_fixed
:
2672 case ARM::VLD1d16TPseudoWB_register
:
2673 case ARM::VLD1d32TPseudo
:
2674 case ARM::VLD1d32TPseudoWB_fixed
:
2675 case ARM::VLD1d32TPseudoWB_register
:
2676 case ARM::VLD1d64TPseudo
:
2677 case ARM::VLD1d64TPseudoWB_fixed
:
2678 case ARM::VLD1d64TPseudoWB_register
:
2679 case ARM::VLD3d8Pseudo_UPD
:
2680 case ARM::VLD3d16Pseudo_UPD
:
2681 case ARM::VLD3d32Pseudo_UPD
:
2682 case ARM::VLD3q8Pseudo_UPD
:
2683 case ARM::VLD3q16Pseudo_UPD
:
2684 case ARM::VLD3q32Pseudo_UPD
:
2685 case ARM::VLD3q8oddPseudo
:
2686 case ARM::VLD3q16oddPseudo
:
2687 case ARM::VLD3q32oddPseudo
:
2688 case ARM::VLD3q8oddPseudo_UPD
:
2689 case ARM::VLD3q16oddPseudo_UPD
:
2690 case ARM::VLD3q32oddPseudo_UPD
:
2691 case ARM::VLD4d8Pseudo
:
2692 case ARM::VLD4d16Pseudo
:
2693 case ARM::VLD4d32Pseudo
:
2694 case ARM::VLD1d8QPseudo
:
2695 case ARM::VLD1d8QPseudoWB_fixed
:
2696 case ARM::VLD1d8QPseudoWB_register
:
2697 case ARM::VLD1d16QPseudo
:
2698 case ARM::VLD1d16QPseudoWB_fixed
:
2699 case ARM::VLD1d16QPseudoWB_register
:
2700 case ARM::VLD1d32QPseudo
:
2701 case ARM::VLD1d32QPseudoWB_fixed
:
2702 case ARM::VLD1d32QPseudoWB_register
:
2703 case ARM::VLD1d64QPseudo
:
2704 case ARM::VLD1d64QPseudoWB_fixed
:
2705 case ARM::VLD1d64QPseudoWB_register
:
2706 case ARM::VLD1q8HighQPseudo
:
2707 case ARM::VLD1q8HighQPseudo_UPD
:
2708 case ARM::VLD1q8LowQPseudo_UPD
:
2709 case ARM::VLD1q8HighTPseudo
:
2710 case ARM::VLD1q8HighTPseudo_UPD
:
2711 case ARM::VLD1q8LowTPseudo_UPD
:
2712 case ARM::VLD1q16HighQPseudo
:
2713 case ARM::VLD1q16HighQPseudo_UPD
:
2714 case ARM::VLD1q16LowQPseudo_UPD
:
2715 case ARM::VLD1q16HighTPseudo
:
2716 case ARM::VLD1q16HighTPseudo_UPD
:
2717 case ARM::VLD1q16LowTPseudo_UPD
:
2718 case ARM::VLD1q32HighQPseudo
:
2719 case ARM::VLD1q32HighQPseudo_UPD
:
2720 case ARM::VLD1q32LowQPseudo_UPD
:
2721 case ARM::VLD1q32HighTPseudo
:
2722 case ARM::VLD1q32HighTPseudo_UPD
:
2723 case ARM::VLD1q32LowTPseudo_UPD
:
2724 case ARM::VLD1q64HighQPseudo
:
2725 case ARM::VLD1q64HighQPseudo_UPD
:
2726 case ARM::VLD1q64LowQPseudo_UPD
:
2727 case ARM::VLD1q64HighTPseudo
:
2728 case ARM::VLD1q64HighTPseudo_UPD
:
2729 case ARM::VLD1q64LowTPseudo_UPD
:
2730 case ARM::VLD4d8Pseudo_UPD
:
2731 case ARM::VLD4d16Pseudo_UPD
:
2732 case ARM::VLD4d32Pseudo_UPD
:
2733 case ARM::VLD4q8Pseudo_UPD
:
2734 case ARM::VLD4q16Pseudo_UPD
:
2735 case ARM::VLD4q32Pseudo_UPD
:
2736 case ARM::VLD4q8oddPseudo
:
2737 case ARM::VLD4q16oddPseudo
:
2738 case ARM::VLD4q32oddPseudo
:
2739 case ARM::VLD4q8oddPseudo_UPD
:
2740 case ARM::VLD4q16oddPseudo_UPD
:
2741 case ARM::VLD4q32oddPseudo_UPD
:
2742 case ARM::VLD3DUPd8Pseudo
:
2743 case ARM::VLD3DUPd16Pseudo
:
2744 case ARM::VLD3DUPd32Pseudo
:
2745 case ARM::VLD3DUPd8Pseudo_UPD
:
2746 case ARM::VLD3DUPd16Pseudo_UPD
:
2747 case ARM::VLD3DUPd32Pseudo_UPD
:
2748 case ARM::VLD4DUPd8Pseudo
:
2749 case ARM::VLD4DUPd16Pseudo
:
2750 case ARM::VLD4DUPd32Pseudo
:
2751 case ARM::VLD4DUPd8Pseudo_UPD
:
2752 case ARM::VLD4DUPd16Pseudo_UPD
:
2753 case ARM::VLD4DUPd32Pseudo_UPD
:
2754 case ARM::VLD2DUPq8EvenPseudo
:
2755 case ARM::VLD2DUPq8OddPseudo
:
2756 case ARM::VLD2DUPq16EvenPseudo
:
2757 case ARM::VLD2DUPq16OddPseudo
:
2758 case ARM::VLD2DUPq32EvenPseudo
:
2759 case ARM::VLD2DUPq32OddPseudo
:
2760 case ARM::VLD2DUPq8OddPseudoWB_fixed
:
2761 case ARM::VLD2DUPq8OddPseudoWB_register
:
2762 case ARM::VLD2DUPq16OddPseudoWB_fixed
:
2763 case ARM::VLD2DUPq16OddPseudoWB_register
:
2764 case ARM::VLD2DUPq32OddPseudoWB_fixed
:
2765 case ARM::VLD2DUPq32OddPseudoWB_register
:
2766 case ARM::VLD3DUPq8EvenPseudo
:
2767 case ARM::VLD3DUPq8OddPseudo
:
2768 case ARM::VLD3DUPq16EvenPseudo
:
2769 case ARM::VLD3DUPq16OddPseudo
:
2770 case ARM::VLD3DUPq32EvenPseudo
:
2771 case ARM::VLD3DUPq32OddPseudo
:
2772 case ARM::VLD3DUPq8OddPseudo_UPD
:
2773 case ARM::VLD3DUPq16OddPseudo_UPD
:
2774 case ARM::VLD3DUPq32OddPseudo_UPD
:
2775 case ARM::VLD4DUPq8EvenPseudo
:
2776 case ARM::VLD4DUPq8OddPseudo
:
2777 case ARM::VLD4DUPq16EvenPseudo
:
2778 case ARM::VLD4DUPq16OddPseudo
:
2779 case ARM::VLD4DUPq32EvenPseudo
:
2780 case ARM::VLD4DUPq32OddPseudo
:
2781 case ARM::VLD4DUPq8OddPseudo_UPD
:
2782 case ARM::VLD4DUPq16OddPseudo_UPD
:
2783 case ARM::VLD4DUPq32OddPseudo_UPD
:
2787 case ARM::VST2q8Pseudo
:
2788 case ARM::VST2q16Pseudo
:
2789 case ARM::VST2q32Pseudo
:
2790 case ARM::VST2q8PseudoWB_fixed
:
2791 case ARM::VST2q16PseudoWB_fixed
:
2792 case ARM::VST2q32PseudoWB_fixed
:
2793 case ARM::VST2q8PseudoWB_register
:
2794 case ARM::VST2q16PseudoWB_register
:
2795 case ARM::VST2q32PseudoWB_register
:
2796 case ARM::VST3d8Pseudo
:
2797 case ARM::VST3d16Pseudo
:
2798 case ARM::VST3d32Pseudo
:
2799 case ARM::VST1d8TPseudo
:
2800 case ARM::VST1d8TPseudoWB_fixed
:
2801 case ARM::VST1d8TPseudoWB_register
:
2802 case ARM::VST1d16TPseudo
:
2803 case ARM::VST1d16TPseudoWB_fixed
:
2804 case ARM::VST1d16TPseudoWB_register
:
2805 case ARM::VST1d32TPseudo
:
2806 case ARM::VST1d32TPseudoWB_fixed
:
2807 case ARM::VST1d32TPseudoWB_register
:
2808 case ARM::VST1d64TPseudo
:
2809 case ARM::VST1d64TPseudoWB_fixed
:
2810 case ARM::VST1d64TPseudoWB_register
:
2811 case ARM::VST3d8Pseudo_UPD
:
2812 case ARM::VST3d16Pseudo_UPD
:
2813 case ARM::VST3d32Pseudo_UPD
:
2814 case ARM::VST3q8Pseudo_UPD
:
2815 case ARM::VST3q16Pseudo_UPD
:
2816 case ARM::VST3q32Pseudo_UPD
:
2817 case ARM::VST3q8oddPseudo
:
2818 case ARM::VST3q16oddPseudo
:
2819 case ARM::VST3q32oddPseudo
:
2820 case ARM::VST3q8oddPseudo_UPD
:
2821 case ARM::VST3q16oddPseudo_UPD
:
2822 case ARM::VST3q32oddPseudo_UPD
:
2823 case ARM::VST4d8Pseudo
:
2824 case ARM::VST4d16Pseudo
:
2825 case ARM::VST4d32Pseudo
:
2826 case ARM::VST1d8QPseudo
:
2827 case ARM::VST1d8QPseudoWB_fixed
:
2828 case ARM::VST1d8QPseudoWB_register
:
2829 case ARM::VST1d16QPseudo
:
2830 case ARM::VST1d16QPseudoWB_fixed
:
2831 case ARM::VST1d16QPseudoWB_register
:
2832 case ARM::VST1d32QPseudo
:
2833 case ARM::VST1d32QPseudoWB_fixed
:
2834 case ARM::VST1d32QPseudoWB_register
:
2835 case ARM::VST1d64QPseudo
:
2836 case ARM::VST1d64QPseudoWB_fixed
:
2837 case ARM::VST1d64QPseudoWB_register
:
2838 case ARM::VST4d8Pseudo_UPD
:
2839 case ARM::VST4d16Pseudo_UPD
:
2840 case ARM::VST4d32Pseudo_UPD
:
2841 case ARM::VST1q8HighQPseudo
:
2842 case ARM::VST1q8LowQPseudo_UPD
:
2843 case ARM::VST1q8HighTPseudo
:
2844 case ARM::VST1q8LowTPseudo_UPD
:
2845 case ARM::VST1q16HighQPseudo
:
2846 case ARM::VST1q16LowQPseudo_UPD
:
2847 case ARM::VST1q16HighTPseudo
:
2848 case ARM::VST1q16LowTPseudo_UPD
:
2849 case ARM::VST1q32HighQPseudo
:
2850 case ARM::VST1q32LowQPseudo_UPD
:
2851 case ARM::VST1q32HighTPseudo
:
2852 case ARM::VST1q32LowTPseudo_UPD
:
2853 case ARM::VST1q64HighQPseudo
:
2854 case ARM::VST1q64LowQPseudo_UPD
:
2855 case ARM::VST1q64HighTPseudo
:
2856 case ARM::VST1q64LowTPseudo_UPD
:
2857 case ARM::VST1q8HighTPseudo_UPD
:
2858 case ARM::VST1q16HighTPseudo_UPD
:
2859 case ARM::VST1q32HighTPseudo_UPD
:
2860 case ARM::VST1q64HighTPseudo_UPD
:
2861 case ARM::VST1q8HighQPseudo_UPD
:
2862 case ARM::VST1q16HighQPseudo_UPD
:
2863 case ARM::VST1q32HighQPseudo_UPD
:
2864 case ARM::VST1q64HighQPseudo_UPD
:
2865 case ARM::VST4q8Pseudo_UPD
:
2866 case ARM::VST4q16Pseudo_UPD
:
2867 case ARM::VST4q32Pseudo_UPD
:
2868 case ARM::VST4q8oddPseudo
:
2869 case ARM::VST4q16oddPseudo
:
2870 case ARM::VST4q32oddPseudo
:
2871 case ARM::VST4q8oddPseudo_UPD
:
2872 case ARM::VST4q16oddPseudo_UPD
:
2873 case ARM::VST4q32oddPseudo_UPD
:
2877 case ARM::VLD1LNq8Pseudo
:
2878 case ARM::VLD1LNq16Pseudo
:
2879 case ARM::VLD1LNq32Pseudo
:
2880 case ARM::VLD1LNq8Pseudo_UPD
:
2881 case ARM::VLD1LNq16Pseudo_UPD
:
2882 case ARM::VLD1LNq32Pseudo_UPD
:
2883 case ARM::VLD2LNd8Pseudo
:
2884 case ARM::VLD2LNd16Pseudo
:
2885 case ARM::VLD2LNd32Pseudo
:
2886 case ARM::VLD2LNq16Pseudo
:
2887 case ARM::VLD2LNq32Pseudo
:
2888 case ARM::VLD2LNd8Pseudo_UPD
:
2889 case ARM::VLD2LNd16Pseudo_UPD
:
2890 case ARM::VLD2LNd32Pseudo_UPD
:
2891 case ARM::VLD2LNq16Pseudo_UPD
:
2892 case ARM::VLD2LNq32Pseudo_UPD
:
2893 case ARM::VLD3LNd8Pseudo
:
2894 case ARM::VLD3LNd16Pseudo
:
2895 case ARM::VLD3LNd32Pseudo
:
2896 case ARM::VLD3LNq16Pseudo
:
2897 case ARM::VLD3LNq32Pseudo
:
2898 case ARM::VLD3LNd8Pseudo_UPD
:
2899 case ARM::VLD3LNd16Pseudo_UPD
:
2900 case ARM::VLD3LNd32Pseudo_UPD
:
2901 case ARM::VLD3LNq16Pseudo_UPD
:
2902 case ARM::VLD3LNq32Pseudo_UPD
:
2903 case ARM::VLD4LNd8Pseudo
:
2904 case ARM::VLD4LNd16Pseudo
:
2905 case ARM::VLD4LNd32Pseudo
:
2906 case ARM::VLD4LNq16Pseudo
:
2907 case ARM::VLD4LNq32Pseudo
:
2908 case ARM::VLD4LNd8Pseudo_UPD
:
2909 case ARM::VLD4LNd16Pseudo_UPD
:
2910 case ARM::VLD4LNd32Pseudo_UPD
:
2911 case ARM::VLD4LNq16Pseudo_UPD
:
2912 case ARM::VLD4LNq32Pseudo_UPD
:
2913 case ARM::VST1LNq8Pseudo
:
2914 case ARM::VST1LNq16Pseudo
:
2915 case ARM::VST1LNq32Pseudo
:
2916 case ARM::VST1LNq8Pseudo_UPD
:
2917 case ARM::VST1LNq16Pseudo_UPD
:
2918 case ARM::VST1LNq32Pseudo_UPD
:
2919 case ARM::VST2LNd8Pseudo
:
2920 case ARM::VST2LNd16Pseudo
:
2921 case ARM::VST2LNd32Pseudo
:
2922 case ARM::VST2LNq16Pseudo
:
2923 case ARM::VST2LNq32Pseudo
:
2924 case ARM::VST2LNd8Pseudo_UPD
:
2925 case ARM::VST2LNd16Pseudo_UPD
:
2926 case ARM::VST2LNd32Pseudo_UPD
:
2927 case ARM::VST2LNq16Pseudo_UPD
:
2928 case ARM::VST2LNq32Pseudo_UPD
:
2929 case ARM::VST3LNd8Pseudo
:
2930 case ARM::VST3LNd16Pseudo
:
2931 case ARM::VST3LNd32Pseudo
:
2932 case ARM::VST3LNq16Pseudo
:
2933 case ARM::VST3LNq32Pseudo
:
2934 case ARM::VST3LNd8Pseudo_UPD
:
2935 case ARM::VST3LNd16Pseudo_UPD
:
2936 case ARM::VST3LNd32Pseudo_UPD
:
2937 case ARM::VST3LNq16Pseudo_UPD
:
2938 case ARM::VST3LNq32Pseudo_UPD
:
2939 case ARM::VST4LNd8Pseudo
:
2940 case ARM::VST4LNd16Pseudo
:
2941 case ARM::VST4LNd32Pseudo
:
2942 case ARM::VST4LNq16Pseudo
:
2943 case ARM::VST4LNq32Pseudo
:
2944 case ARM::VST4LNd8Pseudo_UPD
:
2945 case ARM::VST4LNd16Pseudo_UPD
:
2946 case ARM::VST4LNd32Pseudo_UPD
:
2947 case ARM::VST4LNq16Pseudo_UPD
:
2948 case ARM::VST4LNq32Pseudo_UPD
:
2952 case ARM::VTBL3Pseudo
: ExpandVTBL(MBBI
, ARM::VTBL3
, false); return true;
2953 case ARM::VTBL4Pseudo
: ExpandVTBL(MBBI
, ARM::VTBL4
, false); return true;
2954 case ARM::VTBX3Pseudo
: ExpandVTBL(MBBI
, ARM::VTBX3
, true); return true;
2955 case ARM::VTBX4Pseudo
: ExpandVTBL(MBBI
, ARM::VTBX4
, true); return true;
2957 case ARM::MQQPRLoad
:
2958 case ARM::MQQPRStore
:
2959 case ARM::MQQQQPRLoad
:
2960 case ARM::MQQQQPRStore
:
2961 ExpandMQQPRLoadStore(MBBI
);
2964 case ARM::tCMP_SWAP_8
:
2965 assert(STI
->isThumb());
2966 return ExpandCMP_SWAP(MBB
, MBBI
, ARM::t2LDREXB
, ARM::t2STREXB
, ARM::tUXTB
,
2968 case ARM::tCMP_SWAP_16
:
2969 assert(STI
->isThumb());
2970 return ExpandCMP_SWAP(MBB
, MBBI
, ARM::t2LDREXH
, ARM::t2STREXH
, ARM::tUXTH
,
2973 case ARM::CMP_SWAP_8
:
2974 assert(!STI
->isThumb());
2975 return ExpandCMP_SWAP(MBB
, MBBI
, ARM::LDREXB
, ARM::STREXB
, ARM::UXTB
,
2977 case ARM::CMP_SWAP_16
:
2978 assert(!STI
->isThumb());
2979 return ExpandCMP_SWAP(MBB
, MBBI
, ARM::LDREXH
, ARM::STREXH
, ARM::UXTH
,
2981 case ARM::CMP_SWAP_32
:
2983 return ExpandCMP_SWAP(MBB
, MBBI
, ARM::t2LDREX
, ARM::t2STREX
, 0,
2986 return ExpandCMP_SWAP(MBB
, MBBI
, ARM::LDREX
, ARM::STREX
, 0, NextMBBI
);
2988 case ARM::CMP_SWAP_64
:
2989 return ExpandCMP_SWAP_64(MBB
, MBBI
, NextMBBI
);
2991 case ARM::tBL_PUSHLR
:
2992 case ARM::BL_PUSHLR
: {
2993 const bool Thumb
= Opcode
== ARM::tBL_PUSHLR
;
2994 Register Reg
= MI
.getOperand(0).getReg();
2995 assert(Reg
== ARM::LR
&& "expect LR register!");
2996 MachineInstrBuilder MIB
;
2999 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::tPUSH
))
3000 .add(predOps(ARMCC::AL
))
3003 // bl __gnu_mcount_nc
3004 MIB
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::tBL
));
3007 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::STMDB_UPD
))
3008 .addReg(ARM::SP
, RegState::Define
)
3010 .add(predOps(ARMCC::AL
))
3013 // bl __gnu_mcount_nc
3014 MIB
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(ARM::BL
));
3016 MIB
.cloneMemRefs(MI
);
3017 for (unsigned i
= 1; i
< MI
.getNumOperands(); ++i
) MIB
.add(MI
.getOperand(i
));
3018 MI
.eraseFromParent();
3022 case ARM::STOREDUAL
: {
3023 Register PairReg
= MI
.getOperand(0).getReg();
3025 MachineInstrBuilder MIB
=
3026 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
3027 TII
->get(Opcode
== ARM::LOADDUAL
? ARM::LDRD
: ARM::STRD
))
3028 .addReg(TRI
->getSubReg(PairReg
, ARM::gsub_0
),
3029 Opcode
== ARM::LOADDUAL
? RegState::Define
: 0)
3030 .addReg(TRI
->getSubReg(PairReg
, ARM::gsub_1
),
3031 Opcode
== ARM::LOADDUAL
? RegState::Define
: 0);
3032 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++)
3033 MIB
.add(MI
.getOperand(i
));
3034 MIB
.add(predOps(ARMCC::AL
));
3035 MIB
.cloneMemRefs(MI
);
3036 MI
.eraseFromParent();
3042 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock
&MBB
) {
3043 bool Modified
= false;
3045 MachineBasicBlock::iterator MBBI
= MBB
.begin(), E
= MBB
.end();
3047 MachineBasicBlock::iterator NMBBI
= std::next(MBBI
);
3048 Modified
|= ExpandMI(MBB
, MBBI
, NMBBI
);
3055 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction
&MF
) {
3056 STI
= &static_cast<const ARMSubtarget
&>(MF
.getSubtarget());
3057 TII
= STI
->getInstrInfo();
3058 TRI
= STI
->getRegisterInfo();
3059 AFI
= MF
.getInfo
<ARMFunctionInfo
>();
3061 LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n"
3062 << "********** Function: " << MF
.getName() << '\n');
3064 bool Modified
= false;
3065 for (MachineBasicBlock
&MBB
: MF
)
3066 Modified
|= ExpandMBB(MBB
);
3067 if (VerifyARMPseudo
)
3068 MF
.verify(this, "After expanding ARM pseudo instructions.");
3070 LLVM_DEBUG(dbgs() << "***************************************************\n");
3074 /// createARMExpandPseudoPass - returns an instance of the pseudo instruction
3076 FunctionPass
*llvm::createARMExpandPseudoPass() {
3077 return new ARMExpandPseudo();