1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-isel"
17 #include "ARMAddressingModes.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMISelLowering.h"
21 #include "ARMMachineFunctionInfo.h"
22 #include "ARMPerfectShuffle.h"
23 #include "ARMRegisterInfo.h"
24 #include "ARMSubtarget.h"
25 #include "ARMTargetMachine.h"
26 #include "ARMTargetObjectFile.h"
27 #include "llvm/CallingConv.h"
28 #include "llvm/Constants.h"
29 #include "llvm/Function.h"
30 #include "llvm/GlobalValue.h"
31 #include "llvm/Instruction.h"
32 #include "llvm/Instructions.h"
33 #include "llvm/Intrinsics.h"
34 #include "llvm/Type.h"
35 #include "llvm/CodeGen/CallingConvLower.h"
36 #include "llvm/CodeGen/IntrinsicLowering.h"
37 #include "llvm/CodeGen/MachineBasicBlock.h"
38 #include "llvm/CodeGen/MachineFrameInfo.h"
39 #include "llvm/CodeGen/MachineFunction.h"
40 #include "llvm/CodeGen/MachineInstrBuilder.h"
41 #include "llvm/CodeGen/MachineRegisterInfo.h"
42 #include "llvm/CodeGen/PseudoSourceValue.h"
43 #include "llvm/CodeGen/SelectionDAG.h"
44 #include "llvm/MC/MCSectionMachO.h"
45 #include "llvm/Target/TargetOptions.h"
46 #include "llvm/ADT/VectorExtras.h"
47 #include "llvm/ADT/StringExtras.h"
48 #include "llvm/ADT/Statistic.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/MathExtras.h"
52 #include "llvm/Support/raw_ostream.h"
56 STATISTIC(NumTailCalls
, "Number of tail calls");
57 STATISTIC(NumMovwMovt
, "Number of GAs materialized with movw + movt");
59 // This option should go away when tail calls fully work.
61 EnableARMTailCalls("arm-tail-calls", cl::Hidden
,
62 cl::desc("Generate tail calls (TEMPORARY OPTION)."),
66 EnableARMLongCalls("arm-long-calls", cl::Hidden
,
67 cl::desc("Generate calls via indirect call instructions"),
71 ARMInterworking("arm-interworking", cl::Hidden
,
72 cl::desc("Enable / disable ARM interworking (for debugging only)"),
76 class ARMCCState
: public CCState
{
78 ARMCCState(CallingConv::ID CC
, bool isVarArg
, MachineFunction
&MF
,
79 const TargetMachine
&TM
, SmallVector
<CCValAssign
, 16> &locs
,
80 LLVMContext
&C
, ParmContext PC
)
81 : CCState(CC
, isVarArg
, MF
, TM
, locs
, C
) {
82 assert(((PC
== Call
) || (PC
== Prologue
)) &&
83 "ARMCCState users must specify whether their context is call"
84 "or prologue generation.");
90 // The APCS parameter registers.
91 static const unsigned GPRArgRegs
[] = {
92 ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
95 void ARMTargetLowering::addTypeForNEON(EVT VT
, EVT PromotedLdStVT
,
96 EVT PromotedBitwiseVT
) {
97 if (VT
!= PromotedLdStVT
) {
98 setOperationAction(ISD::LOAD
, VT
.getSimpleVT(), Promote
);
99 AddPromotedToType (ISD::LOAD
, VT
.getSimpleVT(),
100 PromotedLdStVT
.getSimpleVT());
102 setOperationAction(ISD::STORE
, VT
.getSimpleVT(), Promote
);
103 AddPromotedToType (ISD::STORE
, VT
.getSimpleVT(),
104 PromotedLdStVT
.getSimpleVT());
107 EVT ElemTy
= VT
.getVectorElementType();
108 if (ElemTy
!= MVT::i64
&& ElemTy
!= MVT::f64
)
109 setOperationAction(ISD::VSETCC
, VT
.getSimpleVT(), Custom
);
110 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
.getSimpleVT(), Custom
);
111 if (ElemTy
!= MVT::i32
) {
112 setOperationAction(ISD::SINT_TO_FP
, VT
.getSimpleVT(), Expand
);
113 setOperationAction(ISD::UINT_TO_FP
, VT
.getSimpleVT(), Expand
);
114 setOperationAction(ISD::FP_TO_SINT
, VT
.getSimpleVT(), Expand
);
115 setOperationAction(ISD::FP_TO_UINT
, VT
.getSimpleVT(), Expand
);
117 setOperationAction(ISD::BUILD_VECTOR
, VT
.getSimpleVT(), Custom
);
118 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
.getSimpleVT(), Custom
);
119 setOperationAction(ISD::CONCAT_VECTORS
, VT
.getSimpleVT(), Legal
);
120 setOperationAction(ISD::EXTRACT_SUBVECTOR
, VT
.getSimpleVT(), Legal
);
121 setOperationAction(ISD::SELECT
, VT
.getSimpleVT(), Expand
);
122 setOperationAction(ISD::SELECT_CC
, VT
.getSimpleVT(), Expand
);
123 if (VT
.isInteger()) {
124 setOperationAction(ISD::SHL
, VT
.getSimpleVT(), Custom
);
125 setOperationAction(ISD::SRA
, VT
.getSimpleVT(), Custom
);
126 setOperationAction(ISD::SRL
, VT
.getSimpleVT(), Custom
);
127 setLoadExtAction(ISD::SEXTLOAD
, VT
.getSimpleVT(), Expand
);
128 setLoadExtAction(ISD::ZEXTLOAD
, VT
.getSimpleVT(), Expand
);
129 for (unsigned InnerVT
= (unsigned)MVT::FIRST_VECTOR_VALUETYPE
;
130 InnerVT
<= (unsigned)MVT::LAST_VECTOR_VALUETYPE
; ++InnerVT
)
131 setTruncStoreAction(VT
.getSimpleVT(),
132 (MVT::SimpleValueType
)InnerVT
, Expand
);
134 setLoadExtAction(ISD::EXTLOAD
, VT
.getSimpleVT(), Expand
);
136 // Promote all bit-wise operations.
137 if (VT
.isInteger() && VT
!= PromotedBitwiseVT
) {
138 setOperationAction(ISD::AND
, VT
.getSimpleVT(), Promote
);
139 AddPromotedToType (ISD::AND
, VT
.getSimpleVT(),
140 PromotedBitwiseVT
.getSimpleVT());
141 setOperationAction(ISD::OR
, VT
.getSimpleVT(), Promote
);
142 AddPromotedToType (ISD::OR
, VT
.getSimpleVT(),
143 PromotedBitwiseVT
.getSimpleVT());
144 setOperationAction(ISD::XOR
, VT
.getSimpleVT(), Promote
);
145 AddPromotedToType (ISD::XOR
, VT
.getSimpleVT(),
146 PromotedBitwiseVT
.getSimpleVT());
149 // Neon does not support vector divide/remainder operations.
150 setOperationAction(ISD::SDIV
, VT
.getSimpleVT(), Expand
);
151 setOperationAction(ISD::UDIV
, VT
.getSimpleVT(), Expand
);
152 setOperationAction(ISD::FDIV
, VT
.getSimpleVT(), Expand
);
153 setOperationAction(ISD::SREM
, VT
.getSimpleVT(), Expand
);
154 setOperationAction(ISD::UREM
, VT
.getSimpleVT(), Expand
);
155 setOperationAction(ISD::FREM
, VT
.getSimpleVT(), Expand
);
158 void ARMTargetLowering::addDRTypeForNEON(EVT VT
) {
159 addRegisterClass(VT
, ARM::DPRRegisterClass
);
160 addTypeForNEON(VT
, MVT::f64
, MVT::v2i32
);
163 void ARMTargetLowering::addQRTypeForNEON(EVT VT
) {
164 addRegisterClass(VT
, ARM::QPRRegisterClass
);
165 addTypeForNEON(VT
, MVT::v2f64
, MVT::v4i32
);
168 static TargetLoweringObjectFile
*createTLOF(TargetMachine
&TM
) {
169 if (TM
.getSubtarget
<ARMSubtarget
>().isTargetDarwin())
170 return new TargetLoweringObjectFileMachO();
172 return new ARMElfTargetObjectFile();
175 ARMTargetLowering::ARMTargetLowering(TargetMachine
&TM
)
176 : TargetLowering(TM
, createTLOF(TM
)) {
177 Subtarget
= &TM
.getSubtarget
<ARMSubtarget
>();
178 RegInfo
= TM
.getRegisterInfo();
179 Itins
= TM
.getInstrItineraryData();
181 if (Subtarget
->isTargetDarwin()) {
182 // Uses VFP for Thumb libfuncs if available.
183 if (Subtarget
->isThumb() && Subtarget
->hasVFP2()) {
184 // Single-precision floating-point arithmetic.
185 setLibcallName(RTLIB::ADD_F32
, "__addsf3vfp");
186 setLibcallName(RTLIB::SUB_F32
, "__subsf3vfp");
187 setLibcallName(RTLIB::MUL_F32
, "__mulsf3vfp");
188 setLibcallName(RTLIB::DIV_F32
, "__divsf3vfp");
190 // Double-precision floating-point arithmetic.
191 setLibcallName(RTLIB::ADD_F64
, "__adddf3vfp");
192 setLibcallName(RTLIB::SUB_F64
, "__subdf3vfp");
193 setLibcallName(RTLIB::MUL_F64
, "__muldf3vfp");
194 setLibcallName(RTLIB::DIV_F64
, "__divdf3vfp");
196 // Single-precision comparisons.
197 setLibcallName(RTLIB::OEQ_F32
, "__eqsf2vfp");
198 setLibcallName(RTLIB::UNE_F32
, "__nesf2vfp");
199 setLibcallName(RTLIB::OLT_F32
, "__ltsf2vfp");
200 setLibcallName(RTLIB::OLE_F32
, "__lesf2vfp");
201 setLibcallName(RTLIB::OGE_F32
, "__gesf2vfp");
202 setLibcallName(RTLIB::OGT_F32
, "__gtsf2vfp");
203 setLibcallName(RTLIB::UO_F32
, "__unordsf2vfp");
204 setLibcallName(RTLIB::O_F32
, "__unordsf2vfp");
206 setCmpLibcallCC(RTLIB::OEQ_F32
, ISD::SETNE
);
207 setCmpLibcallCC(RTLIB::UNE_F32
, ISD::SETNE
);
208 setCmpLibcallCC(RTLIB::OLT_F32
, ISD::SETNE
);
209 setCmpLibcallCC(RTLIB::OLE_F32
, ISD::SETNE
);
210 setCmpLibcallCC(RTLIB::OGE_F32
, ISD::SETNE
);
211 setCmpLibcallCC(RTLIB::OGT_F32
, ISD::SETNE
);
212 setCmpLibcallCC(RTLIB::UO_F32
, ISD::SETNE
);
213 setCmpLibcallCC(RTLIB::O_F32
, ISD::SETEQ
);
215 // Double-precision comparisons.
216 setLibcallName(RTLIB::OEQ_F64
, "__eqdf2vfp");
217 setLibcallName(RTLIB::UNE_F64
, "__nedf2vfp");
218 setLibcallName(RTLIB::OLT_F64
, "__ltdf2vfp");
219 setLibcallName(RTLIB::OLE_F64
, "__ledf2vfp");
220 setLibcallName(RTLIB::OGE_F64
, "__gedf2vfp");
221 setLibcallName(RTLIB::OGT_F64
, "__gtdf2vfp");
222 setLibcallName(RTLIB::UO_F64
, "__unorddf2vfp");
223 setLibcallName(RTLIB::O_F64
, "__unorddf2vfp");
225 setCmpLibcallCC(RTLIB::OEQ_F64
, ISD::SETNE
);
226 setCmpLibcallCC(RTLIB::UNE_F64
, ISD::SETNE
);
227 setCmpLibcallCC(RTLIB::OLT_F64
, ISD::SETNE
);
228 setCmpLibcallCC(RTLIB::OLE_F64
, ISD::SETNE
);
229 setCmpLibcallCC(RTLIB::OGE_F64
, ISD::SETNE
);
230 setCmpLibcallCC(RTLIB::OGT_F64
, ISD::SETNE
);
231 setCmpLibcallCC(RTLIB::UO_F64
, ISD::SETNE
);
232 setCmpLibcallCC(RTLIB::O_F64
, ISD::SETEQ
);
234 // Floating-point to integer conversions.
235 // i64 conversions are done via library routines even when generating VFP
236 // instructions, so use the same ones.
237 setLibcallName(RTLIB::FPTOSINT_F64_I32
, "__fixdfsivfp");
238 setLibcallName(RTLIB::FPTOUINT_F64_I32
, "__fixunsdfsivfp");
239 setLibcallName(RTLIB::FPTOSINT_F32_I32
, "__fixsfsivfp");
240 setLibcallName(RTLIB::FPTOUINT_F32_I32
, "__fixunssfsivfp");
242 // Conversions between floating types.
243 setLibcallName(RTLIB::FPROUND_F64_F32
, "__truncdfsf2vfp");
244 setLibcallName(RTLIB::FPEXT_F32_F64
, "__extendsfdf2vfp");
246 // Integer to floating-point conversions.
247 // i64 conversions are done via library routines even when generating VFP
248 // instructions, so use the same ones.
249 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
250 // e.g., __floatunsidf vs. __floatunssidfvfp.
251 setLibcallName(RTLIB::SINTTOFP_I32_F64
, "__floatsidfvfp");
252 setLibcallName(RTLIB::UINTTOFP_I32_F64
, "__floatunssidfvfp");
253 setLibcallName(RTLIB::SINTTOFP_I32_F32
, "__floatsisfvfp");
254 setLibcallName(RTLIB::UINTTOFP_I32_F32
, "__floatunssisfvfp");
258 // These libcalls are not available in 32-bit.
259 setLibcallName(RTLIB::SHL_I128
, 0);
260 setLibcallName(RTLIB::SRL_I128
, 0);
261 setLibcallName(RTLIB::SRA_I128
, 0);
263 if (Subtarget
->isAAPCS_ABI()) {
264 // Double-precision floating-point arithmetic helper functions
265 // RTABI chapter 4.1.2, Table 2
266 setLibcallName(RTLIB::ADD_F64
, "__aeabi_dadd");
267 setLibcallName(RTLIB::DIV_F64
, "__aeabi_ddiv");
268 setLibcallName(RTLIB::MUL_F64
, "__aeabi_dmul");
269 setLibcallName(RTLIB::SUB_F64
, "__aeabi_dsub");
270 setLibcallCallingConv(RTLIB::ADD_F64
, CallingConv::ARM_AAPCS
);
271 setLibcallCallingConv(RTLIB::DIV_F64
, CallingConv::ARM_AAPCS
);
272 setLibcallCallingConv(RTLIB::MUL_F64
, CallingConv::ARM_AAPCS
);
273 setLibcallCallingConv(RTLIB::SUB_F64
, CallingConv::ARM_AAPCS
);
275 // Double-precision floating-point comparison helper functions
276 // RTABI chapter 4.1.2, Table 3
277 setLibcallName(RTLIB::OEQ_F64
, "__aeabi_dcmpeq");
278 setCmpLibcallCC(RTLIB::OEQ_F64
, ISD::SETNE
);
279 setLibcallName(RTLIB::UNE_F64
, "__aeabi_dcmpeq");
280 setCmpLibcallCC(RTLIB::UNE_F64
, ISD::SETEQ
);
281 setLibcallName(RTLIB::OLT_F64
, "__aeabi_dcmplt");
282 setCmpLibcallCC(RTLIB::OLT_F64
, ISD::SETNE
);
283 setLibcallName(RTLIB::OLE_F64
, "__aeabi_dcmple");
284 setCmpLibcallCC(RTLIB::OLE_F64
, ISD::SETNE
);
285 setLibcallName(RTLIB::OGE_F64
, "__aeabi_dcmpge");
286 setCmpLibcallCC(RTLIB::OGE_F64
, ISD::SETNE
);
287 setLibcallName(RTLIB::OGT_F64
, "__aeabi_dcmpgt");
288 setCmpLibcallCC(RTLIB::OGT_F64
, ISD::SETNE
);
289 setLibcallName(RTLIB::UO_F64
, "__aeabi_dcmpun");
290 setCmpLibcallCC(RTLIB::UO_F64
, ISD::SETNE
);
291 setLibcallName(RTLIB::O_F64
, "__aeabi_dcmpun");
292 setCmpLibcallCC(RTLIB::O_F64
, ISD::SETEQ
);
293 setLibcallCallingConv(RTLIB::OEQ_F64
, CallingConv::ARM_AAPCS
);
294 setLibcallCallingConv(RTLIB::UNE_F64
, CallingConv::ARM_AAPCS
);
295 setLibcallCallingConv(RTLIB::OLT_F64
, CallingConv::ARM_AAPCS
);
296 setLibcallCallingConv(RTLIB::OLE_F64
, CallingConv::ARM_AAPCS
);
297 setLibcallCallingConv(RTLIB::OGE_F64
, CallingConv::ARM_AAPCS
);
298 setLibcallCallingConv(RTLIB::OGT_F64
, CallingConv::ARM_AAPCS
);
299 setLibcallCallingConv(RTLIB::UO_F64
, CallingConv::ARM_AAPCS
);
300 setLibcallCallingConv(RTLIB::O_F64
, CallingConv::ARM_AAPCS
);
302 // Single-precision floating-point arithmetic helper functions
303 // RTABI chapter 4.1.2, Table 4
304 setLibcallName(RTLIB::ADD_F32
, "__aeabi_fadd");
305 setLibcallName(RTLIB::DIV_F32
, "__aeabi_fdiv");
306 setLibcallName(RTLIB::MUL_F32
, "__aeabi_fmul");
307 setLibcallName(RTLIB::SUB_F32
, "__aeabi_fsub");
308 setLibcallCallingConv(RTLIB::ADD_F32
, CallingConv::ARM_AAPCS
);
309 setLibcallCallingConv(RTLIB::DIV_F32
, CallingConv::ARM_AAPCS
);
310 setLibcallCallingConv(RTLIB::MUL_F32
, CallingConv::ARM_AAPCS
);
311 setLibcallCallingConv(RTLIB::SUB_F32
, CallingConv::ARM_AAPCS
);
313 // Single-precision floating-point comparison helper functions
314 // RTABI chapter 4.1.2, Table 5
315 setLibcallName(RTLIB::OEQ_F32
, "__aeabi_fcmpeq");
316 setCmpLibcallCC(RTLIB::OEQ_F32
, ISD::SETNE
);
317 setLibcallName(RTLIB::UNE_F32
, "__aeabi_fcmpeq");
318 setCmpLibcallCC(RTLIB::UNE_F32
, ISD::SETEQ
);
319 setLibcallName(RTLIB::OLT_F32
, "__aeabi_fcmplt");
320 setCmpLibcallCC(RTLIB::OLT_F32
, ISD::SETNE
);
321 setLibcallName(RTLIB::OLE_F32
, "__aeabi_fcmple");
322 setCmpLibcallCC(RTLIB::OLE_F32
, ISD::SETNE
);
323 setLibcallName(RTLIB::OGE_F32
, "__aeabi_fcmpge");
324 setCmpLibcallCC(RTLIB::OGE_F32
, ISD::SETNE
);
325 setLibcallName(RTLIB::OGT_F32
, "__aeabi_fcmpgt");
326 setCmpLibcallCC(RTLIB::OGT_F32
, ISD::SETNE
);
327 setLibcallName(RTLIB::UO_F32
, "__aeabi_fcmpun");
328 setCmpLibcallCC(RTLIB::UO_F32
, ISD::SETNE
);
329 setLibcallName(RTLIB::O_F32
, "__aeabi_fcmpun");
330 setCmpLibcallCC(RTLIB::O_F32
, ISD::SETEQ
);
331 setLibcallCallingConv(RTLIB::OEQ_F32
, CallingConv::ARM_AAPCS
);
332 setLibcallCallingConv(RTLIB::UNE_F32
, CallingConv::ARM_AAPCS
);
333 setLibcallCallingConv(RTLIB::OLT_F32
, CallingConv::ARM_AAPCS
);
334 setLibcallCallingConv(RTLIB::OLE_F32
, CallingConv::ARM_AAPCS
);
335 setLibcallCallingConv(RTLIB::OGE_F32
, CallingConv::ARM_AAPCS
);
336 setLibcallCallingConv(RTLIB::OGT_F32
, CallingConv::ARM_AAPCS
);
337 setLibcallCallingConv(RTLIB::UO_F32
, CallingConv::ARM_AAPCS
);
338 setLibcallCallingConv(RTLIB::O_F32
, CallingConv::ARM_AAPCS
);
340 // Floating-point to integer conversions.
341 // RTABI chapter 4.1.2, Table 6
342 setLibcallName(RTLIB::FPTOSINT_F64_I32
, "__aeabi_d2iz");
343 setLibcallName(RTLIB::FPTOUINT_F64_I32
, "__aeabi_d2uiz");
344 setLibcallName(RTLIB::FPTOSINT_F64_I64
, "__aeabi_d2lz");
345 setLibcallName(RTLIB::FPTOUINT_F64_I64
, "__aeabi_d2ulz");
346 setLibcallName(RTLIB::FPTOSINT_F32_I32
, "__aeabi_f2iz");
347 setLibcallName(RTLIB::FPTOUINT_F32_I32
, "__aeabi_f2uiz");
348 setLibcallName(RTLIB::FPTOSINT_F32_I64
, "__aeabi_f2lz");
349 setLibcallName(RTLIB::FPTOUINT_F32_I64
, "__aeabi_f2ulz");
350 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32
, CallingConv::ARM_AAPCS
);
351 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32
, CallingConv::ARM_AAPCS
);
352 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64
, CallingConv::ARM_AAPCS
);
353 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64
, CallingConv::ARM_AAPCS
);
354 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32
, CallingConv::ARM_AAPCS
);
355 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32
, CallingConv::ARM_AAPCS
);
356 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64
, CallingConv::ARM_AAPCS
);
357 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64
, CallingConv::ARM_AAPCS
);
359 // Conversions between floating types.
360 // RTABI chapter 4.1.2, Table 7
361 setLibcallName(RTLIB::FPROUND_F64_F32
, "__aeabi_d2f");
362 setLibcallName(RTLIB::FPEXT_F32_F64
, "__aeabi_f2d");
363 setLibcallCallingConv(RTLIB::FPROUND_F64_F32
, CallingConv::ARM_AAPCS
);
364 setLibcallCallingConv(RTLIB::FPEXT_F32_F64
, CallingConv::ARM_AAPCS
);
366 // Integer to floating-point conversions.
367 // RTABI chapter 4.1.2, Table 8
368 setLibcallName(RTLIB::SINTTOFP_I32_F64
, "__aeabi_i2d");
369 setLibcallName(RTLIB::UINTTOFP_I32_F64
, "__aeabi_ui2d");
370 setLibcallName(RTLIB::SINTTOFP_I64_F64
, "__aeabi_l2d");
371 setLibcallName(RTLIB::UINTTOFP_I64_F64
, "__aeabi_ul2d");
372 setLibcallName(RTLIB::SINTTOFP_I32_F32
, "__aeabi_i2f");
373 setLibcallName(RTLIB::UINTTOFP_I32_F32
, "__aeabi_ui2f");
374 setLibcallName(RTLIB::SINTTOFP_I64_F32
, "__aeabi_l2f");
375 setLibcallName(RTLIB::UINTTOFP_I64_F32
, "__aeabi_ul2f");
376 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64
, CallingConv::ARM_AAPCS
);
377 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64
, CallingConv::ARM_AAPCS
);
378 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64
, CallingConv::ARM_AAPCS
);
379 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64
, CallingConv::ARM_AAPCS
);
380 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32
, CallingConv::ARM_AAPCS
);
381 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32
, CallingConv::ARM_AAPCS
);
382 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32
, CallingConv::ARM_AAPCS
);
383 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32
, CallingConv::ARM_AAPCS
);
385 // Long long helper functions
386 // RTABI chapter 4.2, Table 9
387 setLibcallName(RTLIB::MUL_I64
, "__aeabi_lmul");
388 setLibcallName(RTLIB::SDIV_I64
, "__aeabi_ldivmod");
389 setLibcallName(RTLIB::UDIV_I64
, "__aeabi_uldivmod");
390 setLibcallName(RTLIB::SHL_I64
, "__aeabi_llsl");
391 setLibcallName(RTLIB::SRL_I64
, "__aeabi_llsr");
392 setLibcallName(RTLIB::SRA_I64
, "__aeabi_lasr");
393 setLibcallCallingConv(RTLIB::MUL_I64
, CallingConv::ARM_AAPCS
);
394 setLibcallCallingConv(RTLIB::SDIV_I64
, CallingConv::ARM_AAPCS
);
395 setLibcallCallingConv(RTLIB::UDIV_I64
, CallingConv::ARM_AAPCS
);
396 setLibcallCallingConv(RTLIB::SHL_I64
, CallingConv::ARM_AAPCS
);
397 setLibcallCallingConv(RTLIB::SRL_I64
, CallingConv::ARM_AAPCS
);
398 setLibcallCallingConv(RTLIB::SRA_I64
, CallingConv::ARM_AAPCS
);
400 // Integer division functions
401 // RTABI chapter 4.3.1
402 setLibcallName(RTLIB::SDIV_I8
, "__aeabi_idiv");
403 setLibcallName(RTLIB::SDIV_I16
, "__aeabi_idiv");
404 setLibcallName(RTLIB::SDIV_I32
, "__aeabi_idiv");
405 setLibcallName(RTLIB::UDIV_I8
, "__aeabi_uidiv");
406 setLibcallName(RTLIB::UDIV_I16
, "__aeabi_uidiv");
407 setLibcallName(RTLIB::UDIV_I32
, "__aeabi_uidiv");
408 setLibcallCallingConv(RTLIB::SDIV_I8
, CallingConv::ARM_AAPCS
);
409 setLibcallCallingConv(RTLIB::SDIV_I16
, CallingConv::ARM_AAPCS
);
410 setLibcallCallingConv(RTLIB::SDIV_I32
, CallingConv::ARM_AAPCS
);
411 setLibcallCallingConv(RTLIB::UDIV_I8
, CallingConv::ARM_AAPCS
);
412 setLibcallCallingConv(RTLIB::UDIV_I16
, CallingConv::ARM_AAPCS
);
413 setLibcallCallingConv(RTLIB::UDIV_I32
, CallingConv::ARM_AAPCS
);
416 // RTABI chapter 4.3.4
417 setLibcallName(RTLIB::MEMCPY
, "__aeabi_memcpy");
418 setLibcallName(RTLIB::MEMMOVE
, "__aeabi_memmove");
419 setLibcallName(RTLIB::MEMSET
, "__aeabi_memset");
422 if (Subtarget
->isThumb1Only())
423 addRegisterClass(MVT::i32
, ARM::tGPRRegisterClass
);
425 addRegisterClass(MVT::i32
, ARM::GPRRegisterClass
);
426 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only()) {
427 addRegisterClass(MVT::f32
, ARM::SPRRegisterClass
);
428 if (!Subtarget
->isFPOnlySP())
429 addRegisterClass(MVT::f64
, ARM::DPRRegisterClass
);
431 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
434 if (Subtarget
->hasNEON()) {
435 addDRTypeForNEON(MVT::v2f32
);
436 addDRTypeForNEON(MVT::v8i8
);
437 addDRTypeForNEON(MVT::v4i16
);
438 addDRTypeForNEON(MVT::v2i32
);
439 addDRTypeForNEON(MVT::v1i64
);
441 addQRTypeForNEON(MVT::v4f32
);
442 addQRTypeForNEON(MVT::v2f64
);
443 addQRTypeForNEON(MVT::v16i8
);
444 addQRTypeForNEON(MVT::v8i16
);
445 addQRTypeForNEON(MVT::v4i32
);
446 addQRTypeForNEON(MVT::v2i64
);
448 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
449 // neither Neon nor VFP support any arithmetic operations on it.
450 setOperationAction(ISD::FADD
, MVT::v2f64
, Expand
);
451 setOperationAction(ISD::FSUB
, MVT::v2f64
, Expand
);
452 setOperationAction(ISD::FMUL
, MVT::v2f64
, Expand
);
453 setOperationAction(ISD::FDIV
, MVT::v2f64
, Expand
);
454 setOperationAction(ISD::FREM
, MVT::v2f64
, Expand
);
455 setOperationAction(ISD::FCOPYSIGN
, MVT::v2f64
, Expand
);
456 setOperationAction(ISD::VSETCC
, MVT::v2f64
, Expand
);
457 setOperationAction(ISD::FNEG
, MVT::v2f64
, Expand
);
458 setOperationAction(ISD::FABS
, MVT::v2f64
, Expand
);
459 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Expand
);
460 setOperationAction(ISD::FSIN
, MVT::v2f64
, Expand
);
461 setOperationAction(ISD::FCOS
, MVT::v2f64
, Expand
);
462 setOperationAction(ISD::FPOWI
, MVT::v2f64
, Expand
);
463 setOperationAction(ISD::FPOW
, MVT::v2f64
, Expand
);
464 setOperationAction(ISD::FLOG
, MVT::v2f64
, Expand
);
465 setOperationAction(ISD::FLOG2
, MVT::v2f64
, Expand
);
466 setOperationAction(ISD::FLOG10
, MVT::v2f64
, Expand
);
467 setOperationAction(ISD::FEXP
, MVT::v2f64
, Expand
);
468 setOperationAction(ISD::FEXP2
, MVT::v2f64
, Expand
);
469 setOperationAction(ISD::FCEIL
, MVT::v2f64
, Expand
);
470 setOperationAction(ISD::FTRUNC
, MVT::v2f64
, Expand
);
471 setOperationAction(ISD::FRINT
, MVT::v2f64
, Expand
);
472 setOperationAction(ISD::FNEARBYINT
, MVT::v2f64
, Expand
);
473 setOperationAction(ISD::FFLOOR
, MVT::v2f64
, Expand
);
475 setTruncStoreAction(MVT::v2f64
, MVT::v2f32
, Expand
);
477 // Neon does not support some operations on v1i64 and v2i64 types.
478 setOperationAction(ISD::MUL
, MVT::v1i64
, Expand
);
479 // Custom handling for some quad-vector types to detect VMULL.
480 setOperationAction(ISD::MUL
, MVT::v8i16
, Custom
);
481 setOperationAction(ISD::MUL
, MVT::v4i32
, Custom
);
482 setOperationAction(ISD::MUL
, MVT::v2i64
, Custom
);
483 // Custom handling for some vector types to avoid expensive expansions
484 setOperationAction(ISD::SDIV
, MVT::v4i16
, Custom
);
485 setOperationAction(ISD::SDIV
, MVT::v8i8
, Custom
);
486 setOperationAction(ISD::UDIV
, MVT::v4i16
, Custom
);
487 setOperationAction(ISD::UDIV
, MVT::v8i8
, Custom
);
488 setOperationAction(ISD::VSETCC
, MVT::v1i64
, Expand
);
489 setOperationAction(ISD::VSETCC
, MVT::v2i64
, Expand
);
490 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
491 // a destination type that is wider than the source.
492 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i16
, Custom
);
493 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i16
, Custom
);
495 setTargetDAGCombine(ISD::INTRINSIC_VOID
);
496 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN
);
497 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN
);
498 setTargetDAGCombine(ISD::SHL
);
499 setTargetDAGCombine(ISD::SRL
);
500 setTargetDAGCombine(ISD::SRA
);
501 setTargetDAGCombine(ISD::SIGN_EXTEND
);
502 setTargetDAGCombine(ISD::ZERO_EXTEND
);
503 setTargetDAGCombine(ISD::ANY_EXTEND
);
504 setTargetDAGCombine(ISD::SELECT_CC
);
505 setTargetDAGCombine(ISD::BUILD_VECTOR
);
506 setTargetDAGCombine(ISD::VECTOR_SHUFFLE
);
507 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT
);
508 setTargetDAGCombine(ISD::STORE
);
509 setTargetDAGCombine(ISD::FP_TO_SINT
);
510 setTargetDAGCombine(ISD::FP_TO_UINT
);
511 setTargetDAGCombine(ISD::FDIV
);
514 computeRegisterProperties();
516 // ARM does not have f32 extending load.
517 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, Expand
);
519 // ARM does not have i1 sign extending load.
520 setLoadExtAction(ISD::SEXTLOAD
, MVT::i1
, Promote
);
522 // ARM supports all 4 flavors of integer indexed load / store.
523 if (!Subtarget
->isThumb1Only()) {
524 for (unsigned im
= (unsigned)ISD::PRE_INC
;
525 im
!= (unsigned)ISD::LAST_INDEXED_MODE
; ++im
) {
526 setIndexedLoadAction(im
, MVT::i1
, Legal
);
527 setIndexedLoadAction(im
, MVT::i8
, Legal
);
528 setIndexedLoadAction(im
, MVT::i16
, Legal
);
529 setIndexedLoadAction(im
, MVT::i32
, Legal
);
530 setIndexedStoreAction(im
, MVT::i1
, Legal
);
531 setIndexedStoreAction(im
, MVT::i8
, Legal
);
532 setIndexedStoreAction(im
, MVT::i16
, Legal
);
533 setIndexedStoreAction(im
, MVT::i32
, Legal
);
537 // i64 operation support.
538 setOperationAction(ISD::MUL
, MVT::i64
, Expand
);
539 setOperationAction(ISD::MULHU
, MVT::i32
, Expand
);
540 if (Subtarget
->isThumb1Only()) {
541 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
542 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
544 if (Subtarget
->isThumb1Only() || !Subtarget
->hasV6Ops()
545 || (Subtarget
->isThumb2() && !Subtarget
->hasThumb2DSP()))
546 setOperationAction(ISD::MULHS
, MVT::i32
, Expand
);
548 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Custom
);
549 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Custom
);
550 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Custom
);
551 setOperationAction(ISD::SRL
, MVT::i64
, Custom
);
552 setOperationAction(ISD::SRA
, MVT::i64
, Custom
);
554 // ARM does not have ROTL.
555 setOperationAction(ISD::ROTL
, MVT::i32
, Expand
);
556 setOperationAction(ISD::CTTZ
, MVT::i32
, Custom
);
557 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
558 if (!Subtarget
->hasV5TOps() || Subtarget
->isThumb1Only())
559 setOperationAction(ISD::CTLZ
, MVT::i32
, Expand
);
561 // Only ARMv6 has BSWAP.
562 if (!Subtarget
->hasV6Ops())
563 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
565 // These are expanded into libcalls.
566 if (!Subtarget
->hasDivide() || !Subtarget
->isThumb2()) {
567 // v7M has a hardware divider
568 setOperationAction(ISD::SDIV
, MVT::i32
, Expand
);
569 setOperationAction(ISD::UDIV
, MVT::i32
, Expand
);
571 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
572 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
573 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
574 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
576 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
577 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
578 setOperationAction(ISD::GLOBAL_OFFSET_TABLE
, MVT::i32
, Custom
);
579 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
580 setOperationAction(ISD::BlockAddress
, MVT::i32
, Custom
);
582 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
584 // Use the default implementation.
585 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
586 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
587 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
588 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
589 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
590 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
591 setOperationAction(ISD::EHSELECTION
, MVT::i32
, Expand
);
592 setOperationAction(ISD::EXCEPTIONADDR
, MVT::i32
, Expand
);
593 setExceptionPointerRegister(ARM::R0
);
594 setExceptionSelectorRegister(ARM::R1
);
596 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Expand
);
597 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
598 // the default expansion.
599 if (Subtarget
->hasDataBarrier() ||
600 (Subtarget
->hasV6Ops() && !Subtarget
->isThumb())) {
601 // membarrier needs custom lowering; the rest are legal and handled
603 setOperationAction(ISD::MEMBARRIER
, MVT::Other
, Custom
);
605 // Set them all for expansion, which will force libcalls.
606 setOperationAction(ISD::MEMBARRIER
, MVT::Other
, Expand
);
607 setOperationAction(ISD::ATOMIC_CMP_SWAP
, MVT::i8
, Expand
);
608 setOperationAction(ISD::ATOMIC_CMP_SWAP
, MVT::i16
, Expand
);
609 setOperationAction(ISD::ATOMIC_CMP_SWAP
, MVT::i32
, Expand
);
610 setOperationAction(ISD::ATOMIC_SWAP
, MVT::i8
, Expand
);
611 setOperationAction(ISD::ATOMIC_SWAP
, MVT::i16
, Expand
);
612 setOperationAction(ISD::ATOMIC_SWAP
, MVT::i32
, Expand
);
613 setOperationAction(ISD::ATOMIC_LOAD_ADD
, MVT::i8
, Expand
);
614 setOperationAction(ISD::ATOMIC_LOAD_ADD
, MVT::i16
, Expand
);
615 setOperationAction(ISD::ATOMIC_LOAD_ADD
, MVT::i32
, Expand
);
616 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i8
, Expand
);
617 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i16
, Expand
);
618 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i32
, Expand
);
619 setOperationAction(ISD::ATOMIC_LOAD_AND
, MVT::i8
, Expand
);
620 setOperationAction(ISD::ATOMIC_LOAD_AND
, MVT::i16
, Expand
);
621 setOperationAction(ISD::ATOMIC_LOAD_AND
, MVT::i32
, Expand
);
622 setOperationAction(ISD::ATOMIC_LOAD_OR
, MVT::i8
, Expand
);
623 setOperationAction(ISD::ATOMIC_LOAD_OR
, MVT::i16
, Expand
);
624 setOperationAction(ISD::ATOMIC_LOAD_OR
, MVT::i32
, Expand
);
625 setOperationAction(ISD::ATOMIC_LOAD_XOR
, MVT::i8
, Expand
);
626 setOperationAction(ISD::ATOMIC_LOAD_XOR
, MVT::i16
, Expand
);
627 setOperationAction(ISD::ATOMIC_LOAD_XOR
, MVT::i32
, Expand
);
628 setOperationAction(ISD::ATOMIC_LOAD_NAND
, MVT::i8
, Expand
);
629 setOperationAction(ISD::ATOMIC_LOAD_NAND
, MVT::i16
, Expand
);
630 setOperationAction(ISD::ATOMIC_LOAD_NAND
, MVT::i32
, Expand
);
631 setOperationAction(ISD::ATOMIC_LOAD_MIN
, MVT::i8
, Expand
);
632 setOperationAction(ISD::ATOMIC_LOAD_MIN
, MVT::i16
, Expand
);
633 setOperationAction(ISD::ATOMIC_LOAD_MIN
, MVT::i32
, Expand
);
634 setOperationAction(ISD::ATOMIC_LOAD_MAX
, MVT::i8
, Expand
);
635 setOperationAction(ISD::ATOMIC_LOAD_MAX
, MVT::i16
, Expand
);
636 setOperationAction(ISD::ATOMIC_LOAD_MAX
, MVT::i32
, Expand
);
637 setOperationAction(ISD::ATOMIC_LOAD_UMIN
, MVT::i8
, Expand
);
638 setOperationAction(ISD::ATOMIC_LOAD_UMIN
, MVT::i16
, Expand
);
639 setOperationAction(ISD::ATOMIC_LOAD_UMIN
, MVT::i32
, Expand
);
640 setOperationAction(ISD::ATOMIC_LOAD_UMAX
, MVT::i8
, Expand
);
641 setOperationAction(ISD::ATOMIC_LOAD_UMAX
, MVT::i16
, Expand
);
642 setOperationAction(ISD::ATOMIC_LOAD_UMAX
, MVT::i32
, Expand
);
643 // Since the libcalls include locking, fold in the fences
644 setShouldFoldAtomicFences(true);
646 // 64-bit versions are always libcalls (for now)
647 setOperationAction(ISD::ATOMIC_CMP_SWAP
, MVT::i64
, Expand
);
648 setOperationAction(ISD::ATOMIC_SWAP
, MVT::i64
, Expand
);
649 setOperationAction(ISD::ATOMIC_LOAD_ADD
, MVT::i64
, Expand
);
650 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i64
, Expand
);
651 setOperationAction(ISD::ATOMIC_LOAD_AND
, MVT::i64
, Expand
);
652 setOperationAction(ISD::ATOMIC_LOAD_OR
, MVT::i64
, Expand
);
653 setOperationAction(ISD::ATOMIC_LOAD_XOR
, MVT::i64
, Expand
);
654 setOperationAction(ISD::ATOMIC_LOAD_NAND
, MVT::i64
, Expand
);
656 setOperationAction(ISD::PREFETCH
, MVT::Other
, Custom
);
658 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
659 if (!Subtarget
->hasV6Ops()) {
660 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i16
, Expand
);
661 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i8
, Expand
);
663 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
665 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only()) {
666 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
667 // iff target supports vfp2.
668 setOperationAction(ISD::BITCAST
, MVT::i64
, Custom
);
669 setOperationAction(ISD::FLT_ROUNDS_
, MVT::i32
, Custom
);
672 // We want to custom lower some of our intrinsics.
673 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
674 if (Subtarget
->isTargetDarwin()) {
675 setOperationAction(ISD::EH_SJLJ_SETJMP
, MVT::i32
, Custom
);
676 setOperationAction(ISD::EH_SJLJ_LONGJMP
, MVT::Other
, Custom
);
677 setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP
, MVT::Other
, Custom
);
678 setLibcallName(RTLIB::UNWIND_RESUME
, "_Unwind_SjLj_Resume");
681 setOperationAction(ISD::SETCC
, MVT::i32
, Expand
);
682 setOperationAction(ISD::SETCC
, MVT::f32
, Expand
);
683 setOperationAction(ISD::SETCC
, MVT::f64
, Expand
);
684 setOperationAction(ISD::SELECT
, MVT::i32
, Custom
);
685 setOperationAction(ISD::SELECT
, MVT::f32
, Custom
);
686 setOperationAction(ISD::SELECT
, MVT::f64
, Custom
);
687 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
688 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
689 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Custom
);
691 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
692 setOperationAction(ISD::BR_CC
, MVT::i32
, Custom
);
693 setOperationAction(ISD::BR_CC
, MVT::f32
, Custom
);
694 setOperationAction(ISD::BR_CC
, MVT::f64
, Custom
);
695 setOperationAction(ISD::BR_JT
, MVT::Other
, Custom
);
697 // We don't support sin/cos/fmod/copysign/pow
698 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
699 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
700 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
701 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
702 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
703 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
704 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only()) {
705 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Custom
);
706 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Custom
);
708 setOperationAction(ISD::FPOW
, MVT::f64
, Expand
);
709 setOperationAction(ISD::FPOW
, MVT::f32
, Expand
);
711 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
712 setOperationAction(ISD::FMA
, MVT::f32
, Expand
);
714 // Various VFP goodness
715 if (!UseSoftFloat
&& !Subtarget
->isThumb1Only()) {
716 // int <-> fp are custom expanded into bit_convert + ARMISD ops.
717 if (Subtarget
->hasVFP2()) {
718 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
719 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Custom
);
720 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
721 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
723 // Special handling for half-precision FP.
724 if (!Subtarget
->hasFP16()) {
725 setOperationAction(ISD::FP16_TO_FP32
, MVT::f32
, Expand
);
726 setOperationAction(ISD::FP32_TO_FP16
, MVT::i32
, Expand
);
730 // We have target-specific dag combine patterns for the following nodes:
731 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
732 setTargetDAGCombine(ISD::ADD
);
733 setTargetDAGCombine(ISD::SUB
);
734 setTargetDAGCombine(ISD::MUL
);
736 if (Subtarget
->hasV6T2Ops() || Subtarget
->hasNEON())
737 setTargetDAGCombine(ISD::OR
);
738 if (Subtarget
->hasNEON())
739 setTargetDAGCombine(ISD::AND
);
741 setStackPointerRegisterToSaveRestore(ARM::SP
);
743 if (UseSoftFloat
|| Subtarget
->isThumb1Only() || !Subtarget
->hasVFP2())
744 setSchedulingPreference(Sched::RegPressure
);
746 setSchedulingPreference(Sched::Hybrid
);
748 //// temporary - rewrite interface to use type
749 maxStoresPerMemcpy
= maxStoresPerMemcpyOptSize
= 1;
751 // On ARM arguments smaller than 4 bytes are extended, so all arguments
752 // are at least 4 bytes aligned.
753 setMinStackArgumentAlignment(4);
755 benefitFromCodePlacementOpt
= true;
757 setMinFunctionAlignment(Subtarget
->isThumb() ? 1 : 2);
760 // FIXME: It might make sense to define the representative register class as the
761 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
762 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
763 // SPR's representative would be DPR_VFP2. This should work well if register
764 // pressure tracking were modified such that a register use would increment the
765 // pressure of the register class's representative and all of it's super
766 // classes' representatives transitively. We have not implemented this because
767 // of the difficulty prior to coalescing of modeling operand register classes
768 // due to the common occurrence of cross class copies and subregister insertions
770 std::pair
<const TargetRegisterClass
*, uint8_t>
771 ARMTargetLowering::findRepresentativeClass(EVT VT
) const{
772 const TargetRegisterClass
*RRC
= 0;
774 switch (VT
.getSimpleVT().SimpleTy
) {
776 return TargetLowering::findRepresentativeClass(VT
);
777 // Use DPR as representative register class for all floating point
778 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
779 // the cost is 1 for both f32 and f64.
780 case MVT::f32
: case MVT::f64
: case MVT::v8i8
: case MVT::v4i16
:
781 case MVT::v2i32
: case MVT::v1i64
: case MVT::v2f32
:
782 RRC
= ARM::DPRRegisterClass
;
783 // When NEON is used for SP, only half of the register file is available
784 // because operations that define both SP and DP results will be constrained
785 // to the VFP2 class (D0-D15). We currently model this constraint prior to
786 // coalescing by double-counting the SP regs. See the FIXME above.
787 if (Subtarget
->useNEONForSinglePrecisionFP())
790 case MVT::v16i8
: case MVT::v8i16
: case MVT::v4i32
: case MVT::v2i64
:
791 case MVT::v4f32
: case MVT::v2f64
:
792 RRC
= ARM::DPRRegisterClass
;
796 RRC
= ARM::DPRRegisterClass
;
800 RRC
= ARM::DPRRegisterClass
;
804 return std::make_pair(RRC
, Cost
);
807 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode
) const {
810 case ARMISD::Wrapper
: return "ARMISD::Wrapper";
811 case ARMISD::WrapperDYN
: return "ARMISD::WrapperDYN";
812 case ARMISD::WrapperPIC
: return "ARMISD::WrapperPIC";
813 case ARMISD::WrapperJT
: return "ARMISD::WrapperJT";
814 case ARMISD::CALL
: return "ARMISD::CALL";
815 case ARMISD::CALL_PRED
: return "ARMISD::CALL_PRED";
816 case ARMISD::CALL_NOLINK
: return "ARMISD::CALL_NOLINK";
817 case ARMISD::tCALL
: return "ARMISD::tCALL";
818 case ARMISD::BRCOND
: return "ARMISD::BRCOND";
819 case ARMISD::BR_JT
: return "ARMISD::BR_JT";
820 case ARMISD::BR2_JT
: return "ARMISD::BR2_JT";
821 case ARMISD::RET_FLAG
: return "ARMISD::RET_FLAG";
822 case ARMISD::PIC_ADD
: return "ARMISD::PIC_ADD";
823 case ARMISD::CMP
: return "ARMISD::CMP";
824 case ARMISD::CMPZ
: return "ARMISD::CMPZ";
825 case ARMISD::CMPFP
: return "ARMISD::CMPFP";
826 case ARMISD::CMPFPw0
: return "ARMISD::CMPFPw0";
827 case ARMISD::BCC_i64
: return "ARMISD::BCC_i64";
828 case ARMISD::FMSTAT
: return "ARMISD::FMSTAT";
829 case ARMISD::CMOV
: return "ARMISD::CMOV";
831 case ARMISD::RBIT
: return "ARMISD::RBIT";
833 case ARMISD::FTOSI
: return "ARMISD::FTOSI";
834 case ARMISD::FTOUI
: return "ARMISD::FTOUI";
835 case ARMISD::SITOF
: return "ARMISD::SITOF";
836 case ARMISD::UITOF
: return "ARMISD::UITOF";
838 case ARMISD::SRL_FLAG
: return "ARMISD::SRL_FLAG";
839 case ARMISD::SRA_FLAG
: return "ARMISD::SRA_FLAG";
840 case ARMISD::RRX
: return "ARMISD::RRX";
842 case ARMISD::VMOVRRD
: return "ARMISD::VMOVRRD";
843 case ARMISD::VMOVDRR
: return "ARMISD::VMOVDRR";
845 case ARMISD::EH_SJLJ_SETJMP
: return "ARMISD::EH_SJLJ_SETJMP";
846 case ARMISD::EH_SJLJ_LONGJMP
:return "ARMISD::EH_SJLJ_LONGJMP";
847 case ARMISD::EH_SJLJ_DISPATCHSETUP
:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
849 case ARMISD::TC_RETURN
: return "ARMISD::TC_RETURN";
851 case ARMISD::THREAD_POINTER
:return "ARMISD::THREAD_POINTER";
853 case ARMISD::DYN_ALLOC
: return "ARMISD::DYN_ALLOC";
855 case ARMISD::MEMBARRIER
: return "ARMISD::MEMBARRIER";
856 case ARMISD::MEMBARRIER_MCR
: return "ARMISD::MEMBARRIER_MCR";
858 case ARMISD::PRELOAD
: return "ARMISD::PRELOAD";
860 case ARMISD::VCEQ
: return "ARMISD::VCEQ";
861 case ARMISD::VCEQZ
: return "ARMISD::VCEQZ";
862 case ARMISD::VCGE
: return "ARMISD::VCGE";
863 case ARMISD::VCGEZ
: return "ARMISD::VCGEZ";
864 case ARMISD::VCLEZ
: return "ARMISD::VCLEZ";
865 case ARMISD::VCGEU
: return "ARMISD::VCGEU";
866 case ARMISD::VCGT
: return "ARMISD::VCGT";
867 case ARMISD::VCGTZ
: return "ARMISD::VCGTZ";
868 case ARMISD::VCLTZ
: return "ARMISD::VCLTZ";
869 case ARMISD::VCGTU
: return "ARMISD::VCGTU";
870 case ARMISD::VTST
: return "ARMISD::VTST";
872 case ARMISD::VSHL
: return "ARMISD::VSHL";
873 case ARMISD::VSHRs
: return "ARMISD::VSHRs";
874 case ARMISD::VSHRu
: return "ARMISD::VSHRu";
875 case ARMISD::VSHLLs
: return "ARMISD::VSHLLs";
876 case ARMISD::VSHLLu
: return "ARMISD::VSHLLu";
877 case ARMISD::VSHLLi
: return "ARMISD::VSHLLi";
878 case ARMISD::VSHRN
: return "ARMISD::VSHRN";
879 case ARMISD::VRSHRs
: return "ARMISD::VRSHRs";
880 case ARMISD::VRSHRu
: return "ARMISD::VRSHRu";
881 case ARMISD::VRSHRN
: return "ARMISD::VRSHRN";
882 case ARMISD::VQSHLs
: return "ARMISD::VQSHLs";
883 case ARMISD::VQSHLu
: return "ARMISD::VQSHLu";
884 case ARMISD::VQSHLsu
: return "ARMISD::VQSHLsu";
885 case ARMISD::VQSHRNs
: return "ARMISD::VQSHRNs";
886 case ARMISD::VQSHRNu
: return "ARMISD::VQSHRNu";
887 case ARMISD::VQSHRNsu
: return "ARMISD::VQSHRNsu";
888 case ARMISD::VQRSHRNs
: return "ARMISD::VQRSHRNs";
889 case ARMISD::VQRSHRNu
: return "ARMISD::VQRSHRNu";
890 case ARMISD::VQRSHRNsu
: return "ARMISD::VQRSHRNsu";
891 case ARMISD::VGETLANEu
: return "ARMISD::VGETLANEu";
892 case ARMISD::VGETLANEs
: return "ARMISD::VGETLANEs";
893 case ARMISD::VMOVIMM
: return "ARMISD::VMOVIMM";
894 case ARMISD::VMVNIMM
: return "ARMISD::VMVNIMM";
895 case ARMISD::VDUP
: return "ARMISD::VDUP";
896 case ARMISD::VDUPLANE
: return "ARMISD::VDUPLANE";
897 case ARMISD::VEXT
: return "ARMISD::VEXT";
898 case ARMISD::VREV64
: return "ARMISD::VREV64";
899 case ARMISD::VREV32
: return "ARMISD::VREV32";
900 case ARMISD::VREV16
: return "ARMISD::VREV16";
901 case ARMISD::VZIP
: return "ARMISD::VZIP";
902 case ARMISD::VUZP
: return "ARMISD::VUZP";
903 case ARMISD::VTRN
: return "ARMISD::VTRN";
904 case ARMISD::VTBL1
: return "ARMISD::VTBL1";
905 case ARMISD::VTBL2
: return "ARMISD::VTBL2";
906 case ARMISD::VMULLs
: return "ARMISD::VMULLs";
907 case ARMISD::VMULLu
: return "ARMISD::VMULLu";
908 case ARMISD::BUILD_VECTOR
: return "ARMISD::BUILD_VECTOR";
909 case ARMISD::FMAX
: return "ARMISD::FMAX";
910 case ARMISD::FMIN
: return "ARMISD::FMIN";
911 case ARMISD::BFI
: return "ARMISD::BFI";
912 case ARMISD::VORRIMM
: return "ARMISD::VORRIMM";
913 case ARMISD::VBICIMM
: return "ARMISD::VBICIMM";
914 case ARMISD::VBSL
: return "ARMISD::VBSL";
915 case ARMISD::VLD2DUP
: return "ARMISD::VLD2DUP";
916 case ARMISD::VLD3DUP
: return "ARMISD::VLD3DUP";
917 case ARMISD::VLD4DUP
: return "ARMISD::VLD4DUP";
918 case ARMISD::VLD1_UPD
: return "ARMISD::VLD1_UPD";
919 case ARMISD::VLD2_UPD
: return "ARMISD::VLD2_UPD";
920 case ARMISD::VLD3_UPD
: return "ARMISD::VLD3_UPD";
921 case ARMISD::VLD4_UPD
: return "ARMISD::VLD4_UPD";
922 case ARMISD::VLD2LN_UPD
: return "ARMISD::VLD2LN_UPD";
923 case ARMISD::VLD3LN_UPD
: return "ARMISD::VLD3LN_UPD";
924 case ARMISD::VLD4LN_UPD
: return "ARMISD::VLD4LN_UPD";
925 case ARMISD::VLD2DUP_UPD
: return "ARMISD::VLD2DUP_UPD";
926 case ARMISD::VLD3DUP_UPD
: return "ARMISD::VLD3DUP_UPD";
927 case ARMISD::VLD4DUP_UPD
: return "ARMISD::VLD4DUP_UPD";
928 case ARMISD::VST1_UPD
: return "ARMISD::VST1_UPD";
929 case ARMISD::VST2_UPD
: return "ARMISD::VST2_UPD";
930 case ARMISD::VST3_UPD
: return "ARMISD::VST3_UPD";
931 case ARMISD::VST4_UPD
: return "ARMISD::VST4_UPD";
932 case ARMISD::VST2LN_UPD
: return "ARMISD::VST2LN_UPD";
933 case ARMISD::VST3LN_UPD
: return "ARMISD::VST3LN_UPD";
934 case ARMISD::VST4LN_UPD
: return "ARMISD::VST4LN_UPD";
938 /// getRegClassFor - Return the register class that should be used for the
939 /// specified value type.
940 TargetRegisterClass
*ARMTargetLowering::getRegClassFor(EVT VT
) const {
941 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
942 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
943 // load / store 4 to 8 consecutive D registers.
944 if (Subtarget
->hasNEON()) {
945 if (VT
== MVT::v4i64
)
946 return ARM::QQPRRegisterClass
;
947 else if (VT
== MVT::v8i64
)
948 return ARM::QQQQPRRegisterClass
;
950 return TargetLowering::getRegClassFor(VT
);
953 // Create a fast isel object.
955 ARMTargetLowering::createFastISel(FunctionLoweringInfo
&funcInfo
) const {
956 return ARM::createFastISel(funcInfo
);
959 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
960 /// be used for loads / stores from the global.
961 unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
962 return (Subtarget
->isThumb1Only() ? 127 : 4095);
965 Sched::Preference
ARMTargetLowering::getSchedulingPreference(SDNode
*N
) const {
966 unsigned NumVals
= N
->getNumValues();
968 return Sched::RegPressure
;
970 for (unsigned i
= 0; i
!= NumVals
; ++i
) {
971 EVT VT
= N
->getValueType(i
);
972 if (VT
== MVT::Glue
|| VT
== MVT::Other
)
974 if (VT
.isFloatingPoint() || VT
.isVector())
975 return Sched::Latency
;
978 if (!N
->isMachineOpcode())
979 return Sched::RegPressure
;
981 // Load are scheduled for latency even if there instruction itinerary
983 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
984 const MCInstrDesc
&MCID
= TII
->get(N
->getMachineOpcode());
986 if (MCID
.getNumDefs() == 0)
987 return Sched::RegPressure
;
988 if (!Itins
->isEmpty() &&
989 Itins
->getOperandCycle(MCID
.getSchedClass(), 0) > 2)
990 return Sched::Latency
;
992 return Sched::RegPressure
;
995 //===----------------------------------------------------------------------===//
997 //===----------------------------------------------------------------------===//
999 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1000 static ARMCC::CondCodes
IntCCToARMCC(ISD::CondCode CC
) {
1002 default: llvm_unreachable("Unknown condition code!");
1003 case ISD::SETNE
: return ARMCC::NE
;
1004 case ISD::SETEQ
: return ARMCC::EQ
;
1005 case ISD::SETGT
: return ARMCC::GT
;
1006 case ISD::SETGE
: return ARMCC::GE
;
1007 case ISD::SETLT
: return ARMCC::LT
;
1008 case ISD::SETLE
: return ARMCC::LE
;
1009 case ISD::SETUGT
: return ARMCC::HI
;
1010 case ISD::SETUGE
: return ARMCC::HS
;
1011 case ISD::SETULT
: return ARMCC::LO
;
1012 case ISD::SETULE
: return ARMCC::LS
;
1016 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1017 static void FPCCToARMCC(ISD::CondCode CC
, ARMCC::CondCodes
&CondCode
,
1018 ARMCC::CondCodes
&CondCode2
) {
1019 CondCode2
= ARMCC::AL
;
1021 default: llvm_unreachable("Unknown FP condition!");
1023 case ISD::SETOEQ
: CondCode
= ARMCC::EQ
; break;
1025 case ISD::SETOGT
: CondCode
= ARMCC::GT
; break;
1027 case ISD::SETOGE
: CondCode
= ARMCC::GE
; break;
1028 case ISD::SETOLT
: CondCode
= ARMCC::MI
; break;
1029 case ISD::SETOLE
: CondCode
= ARMCC::LS
; break;
1030 case ISD::SETONE
: CondCode
= ARMCC::MI
; CondCode2
= ARMCC::GT
; break;
1031 case ISD::SETO
: CondCode
= ARMCC::VC
; break;
1032 case ISD::SETUO
: CondCode
= ARMCC::VS
; break;
1033 case ISD::SETUEQ
: CondCode
= ARMCC::EQ
; CondCode2
= ARMCC::VS
; break;
1034 case ISD::SETUGT
: CondCode
= ARMCC::HI
; break;
1035 case ISD::SETUGE
: CondCode
= ARMCC::PL
; break;
1037 case ISD::SETULT
: CondCode
= ARMCC::LT
; break;
1039 case ISD::SETULE
: CondCode
= ARMCC::LE
; break;
1041 case ISD::SETUNE
: CondCode
= ARMCC::NE
; break;
1045 //===----------------------------------------------------------------------===//
1046 // Calling Convention Implementation
1047 //===----------------------------------------------------------------------===//
1049 #include "ARMGenCallingConv.inc"
1051 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
1052 /// given CallingConvention value.
1053 CCAssignFn
*ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC
,
1055 bool isVarArg
) const {
1058 llvm_unreachable("Unsupported calling convention");
1059 case CallingConv::Fast
:
1060 if (Subtarget
->hasVFP2() && !isVarArg
) {
1061 if (!Subtarget
->isAAPCS_ABI())
1062 return (Return
? RetFastCC_ARM_APCS
: FastCC_ARM_APCS
);
1063 // For AAPCS ABI targets, just use VFP variant of the calling convention.
1064 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1067 case CallingConv::C
: {
1068 // Use target triple & subtarget features to do actual dispatch.
1069 if (!Subtarget
->isAAPCS_ABI())
1070 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1071 else if (Subtarget
->hasVFP2() &&
1072 FloatABIType
== FloatABI::Hard
&& !isVarArg
)
1073 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1074 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1076 case CallingConv::ARM_AAPCS_VFP
:
1077 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1078 case CallingConv::ARM_AAPCS
:
1079 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1080 case CallingConv::ARM_APCS
:
1081 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1085 /// LowerCallResult - Lower the result values of a call into the
1086 /// appropriate copies out of appropriate physical registers.
1088 ARMTargetLowering::LowerCallResult(SDValue Chain
, SDValue InFlag
,
1089 CallingConv::ID CallConv
, bool isVarArg
,
1090 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1091 DebugLoc dl
, SelectionDAG
&DAG
,
1092 SmallVectorImpl
<SDValue
> &InVals
) const {
1094 // Assign locations to each value returned by this call.
1095 SmallVector
<CCValAssign
, 16> RVLocs
;
1096 ARMCCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
1097 getTargetMachine(), RVLocs
, *DAG
.getContext(), Call
);
1098 CCInfo
.AnalyzeCallResult(Ins
,
1099 CCAssignFnForNode(CallConv
, /* Return*/ true,
1102 // Copy all of the result registers out of their specified physreg.
1103 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
1104 CCValAssign VA
= RVLocs
[i
];
1107 if (VA
.needsCustom()) {
1108 // Handle f64 or half of a v2f64.
1109 SDValue Lo
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
1111 Chain
= Lo
.getValue(1);
1112 InFlag
= Lo
.getValue(2);
1113 VA
= RVLocs
[++i
]; // skip ahead to next loc
1114 SDValue Hi
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
1116 Chain
= Hi
.getValue(1);
1117 InFlag
= Hi
.getValue(2);
1118 Val
= DAG
.getNode(ARMISD::VMOVDRR
, dl
, MVT::f64
, Lo
, Hi
);
1120 if (VA
.getLocVT() == MVT::v2f64
) {
1121 SDValue Vec
= DAG
.getNode(ISD::UNDEF
, dl
, MVT::v2f64
);
1122 Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Vec
, Val
,
1123 DAG
.getConstant(0, MVT::i32
));
1125 VA
= RVLocs
[++i
]; // skip ahead to next loc
1126 Lo
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
, InFlag
);
1127 Chain
= Lo
.getValue(1);
1128 InFlag
= Lo
.getValue(2);
1129 VA
= RVLocs
[++i
]; // skip ahead to next loc
1130 Hi
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
, InFlag
);
1131 Chain
= Hi
.getValue(1);
1132 InFlag
= Hi
.getValue(2);
1133 Val
= DAG
.getNode(ARMISD::VMOVDRR
, dl
, MVT::f64
, Lo
, Hi
);
1134 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Vec
, Val
,
1135 DAG
.getConstant(1, MVT::i32
));
1138 Val
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), VA
.getLocVT(),
1140 Chain
= Val
.getValue(1);
1141 InFlag
= Val
.getValue(2);
1144 switch (VA
.getLocInfo()) {
1145 default: llvm_unreachable("Unknown loc info!");
1146 case CCValAssign::Full
: break;
1147 case CCValAssign::BCvt
:
1148 Val
= DAG
.getNode(ISD::BITCAST
, dl
, VA
.getValVT(), Val
);
1152 InVals
.push_back(Val
);
1158 /// LowerMemOpCallTo - Store the argument to the stack.
1160 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain
,
1161 SDValue StackPtr
, SDValue Arg
,
1162 DebugLoc dl
, SelectionDAG
&DAG
,
1163 const CCValAssign
&VA
,
1164 ISD::ArgFlagsTy Flags
) const {
1165 unsigned LocMemOffset
= VA
.getLocMemOffset();
1166 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
);
1167 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(), StackPtr
, PtrOff
);
1168 return DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
1169 MachinePointerInfo::getStack(LocMemOffset
),
1173 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl
, SelectionDAG
&DAG
,
1174 SDValue Chain
, SDValue
&Arg
,
1175 RegsToPassVector
&RegsToPass
,
1176 CCValAssign
&VA
, CCValAssign
&NextVA
,
1178 SmallVector
<SDValue
, 8> &MemOpChains
,
1179 ISD::ArgFlagsTy Flags
) const {
1181 SDValue fmrrd
= DAG
.getNode(ARMISD::VMOVRRD
, dl
,
1182 DAG
.getVTList(MVT::i32
, MVT::i32
), Arg
);
1183 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), fmrrd
));
1185 if (NextVA
.isRegLoc())
1186 RegsToPass
.push_back(std::make_pair(NextVA
.getLocReg(), fmrrd
.getValue(1)));
1188 assert(NextVA
.isMemLoc());
1189 if (StackPtr
.getNode() == 0)
1190 StackPtr
= DAG
.getCopyFromReg(Chain
, dl
, ARM::SP
, getPointerTy());
1192 MemOpChains
.push_back(LowerMemOpCallTo(Chain
, StackPtr
, fmrrd
.getValue(1),
1198 /// LowerCall - Lowering a call into a callseq_start <-
1199 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1202 ARMTargetLowering::LowerCall(SDValue Chain
, SDValue Callee
,
1203 CallingConv::ID CallConv
, bool isVarArg
,
1205 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1206 const SmallVectorImpl
<SDValue
> &OutVals
,
1207 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1208 DebugLoc dl
, SelectionDAG
&DAG
,
1209 SmallVectorImpl
<SDValue
> &InVals
) const {
1210 MachineFunction
&MF
= DAG
.getMachineFunction();
1211 bool IsStructRet
= (Outs
.empty()) ? false : Outs
[0].Flags
.isSRet();
1212 bool IsSibCall
= false;
1213 // Temporarily disable tail calls so things don't break.
1214 if (!EnableARMTailCalls
)
1217 // Check if it's really possible to do a tail call.
1218 isTailCall
= IsEligibleForTailCallOptimization(Callee
, CallConv
,
1219 isVarArg
, IsStructRet
, MF
.getFunction()->hasStructRetAttr(),
1220 Outs
, OutVals
, Ins
, DAG
);
1221 // We don't support GuaranteedTailCallOpt for ARM, only automatically
1222 // detected sibcalls.
1229 // Analyze operands of the call, assigning locations to each operand.
1230 SmallVector
<CCValAssign
, 16> ArgLocs
;
1231 ARMCCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
1232 getTargetMachine(), ArgLocs
, *DAG
.getContext(), Call
);
1233 CCInfo
.AnalyzeCallOperands(Outs
,
1234 CCAssignFnForNode(CallConv
, /* Return*/ false,
1237 // Get a count of how many bytes are to be pushed on the stack.
1238 unsigned NumBytes
= CCInfo
.getNextStackOffset();
1240 // For tail calls, memory operands are available in our caller's stack.
1244 // Adjust the stack pointer for the new arguments...
1245 // These operations are automatically eliminated by the prolog/epilog pass
1247 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(NumBytes
, true));
1249 SDValue StackPtr
= DAG
.getCopyFromReg(Chain
, dl
, ARM::SP
, getPointerTy());
1251 RegsToPassVector RegsToPass
;
1252 SmallVector
<SDValue
, 8> MemOpChains
;
1254 // Walk the register/memloc assignments, inserting copies/loads. In the case
1255 // of tail call optimization, arguments are handled later.
1256 for (unsigned i
= 0, realArgIdx
= 0, e
= ArgLocs
.size();
1258 ++i
, ++realArgIdx
) {
1259 CCValAssign
&VA
= ArgLocs
[i
];
1260 SDValue Arg
= OutVals
[realArgIdx
];
1261 ISD::ArgFlagsTy Flags
= Outs
[realArgIdx
].Flags
;
1262 bool isByVal
= Flags
.isByVal();
1264 // Promote the value if needed.
1265 switch (VA
.getLocInfo()) {
1266 default: llvm_unreachable("Unknown loc info!");
1267 case CCValAssign::Full
: break;
1268 case CCValAssign::SExt
:
1269 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
1271 case CCValAssign::ZExt
:
1272 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
1274 case CCValAssign::AExt
:
1275 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
1277 case CCValAssign::BCvt
:
1278 Arg
= DAG
.getNode(ISD::BITCAST
, dl
, VA
.getLocVT(), Arg
);
1282 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1283 if (VA
.needsCustom()) {
1284 if (VA
.getLocVT() == MVT::v2f64
) {
1285 SDValue Op0
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
1286 DAG
.getConstant(0, MVT::i32
));
1287 SDValue Op1
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
1288 DAG
.getConstant(1, MVT::i32
));
1290 PassF64ArgInRegs(dl
, DAG
, Chain
, Op0
, RegsToPass
,
1291 VA
, ArgLocs
[++i
], StackPtr
, MemOpChains
, Flags
);
1293 VA
= ArgLocs
[++i
]; // skip ahead to next loc
1294 if (VA
.isRegLoc()) {
1295 PassF64ArgInRegs(dl
, DAG
, Chain
, Op1
, RegsToPass
,
1296 VA
, ArgLocs
[++i
], StackPtr
, MemOpChains
, Flags
);
1298 assert(VA
.isMemLoc());
1300 MemOpChains
.push_back(LowerMemOpCallTo(Chain
, StackPtr
, Op1
,
1301 dl
, DAG
, VA
, Flags
));
1304 PassF64ArgInRegs(dl
, DAG
, Chain
, Arg
, RegsToPass
, VA
, ArgLocs
[++i
],
1305 StackPtr
, MemOpChains
, Flags
);
1307 } else if (VA
.isRegLoc()) {
1308 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
1309 } else if (isByVal
) {
1310 assert(VA
.isMemLoc());
1311 unsigned offset
= 0;
1313 // True if this byval aggregate will be split between registers
1315 if (CCInfo
.isFirstByValRegValid()) {
1316 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1318 for (i
= 0, j
= CCInfo
.getFirstByValReg(); j
< ARM::R4
; i
++, j
++) {
1319 SDValue Const
= DAG
.getConstant(4*i
, MVT::i32
);
1320 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
1321 SDValue Load
= DAG
.getLoad(PtrVT
, dl
, Chain
, AddArg
,
1322 MachinePointerInfo(),
1324 MemOpChains
.push_back(Load
.getValue(1));
1325 RegsToPass
.push_back(std::make_pair(j
, Load
));
1327 offset
= ARM::R4
- CCInfo
.getFirstByValReg();
1328 CCInfo
.clearFirstByValReg();
1331 unsigned LocMemOffset
= VA
.getLocMemOffset();
1332 SDValue StkPtrOff
= DAG
.getIntPtrConstant(LocMemOffset
);
1333 SDValue Dst
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(), StackPtr
,
1335 SDValue SrcOffset
= DAG
.getIntPtrConstant(4*offset
);
1336 SDValue Src
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(), Arg
, SrcOffset
);
1337 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize() - 4*offset
,
1339 MemOpChains
.push_back(DAG
.getMemcpy(Chain
, dl
, Dst
, Src
, SizeNode
,
1340 Flags
.getByValAlign(),
1341 /*isVolatile=*/false,
1342 /*AlwaysInline=*/false,
1343 MachinePointerInfo(0),
1344 MachinePointerInfo(0)));
1346 } else if (!IsSibCall
) {
1347 assert(VA
.isMemLoc());
1349 MemOpChains
.push_back(LowerMemOpCallTo(Chain
, StackPtr
, Arg
,
1350 dl
, DAG
, VA
, Flags
));
1354 if (!MemOpChains
.empty())
1355 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1356 &MemOpChains
[0], MemOpChains
.size());
1358 // Build a sequence of copy-to-reg nodes chained together with token chain
1359 // and flag operands which copy the outgoing args into the appropriate regs.
1361 // Tail call byval lowering might overwrite argument registers so in case of
1362 // tail call optimization the copies to registers are lowered later.
1364 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1365 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
1366 RegsToPass
[i
].second
, InFlag
);
1367 InFlag
= Chain
.getValue(1);
1370 // For tail calls lower the arguments to the 'real' stack slot.
1372 // Force all the incoming stack arguments to be loaded from the stack
1373 // before any new outgoing arguments are stored to the stack, because the
1374 // outgoing stack slots may alias the incoming argument stack slots, and
1375 // the alias isn't otherwise explicit. This is slightly more conservative
1376 // than necessary, because it means that each store effectively depends
1377 // on every argument instead of just those arguments it would clobber.
1379 // Do not flag preceding copytoreg stuff together with the following stuff.
1381 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1382 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
1383 RegsToPass
[i
].second
, InFlag
);
1384 InFlag
= Chain
.getValue(1);
1389 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1390 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1391 // node so that legalize doesn't hack it.
1392 bool isDirect
= false;
1393 bool isARMFunc
= false;
1394 bool isLocalARMFunc
= false;
1395 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1397 if (EnableARMLongCalls
) {
1398 assert (getTargetMachine().getRelocationModel() == Reloc::Static
1399 && "long-calls with non-static relocation model!");
1400 // Handle a global address or an external symbol. If it's not one of
1401 // those, the target's already in a register, so we don't need to do
1403 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1404 const GlobalValue
*GV
= G
->getGlobal();
1405 // Create a constant pool entry for the callee address
1406 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
1407 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(GV
,
1410 // Get the address of the callee into a register
1411 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, getPointerTy(), 4);
1412 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1413 Callee
= DAG
.getLoad(getPointerTy(), dl
,
1414 DAG
.getEntryNode(), CPAddr
,
1415 MachinePointerInfo::getConstantPool(),
1417 } else if (ExternalSymbolSDNode
*S
=dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1418 const char *Sym
= S
->getSymbol();
1420 // Create a constant pool entry for the callee address
1421 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
1422 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(*DAG
.getContext(),
1423 Sym
, ARMPCLabelIndex
, 0);
1424 // Get the address of the callee into a register
1425 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, getPointerTy(), 4);
1426 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1427 Callee
= DAG
.getLoad(getPointerTy(), dl
,
1428 DAG
.getEntryNode(), CPAddr
,
1429 MachinePointerInfo::getConstantPool(),
1432 } else if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1433 const GlobalValue
*GV
= G
->getGlobal();
1435 bool isExt
= GV
->isDeclaration() || GV
->isWeakForLinker();
1436 bool isStub
= (isExt
&& Subtarget
->isTargetDarwin()) &&
1437 getTargetMachine().getRelocationModel() != Reloc::Static
;
1438 isARMFunc
= !Subtarget
->isThumb() || isStub
;
1439 // ARM call to a local ARM function is predicable.
1440 isLocalARMFunc
= !Subtarget
->isThumb() && (!isExt
|| !ARMInterworking
);
1441 // tBX takes a register source operand.
1442 if (isARMFunc
&& Subtarget
->isThumb1Only() && !Subtarget
->hasV5TOps()) {
1443 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
1444 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(GV
,
1447 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, getPointerTy(), 4);
1448 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1449 Callee
= DAG
.getLoad(getPointerTy(), dl
,
1450 DAG
.getEntryNode(), CPAddr
,
1451 MachinePointerInfo::getConstantPool(),
1453 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
, MVT::i32
);
1454 Callee
= DAG
.getNode(ARMISD::PIC_ADD
, dl
,
1455 getPointerTy(), Callee
, PICLabel
);
1457 // On ELF targets for PIC code, direct calls should go through the PLT
1458 unsigned OpFlags
= 0;
1459 if (Subtarget
->isTargetELF() &&
1460 getTargetMachine().getRelocationModel() == Reloc::PIC_
)
1461 OpFlags
= ARMII::MO_PLT
;
1462 Callee
= DAG
.getTargetGlobalAddress(GV
, dl
, getPointerTy(), 0, OpFlags
);
1464 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1466 bool isStub
= Subtarget
->isTargetDarwin() &&
1467 getTargetMachine().getRelocationModel() != Reloc::Static
;
1468 isARMFunc
= !Subtarget
->isThumb() || isStub
;
1469 // tBX takes a register source operand.
1470 const char *Sym
= S
->getSymbol();
1471 if (isARMFunc
&& Subtarget
->isThumb1Only() && !Subtarget
->hasV5TOps()) {
1472 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
1473 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(*DAG
.getContext(),
1474 Sym
, ARMPCLabelIndex
, 4);
1475 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, getPointerTy(), 4);
1476 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1477 Callee
= DAG
.getLoad(getPointerTy(), dl
,
1478 DAG
.getEntryNode(), CPAddr
,
1479 MachinePointerInfo::getConstantPool(),
1481 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
, MVT::i32
);
1482 Callee
= DAG
.getNode(ARMISD::PIC_ADD
, dl
,
1483 getPointerTy(), Callee
, PICLabel
);
1485 unsigned OpFlags
= 0;
1486 // On ELF targets for PIC code, direct calls should go through the PLT
1487 if (Subtarget
->isTargetELF() &&
1488 getTargetMachine().getRelocationModel() == Reloc::PIC_
)
1489 OpFlags
= ARMII::MO_PLT
;
1490 Callee
= DAG
.getTargetExternalSymbol(Sym
, getPointerTy(), OpFlags
);
1494 // FIXME: handle tail calls differently.
1496 if (Subtarget
->isThumb()) {
1497 if ((!isDirect
|| isARMFunc
) && !Subtarget
->hasV5TOps())
1498 CallOpc
= ARMISD::CALL_NOLINK
;
1500 CallOpc
= isARMFunc
? ARMISD::CALL
: ARMISD::tCALL
;
1502 CallOpc
= (isDirect
|| Subtarget
->hasV5TOps())
1503 ? (isLocalARMFunc
? ARMISD::CALL_PRED
: ARMISD::CALL
)
1504 : ARMISD::CALL_NOLINK
;
1507 std::vector
<SDValue
> Ops
;
1508 Ops
.push_back(Chain
);
1509 Ops
.push_back(Callee
);
1511 // Add argument registers to the end of the list so that they are known live
1513 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
1514 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
1515 RegsToPass
[i
].second
.getValueType()));
1517 if (InFlag
.getNode())
1518 Ops
.push_back(InFlag
);
1520 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1522 return DAG
.getNode(ARMISD::TC_RETURN
, dl
, NodeTys
, &Ops
[0], Ops
.size());
1524 // Returns a chain and a flag for retval copy to use.
1525 Chain
= DAG
.getNode(CallOpc
, dl
, NodeTys
, &Ops
[0], Ops
.size());
1526 InFlag
= Chain
.getValue(1);
1528 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, true),
1529 DAG
.getIntPtrConstant(0, true), InFlag
);
1531 InFlag
= Chain
.getValue(1);
1533 // Handle result values, copying them out of physregs into vregs that we
1535 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
, Ins
,
1539 /// HandleByVal - Every parameter *after* a byval parameter is passed
1540 /// on the stack. Remember the next parameter register to allocate,
1541 /// and then confiscate the rest of the parameter registers to insure
1544 llvm::ARMTargetLowering::HandleByVal(CCState
*State
, unsigned &size
) const {
1545 unsigned reg
= State
->AllocateReg(GPRArgRegs
, 4);
1546 assert((State
->getCallOrPrologue() == Prologue
||
1547 State
->getCallOrPrologue() == Call
) &&
1548 "unhandled ParmContext");
1549 if ((!State
->isFirstByValRegValid()) &&
1550 (ARM::R0
<= reg
) && (reg
<= ARM::R3
)) {
1551 State
->setFirstByValReg(reg
);
1552 // At a call site, a byval parameter that is split between
1553 // registers and memory needs its size truncated here. In a
1554 // function prologue, such byval parameters are reassembled in
1555 // memory, and are not truncated.
1556 if (State
->getCallOrPrologue() == Call
) {
1557 unsigned excess
= 4 * (ARM::R4
- reg
);
1558 assert(size
>= excess
&& "expected larger existing stack allocation");
1562 // Confiscate any remaining parameter registers to preclude their
1563 // assignment to subsequent parameters.
1564 while (State
->AllocateReg(GPRArgRegs
, 4))
1568 /// MatchingStackOffset - Return true if the given stack call argument is
1569 /// already available in the same position (relatively) of the caller's
1570 /// incoming argument stack.
1572 bool MatchingStackOffset(SDValue Arg
, unsigned Offset
, ISD::ArgFlagsTy Flags
,
1573 MachineFrameInfo
*MFI
, const MachineRegisterInfo
*MRI
,
1574 const ARMInstrInfo
*TII
) {
1575 unsigned Bytes
= Arg
.getValueType().getSizeInBits() / 8;
1577 if (Arg
.getOpcode() == ISD::CopyFromReg
) {
1578 unsigned VR
= cast
<RegisterSDNode
>(Arg
.getOperand(1))->getReg();
1579 if (!TargetRegisterInfo::isVirtualRegister(VR
))
1581 MachineInstr
*Def
= MRI
->getVRegDef(VR
);
1584 if (!Flags
.isByVal()) {
1585 if (!TII
->isLoadFromStackSlot(Def
, FI
))
1590 } else if (LoadSDNode
*Ld
= dyn_cast
<LoadSDNode
>(Arg
)) {
1591 if (Flags
.isByVal())
1592 // ByVal argument is passed in as a pointer but it's now being
1593 // dereferenced. e.g.
1594 // define @foo(%struct.X* %A) {
1595 // tail call @bar(%struct.X* byval %A)
1598 SDValue Ptr
= Ld
->getBasePtr();
1599 FrameIndexSDNode
*FINode
= dyn_cast
<FrameIndexSDNode
>(Ptr
);
1602 FI
= FINode
->getIndex();
1606 assert(FI
!= INT_MAX
);
1607 if (!MFI
->isFixedObjectIndex(FI
))
1609 return Offset
== MFI
->getObjectOffset(FI
) && Bytes
== MFI
->getObjectSize(FI
);
1612 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
1613 /// for tail call optimization. Targets which want to do tail call
1614 /// optimization should implement this function.
1616 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee
,
1617 CallingConv::ID CalleeCC
,
1619 bool isCalleeStructRet
,
1620 bool isCallerStructRet
,
1621 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1622 const SmallVectorImpl
<SDValue
> &OutVals
,
1623 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1624 SelectionDAG
& DAG
) const {
1625 const Function
*CallerF
= DAG
.getMachineFunction().getFunction();
1626 CallingConv::ID CallerCC
= CallerF
->getCallingConv();
1627 bool CCMatch
= CallerCC
== CalleeCC
;
1629 // Look for obvious safe cases to perform tail call optimization that do not
1630 // require ABI changes. This is what gcc calls sibcall.
1632 // Do not sibcall optimize vararg calls unless the call site is not passing
1634 if (isVarArg
&& !Outs
.empty())
1637 // Also avoid sibcall optimization if either caller or callee uses struct
1638 // return semantics.
1639 if (isCalleeStructRet
|| isCallerStructRet
)
1642 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
1643 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
1644 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
1645 // support in the assembler and linker to be used. This would need to be
1646 // fixed to fully support tail calls in Thumb1.
1648 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
1649 // LR. This means if we need to reload LR, it takes an extra instructions,
1650 // which outweighs the value of the tail call; but here we don't know yet
1651 // whether LR is going to be used. Probably the right approach is to
1652 // generate the tail call here and turn it back into CALL/RET in
1653 // emitEpilogue if LR is used.
1655 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
1656 // but we need to make sure there are enough registers; the only valid
1657 // registers are the 4 used for parameters. We don't currently do this
1659 if (Subtarget
->isThumb1Only())
1662 // If the calling conventions do not match, then we'd better make sure the
1663 // results are returned in the same way as what the caller expects.
1665 SmallVector
<CCValAssign
, 16> RVLocs1
;
1666 ARMCCState
CCInfo1(CalleeCC
, false, DAG
.getMachineFunction(),
1667 getTargetMachine(), RVLocs1
, *DAG
.getContext(), Call
);
1668 CCInfo1
.AnalyzeCallResult(Ins
, CCAssignFnForNode(CalleeCC
, true, isVarArg
));
1670 SmallVector
<CCValAssign
, 16> RVLocs2
;
1671 ARMCCState
CCInfo2(CallerCC
, false, DAG
.getMachineFunction(),
1672 getTargetMachine(), RVLocs2
, *DAG
.getContext(), Call
);
1673 CCInfo2
.AnalyzeCallResult(Ins
, CCAssignFnForNode(CallerCC
, true, isVarArg
));
1675 if (RVLocs1
.size() != RVLocs2
.size())
1677 for (unsigned i
= 0, e
= RVLocs1
.size(); i
!= e
; ++i
) {
1678 if (RVLocs1
[i
].isRegLoc() != RVLocs2
[i
].isRegLoc())
1680 if (RVLocs1
[i
].getLocInfo() != RVLocs2
[i
].getLocInfo())
1682 if (RVLocs1
[i
].isRegLoc()) {
1683 if (RVLocs1
[i
].getLocReg() != RVLocs2
[i
].getLocReg())
1686 if (RVLocs1
[i
].getLocMemOffset() != RVLocs2
[i
].getLocMemOffset())
1692 // If the callee takes no arguments then go on to check the results of the
1694 if (!Outs
.empty()) {
1695 // Check if stack adjustment is needed. For now, do not do this if any
1696 // argument is passed on the stack.
1697 SmallVector
<CCValAssign
, 16> ArgLocs
;
1698 ARMCCState
CCInfo(CalleeCC
, isVarArg
, DAG
.getMachineFunction(),
1699 getTargetMachine(), ArgLocs
, *DAG
.getContext(), Call
);
1700 CCInfo
.AnalyzeCallOperands(Outs
,
1701 CCAssignFnForNode(CalleeCC
, false, isVarArg
));
1702 if (CCInfo
.getNextStackOffset()) {
1703 MachineFunction
&MF
= DAG
.getMachineFunction();
1705 // Check if the arguments are already laid out in the right way as
1706 // the caller's fixed stack objects.
1707 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1708 const MachineRegisterInfo
*MRI
= &MF
.getRegInfo();
1709 const ARMInstrInfo
*TII
=
1710 ((ARMTargetMachine
&)getTargetMachine()).getInstrInfo();
1711 for (unsigned i
= 0, realArgIdx
= 0, e
= ArgLocs
.size();
1713 ++i
, ++realArgIdx
) {
1714 CCValAssign
&VA
= ArgLocs
[i
];
1715 EVT RegVT
= VA
.getLocVT();
1716 SDValue Arg
= OutVals
[realArgIdx
];
1717 ISD::ArgFlagsTy Flags
= Outs
[realArgIdx
].Flags
;
1718 if (VA
.getLocInfo() == CCValAssign::Indirect
)
1720 if (VA
.needsCustom()) {
1721 // f64 and vector types are split into multiple registers or
1722 // register/stack-slot combinations. The types will not match
1723 // the registers; give up on memory f64 refs until we figure
1724 // out what to do about this.
1727 if (!ArgLocs
[++i
].isRegLoc())
1729 if (RegVT
== MVT::v2f64
) {
1730 if (!ArgLocs
[++i
].isRegLoc())
1732 if (!ArgLocs
[++i
].isRegLoc())
1735 } else if (!VA
.isRegLoc()) {
1736 if (!MatchingStackOffset(Arg
, VA
.getLocMemOffset(), Flags
,
1748 ARMTargetLowering::LowerReturn(SDValue Chain
,
1749 CallingConv::ID CallConv
, bool isVarArg
,
1750 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1751 const SmallVectorImpl
<SDValue
> &OutVals
,
1752 DebugLoc dl
, SelectionDAG
&DAG
) const {
1754 // CCValAssign - represent the assignment of the return value to a location.
1755 SmallVector
<CCValAssign
, 16> RVLocs
;
1757 // CCState - Info about the registers and stack slots.
1758 ARMCCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
1759 getTargetMachine(), RVLocs
, *DAG
.getContext(), Call
);
1761 // Analyze outgoing return values.
1762 CCInfo
.AnalyzeReturn(Outs
, CCAssignFnForNode(CallConv
, /* Return */ true,
1765 // If this is the first return lowered for this function, add
1766 // the regs to the liveout set for the function.
1767 if (DAG
.getMachineFunction().getRegInfo().liveout_empty()) {
1768 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
)
1769 if (RVLocs
[i
].isRegLoc())
1770 DAG
.getMachineFunction().getRegInfo().addLiveOut(RVLocs
[i
].getLocReg());
1775 // Copy the result values into the output registers.
1776 for (unsigned i
= 0, realRVLocIdx
= 0;
1778 ++i
, ++realRVLocIdx
) {
1779 CCValAssign
&VA
= RVLocs
[i
];
1780 assert(VA
.isRegLoc() && "Can only return in registers!");
1782 SDValue Arg
= OutVals
[realRVLocIdx
];
1784 switch (VA
.getLocInfo()) {
1785 default: llvm_unreachable("Unknown loc info!");
1786 case CCValAssign::Full
: break;
1787 case CCValAssign::BCvt
:
1788 Arg
= DAG
.getNode(ISD::BITCAST
, dl
, VA
.getLocVT(), Arg
);
1792 if (VA
.needsCustom()) {
1793 if (VA
.getLocVT() == MVT::v2f64
) {
1794 // Extract the first half and return it in two registers.
1795 SDValue Half
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
1796 DAG
.getConstant(0, MVT::i32
));
1797 SDValue HalfGPRs
= DAG
.getNode(ARMISD::VMOVRRD
, dl
,
1798 DAG
.getVTList(MVT::i32
, MVT::i32
), Half
);
1800 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), HalfGPRs
, Flag
);
1801 Flag
= Chain
.getValue(1);
1802 VA
= RVLocs
[++i
]; // skip ahead to next loc
1803 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(),
1804 HalfGPRs
.getValue(1), Flag
);
1805 Flag
= Chain
.getValue(1);
1806 VA
= RVLocs
[++i
]; // skip ahead to next loc
1808 // Extract the 2nd half and fall through to handle it as an f64 value.
1809 Arg
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
1810 DAG
.getConstant(1, MVT::i32
));
1812 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
1814 SDValue fmrrd
= DAG
.getNode(ARMISD::VMOVRRD
, dl
,
1815 DAG
.getVTList(MVT::i32
, MVT::i32
), &Arg
, 1);
1816 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), fmrrd
, Flag
);
1817 Flag
= Chain
.getValue(1);
1818 VA
= RVLocs
[++i
]; // skip ahead to next loc
1819 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), fmrrd
.getValue(1),
1822 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), Arg
, Flag
);
1824 // Guarantee that all emitted copies are
1825 // stuck together, avoiding something bad.
1826 Flag
= Chain
.getValue(1);
1831 result
= DAG
.getNode(ARMISD::RET_FLAG
, dl
, MVT::Other
, Chain
, Flag
);
1833 result
= DAG
.getNode(ARMISD::RET_FLAG
, dl
, MVT::Other
, Chain
);
1838 bool ARMTargetLowering::isUsedByReturnOnly(SDNode
*N
) const {
1839 if (N
->getNumValues() != 1)
1841 if (!N
->hasNUsesOfValue(1, 0))
1844 unsigned NumCopies
= 0;
1846 SDNode
*Use
= *N
->use_begin();
1847 if (Use
->getOpcode() == ISD::CopyToReg
) {
1848 Copies
[NumCopies
++] = Use
;
1849 } else if (Use
->getOpcode() == ARMISD::VMOVRRD
) {
1850 // f64 returned in a pair of GPRs.
1851 for (SDNode::use_iterator UI
= Use
->use_begin(), UE
= Use
->use_end();
1853 if (UI
->getOpcode() != ISD::CopyToReg
)
1855 Copies
[UI
.getUse().getResNo()] = *UI
;
1858 } else if (Use
->getOpcode() == ISD::BITCAST
) {
1859 // f32 returned in a single GPR.
1860 if (!Use
->hasNUsesOfValue(1, 0))
1862 Use
= *Use
->use_begin();
1863 if (Use
->getOpcode() != ISD::CopyToReg
|| !Use
->hasNUsesOfValue(1, 0))
1865 Copies
[NumCopies
++] = Use
;
1870 if (NumCopies
!= 1 && NumCopies
!= 2)
1873 bool HasRet
= false;
1874 for (unsigned i
= 0; i
< NumCopies
; ++i
) {
1875 SDNode
*Copy
= Copies
[i
];
1876 for (SDNode::use_iterator UI
= Copy
->use_begin(), UE
= Copy
->use_end();
1878 if (UI
->getOpcode() == ISD::CopyToReg
) {
1880 if (Use
== Copies
[0] || Use
== Copies
[1])
1884 if (UI
->getOpcode() != ARMISD::RET_FLAG
)
1893 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst
*CI
) const {
1894 if (!EnableARMTailCalls
)
1897 if (!CI
->isTailCall())
1900 return !Subtarget
->isThumb1Only();
1903 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1904 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1905 // one of the above mentioned nodes. It has to be wrapped because otherwise
1906 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1907 // be used to form addressing mode. These wrapped nodes will be selected
1909 static SDValue
LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) {
1910 EVT PtrVT
= Op
.getValueType();
1911 // FIXME there is no actual debug info here
1912 DebugLoc dl
= Op
.getDebugLoc();
1913 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
1915 if (CP
->isMachineConstantPoolEntry())
1916 Res
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
1917 CP
->getAlignment());
1919 Res
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
1920 CP
->getAlignment());
1921 return DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Res
);
1924 unsigned ARMTargetLowering::getJumpTableEncoding() const {
1925 return MachineJumpTableInfo::EK_Inline
;
1928 SDValue
ARMTargetLowering::LowerBlockAddress(SDValue Op
,
1929 SelectionDAG
&DAG
) const {
1930 MachineFunction
&MF
= DAG
.getMachineFunction();
1931 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1932 unsigned ARMPCLabelIndex
= 0;
1933 DebugLoc DL
= Op
.getDebugLoc();
1934 EVT PtrVT
= getPointerTy();
1935 const BlockAddress
*BA
= cast
<BlockAddressSDNode
>(Op
)->getBlockAddress();
1936 Reloc::Model RelocM
= getTargetMachine().getRelocationModel();
1938 if (RelocM
== Reloc::Static
) {
1939 CPAddr
= DAG
.getTargetConstantPool(BA
, PtrVT
, 4);
1941 unsigned PCAdj
= Subtarget
->isThumb() ? 4 : 8;
1942 ARMPCLabelIndex
= AFI
->createPICLabelUId();
1943 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(BA
, ARMPCLabelIndex
,
1944 ARMCP::CPBlockAddress
,
1946 CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1948 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, DL
, PtrVT
, CPAddr
);
1949 SDValue Result
= DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), CPAddr
,
1950 MachinePointerInfo::getConstantPool(),
1952 if (RelocM
== Reloc::Static
)
1954 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
, MVT::i32
);
1955 return DAG
.getNode(ARMISD::PIC_ADD
, DL
, PtrVT
, Result
, PICLabel
);
1958 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
1960 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode
*GA
,
1961 SelectionDAG
&DAG
) const {
1962 DebugLoc dl
= GA
->getDebugLoc();
1963 EVT PtrVT
= getPointerTy();
1964 unsigned char PCAdj
= Subtarget
->isThumb() ? 4 : 8;
1965 MachineFunction
&MF
= DAG
.getMachineFunction();
1966 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1967 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
1968 ARMConstantPoolValue
*CPV
=
1969 new ARMConstantPoolValue(GA
->getGlobal(), ARMPCLabelIndex
,
1970 ARMCP::CPValue
, PCAdj
, ARMCP::TLSGD
, true);
1971 SDValue Argument
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1972 Argument
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Argument
);
1973 Argument
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), Argument
,
1974 MachinePointerInfo::getConstantPool(),
1976 SDValue Chain
= Argument
.getValue(1);
1978 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
, MVT::i32
);
1979 Argument
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Argument
, PICLabel
);
1981 // call __tls_get_addr.
1984 Entry
.Node
= Argument
;
1985 Entry
.Ty
= (const Type
*) Type::getInt32Ty(*DAG
.getContext());
1986 Args
.push_back(Entry
);
1987 // FIXME: is there useful debug info available here?
1988 std::pair
<SDValue
, SDValue
> CallResult
=
1989 LowerCallTo(Chain
, (const Type
*) Type::getInt32Ty(*DAG
.getContext()),
1990 false, false, false, false,
1991 0, CallingConv::C
, false, /*isReturnValueUsed=*/true,
1992 DAG
.getExternalSymbol("__tls_get_addr", PtrVT
), Args
, DAG
, dl
);
1993 return CallResult
.first
;
1996 // Lower ISD::GlobalTLSAddress using the "initial exec" or
1997 // "local exec" model.
1999 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode
*GA
,
2000 SelectionDAG
&DAG
) const {
2001 const GlobalValue
*GV
= GA
->getGlobal();
2002 DebugLoc dl
= GA
->getDebugLoc();
2004 SDValue Chain
= DAG
.getEntryNode();
2005 EVT PtrVT
= getPointerTy();
2006 // Get the Thread Pointer
2007 SDValue ThreadPointer
= DAG
.getNode(ARMISD::THREAD_POINTER
, dl
, PtrVT
);
2009 if (GV
->isDeclaration()) {
2010 MachineFunction
&MF
= DAG
.getMachineFunction();
2011 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2012 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
2013 // Initial exec model.
2014 unsigned char PCAdj
= Subtarget
->isThumb() ? 4 : 8;
2015 ARMConstantPoolValue
*CPV
=
2016 new ARMConstantPoolValue(GA
->getGlobal(), ARMPCLabelIndex
,
2017 ARMCP::CPValue
, PCAdj
, ARMCP::GOTTPOFF
, true);
2018 Offset
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
2019 Offset
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Offset
);
2020 Offset
= DAG
.getLoad(PtrVT
, dl
, Chain
, Offset
,
2021 MachinePointerInfo::getConstantPool(),
2023 Chain
= Offset
.getValue(1);
2025 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
, MVT::i32
);
2026 Offset
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Offset
, PICLabel
);
2028 Offset
= DAG
.getLoad(PtrVT
, dl
, Chain
, Offset
,
2029 MachinePointerInfo::getConstantPool(),
2033 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(GV
, ARMCP::TPOFF
);
2034 Offset
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
2035 Offset
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Offset
);
2036 Offset
= DAG
.getLoad(PtrVT
, dl
, Chain
, Offset
,
2037 MachinePointerInfo::getConstantPool(),
2041 // The address of the thread local variable is the add of the thread
2042 // pointer with the offset of the variable.
2043 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, ThreadPointer
, Offset
);
2047 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op
, SelectionDAG
&DAG
) const {
2048 // TODO: implement the "local dynamic" model
2049 assert(Subtarget
->isTargetELF() &&
2050 "TLS not implemented for non-ELF targets");
2051 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
2052 // If the relocation model is PIC, use the "General Dynamic" TLS Model,
2053 // otherwise use the "Local Exec" TLS Model
2054 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
)
2055 return LowerToTLSGeneralDynamicModel(GA
, DAG
);
2057 return LowerToTLSExecModels(GA
, DAG
);
2060 SDValue
ARMTargetLowering::LowerGlobalAddressELF(SDValue Op
,
2061 SelectionDAG
&DAG
) const {
2062 EVT PtrVT
= getPointerTy();
2063 DebugLoc dl
= Op
.getDebugLoc();
2064 const GlobalValue
*GV
= cast
<GlobalAddressSDNode
>(Op
)->getGlobal();
2065 Reloc::Model RelocM
= getTargetMachine().getRelocationModel();
2066 if (RelocM
== Reloc::PIC_
) {
2067 bool UseGOTOFF
= GV
->hasLocalLinkage() || GV
->hasHiddenVisibility();
2068 ARMConstantPoolValue
*CPV
=
2069 new ARMConstantPoolValue(GV
, UseGOTOFF
? ARMCP::GOTOFF
: ARMCP::GOT
);
2070 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
2071 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
2072 SDValue Result
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(),
2074 MachinePointerInfo::getConstantPool(),
2076 SDValue Chain
= Result
.getValue(1);
2077 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
2078 Result
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Result
, GOT
);
2080 Result
= DAG
.getLoad(PtrVT
, dl
, Chain
, Result
,
2081 MachinePointerInfo::getGOT(), false, false, 0);
2085 // If we have T2 ops, we can materialize the address directly via movt/movw
2086 // pair. This is always cheaper.
2087 if (Subtarget
->useMovt()) {
2089 // FIXME: Once remat is capable of dealing with instructions with register
2090 // operands, expand this into two nodes.
2091 return DAG
.getNode(ARMISD::Wrapper
, dl
, PtrVT
,
2092 DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
));
2094 SDValue CPAddr
= DAG
.getTargetConstantPool(GV
, PtrVT
, 4);
2095 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
2096 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
,
2097 MachinePointerInfo::getConstantPool(),
2102 SDValue
ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op
,
2103 SelectionDAG
&DAG
) const {
2104 EVT PtrVT
= getPointerTy();
2105 DebugLoc dl
= Op
.getDebugLoc();
2106 const GlobalValue
*GV
= cast
<GlobalAddressSDNode
>(Op
)->getGlobal();
2107 Reloc::Model RelocM
= getTargetMachine().getRelocationModel();
2108 MachineFunction
&MF
= DAG
.getMachineFunction();
2109 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2111 // FIXME: Enable this for static codegen when tool issues are fixed.
2112 if (Subtarget
->useMovt() && RelocM
!= Reloc::Static
) {
2114 // FIXME: Once remat is capable of dealing with instructions with register
2115 // operands, expand this into two nodes.
2116 if (RelocM
== Reloc::Static
)
2117 return DAG
.getNode(ARMISD::Wrapper
, dl
, PtrVT
,
2118 DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
));
2120 unsigned Wrapper
= (RelocM
== Reloc::PIC_
)
2121 ? ARMISD::WrapperPIC
: ARMISD::WrapperDYN
;
2122 SDValue Result
= DAG
.getNode(Wrapper
, dl
, PtrVT
,
2123 DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
));
2124 if (Subtarget
->GVIsIndirectSymbol(GV
, RelocM
))
2125 Result
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), Result
,
2126 MachinePointerInfo::getGOT(), false, false, 0);
2130 unsigned ARMPCLabelIndex
= 0;
2132 if (RelocM
== Reloc::Static
) {
2133 CPAddr
= DAG
.getTargetConstantPool(GV
, PtrVT
, 4);
2135 ARMPCLabelIndex
= AFI
->createPICLabelUId();
2136 unsigned PCAdj
= (RelocM
!= Reloc::PIC_
) ? 0 : (Subtarget
->isThumb()?4:8);
2137 ARMConstantPoolValue
*CPV
=
2138 new ARMConstantPoolValue(GV
, ARMPCLabelIndex
, ARMCP::CPValue
, PCAdj
);
2139 CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
2141 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
2143 SDValue Result
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
,
2144 MachinePointerInfo::getConstantPool(),
2146 SDValue Chain
= Result
.getValue(1);
2148 if (RelocM
== Reloc::PIC_
) {
2149 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
, MVT::i32
);
2150 Result
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Result
, PICLabel
);
2153 if (Subtarget
->GVIsIndirectSymbol(GV
, RelocM
))
2154 Result
= DAG
.getLoad(PtrVT
, dl
, Chain
, Result
, MachinePointerInfo::getGOT(),
2160 SDValue
ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op
,
2161 SelectionDAG
&DAG
) const {
2162 assert(Subtarget
->isTargetELF() &&
2163 "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
2164 MachineFunction
&MF
= DAG
.getMachineFunction();
2165 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2166 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
2167 EVT PtrVT
= getPointerTy();
2168 DebugLoc dl
= Op
.getDebugLoc();
2169 unsigned PCAdj
= Subtarget
->isThumb() ? 4 : 8;
2170 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(*DAG
.getContext(),
2171 "_GLOBAL_OFFSET_TABLE_",
2172 ARMPCLabelIndex
, PCAdj
);
2173 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
2174 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
2175 SDValue Result
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
,
2176 MachinePointerInfo::getConstantPool(),
2178 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
, MVT::i32
);
2179 return DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Result
, PICLabel
);
2183 ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op
, SelectionDAG
&DAG
)
2185 DebugLoc dl
= Op
.getDebugLoc();
2186 return DAG
.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP
, dl
, MVT::Other
,
2187 Op
.getOperand(0), Op
.getOperand(1));
2191 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op
, SelectionDAG
&DAG
) const {
2192 DebugLoc dl
= Op
.getDebugLoc();
2193 SDValue Val
= DAG
.getConstant(0, MVT::i32
);
2194 return DAG
.getNode(ARMISD::EH_SJLJ_SETJMP
, dl
, MVT::i32
, Op
.getOperand(0),
2195 Op
.getOperand(1), Val
);
2199 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op
, SelectionDAG
&DAG
) const {
2200 DebugLoc dl
= Op
.getDebugLoc();
2201 return DAG
.getNode(ARMISD::EH_SJLJ_LONGJMP
, dl
, MVT::Other
, Op
.getOperand(0),
2202 Op
.getOperand(1), DAG
.getConstant(0, MVT::i32
));
2206 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
, SelectionDAG
&DAG
,
2207 const ARMSubtarget
*Subtarget
) const {
2208 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
2209 DebugLoc dl
= Op
.getDebugLoc();
2211 default: return SDValue(); // Don't custom lower most intrinsics.
2212 case Intrinsic::arm_thread_pointer
: {
2213 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2214 return DAG
.getNode(ARMISD::THREAD_POINTER
, dl
, PtrVT
);
2216 case Intrinsic::eh_sjlj_lsda
: {
2217 MachineFunction
&MF
= DAG
.getMachineFunction();
2218 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2219 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
2220 EVT PtrVT
= getPointerTy();
2221 DebugLoc dl
= Op
.getDebugLoc();
2222 Reloc::Model RelocM
= getTargetMachine().getRelocationModel();
2224 unsigned PCAdj
= (RelocM
!= Reloc::PIC_
)
2225 ? 0 : (Subtarget
->isThumb() ? 4 : 8);
2226 ARMConstantPoolValue
*CPV
=
2227 new ARMConstantPoolValue(MF
.getFunction(), ARMPCLabelIndex
,
2228 ARMCP::CPLSDA
, PCAdj
);
2229 CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
2230 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
2232 DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
,
2233 MachinePointerInfo::getConstantPool(),
2236 if (RelocM
== Reloc::PIC_
) {
2237 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
, MVT::i32
);
2238 Result
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Result
, PICLabel
);
2242 case Intrinsic::arm_neon_vmulls
:
2243 case Intrinsic::arm_neon_vmullu
: {
2244 unsigned NewOpc
= (IntNo
== Intrinsic::arm_neon_vmulls
)
2245 ? ARMISD::VMULLs
: ARMISD::VMULLu
;
2246 return DAG
.getNode(NewOpc
, Op
.getDebugLoc(), Op
.getValueType(),
2247 Op
.getOperand(1), Op
.getOperand(2));
2252 static SDValue
LowerMEMBARRIER(SDValue Op
, SelectionDAG
&DAG
,
2253 const ARMSubtarget
*Subtarget
) {
2254 DebugLoc dl
= Op
.getDebugLoc();
2255 if (!Subtarget
->hasDataBarrier()) {
2256 // Some ARMv6 cpus can support data barriers with an mcr instruction.
2257 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2259 assert(Subtarget
->hasV6Ops() && !Subtarget
->isThumb() &&
2260 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2261 return DAG
.getNode(ARMISD::MEMBARRIER_MCR
, dl
, MVT::Other
, Op
.getOperand(0),
2262 DAG
.getConstant(0, MVT::i32
));
2265 SDValue Op5
= Op
.getOperand(5);
2266 bool isDeviceBarrier
= cast
<ConstantSDNode
>(Op5
)->getZExtValue() != 0;
2267 unsigned isLL
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
2268 unsigned isLS
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue();
2269 bool isOnlyStoreBarrier
= (isLL
== 0 && isLS
== 0);
2271 ARM_MB::MemBOpt DMBOpt
;
2272 if (isDeviceBarrier
)
2273 DMBOpt
= isOnlyStoreBarrier
? ARM_MB::ST
: ARM_MB::SY
;
2275 DMBOpt
= isOnlyStoreBarrier
? ARM_MB::ISHST
: ARM_MB::ISH
;
2276 return DAG
.getNode(ARMISD::MEMBARRIER
, dl
, MVT::Other
, Op
.getOperand(0),
2277 DAG
.getConstant(DMBOpt
, MVT::i32
));
2280 static SDValue
LowerPREFETCH(SDValue Op
, SelectionDAG
&DAG
,
2281 const ARMSubtarget
*Subtarget
) {
2282 // ARM pre v5TE and Thumb1 does not have preload instructions.
2283 if (!(Subtarget
->isThumb2() ||
2284 (!Subtarget
->isThumb1Only() && Subtarget
->hasV5TEOps())))
2285 // Just preserve the chain.
2286 return Op
.getOperand(0);
2288 DebugLoc dl
= Op
.getDebugLoc();
2289 unsigned isRead
= ~cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue() & 1;
2291 (!Subtarget
->hasV7Ops() || !Subtarget
->hasMPExtension()))
2292 // ARMv7 with MP extension has PLDW.
2293 return Op
.getOperand(0);
2295 unsigned isData
= cast
<ConstantSDNode
>(Op
.getOperand(4))->getZExtValue();
2296 if (Subtarget
->isThumb()) {
2298 isRead
= ~isRead
& 1;
2299 isData
= ~isData
& 1;
2302 return DAG
.getNode(ARMISD::PRELOAD
, dl
, MVT::Other
, Op
.getOperand(0),
2303 Op
.getOperand(1), DAG
.getConstant(isRead
, MVT::i32
),
2304 DAG
.getConstant(isData
, MVT::i32
));
2307 static SDValue
LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) {
2308 MachineFunction
&MF
= DAG
.getMachineFunction();
2309 ARMFunctionInfo
*FuncInfo
= MF
.getInfo
<ARMFunctionInfo
>();
2311 // vastart just stores the address of the VarArgsFrameIndex slot into the
2312 // memory location argument.
2313 DebugLoc dl
= Op
.getDebugLoc();
2314 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2315 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
2316 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
2317 return DAG
.getStore(Op
.getOperand(0), dl
, FR
, Op
.getOperand(1),
2318 MachinePointerInfo(SV
), false, false, 0);
2322 ARMTargetLowering::GetF64FormalArgument(CCValAssign
&VA
, CCValAssign
&NextVA
,
2323 SDValue
&Root
, SelectionDAG
&DAG
,
2324 DebugLoc dl
) const {
2325 MachineFunction
&MF
= DAG
.getMachineFunction();
2326 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2328 TargetRegisterClass
*RC
;
2329 if (AFI
->isThumb1OnlyFunction())
2330 RC
= ARM::tGPRRegisterClass
;
2332 RC
= ARM::GPRRegisterClass
;
2334 // Transform the arguments stored in physical registers into virtual ones.
2335 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
2336 SDValue ArgValue
= DAG
.getCopyFromReg(Root
, dl
, Reg
, MVT::i32
);
2339 if (NextVA
.isMemLoc()) {
2340 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
2341 int FI
= MFI
->CreateFixedObject(4, NextVA
.getLocMemOffset(), true);
2343 // Create load node to retrieve arguments from the stack.
2344 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
2345 ArgValue2
= DAG
.getLoad(MVT::i32
, dl
, Root
, FIN
,
2346 MachinePointerInfo::getFixedStack(FI
),
2349 Reg
= MF
.addLiveIn(NextVA
.getLocReg(), RC
);
2350 ArgValue2
= DAG
.getCopyFromReg(Root
, dl
, Reg
, MVT::i32
);
2353 return DAG
.getNode(ARMISD::VMOVDRR
, dl
, MVT::f64
, ArgValue
, ArgValue2
);
2357 ARMTargetLowering::computeRegArea(CCState
&CCInfo
, MachineFunction
&MF
,
2358 unsigned &VARegSize
, unsigned &VARegSaveSize
)
2361 if (CCInfo
.isFirstByValRegValid())
2362 NumGPRs
= ARM::R4
- CCInfo
.getFirstByValReg();
2364 unsigned int firstUnalloced
;
2365 firstUnalloced
= CCInfo
.getFirstUnallocated(GPRArgRegs
,
2366 sizeof(GPRArgRegs
) /
2367 sizeof(GPRArgRegs
[0]));
2368 NumGPRs
= (firstUnalloced
<= 3) ? (4 - firstUnalloced
) : 0;
2371 unsigned Align
= MF
.getTarget().getFrameLowering()->getStackAlignment();
2372 VARegSize
= NumGPRs
* 4;
2373 VARegSaveSize
= (VARegSize
+ Align
- 1) & ~(Align
- 1);
2376 // The remaining GPRs hold either the beginning of variable-argument
2377 // data, or the beginning of an aggregate passed by value (usuall
2378 // byval). Either way, we allocate stack slots adjacent to the data
2379 // provided by our caller, and store the unallocated registers there.
2380 // If this is a variadic function, the va_list pointer will begin with
2381 // these values; otherwise, this reassembles a (byval) structure that
2382 // was split between registers and memory.
2384 ARMTargetLowering::VarArgStyleRegisters(CCState
&CCInfo
, SelectionDAG
&DAG
,
2385 DebugLoc dl
, SDValue
&Chain
,
2386 unsigned ArgOffset
) const {
2387 MachineFunction
&MF
= DAG
.getMachineFunction();
2388 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
2389 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2390 unsigned firstRegToSaveIndex
;
2391 if (CCInfo
.isFirstByValRegValid())
2392 firstRegToSaveIndex
= CCInfo
.getFirstByValReg() - ARM::R0
;
2394 firstRegToSaveIndex
= CCInfo
.getFirstUnallocated
2395 (GPRArgRegs
, sizeof(GPRArgRegs
) / sizeof(GPRArgRegs
[0]));
2398 unsigned VARegSize
, VARegSaveSize
;
2399 computeRegArea(CCInfo
, MF
, VARegSize
, VARegSaveSize
);
2400 if (VARegSaveSize
) {
2401 // If this function is vararg, store any remaining integer argument regs
2402 // to their spots on the stack so that they may be loaded by deferencing
2403 // the result of va_next.
2404 AFI
->setVarArgsRegSaveSize(VARegSaveSize
);
2405 AFI
->setVarArgsFrameIndex(MFI
->CreateFixedObject(VARegSaveSize
,
2406 ArgOffset
+ VARegSaveSize
2409 SDValue FIN
= DAG
.getFrameIndex(AFI
->getVarArgsFrameIndex(),
2412 SmallVector
<SDValue
, 4> MemOps
;
2413 for (; firstRegToSaveIndex
< 4; ++firstRegToSaveIndex
) {
2414 TargetRegisterClass
*RC
;
2415 if (AFI
->isThumb1OnlyFunction())
2416 RC
= ARM::tGPRRegisterClass
;
2418 RC
= ARM::GPRRegisterClass
;
2420 unsigned VReg
= MF
.addLiveIn(GPRArgRegs
[firstRegToSaveIndex
], RC
);
2421 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i32
);
2423 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
,
2424 MachinePointerInfo::getFixedStack(AFI
->getVarArgsFrameIndex()),
2426 MemOps
.push_back(Store
);
2427 FIN
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(), FIN
,
2428 DAG
.getConstant(4, getPointerTy()));
2430 if (!MemOps
.empty())
2431 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
2432 &MemOps
[0], MemOps
.size());
2434 // This will point to the next argument passed via stack.
2435 AFI
->setVarArgsFrameIndex(MFI
->CreateFixedObject(4, ArgOffset
, true));
2439 ARMTargetLowering::LowerFormalArguments(SDValue Chain
,
2440 CallingConv::ID CallConv
, bool isVarArg
,
2441 const SmallVectorImpl
<ISD::InputArg
>
2443 DebugLoc dl
, SelectionDAG
&DAG
,
2444 SmallVectorImpl
<SDValue
> &InVals
)
2446 MachineFunction
&MF
= DAG
.getMachineFunction();
2447 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
2449 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2451 // Assign locations to all of the incoming arguments.
2452 SmallVector
<CCValAssign
, 16> ArgLocs
;
2453 ARMCCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
2454 getTargetMachine(), ArgLocs
, *DAG
.getContext(), Prologue
);
2455 CCInfo
.AnalyzeFormalArguments(Ins
,
2456 CCAssignFnForNode(CallConv
, /* Return*/ false,
2459 SmallVector
<SDValue
, 16> ArgValues
;
2460 int lastInsIndex
= -1;
2463 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
2464 CCValAssign
&VA
= ArgLocs
[i
];
2466 // Arguments stored in registers.
2467 if (VA
.isRegLoc()) {
2468 EVT RegVT
= VA
.getLocVT();
2470 if (VA
.needsCustom()) {
2471 // f64 and vector types are split up into multiple registers or
2472 // combinations of registers and stack slots.
2473 if (VA
.getLocVT() == MVT::v2f64
) {
2474 SDValue ArgValue1
= GetF64FormalArgument(VA
, ArgLocs
[++i
],
2476 VA
= ArgLocs
[++i
]; // skip ahead to next loc
2478 if (VA
.isMemLoc()) {
2479 int FI
= MFI
->CreateFixedObject(8, VA
.getLocMemOffset(), true);
2480 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
2481 ArgValue2
= DAG
.getLoad(MVT::f64
, dl
, Chain
, FIN
,
2482 MachinePointerInfo::getFixedStack(FI
),
2485 ArgValue2
= GetF64FormalArgument(VA
, ArgLocs
[++i
],
2488 ArgValue
= DAG
.getNode(ISD::UNDEF
, dl
, MVT::v2f64
);
2489 ArgValue
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
,
2490 ArgValue
, ArgValue1
, DAG
.getIntPtrConstant(0));
2491 ArgValue
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
,
2492 ArgValue
, ArgValue2
, DAG
.getIntPtrConstant(1));
2494 ArgValue
= GetF64FormalArgument(VA
, ArgLocs
[++i
], Chain
, DAG
, dl
);
2497 TargetRegisterClass
*RC
;
2499 if (RegVT
== MVT::f32
)
2500 RC
= ARM::SPRRegisterClass
;
2501 else if (RegVT
== MVT::f64
)
2502 RC
= ARM::DPRRegisterClass
;
2503 else if (RegVT
== MVT::v2f64
)
2504 RC
= ARM::QPRRegisterClass
;
2505 else if (RegVT
== MVT::i32
)
2506 RC
= (AFI
->isThumb1OnlyFunction() ?
2507 ARM::tGPRRegisterClass
: ARM::GPRRegisterClass
);
2509 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2511 // Transform the arguments in physical registers into virtual ones.
2512 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
2513 ArgValue
= DAG
.getCopyFromReg(Chain
, dl
, Reg
, RegVT
);
2516 // If this is an 8 or 16-bit value, it is really passed promoted
2517 // to 32 bits. Insert an assert[sz]ext to capture this, then
2518 // truncate to the right size.
2519 switch (VA
.getLocInfo()) {
2520 default: llvm_unreachable("Unknown loc info!");
2521 case CCValAssign::Full
: break;
2522 case CCValAssign::BCvt
:
2523 ArgValue
= DAG
.getNode(ISD::BITCAST
, dl
, VA
.getValVT(), ArgValue
);
2525 case CCValAssign::SExt
:
2526 ArgValue
= DAG
.getNode(ISD::AssertSext
, dl
, RegVT
, ArgValue
,
2527 DAG
.getValueType(VA
.getValVT()));
2528 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), ArgValue
);
2530 case CCValAssign::ZExt
:
2531 ArgValue
= DAG
.getNode(ISD::AssertZext
, dl
, RegVT
, ArgValue
,
2532 DAG
.getValueType(VA
.getValVT()));
2533 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), ArgValue
);
2537 InVals
.push_back(ArgValue
);
2539 } else { // VA.isRegLoc()
2542 assert(VA
.isMemLoc());
2543 assert(VA
.getValVT() != MVT::i64
&& "i64 should already be lowered");
2545 int index
= ArgLocs
[i
].getValNo();
2547 // Some Ins[] entries become multiple ArgLoc[] entries.
2548 // Process them only once.
2549 if (index
!= lastInsIndex
)
2551 ISD::ArgFlagsTy Flags
= Ins
[index
].Flags
;
2552 // FIXME: For now, all byval parameter objects are marked mutable.
2553 // This can be changed with more analysis.
2554 // In case of tail call optimization mark all arguments mutable.
2555 // Since they could be overwritten by lowering of arguments in case of
2557 if (Flags
.isByVal()) {
2558 unsigned VARegSize
, VARegSaveSize
;
2559 computeRegArea(CCInfo
, MF
, VARegSize
, VARegSaveSize
);
2560 VarArgStyleRegisters(CCInfo
, DAG
, dl
, Chain
, 0);
2561 unsigned Bytes
= Flags
.getByValSize() - VARegSize
;
2562 if (Bytes
== 0) Bytes
= 1; // Don't create zero-sized stack objects.
2563 int FI
= MFI
->CreateFixedObject(Bytes
,
2564 VA
.getLocMemOffset(), false);
2565 InVals
.push_back(DAG
.getFrameIndex(FI
, getPointerTy()));
2567 int FI
= MFI
->CreateFixedObject(VA
.getLocVT().getSizeInBits()/8,
2568 VA
.getLocMemOffset(), true);
2570 // Create load nodes to retrieve arguments from the stack.
2571 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
2572 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
,
2573 MachinePointerInfo::getFixedStack(FI
),
2576 lastInsIndex
= index
;
2583 VarArgStyleRegisters(CCInfo
, DAG
, dl
, Chain
, CCInfo
.getNextStackOffset());
2588 /// isFloatingPointZero - Return true if this is +0.0.
2589 static bool isFloatingPointZero(SDValue Op
) {
2590 if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(Op
))
2591 return CFP
->getValueAPF().isPosZero();
2592 else if (ISD::isEXTLoad(Op
.getNode()) || ISD::isNON_EXTLoad(Op
.getNode())) {
2593 // Maybe this has already been legalized into the constant pool?
2594 if (Op
.getOperand(1).getOpcode() == ARMISD::Wrapper
) {
2595 SDValue WrapperOp
= Op
.getOperand(1).getOperand(0);
2596 if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(WrapperOp
))
2597 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CP
->getConstVal()))
2598 return CFP
->getValueAPF().isPosZero();
2604 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
2605 /// the given operands.
2607 ARMTargetLowering::getARMCmp(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2608 SDValue
&ARMcc
, SelectionDAG
&DAG
,
2609 DebugLoc dl
) const {
2610 if (ConstantSDNode
*RHSC
= dyn_cast
<ConstantSDNode
>(RHS
.getNode())) {
2611 unsigned C
= RHSC
->getZExtValue();
2612 if (!isLegalICmpImmediate(C
)) {
2613 // Constant does not fit, try adjusting it by one?
2618 if (C
!= 0x80000000 && isLegalICmpImmediate(C
-1)) {
2619 CC
= (CC
== ISD::SETLT
) ? ISD::SETLE
: ISD::SETGT
;
2620 RHS
= DAG
.getConstant(C
-1, MVT::i32
);
2625 if (C
!= 0 && isLegalICmpImmediate(C
-1)) {
2626 CC
= (CC
== ISD::SETULT
) ? ISD::SETULE
: ISD::SETUGT
;
2627 RHS
= DAG
.getConstant(C
-1, MVT::i32
);
2632 if (C
!= 0x7fffffff && isLegalICmpImmediate(C
+1)) {
2633 CC
= (CC
== ISD::SETLE
) ? ISD::SETLT
: ISD::SETGE
;
2634 RHS
= DAG
.getConstant(C
+1, MVT::i32
);
2639 if (C
!= 0xffffffff && isLegalICmpImmediate(C
+1)) {
2640 CC
= (CC
== ISD::SETULE
) ? ISD::SETULT
: ISD::SETUGE
;
2641 RHS
= DAG
.getConstant(C
+1, MVT::i32
);
2648 ARMCC::CondCodes CondCode
= IntCCToARMCC(CC
);
2649 ARMISD::NodeType CompareType
;
2652 CompareType
= ARMISD::CMP
;
2657 CompareType
= ARMISD::CMPZ
;
2660 ARMcc
= DAG
.getConstant(CondCode
, MVT::i32
);
2661 return DAG
.getNode(CompareType
, dl
, MVT::Glue
, LHS
, RHS
);
2664 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
2666 ARMTargetLowering::getVFPCmp(SDValue LHS
, SDValue RHS
, SelectionDAG
&DAG
,
2667 DebugLoc dl
) const {
2669 if (!isFloatingPointZero(RHS
))
2670 Cmp
= DAG
.getNode(ARMISD::CMPFP
, dl
, MVT::Glue
, LHS
, RHS
);
2672 Cmp
= DAG
.getNode(ARMISD::CMPFPw0
, dl
, MVT::Glue
, LHS
);
2673 return DAG
.getNode(ARMISD::FMSTAT
, dl
, MVT::Glue
, Cmp
);
2676 /// duplicateCmp - Glue values can have only one use, so this function
2677 /// duplicates a comparison node.
2679 ARMTargetLowering::duplicateCmp(SDValue Cmp
, SelectionDAG
&DAG
) const {
2680 unsigned Opc
= Cmp
.getOpcode();
2681 DebugLoc DL
= Cmp
.getDebugLoc();
2682 if (Opc
== ARMISD::CMP
|| Opc
== ARMISD::CMPZ
)
2683 return DAG
.getNode(Opc
, DL
, MVT::Glue
, Cmp
.getOperand(0),Cmp
.getOperand(1));
2685 assert(Opc
== ARMISD::FMSTAT
&& "unexpected comparison operation");
2686 Cmp
= Cmp
.getOperand(0);
2687 Opc
= Cmp
.getOpcode();
2688 if (Opc
== ARMISD::CMPFP
)
2689 Cmp
= DAG
.getNode(Opc
, DL
, MVT::Glue
, Cmp
.getOperand(0),Cmp
.getOperand(1));
2691 assert(Opc
== ARMISD::CMPFPw0
&& "unexpected operand of FMSTAT");
2692 Cmp
= DAG
.getNode(Opc
, DL
, MVT::Glue
, Cmp
.getOperand(0));
2694 return DAG
.getNode(ARMISD::FMSTAT
, DL
, MVT::Glue
, Cmp
);
2697 SDValue
ARMTargetLowering::LowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
2698 SDValue Cond
= Op
.getOperand(0);
2699 SDValue SelectTrue
= Op
.getOperand(1);
2700 SDValue SelectFalse
= Op
.getOperand(2);
2701 DebugLoc dl
= Op
.getDebugLoc();
2705 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
2706 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
2708 if (Cond
.getOpcode() == ARMISD::CMOV
&& Cond
.hasOneUse()) {
2709 const ConstantSDNode
*CMOVTrue
=
2710 dyn_cast
<ConstantSDNode
>(Cond
.getOperand(0));
2711 const ConstantSDNode
*CMOVFalse
=
2712 dyn_cast
<ConstantSDNode
>(Cond
.getOperand(1));
2714 if (CMOVTrue
&& CMOVFalse
) {
2715 unsigned CMOVTrueVal
= CMOVTrue
->getZExtValue();
2716 unsigned CMOVFalseVal
= CMOVFalse
->getZExtValue();
2720 if (CMOVTrueVal
== 1 && CMOVFalseVal
== 0) {
2722 False
= SelectFalse
;
2723 } else if (CMOVTrueVal
== 0 && CMOVFalseVal
== 1) {
2728 if (True
.getNode() && False
.getNode()) {
2729 EVT VT
= Op
.getValueType();
2730 SDValue ARMcc
= Cond
.getOperand(2);
2731 SDValue CCR
= Cond
.getOperand(3);
2732 SDValue Cmp
= duplicateCmp(Cond
.getOperand(4), DAG
);
2733 assert(True
.getValueType() == VT
);
2734 return DAG
.getNode(ARMISD::CMOV
, dl
, VT
, True
, False
, ARMcc
, CCR
, Cmp
);
2739 return DAG
.getSelectCC(dl
, Cond
,
2740 DAG
.getConstant(0, Cond
.getValueType()),
2741 SelectTrue
, SelectFalse
, ISD::SETNE
);
2744 SDValue
ARMTargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const {
2745 EVT VT
= Op
.getValueType();
2746 SDValue LHS
= Op
.getOperand(0);
2747 SDValue RHS
= Op
.getOperand(1);
2748 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
2749 SDValue TrueVal
= Op
.getOperand(2);
2750 SDValue FalseVal
= Op
.getOperand(3);
2751 DebugLoc dl
= Op
.getDebugLoc();
2753 if (LHS
.getValueType() == MVT::i32
) {
2755 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
2756 SDValue Cmp
= getARMCmp(LHS
, RHS
, CC
, ARMcc
, DAG
, dl
);
2757 return DAG
.getNode(ARMISD::CMOV
, dl
, VT
, FalseVal
, TrueVal
, ARMcc
, CCR
,Cmp
);
2760 ARMCC::CondCodes CondCode
, CondCode2
;
2761 FPCCToARMCC(CC
, CondCode
, CondCode2
);
2763 SDValue ARMcc
= DAG
.getConstant(CondCode
, MVT::i32
);
2764 SDValue Cmp
= getVFPCmp(LHS
, RHS
, DAG
, dl
);
2765 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
2766 SDValue Result
= DAG
.getNode(ARMISD::CMOV
, dl
, VT
, FalseVal
, TrueVal
,
2768 if (CondCode2
!= ARMCC::AL
) {
2769 SDValue ARMcc2
= DAG
.getConstant(CondCode2
, MVT::i32
);
2770 // FIXME: Needs another CMP because flag can have but one use.
2771 SDValue Cmp2
= getVFPCmp(LHS
, RHS
, DAG
, dl
);
2772 Result
= DAG
.getNode(ARMISD::CMOV
, dl
, VT
,
2773 Result
, TrueVal
, ARMcc2
, CCR
, Cmp2
);
2778 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
2779 /// to morph to an integer compare sequence.
2780 static bool canChangeToInt(SDValue Op
, bool &SeenZero
,
2781 const ARMSubtarget
*Subtarget
) {
2782 SDNode
*N
= Op
.getNode();
2783 if (!N
->hasOneUse())
2784 // Otherwise it requires moving the value from fp to integer registers.
2786 if (!N
->getNumValues())
2788 EVT VT
= Op
.getValueType();
2789 if (VT
!= MVT::f32
&& !Subtarget
->isFPBrccSlow())
2790 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
2791 // vmrs are very slow, e.g. cortex-a8.
2794 if (isFloatingPointZero(Op
)) {
2798 return ISD::isNormalLoad(N
);
2801 static SDValue
bitcastf32Toi32(SDValue Op
, SelectionDAG
&DAG
) {
2802 if (isFloatingPointZero(Op
))
2803 return DAG
.getConstant(0, MVT::i32
);
2805 if (LoadSDNode
*Ld
= dyn_cast
<LoadSDNode
>(Op
))
2806 return DAG
.getLoad(MVT::i32
, Op
.getDebugLoc(),
2807 Ld
->getChain(), Ld
->getBasePtr(), Ld
->getPointerInfo(),
2808 Ld
->isVolatile(), Ld
->isNonTemporal(),
2809 Ld
->getAlignment());
2811 llvm_unreachable("Unknown VFP cmp argument!");
2814 static void expandf64Toi32(SDValue Op
, SelectionDAG
&DAG
,
2815 SDValue
&RetVal1
, SDValue
&RetVal2
) {
2816 if (isFloatingPointZero(Op
)) {
2817 RetVal1
= DAG
.getConstant(0, MVT::i32
);
2818 RetVal2
= DAG
.getConstant(0, MVT::i32
);
2822 if (LoadSDNode
*Ld
= dyn_cast
<LoadSDNode
>(Op
)) {
2823 SDValue Ptr
= Ld
->getBasePtr();
2824 RetVal1
= DAG
.getLoad(MVT::i32
, Op
.getDebugLoc(),
2825 Ld
->getChain(), Ptr
,
2826 Ld
->getPointerInfo(),
2827 Ld
->isVolatile(), Ld
->isNonTemporal(),
2828 Ld
->getAlignment());
2830 EVT PtrType
= Ptr
.getValueType();
2831 unsigned NewAlign
= MinAlign(Ld
->getAlignment(), 4);
2832 SDValue NewPtr
= DAG
.getNode(ISD::ADD
, Op
.getDebugLoc(),
2833 PtrType
, Ptr
, DAG
.getConstant(4, PtrType
));
2834 RetVal2
= DAG
.getLoad(MVT::i32
, Op
.getDebugLoc(),
2835 Ld
->getChain(), NewPtr
,
2836 Ld
->getPointerInfo().getWithOffset(4),
2837 Ld
->isVolatile(), Ld
->isNonTemporal(),
2842 llvm_unreachable("Unknown VFP cmp argument!");
2845 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
2846 /// f32 and even f64 comparisons to integer ones.
2848 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op
, SelectionDAG
&DAG
) const {
2849 SDValue Chain
= Op
.getOperand(0);
2850 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(1))->get();
2851 SDValue LHS
= Op
.getOperand(2);
2852 SDValue RHS
= Op
.getOperand(3);
2853 SDValue Dest
= Op
.getOperand(4);
2854 DebugLoc dl
= Op
.getDebugLoc();
2856 bool SeenZero
= false;
2857 if (canChangeToInt(LHS
, SeenZero
, Subtarget
) &&
2858 canChangeToInt(RHS
, SeenZero
, Subtarget
) &&
2859 // If one of the operand is zero, it's safe to ignore the NaN case since
2860 // we only care about equality comparisons.
2861 (SeenZero
|| (DAG
.isKnownNeverNaN(LHS
) && DAG
.isKnownNeverNaN(RHS
)))) {
2862 // If unsafe fp math optimization is enabled and there are no other uses of
2863 // the CMP operands, and the condition code is EQ or NE, we can optimize it
2864 // to an integer comparison.
2865 if (CC
== ISD::SETOEQ
)
2867 else if (CC
== ISD::SETUNE
)
2871 if (LHS
.getValueType() == MVT::f32
) {
2872 LHS
= bitcastf32Toi32(LHS
, DAG
);
2873 RHS
= bitcastf32Toi32(RHS
, DAG
);
2874 SDValue Cmp
= getARMCmp(LHS
, RHS
, CC
, ARMcc
, DAG
, dl
);
2875 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
2876 return DAG
.getNode(ARMISD::BRCOND
, dl
, MVT::Other
,
2877 Chain
, Dest
, ARMcc
, CCR
, Cmp
);
2882 expandf64Toi32(LHS
, DAG
, LHS1
, LHS2
);
2883 expandf64Toi32(RHS
, DAG
, RHS1
, RHS2
);
2884 ARMCC::CondCodes CondCode
= IntCCToARMCC(CC
);
2885 ARMcc
= DAG
.getConstant(CondCode
, MVT::i32
);
2886 SDVTList VTList
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
2887 SDValue Ops
[] = { Chain
, ARMcc
, LHS1
, LHS2
, RHS1
, RHS2
, Dest
};
2888 return DAG
.getNode(ARMISD::BCC_i64
, dl
, VTList
, Ops
, 7);
2894 SDValue
ARMTargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const {
2895 SDValue Chain
= Op
.getOperand(0);
2896 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(1))->get();
2897 SDValue LHS
= Op
.getOperand(2);
2898 SDValue RHS
= Op
.getOperand(3);
2899 SDValue Dest
= Op
.getOperand(4);
2900 DebugLoc dl
= Op
.getDebugLoc();
2902 if (LHS
.getValueType() == MVT::i32
) {
2904 SDValue Cmp
= getARMCmp(LHS
, RHS
, CC
, ARMcc
, DAG
, dl
);
2905 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
2906 return DAG
.getNode(ARMISD::BRCOND
, dl
, MVT::Other
,
2907 Chain
, Dest
, ARMcc
, CCR
, Cmp
);
2910 assert(LHS
.getValueType() == MVT::f32
|| LHS
.getValueType() == MVT::f64
);
2913 (CC
== ISD::SETEQ
|| CC
== ISD::SETOEQ
||
2914 CC
== ISD::SETNE
|| CC
== ISD::SETUNE
)) {
2915 SDValue Result
= OptimizeVFPBrcond(Op
, DAG
);
2916 if (Result
.getNode())
2920 ARMCC::CondCodes CondCode
, CondCode2
;
2921 FPCCToARMCC(CC
, CondCode
, CondCode2
);
2923 SDValue ARMcc
= DAG
.getConstant(CondCode
, MVT::i32
);
2924 SDValue Cmp
= getVFPCmp(LHS
, RHS
, DAG
, dl
);
2925 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
2926 SDVTList VTList
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
2927 SDValue Ops
[] = { Chain
, Dest
, ARMcc
, CCR
, Cmp
};
2928 SDValue Res
= DAG
.getNode(ARMISD::BRCOND
, dl
, VTList
, Ops
, 5);
2929 if (CondCode2
!= ARMCC::AL
) {
2930 ARMcc
= DAG
.getConstant(CondCode2
, MVT::i32
);
2931 SDValue Ops
[] = { Res
, Dest
, ARMcc
, CCR
, Res
.getValue(1) };
2932 Res
= DAG
.getNode(ARMISD::BRCOND
, dl
, VTList
, Ops
, 5);
2937 SDValue
ARMTargetLowering::LowerBR_JT(SDValue Op
, SelectionDAG
&DAG
) const {
2938 SDValue Chain
= Op
.getOperand(0);
2939 SDValue Table
= Op
.getOperand(1);
2940 SDValue Index
= Op
.getOperand(2);
2941 DebugLoc dl
= Op
.getDebugLoc();
2943 EVT PTy
= getPointerTy();
2944 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Table
);
2945 ARMFunctionInfo
*AFI
= DAG
.getMachineFunction().getInfo
<ARMFunctionInfo
>();
2946 SDValue UId
= DAG
.getConstant(AFI
->createJumpTableUId(), PTy
);
2947 SDValue JTI
= DAG
.getTargetJumpTable(JT
->getIndex(), PTy
);
2948 Table
= DAG
.getNode(ARMISD::WrapperJT
, dl
, MVT::i32
, JTI
, UId
);
2949 Index
= DAG
.getNode(ISD::MUL
, dl
, PTy
, Index
, DAG
.getConstant(4, PTy
));
2950 SDValue Addr
= DAG
.getNode(ISD::ADD
, dl
, PTy
, Index
, Table
);
2951 if (Subtarget
->isThumb2()) {
2952 // Thumb2 uses a two-level jump. That is, it jumps into the jump table
2953 // which does another jump to the destination. This also makes it easier
2954 // to translate it to TBB / TBH later.
2955 // FIXME: This might not work if the function is extremely large.
2956 return DAG
.getNode(ARMISD::BR2_JT
, dl
, MVT::Other
, Chain
,
2957 Addr
, Op
.getOperand(2), JTI
, UId
);
2959 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
) {
2960 Addr
= DAG
.getLoad((EVT
)MVT::i32
, dl
, Chain
, Addr
,
2961 MachinePointerInfo::getJumpTable(),
2963 Chain
= Addr
.getValue(1);
2964 Addr
= DAG
.getNode(ISD::ADD
, dl
, PTy
, Addr
, Table
);
2965 return DAG
.getNode(ARMISD::BR_JT
, dl
, MVT::Other
, Chain
, Addr
, JTI
, UId
);
2967 Addr
= DAG
.getLoad(PTy
, dl
, Chain
, Addr
,
2968 MachinePointerInfo::getJumpTable(), false, false, 0);
2969 Chain
= Addr
.getValue(1);
2970 return DAG
.getNode(ARMISD::BR_JT
, dl
, MVT::Other
, Chain
, Addr
, JTI
, UId
);
2974 static SDValue
LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
) {
2975 DebugLoc dl
= Op
.getDebugLoc();
2978 switch (Op
.getOpcode()) {
2980 assert(0 && "Invalid opcode!");
2981 case ISD::FP_TO_SINT
:
2982 Opc
= ARMISD::FTOSI
;
2984 case ISD::FP_TO_UINT
:
2985 Opc
= ARMISD::FTOUI
;
2988 Op
= DAG
.getNode(Opc
, dl
, MVT::f32
, Op
.getOperand(0));
2989 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Op
);
2992 static SDValue
LowerVectorINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) {
2993 EVT VT
= Op
.getValueType();
2994 DebugLoc dl
= Op
.getDebugLoc();
2996 EVT OperandVT
= Op
.getOperand(0).getValueType();
2997 assert(OperandVT
== MVT::v4i16
&& "Invalid type for custom lowering!");
2998 if (VT
!= MVT::v4f32
)
2999 return DAG
.UnrollVectorOp(Op
.getNode());
3003 switch (Op
.getOpcode()) {
3005 assert(0 && "Invalid opcode!");
3006 case ISD::SINT_TO_FP
:
3007 CastOpc
= ISD::SIGN_EXTEND
;
3008 Opc
= ISD::SINT_TO_FP
;
3010 case ISD::UINT_TO_FP
:
3011 CastOpc
= ISD::ZERO_EXTEND
;
3012 Opc
= ISD::UINT_TO_FP
;
3016 Op
= DAG
.getNode(CastOpc
, dl
, MVT::v4i32
, Op
.getOperand(0));
3017 return DAG
.getNode(Opc
, dl
, VT
, Op
);
3020 static SDValue
LowerINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) {
3021 EVT VT
= Op
.getValueType();
3023 return LowerVectorINT_TO_FP(Op
, DAG
);
3025 DebugLoc dl
= Op
.getDebugLoc();
3028 switch (Op
.getOpcode()) {
3030 assert(0 && "Invalid opcode!");
3031 case ISD::SINT_TO_FP
:
3032 Opc
= ARMISD::SITOF
;
3034 case ISD::UINT_TO_FP
:
3035 Opc
= ARMISD::UITOF
;
3039 Op
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::f32
, Op
.getOperand(0));
3040 return DAG
.getNode(Opc
, dl
, VT
, Op
);
3043 SDValue
ARMTargetLowering::LowerFCOPYSIGN(SDValue Op
, SelectionDAG
&DAG
) const {
3044 // Implement fcopysign with a fabs and a conditional fneg.
3045 SDValue Tmp0
= Op
.getOperand(0);
3046 SDValue Tmp1
= Op
.getOperand(1);
3047 DebugLoc dl
= Op
.getDebugLoc();
3048 EVT VT
= Op
.getValueType();
3049 EVT SrcVT
= Tmp1
.getValueType();
3050 bool InGPR
= Tmp0
.getOpcode() == ISD::BITCAST
||
3051 Tmp0
.getOpcode() == ARMISD::VMOVDRR
;
3052 bool UseNEON
= !InGPR
&& Subtarget
->hasNEON();
3055 // Use VBSL to copy the sign bit.
3056 unsigned EncodedVal
= ARM_AM::createNEONModImm(0x6, 0x80);
3057 SDValue Mask
= DAG
.getNode(ARMISD::VMOVIMM
, dl
, MVT::v2i32
,
3058 DAG
.getTargetConstant(EncodedVal
, MVT::i32
));
3059 EVT OpVT
= (VT
== MVT::f32
) ? MVT::v2i32
: MVT::v1i64
;
3061 Mask
= DAG
.getNode(ARMISD::VSHL
, dl
, OpVT
,
3062 DAG
.getNode(ISD::BITCAST
, dl
, OpVT
, Mask
),
3063 DAG
.getConstant(32, MVT::i32
));
3064 else /*if (VT == MVT::f32)*/
3065 Tmp0
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, dl
, MVT::v2f32
, Tmp0
);
3066 if (SrcVT
== MVT::f32
) {
3067 Tmp1
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, dl
, MVT::v2f32
, Tmp1
);
3069 Tmp1
= DAG
.getNode(ARMISD::VSHL
, dl
, OpVT
,
3070 DAG
.getNode(ISD::BITCAST
, dl
, OpVT
, Tmp1
),
3071 DAG
.getConstant(32, MVT::i32
));
3072 } else if (VT
== MVT::f32
)
3073 Tmp1
= DAG
.getNode(ARMISD::VSHRu
, dl
, MVT::v1i64
,
3074 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v1i64
, Tmp1
),
3075 DAG
.getConstant(32, MVT::i32
));
3076 Tmp0
= DAG
.getNode(ISD::BITCAST
, dl
, OpVT
, Tmp0
);
3077 Tmp1
= DAG
.getNode(ISD::BITCAST
, dl
, OpVT
, Tmp1
);
3079 SDValue AllOnes
= DAG
.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
3081 AllOnes
= DAG
.getNode(ARMISD::VMOVIMM
, dl
, MVT::v8i8
, AllOnes
);
3082 SDValue MaskNot
= DAG
.getNode(ISD::XOR
, dl
, OpVT
, Mask
,
3083 DAG
.getNode(ISD::BITCAST
, dl
, OpVT
, AllOnes
));
3085 SDValue Res
= DAG
.getNode(ISD::OR
, dl
, OpVT
,
3086 DAG
.getNode(ISD::AND
, dl
, OpVT
, Tmp1
, Mask
),
3087 DAG
.getNode(ISD::AND
, dl
, OpVT
, Tmp0
, MaskNot
));
3088 if (VT
== MVT::f32
) {
3089 Res
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f32
, Res
);
3090 Res
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f32
, Res
,
3091 DAG
.getConstant(0, MVT::i32
));
3093 Res
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::f64
, Res
);
3099 // Bitcast operand 1 to i32.
3100 if (SrcVT
== MVT::f64
)
3101 Tmp1
= DAG
.getNode(ARMISD::VMOVRRD
, dl
, DAG
.getVTList(MVT::i32
, MVT::i32
),
3102 &Tmp1
, 1).getValue(1);
3103 Tmp1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Tmp1
);
3105 // Or in the signbit with integer operations.
3106 SDValue Mask1
= DAG
.getConstant(0x80000000, MVT::i32
);
3107 SDValue Mask2
= DAG
.getConstant(0x7fffffff, MVT::i32
);
3108 Tmp1
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Tmp1
, Mask1
);
3109 if (VT
== MVT::f32
) {
3110 Tmp0
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
3111 DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Tmp0
), Mask2
);
3112 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::f32
,
3113 DAG
.getNode(ISD::OR
, dl
, MVT::i32
, Tmp0
, Tmp1
));
3116 // f64: Or the high part with signbit and then combine two parts.
3117 Tmp0
= DAG
.getNode(ARMISD::VMOVRRD
, dl
, DAG
.getVTList(MVT::i32
, MVT::i32
),
3119 SDValue Lo
= Tmp0
.getValue(0);
3120 SDValue Hi
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Tmp0
.getValue(1), Mask2
);
3121 Hi
= DAG
.getNode(ISD::OR
, dl
, MVT::i32
, Hi
, Tmp1
);
3122 return DAG
.getNode(ARMISD::VMOVDRR
, dl
, MVT::f64
, Lo
, Hi
);
3125 SDValue
ARMTargetLowering::LowerRETURNADDR(SDValue Op
, SelectionDAG
&DAG
) const{
3126 MachineFunction
&MF
= DAG
.getMachineFunction();
3127 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
3128 MFI
->setReturnAddressIsTaken(true);
3130 EVT VT
= Op
.getValueType();
3131 DebugLoc dl
= Op
.getDebugLoc();
3132 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3134 SDValue FrameAddr
= LowerFRAMEADDR(Op
, DAG
);
3135 SDValue Offset
= DAG
.getConstant(4, MVT::i32
);
3136 return DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(),
3137 DAG
.getNode(ISD::ADD
, dl
, VT
, FrameAddr
, Offset
),
3138 MachinePointerInfo(), false, false, 0);
3141 // Return LR, which contains the return address. Mark it an implicit live-in.
3142 unsigned Reg
= MF
.addLiveIn(ARM::LR
, getRegClassFor(MVT::i32
));
3143 return DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, Reg
, VT
);
3146 SDValue
ARMTargetLowering::LowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
) const {
3147 MachineFrameInfo
*MFI
= DAG
.getMachineFunction().getFrameInfo();
3148 MFI
->setFrameAddressIsTaken(true);
3150 EVT VT
= Op
.getValueType();
3151 DebugLoc dl
= Op
.getDebugLoc(); // FIXME probably not meaningful
3152 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3153 unsigned FrameReg
= (Subtarget
->isThumb() || Subtarget
->isTargetDarwin())
3154 ? ARM::R7
: ARM::R11
;
3155 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, FrameReg
, VT
);
3157 FrameAddr
= DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(), FrameAddr
,
3158 MachinePointerInfo(),
3163 /// ExpandBITCAST - If the target supports VFP, this function is called to
3164 /// expand a bit convert where either the source or destination type is i64 to
3165 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
3166 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
3167 /// vectors), since the legalizer won't know what to do with that.
3168 static SDValue
ExpandBITCAST(SDNode
*N
, SelectionDAG
&DAG
) {
3169 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3170 DebugLoc dl
= N
->getDebugLoc();
3171 SDValue Op
= N
->getOperand(0);
3173 // This function is only supposed to be called for i64 types, either as the
3174 // source or destination of the bit convert.
3175 EVT SrcVT
= Op
.getValueType();
3176 EVT DstVT
= N
->getValueType(0);
3177 assert((SrcVT
== MVT::i64
|| DstVT
== MVT::i64
) &&
3178 "ExpandBITCAST called for non-i64 type");
3180 // Turn i64->f64 into VMOVDRR.
3181 if (SrcVT
== MVT::i64
&& TLI
.isTypeLegal(DstVT
)) {
3182 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, Op
,
3183 DAG
.getConstant(0, MVT::i32
));
3184 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, Op
,
3185 DAG
.getConstant(1, MVT::i32
));
3186 return DAG
.getNode(ISD::BITCAST
, dl
, DstVT
,
3187 DAG
.getNode(ARMISD::VMOVDRR
, dl
, MVT::f64
, Lo
, Hi
));
3190 // Turn f64->i64 into VMOVRRD.
3191 if (DstVT
== MVT::i64
&& TLI
.isTypeLegal(SrcVT
)) {
3192 SDValue Cvt
= DAG
.getNode(ARMISD::VMOVRRD
, dl
,
3193 DAG
.getVTList(MVT::i32
, MVT::i32
), &Op
, 1);
3194 // Merge the pieces into a single i64 value.
3195 return DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Cvt
, Cvt
.getValue(1));
3201 /// getZeroVector - Returns a vector of specified type with all zero elements.
3202 /// Zero vectors are used to represent vector negation and in those cases
3203 /// will be implemented with the NEON VNEG instruction. However, VNEG does
3204 /// not support i64 elements, so sometimes the zero vectors will need to be
3205 /// explicitly constructed. Regardless, use a canonical VMOV to create the
3207 static SDValue
getZeroVector(EVT VT
, SelectionDAG
&DAG
, DebugLoc dl
) {
3208 assert(VT
.isVector() && "Expected a vector type");
3209 // The canonical modified immediate encoding of a zero vector is....0!
3210 SDValue EncodedVal
= DAG
.getTargetConstant(0, MVT::i32
);
3211 EVT VmovVT
= VT
.is128BitVector() ? MVT::v4i32
: MVT::v2i32
;
3212 SDValue Vmov
= DAG
.getNode(ARMISD::VMOVIMM
, dl
, VmovVT
, EncodedVal
);
3213 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, Vmov
);
3216 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
3217 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
3218 SDValue
ARMTargetLowering::LowerShiftRightParts(SDValue Op
,
3219 SelectionDAG
&DAG
) const {
3220 assert(Op
.getNumOperands() == 3 && "Not a double-shift!");
3221 EVT VT
= Op
.getValueType();
3222 unsigned VTBits
= VT
.getSizeInBits();
3223 DebugLoc dl
= Op
.getDebugLoc();
3224 SDValue ShOpLo
= Op
.getOperand(0);
3225 SDValue ShOpHi
= Op
.getOperand(1);
3226 SDValue ShAmt
= Op
.getOperand(2);
3228 unsigned Opc
= (Op
.getOpcode() == ISD::SRA_PARTS
) ? ISD::SRA
: ISD::SRL
;
3230 assert(Op
.getOpcode() == ISD::SRA_PARTS
|| Op
.getOpcode() == ISD::SRL_PARTS
);
3232 SDValue RevShAmt
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
3233 DAG
.getConstant(VTBits
, MVT::i32
), ShAmt
);
3234 SDValue Tmp1
= DAG
.getNode(ISD::SRL
, dl
, VT
, ShOpLo
, ShAmt
);
3235 SDValue ExtraShAmt
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, ShAmt
,
3236 DAG
.getConstant(VTBits
, MVT::i32
));
3237 SDValue Tmp2
= DAG
.getNode(ISD::SHL
, dl
, VT
, ShOpHi
, RevShAmt
);
3238 SDValue FalseVal
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp1
, Tmp2
);
3239 SDValue TrueVal
= DAG
.getNode(Opc
, dl
, VT
, ShOpHi
, ExtraShAmt
);
3241 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
3242 SDValue Cmp
= getARMCmp(ExtraShAmt
, DAG
.getConstant(0, MVT::i32
), ISD::SETGE
,
3244 SDValue Hi
= DAG
.getNode(Opc
, dl
, VT
, ShOpHi
, ShAmt
);
3245 SDValue Lo
= DAG
.getNode(ARMISD::CMOV
, dl
, VT
, FalseVal
, TrueVal
, ARMcc
,
3248 SDValue Ops
[2] = { Lo
, Hi
};
3249 return DAG
.getMergeValues(Ops
, 2, dl
);
3252 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
3253 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
3254 SDValue
ARMTargetLowering::LowerShiftLeftParts(SDValue Op
,
3255 SelectionDAG
&DAG
) const {
3256 assert(Op
.getNumOperands() == 3 && "Not a double-shift!");
3257 EVT VT
= Op
.getValueType();
3258 unsigned VTBits
= VT
.getSizeInBits();
3259 DebugLoc dl
= Op
.getDebugLoc();
3260 SDValue ShOpLo
= Op
.getOperand(0);
3261 SDValue ShOpHi
= Op
.getOperand(1);
3262 SDValue ShAmt
= Op
.getOperand(2);
3265 assert(Op
.getOpcode() == ISD::SHL_PARTS
);
3266 SDValue RevShAmt
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
3267 DAG
.getConstant(VTBits
, MVT::i32
), ShAmt
);
3268 SDValue Tmp1
= DAG
.getNode(ISD::SRL
, dl
, VT
, ShOpLo
, RevShAmt
);
3269 SDValue ExtraShAmt
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, ShAmt
,
3270 DAG
.getConstant(VTBits
, MVT::i32
));
3271 SDValue Tmp2
= DAG
.getNode(ISD::SHL
, dl
, VT
, ShOpHi
, ShAmt
);
3272 SDValue Tmp3
= DAG
.getNode(ISD::SHL
, dl
, VT
, ShOpLo
, ExtraShAmt
);
3274 SDValue FalseVal
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp1
, Tmp2
);
3275 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
3276 SDValue Cmp
= getARMCmp(ExtraShAmt
, DAG
.getConstant(0, MVT::i32
), ISD::SETGE
,
3278 SDValue Lo
= DAG
.getNode(ISD::SHL
, dl
, VT
, ShOpLo
, ShAmt
);
3279 SDValue Hi
= DAG
.getNode(ARMISD::CMOV
, dl
, VT
, FalseVal
, Tmp3
, ARMcc
,
3282 SDValue Ops
[2] = { Lo
, Hi
};
3283 return DAG
.getMergeValues(Ops
, 2, dl
);
3286 SDValue
ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op
,
3287 SelectionDAG
&DAG
) const {
3288 // The rounding mode is in bits 23:22 of the FPSCR.
3289 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3290 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3291 // so that the shift + and get folded into a bitfield extract.
3292 DebugLoc dl
= Op
.getDebugLoc();
3293 SDValue FPSCR
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::i32
,
3294 DAG
.getConstant(Intrinsic::arm_get_fpscr
,
3296 SDValue FltRounds
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, FPSCR
,
3297 DAG
.getConstant(1U << 22, MVT::i32
));
3298 SDValue RMODE
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
, FltRounds
,
3299 DAG
.getConstant(22, MVT::i32
));
3300 return DAG
.getNode(ISD::AND
, dl
, MVT::i32
, RMODE
,
3301 DAG
.getConstant(3, MVT::i32
));
3304 static SDValue
LowerCTTZ(SDNode
*N
, SelectionDAG
&DAG
,
3305 const ARMSubtarget
*ST
) {
3306 EVT VT
= N
->getValueType(0);
3307 DebugLoc dl
= N
->getDebugLoc();
3309 if (!ST
->hasV6T2Ops())
3312 SDValue rbit
= DAG
.getNode(ARMISD::RBIT
, dl
, VT
, N
->getOperand(0));
3313 return DAG
.getNode(ISD::CTLZ
, dl
, VT
, rbit
);
3316 static SDValue
LowerShift(SDNode
*N
, SelectionDAG
&DAG
,
3317 const ARMSubtarget
*ST
) {
3318 EVT VT
= N
->getValueType(0);
3319 DebugLoc dl
= N
->getDebugLoc();
3324 // Lower vector shifts on NEON to use VSHL.
3325 assert(ST
->hasNEON() && "unexpected vector shift");
3327 // Left shifts translate directly to the vshiftu intrinsic.
3328 if (N
->getOpcode() == ISD::SHL
)
3329 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, VT
,
3330 DAG
.getConstant(Intrinsic::arm_neon_vshiftu
, MVT::i32
),
3331 N
->getOperand(0), N
->getOperand(1));
3333 assert((N
->getOpcode() == ISD::SRA
||
3334 N
->getOpcode() == ISD::SRL
) && "unexpected vector shift opcode");
3336 // NEON uses the same intrinsics for both left and right shifts. For
3337 // right shifts, the shift amounts are negative, so negate the vector of
3339 EVT ShiftVT
= N
->getOperand(1).getValueType();
3340 SDValue NegatedCount
= DAG
.getNode(ISD::SUB
, dl
, ShiftVT
,
3341 getZeroVector(ShiftVT
, DAG
, dl
),
3343 Intrinsic::ID vshiftInt
= (N
->getOpcode() == ISD::SRA
?
3344 Intrinsic::arm_neon_vshifts
:
3345 Intrinsic::arm_neon_vshiftu
);
3346 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, VT
,
3347 DAG
.getConstant(vshiftInt
, MVT::i32
),
3348 N
->getOperand(0), NegatedCount
);
3351 static SDValue
Expand64BitShift(SDNode
*N
, SelectionDAG
&DAG
,
3352 const ARMSubtarget
*ST
) {
3353 EVT VT
= N
->getValueType(0);
3354 DebugLoc dl
= N
->getDebugLoc();
3356 // We can get here for a node like i32 = ISD::SHL i32, i64
3360 assert((N
->getOpcode() == ISD::SRL
|| N
->getOpcode() == ISD::SRA
) &&
3361 "Unknown shift to lower!");
3363 // We only lower SRA, SRL of 1 here, all others use generic lowering.
3364 if (!isa
<ConstantSDNode
>(N
->getOperand(1)) ||
3365 cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue() != 1)
3368 // If we are in thumb mode, we don't have RRX.
3369 if (ST
->isThumb1Only()) return SDValue();
3371 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
3372 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, N
->getOperand(0),
3373 DAG
.getConstant(0, MVT::i32
));
3374 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, N
->getOperand(0),
3375 DAG
.getConstant(1, MVT::i32
));
3377 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
3378 // captures the result into a carry flag.
3379 unsigned Opc
= N
->getOpcode() == ISD::SRL
? ARMISD::SRL_FLAG
:ARMISD::SRA_FLAG
;
3380 Hi
= DAG
.getNode(Opc
, dl
, DAG
.getVTList(MVT::i32
, MVT::Glue
), &Hi
, 1);
3382 // The low part is an ARMISD::RRX operand, which shifts the carry in.
3383 Lo
= DAG
.getNode(ARMISD::RRX
, dl
, MVT::i32
, Lo
, Hi
.getValue(1));
3385 // Merge the pieces into a single i64 value.
3386 return DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Lo
, Hi
);
3389 static SDValue
LowerVSETCC(SDValue Op
, SelectionDAG
&DAG
) {
3390 SDValue TmpOp0
, TmpOp1
;
3391 bool Invert
= false;
3395 SDValue Op0
= Op
.getOperand(0);
3396 SDValue Op1
= Op
.getOperand(1);
3397 SDValue CC
= Op
.getOperand(2);
3398 EVT VT
= Op
.getValueType();
3399 ISD::CondCode SetCCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
3400 DebugLoc dl
= Op
.getDebugLoc();
3402 if (Op
.getOperand(1).getValueType().isFloatingPoint()) {
3403 switch (SetCCOpcode
) {
3404 default: llvm_unreachable("Illegal FP comparison"); break;
3406 case ISD::SETNE
: Invert
= true; // Fallthrough
3408 case ISD::SETEQ
: Opc
= ARMISD::VCEQ
; break;
3410 case ISD::SETLT
: Swap
= true; // Fallthrough
3412 case ISD::SETGT
: Opc
= ARMISD::VCGT
; break;
3414 case ISD::SETLE
: Swap
= true; // Fallthrough
3416 case ISD::SETGE
: Opc
= ARMISD::VCGE
; break;
3417 case ISD::SETUGE
: Swap
= true; // Fallthrough
3418 case ISD::SETULE
: Invert
= true; Opc
= ARMISD::VCGT
; break;
3419 case ISD::SETUGT
: Swap
= true; // Fallthrough
3420 case ISD::SETULT
: Invert
= true; Opc
= ARMISD::VCGE
; break;
3421 case ISD::SETUEQ
: Invert
= true; // Fallthrough
3423 // Expand this to (OLT | OGT).
3427 Op0
= DAG
.getNode(ARMISD::VCGT
, dl
, VT
, TmpOp1
, TmpOp0
);
3428 Op1
= DAG
.getNode(ARMISD::VCGT
, dl
, VT
, TmpOp0
, TmpOp1
);
3430 case ISD::SETUO
: Invert
= true; // Fallthrough
3432 // Expand this to (OLT | OGE).
3436 Op0
= DAG
.getNode(ARMISD::VCGT
, dl
, VT
, TmpOp1
, TmpOp0
);
3437 Op1
= DAG
.getNode(ARMISD::VCGE
, dl
, VT
, TmpOp0
, TmpOp1
);
3441 // Integer comparisons.
3442 switch (SetCCOpcode
) {
3443 default: llvm_unreachable("Illegal integer comparison"); break;
3444 case ISD::SETNE
: Invert
= true;
3445 case ISD::SETEQ
: Opc
= ARMISD::VCEQ
; break;
3446 case ISD::SETLT
: Swap
= true;
3447 case ISD::SETGT
: Opc
= ARMISD::VCGT
; break;
3448 case ISD::SETLE
: Swap
= true;
3449 case ISD::SETGE
: Opc
= ARMISD::VCGE
; break;
3450 case ISD::SETULT
: Swap
= true;
3451 case ISD::SETUGT
: Opc
= ARMISD::VCGTU
; break;
3452 case ISD::SETULE
: Swap
= true;
3453 case ISD::SETUGE
: Opc
= ARMISD::VCGEU
; break;
3456 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
3457 if (Opc
== ARMISD::VCEQ
) {
3460 if (ISD::isBuildVectorAllZeros(Op1
.getNode()))
3462 else if (ISD::isBuildVectorAllZeros(Op0
.getNode()))
3465 // Ignore bitconvert.
3466 if (AndOp
.getNode() && AndOp
.getOpcode() == ISD::BITCAST
)
3467 AndOp
= AndOp
.getOperand(0);
3469 if (AndOp
.getNode() && AndOp
.getOpcode() == ISD::AND
) {
3471 Op0
= DAG
.getNode(ISD::BITCAST
, dl
, VT
, AndOp
.getOperand(0));
3472 Op1
= DAG
.getNode(ISD::BITCAST
, dl
, VT
, AndOp
.getOperand(1));
3479 std::swap(Op0
, Op1
);
3481 // If one of the operands is a constant vector zero, attempt to fold the
3482 // comparison to a specialized compare-against-zero form.
3484 if (ISD::isBuildVectorAllZeros(Op1
.getNode()))
3486 else if (ISD::isBuildVectorAllZeros(Op0
.getNode())) {
3487 if (Opc
== ARMISD::VCGE
)
3488 Opc
= ARMISD::VCLEZ
;
3489 else if (Opc
== ARMISD::VCGT
)
3490 Opc
= ARMISD::VCLTZ
;
3495 if (SingleOp
.getNode()) {
3498 Result
= DAG
.getNode(ARMISD::VCEQZ
, dl
, VT
, SingleOp
); break;
3500 Result
= DAG
.getNode(ARMISD::VCGEZ
, dl
, VT
, SingleOp
); break;
3502 Result
= DAG
.getNode(ARMISD::VCLEZ
, dl
, VT
, SingleOp
); break;
3504 Result
= DAG
.getNode(ARMISD::VCGTZ
, dl
, VT
, SingleOp
); break;
3506 Result
= DAG
.getNode(ARMISD::VCLTZ
, dl
, VT
, SingleOp
); break;
3508 Result
= DAG
.getNode(Opc
, dl
, VT
, Op0
, Op1
);
3511 Result
= DAG
.getNode(Opc
, dl
, VT
, Op0
, Op1
);
3515 Result
= DAG
.getNOT(dl
, Result
, VT
);
3520 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
3521 /// valid vector constant for a NEON instruction with a "modified immediate"
3522 /// operand (e.g., VMOV). If so, return the encoded value.
3523 static SDValue
isNEONModifiedImm(uint64_t SplatBits
, uint64_t SplatUndef
,
3524 unsigned SplatBitSize
, SelectionDAG
&DAG
,
3525 EVT
&VT
, bool is128Bits
, NEONModImmType type
) {
3526 unsigned OpCmode
, Imm
;
3528 // SplatBitSize is set to the smallest size that splats the vector, so a
3529 // zero vector will always have SplatBitSize == 8. However, NEON modified
3530 // immediate instructions others than VMOV do not support the 8-bit encoding
3531 // of a zero vector, and the default encoding of zero is supposed to be the
3536 switch (SplatBitSize
) {
3538 if (type
!= VMOVModImm
)
3540 // Any 1-byte value is OK. Op=0, Cmode=1110.
3541 assert((SplatBits
& ~0xff) == 0 && "one byte splat value is too big");
3544 VT
= is128Bits
? MVT::v16i8
: MVT::v8i8
;
3548 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
3549 VT
= is128Bits
? MVT::v8i16
: MVT::v4i16
;
3550 if ((SplatBits
& ~0xff) == 0) {
3551 // Value = 0x00nn: Op=x, Cmode=100x.
3556 if ((SplatBits
& ~0xff00) == 0) {
3557 // Value = 0xnn00: Op=x, Cmode=101x.
3559 Imm
= SplatBits
>> 8;
3565 // NEON's 32-bit VMOV supports splat values where:
3566 // * only one byte is nonzero, or
3567 // * the least significant byte is 0xff and the second byte is nonzero, or
3568 // * the least significant 2 bytes are 0xff and the third is nonzero.
3569 VT
= is128Bits
? MVT::v4i32
: MVT::v2i32
;
3570 if ((SplatBits
& ~0xff) == 0) {
3571 // Value = 0x000000nn: Op=x, Cmode=000x.
3576 if ((SplatBits
& ~0xff00) == 0) {
3577 // Value = 0x0000nn00: Op=x, Cmode=001x.
3579 Imm
= SplatBits
>> 8;
3582 if ((SplatBits
& ~0xff0000) == 0) {
3583 // Value = 0x00nn0000: Op=x, Cmode=010x.
3585 Imm
= SplatBits
>> 16;
3588 if ((SplatBits
& ~0xff000000) == 0) {
3589 // Value = 0xnn000000: Op=x, Cmode=011x.
3591 Imm
= SplatBits
>> 24;
3595 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
3596 if (type
== OtherModImm
) return SDValue();
3598 if ((SplatBits
& ~0xffff) == 0 &&
3599 ((SplatBits
| SplatUndef
) & 0xff) == 0xff) {
3600 // Value = 0x0000nnff: Op=x, Cmode=1100.
3602 Imm
= SplatBits
>> 8;
3607 if ((SplatBits
& ~0xffffff) == 0 &&
3608 ((SplatBits
| SplatUndef
) & 0xffff) == 0xffff) {
3609 // Value = 0x00nnffff: Op=x, Cmode=1101.
3611 Imm
= SplatBits
>> 16;
3612 SplatBits
|= 0xffff;
3616 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
3617 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
3618 // VMOV.I32. A (very) minor optimization would be to replicate the value
3619 // and fall through here to test for a valid 64-bit splat. But, then the
3620 // caller would also need to check and handle the change in size.
3624 if (type
!= VMOVModImm
)
3626 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
3627 uint64_t BitMask
= 0xff;
3629 unsigned ImmMask
= 1;
3631 for (int ByteNum
= 0; ByteNum
< 8; ++ByteNum
) {
3632 if (((SplatBits
| SplatUndef
) & BitMask
) == BitMask
) {
3635 } else if ((SplatBits
& BitMask
) != 0) {
3641 // Op=1, Cmode=1110.
3644 VT
= is128Bits
? MVT::v2i64
: MVT::v1i64
;
3649 llvm_unreachable("unexpected size for isNEONModifiedImm");
3653 unsigned EncodedVal
= ARM_AM::createNEONModImm(OpCmode
, Imm
);
3654 return DAG
.getTargetConstant(EncodedVal
, MVT::i32
);
3657 static bool isVEXTMask(const SmallVectorImpl
<int> &M
, EVT VT
,
3658 bool &ReverseVEXT
, unsigned &Imm
) {
3659 unsigned NumElts
= VT
.getVectorNumElements();
3660 ReverseVEXT
= false;
3662 // Assume that the first shuffle index is not UNDEF. Fail if it is.
3668 // If this is a VEXT shuffle, the immediate value is the index of the first
3669 // element. The other shuffle indices must be the successive elements after
3671 unsigned ExpectedElt
= Imm
;
3672 for (unsigned i
= 1; i
< NumElts
; ++i
) {
3673 // Increment the expected index. If it wraps around, it may still be
3674 // a VEXT but the source vectors must be swapped.
3676 if (ExpectedElt
== NumElts
* 2) {
3681 if (M
[i
] < 0) continue; // ignore UNDEF indices
3682 if (ExpectedElt
!= static_cast<unsigned>(M
[i
]))
3686 // Adjust the index value if the source operands will be swapped.
3693 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
3694 /// instruction with the specified blocksize. (The order of the elements
3695 /// within each block of the vector is reversed.)
3696 static bool isVREVMask(const SmallVectorImpl
<int> &M
, EVT VT
,
3697 unsigned BlockSize
) {
3698 assert((BlockSize
==16 || BlockSize
==32 || BlockSize
==64) &&
3699 "Only possible block sizes for VREV are: 16, 32, 64");
3701 unsigned EltSz
= VT
.getVectorElementType().getSizeInBits();
3705 unsigned NumElts
= VT
.getVectorNumElements();
3706 unsigned BlockElts
= M
[0] + 1;
3707 // If the first shuffle index is UNDEF, be optimistic.
3709 BlockElts
= BlockSize
/ EltSz
;
3711 if (BlockSize
<= EltSz
|| BlockSize
!= BlockElts
* EltSz
)
3714 for (unsigned i
= 0; i
< NumElts
; ++i
) {
3715 if (M
[i
] < 0) continue; // ignore UNDEF indices
3716 if ((unsigned) M
[i
] != (i
- i
%BlockElts
) + (BlockElts
- 1 - i
%BlockElts
))
3723 static bool isVTBLMask(const SmallVectorImpl
<int> &M
, EVT VT
) {
3724 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
3725 // range, then 0 is placed into the resulting vector. So pretty much any mask
3726 // of 8 elements can work here.
3727 return VT
== MVT::v8i8
&& M
.size() == 8;
3730 static bool isVTRNMask(const SmallVectorImpl
<int> &M
, EVT VT
,
3731 unsigned &WhichResult
) {
3732 unsigned EltSz
= VT
.getVectorElementType().getSizeInBits();
3736 unsigned NumElts
= VT
.getVectorNumElements();
3737 WhichResult
= (M
[0] == 0 ? 0 : 1);
3738 for (unsigned i
= 0; i
< NumElts
; i
+= 2) {
3739 if ((M
[i
] >= 0 && (unsigned) M
[i
] != i
+ WhichResult
) ||
3740 (M
[i
+1] >= 0 && (unsigned) M
[i
+1] != i
+ NumElts
+ WhichResult
))
3746 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
3747 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3748 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
3749 static bool isVTRN_v_undef_Mask(const SmallVectorImpl
<int> &M
, EVT VT
,
3750 unsigned &WhichResult
) {
3751 unsigned EltSz
= VT
.getVectorElementType().getSizeInBits();
3755 unsigned NumElts
= VT
.getVectorNumElements();
3756 WhichResult
= (M
[0] == 0 ? 0 : 1);
3757 for (unsigned i
= 0; i
< NumElts
; i
+= 2) {
3758 if ((M
[i
] >= 0 && (unsigned) M
[i
] != i
+ WhichResult
) ||
3759 (M
[i
+1] >= 0 && (unsigned) M
[i
+1] != i
+ WhichResult
))
3765 static bool isVUZPMask(const SmallVectorImpl
<int> &M
, EVT VT
,
3766 unsigned &WhichResult
) {
3767 unsigned EltSz
= VT
.getVectorElementType().getSizeInBits();
3771 unsigned NumElts
= VT
.getVectorNumElements();
3772 WhichResult
= (M
[0] == 0 ? 0 : 1);
3773 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
3774 if (M
[i
] < 0) continue; // ignore UNDEF indices
3775 if ((unsigned) M
[i
] != 2 * i
+ WhichResult
)
3779 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3780 if (VT
.is64BitVector() && EltSz
== 32)
3786 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
3787 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3788 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
3789 static bool isVUZP_v_undef_Mask(const SmallVectorImpl
<int> &M
, EVT VT
,
3790 unsigned &WhichResult
) {
3791 unsigned EltSz
= VT
.getVectorElementType().getSizeInBits();
3795 unsigned Half
= VT
.getVectorNumElements() / 2;
3796 WhichResult
= (M
[0] == 0 ? 0 : 1);
3797 for (unsigned j
= 0; j
!= 2; ++j
) {
3798 unsigned Idx
= WhichResult
;
3799 for (unsigned i
= 0; i
!= Half
; ++i
) {
3800 int MIdx
= M
[i
+ j
* Half
];
3801 if (MIdx
>= 0 && (unsigned) MIdx
!= Idx
)
3807 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3808 if (VT
.is64BitVector() && EltSz
== 32)
3814 static bool isVZIPMask(const SmallVectorImpl
<int> &M
, EVT VT
,
3815 unsigned &WhichResult
) {
3816 unsigned EltSz
= VT
.getVectorElementType().getSizeInBits();
3820 unsigned NumElts
= VT
.getVectorNumElements();
3821 WhichResult
= (M
[0] == 0 ? 0 : 1);
3822 unsigned Idx
= WhichResult
* NumElts
/ 2;
3823 for (unsigned i
= 0; i
!= NumElts
; i
+= 2) {
3824 if ((M
[i
] >= 0 && (unsigned) M
[i
] != Idx
) ||
3825 (M
[i
+1] >= 0 && (unsigned) M
[i
+1] != Idx
+ NumElts
))
3830 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3831 if (VT
.is64BitVector() && EltSz
== 32)
3837 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
3838 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3839 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
3840 static bool isVZIP_v_undef_Mask(const SmallVectorImpl
<int> &M
, EVT VT
,
3841 unsigned &WhichResult
) {
3842 unsigned EltSz
= VT
.getVectorElementType().getSizeInBits();
3846 unsigned NumElts
= VT
.getVectorNumElements();
3847 WhichResult
= (M
[0] == 0 ? 0 : 1);
3848 unsigned Idx
= WhichResult
* NumElts
/ 2;
3849 for (unsigned i
= 0; i
!= NumElts
; i
+= 2) {
3850 if ((M
[i
] >= 0 && (unsigned) M
[i
] != Idx
) ||
3851 (M
[i
+1] >= 0 && (unsigned) M
[i
+1] != Idx
))
3856 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3857 if (VT
.is64BitVector() && EltSz
== 32)
3863 // If N is an integer constant that can be moved into a register in one
3864 // instruction, return an SDValue of such a constant (will become a MOV
3865 // instruction). Otherwise return null.
3866 static SDValue
IsSingleInstrConstant(SDValue N
, SelectionDAG
&DAG
,
3867 const ARMSubtarget
*ST
, DebugLoc dl
) {
3869 if (!isa
<ConstantSDNode
>(N
))
3871 Val
= cast
<ConstantSDNode
>(N
)->getZExtValue();
3873 if (ST
->isThumb1Only()) {
3874 if (Val
<= 255 || ~Val
<= 255)
3875 return DAG
.getConstant(Val
, MVT::i32
);
3877 if (ARM_AM::getSOImmVal(Val
) != -1 || ARM_AM::getSOImmVal(~Val
) != -1)
3878 return DAG
.getConstant(Val
, MVT::i32
);
3883 // If this is a case we can't handle, return null and let the default
3884 // expansion code take care of it.
3885 SDValue
ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
,
3886 const ARMSubtarget
*ST
) const {
3887 BuildVectorSDNode
*BVN
= cast
<BuildVectorSDNode
>(Op
.getNode());
3888 DebugLoc dl
= Op
.getDebugLoc();
3889 EVT VT
= Op
.getValueType();
3891 APInt SplatBits
, SplatUndef
;
3892 unsigned SplatBitSize
;
3894 if (BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
)) {
3895 if (SplatBitSize
<= 64) {
3896 // Check if an immediate VMOV works.
3898 SDValue Val
= isNEONModifiedImm(SplatBits
.getZExtValue(),
3899 SplatUndef
.getZExtValue(), SplatBitSize
,
3900 DAG
, VmovVT
, VT
.is128BitVector(),
3902 if (Val
.getNode()) {
3903 SDValue Vmov
= DAG
.getNode(ARMISD::VMOVIMM
, dl
, VmovVT
, Val
);
3904 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, Vmov
);
3907 // Try an immediate VMVN.
3908 uint64_t NegatedImm
= (SplatBits
.getZExtValue() ^
3909 ((1LL << SplatBitSize
) - 1));
3910 Val
= isNEONModifiedImm(NegatedImm
,
3911 SplatUndef
.getZExtValue(), SplatBitSize
,
3912 DAG
, VmovVT
, VT
.is128BitVector(),
3914 if (Val
.getNode()) {
3915 SDValue Vmov
= DAG
.getNode(ARMISD::VMVNIMM
, dl
, VmovVT
, Val
);
3916 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, Vmov
);
3921 // Scan through the operands to see if only one value is used.
3922 unsigned NumElts
= VT
.getVectorNumElements();
3923 bool isOnlyLowElement
= true;
3924 bool usesOnlyOneValue
= true;
3925 bool isConstant
= true;
3927 for (unsigned i
= 0; i
< NumElts
; ++i
) {
3928 SDValue V
= Op
.getOperand(i
);
3929 if (V
.getOpcode() == ISD::UNDEF
)
3932 isOnlyLowElement
= false;
3933 if (!isa
<ConstantFPSDNode
>(V
) && !isa
<ConstantSDNode
>(V
))
3936 if (!Value
.getNode())
3938 else if (V
!= Value
)
3939 usesOnlyOneValue
= false;
3942 if (!Value
.getNode())
3943 return DAG
.getUNDEF(VT
);
3945 if (isOnlyLowElement
)
3946 return DAG
.getNode(ISD::SCALAR_TO_VECTOR
, dl
, VT
, Value
);
3948 unsigned EltSize
= VT
.getVectorElementType().getSizeInBits();
3950 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
3951 // i32 and try again.
3952 if (usesOnlyOneValue
&& EltSize
<= 32) {
3954 return DAG
.getNode(ARMISD::VDUP
, dl
, VT
, Value
);
3955 if (VT
.getVectorElementType().isFloatingPoint()) {
3956 SmallVector
<SDValue
, 8> Ops
;
3957 for (unsigned i
= 0; i
< NumElts
; ++i
)
3958 Ops
.push_back(DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
,
3960 EVT VecVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::i32
, NumElts
);
3961 SDValue Val
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, VecVT
, &Ops
[0], NumElts
);
3962 Val
= LowerBUILD_VECTOR(Val
, DAG
, ST
);
3964 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, Val
);
3966 SDValue Val
= IsSingleInstrConstant(Value
, DAG
, ST
, dl
);
3968 return DAG
.getNode(ARMISD::VDUP
, dl
, VT
, Val
);
3971 // If all elements are constants and the case above didn't get hit, fall back
3972 // to the default expansion, which will generate a load from the constant
3977 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
3979 SDValue shuffle
= ReconstructShuffle(Op
, DAG
);
3980 if (shuffle
!= SDValue())
3984 // Vectors with 32- or 64-bit elements can be built by directly assigning
3985 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
3986 // will be legalized.
3987 if (EltSize
>= 32) {
3988 // Do the expansion with floating-point types, since that is what the VFP
3989 // registers are defined to use, and since i64 is not legal.
3990 EVT EltVT
= EVT::getFloatingPointVT(EltSize
);
3991 EVT VecVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, NumElts
);
3992 SmallVector
<SDValue
, 8> Ops
;
3993 for (unsigned i
= 0; i
< NumElts
; ++i
)
3994 Ops
.push_back(DAG
.getNode(ISD::BITCAST
, dl
, EltVT
, Op
.getOperand(i
)));
3995 SDValue Val
= DAG
.getNode(ARMISD::BUILD_VECTOR
, dl
, VecVT
, &Ops
[0],NumElts
);
3996 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, Val
);
4002 // Gather data to see if the operation can be modelled as a
4003 // shuffle in combination with VEXTs.
4004 SDValue
ARMTargetLowering::ReconstructShuffle(SDValue Op
,
4005 SelectionDAG
&DAG
) const {
4006 DebugLoc dl
= Op
.getDebugLoc();
4007 EVT VT
= Op
.getValueType();
4008 unsigned NumElts
= VT
.getVectorNumElements();
4010 SmallVector
<SDValue
, 2> SourceVecs
;
4011 SmallVector
<unsigned, 2> MinElts
;
4012 SmallVector
<unsigned, 2> MaxElts
;
4014 for (unsigned i
= 0; i
< NumElts
; ++i
) {
4015 SDValue V
= Op
.getOperand(i
);
4016 if (V
.getOpcode() == ISD::UNDEF
)
4018 else if (V
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
) {
4019 // A shuffle can only come from building a vector from various
4020 // elements of other vectors.
4024 // Record this extraction against the appropriate vector if possible...
4025 SDValue SourceVec
= V
.getOperand(0);
4026 unsigned EltNo
= cast
<ConstantSDNode
>(V
.getOperand(1))->getZExtValue();
4027 bool FoundSource
= false;
4028 for (unsigned j
= 0; j
< SourceVecs
.size(); ++j
) {
4029 if (SourceVecs
[j
] == SourceVec
) {
4030 if (MinElts
[j
] > EltNo
)
4032 if (MaxElts
[j
] < EltNo
)
4039 // Or record a new source if not...
4041 SourceVecs
.push_back(SourceVec
);
4042 MinElts
.push_back(EltNo
);
4043 MaxElts
.push_back(EltNo
);
4047 // Currently only do something sane when at most two source vectors
4049 if (SourceVecs
.size() > 2)
4052 SDValue ShuffleSrcs
[2] = {DAG
.getUNDEF(VT
), DAG
.getUNDEF(VT
) };
4053 int VEXTOffsets
[2] = {0, 0};
4055 // This loop extracts the usage patterns of the source vectors
4056 // and prepares appropriate SDValues for a shuffle if possible.
4057 for (unsigned i
= 0; i
< SourceVecs
.size(); ++i
) {
4058 if (SourceVecs
[i
].getValueType() == VT
) {
4059 // No VEXT necessary
4060 ShuffleSrcs
[i
] = SourceVecs
[i
];
4063 } else if (SourceVecs
[i
].getValueType().getVectorNumElements() < NumElts
) {
4064 // It probably isn't worth padding out a smaller vector just to
4065 // break it down again in a shuffle.
4069 // Since only 64-bit and 128-bit vectors are legal on ARM and
4070 // we've eliminated the other cases...
4071 assert(SourceVecs
[i
].getValueType().getVectorNumElements() == 2*NumElts
&&
4072 "unexpected vector sizes in ReconstructShuffle");
4074 if (MaxElts
[i
] - MinElts
[i
] >= NumElts
) {
4075 // Span too large for a VEXT to cope
4079 if (MinElts
[i
] >= NumElts
) {
4080 // The extraction can just take the second half
4081 VEXTOffsets
[i
] = NumElts
;
4082 ShuffleSrcs
[i
] = DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, VT
,
4084 DAG
.getIntPtrConstant(NumElts
));
4085 } else if (MaxElts
[i
] < NumElts
) {
4086 // The extraction can just take the first half
4088 ShuffleSrcs
[i
] = DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, VT
,
4090 DAG
.getIntPtrConstant(0));
4092 // An actual VEXT is needed
4093 VEXTOffsets
[i
] = MinElts
[i
];
4094 SDValue VEXTSrc1
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, VT
,
4096 DAG
.getIntPtrConstant(0));
4097 SDValue VEXTSrc2
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, VT
,
4099 DAG
.getIntPtrConstant(NumElts
));
4100 ShuffleSrcs
[i
] = DAG
.getNode(ARMISD::VEXT
, dl
, VT
, VEXTSrc1
, VEXTSrc2
,
4101 DAG
.getConstant(VEXTOffsets
[i
], MVT::i32
));
4105 SmallVector
<int, 8> Mask
;
4107 for (unsigned i
= 0; i
< NumElts
; ++i
) {
4108 SDValue Entry
= Op
.getOperand(i
);
4109 if (Entry
.getOpcode() == ISD::UNDEF
) {
4114 SDValue ExtractVec
= Entry
.getOperand(0);
4115 int ExtractElt
= cast
<ConstantSDNode
>(Op
.getOperand(i
)
4116 .getOperand(1))->getSExtValue();
4117 if (ExtractVec
== SourceVecs
[0]) {
4118 Mask
.push_back(ExtractElt
- VEXTOffsets
[0]);
4120 Mask
.push_back(ExtractElt
+ NumElts
- VEXTOffsets
[1]);
4124 // Final check before we try to produce nonsense...
4125 if (isShuffleMaskLegal(Mask
, VT
))
4126 return DAG
.getVectorShuffle(VT
, dl
, ShuffleSrcs
[0], ShuffleSrcs
[1],
4132 /// isShuffleMaskLegal - Targets can use this to indicate that they only
4133 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4134 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4135 /// are assumed to be legal.
4137 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl
<int> &M
,
4139 if (VT
.getVectorNumElements() == 4 &&
4140 (VT
.is128BitVector() || VT
.is64BitVector())) {
4141 unsigned PFIndexes
[4];
4142 for (unsigned i
= 0; i
!= 4; ++i
) {
4146 PFIndexes
[i
] = M
[i
];
4149 // Compute the index in the perfect shuffle table.
4150 unsigned PFTableIndex
=
4151 PFIndexes
[0]*9*9*9+PFIndexes
[1]*9*9+PFIndexes
[2]*9+PFIndexes
[3];
4152 unsigned PFEntry
= PerfectShuffleTable
[PFTableIndex
];
4153 unsigned Cost
= (PFEntry
>> 30);
4160 unsigned Imm
, WhichResult
;
4162 unsigned EltSize
= VT
.getVectorElementType().getSizeInBits();
4163 return (EltSize
>= 32 ||
4164 ShuffleVectorSDNode::isSplatMask(&M
[0], VT
) ||
4165 isVREVMask(M
, VT
, 64) ||
4166 isVREVMask(M
, VT
, 32) ||
4167 isVREVMask(M
, VT
, 16) ||
4168 isVEXTMask(M
, VT
, ReverseVEXT
, Imm
) ||
4169 isVTBLMask(M
, VT
) ||
4170 isVTRNMask(M
, VT
, WhichResult
) ||
4171 isVUZPMask(M
, VT
, WhichResult
) ||
4172 isVZIPMask(M
, VT
, WhichResult
) ||
4173 isVTRN_v_undef_Mask(M
, VT
, WhichResult
) ||
4174 isVUZP_v_undef_Mask(M
, VT
, WhichResult
) ||
4175 isVZIP_v_undef_Mask(M
, VT
, WhichResult
));
4178 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
4179 /// the specified operations to build the shuffle.
4180 static SDValue
GeneratePerfectShuffle(unsigned PFEntry
, SDValue LHS
,
4181 SDValue RHS
, SelectionDAG
&DAG
,
4183 unsigned OpNum
= (PFEntry
>> 26) & 0x0F;
4184 unsigned LHSID
= (PFEntry
>> 13) & ((1 << 13)-1);
4185 unsigned RHSID
= (PFEntry
>> 0) & ((1 << 13)-1);
4188 OP_COPY
= 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
4197 OP_VUZPL
, // VUZP, left result
4198 OP_VUZPR
, // VUZP, right result
4199 OP_VZIPL
, // VZIP, left result
4200 OP_VZIPR
, // VZIP, right result
4201 OP_VTRNL
, // VTRN, left result
4202 OP_VTRNR
// VTRN, right result
4205 if (OpNum
== OP_COPY
) {
4206 if (LHSID
== (1*9+2)*9+3) return LHS
;
4207 assert(LHSID
== ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
4211 SDValue OpLHS
, OpRHS
;
4212 OpLHS
= GeneratePerfectShuffle(PerfectShuffleTable
[LHSID
], LHS
, RHS
, DAG
, dl
);
4213 OpRHS
= GeneratePerfectShuffle(PerfectShuffleTable
[RHSID
], LHS
, RHS
, DAG
, dl
);
4214 EVT VT
= OpLHS
.getValueType();
4217 default: llvm_unreachable("Unknown shuffle opcode!");
4219 // VREV divides the vector in half and swaps within the half.
4220 if (VT
.getVectorElementType() == MVT::i32
||
4221 VT
.getVectorElementType() == MVT::f32
)
4222 return DAG
.getNode(ARMISD::VREV64
, dl
, VT
, OpLHS
);
4223 // vrev <4 x i16> -> VREV32
4224 if (VT
.getVectorElementType() == MVT::i16
)
4225 return DAG
.getNode(ARMISD::VREV32
, dl
, VT
, OpLHS
);
4226 // vrev <4 x i8> -> VREV16
4227 assert(VT
.getVectorElementType() == MVT::i8
);
4228 return DAG
.getNode(ARMISD::VREV16
, dl
, VT
, OpLHS
);
4233 return DAG
.getNode(ARMISD::VDUPLANE
, dl
, VT
,
4234 OpLHS
, DAG
.getConstant(OpNum
-OP_VDUP0
, MVT::i32
));
4238 return DAG
.getNode(ARMISD::VEXT
, dl
, VT
,
4240 DAG
.getConstant(OpNum
-OP_VEXT1
+1, MVT::i32
));
4243 return DAG
.getNode(ARMISD::VUZP
, dl
, DAG
.getVTList(VT
, VT
),
4244 OpLHS
, OpRHS
).getValue(OpNum
-OP_VUZPL
);
4247 return DAG
.getNode(ARMISD::VZIP
, dl
, DAG
.getVTList(VT
, VT
),
4248 OpLHS
, OpRHS
).getValue(OpNum
-OP_VZIPL
);
4251 return DAG
.getNode(ARMISD::VTRN
, dl
, DAG
.getVTList(VT
, VT
),
4252 OpLHS
, OpRHS
).getValue(OpNum
-OP_VTRNL
);
4256 static SDValue
LowerVECTOR_SHUFFLEv8i8(SDValue Op
,
4257 SmallVectorImpl
<int> &ShuffleMask
,
4258 SelectionDAG
&DAG
) {
4259 // Check to see if we can use the VTBL instruction.
4260 SDValue V1
= Op
.getOperand(0);
4261 SDValue V2
= Op
.getOperand(1);
4262 DebugLoc DL
= Op
.getDebugLoc();
4264 SmallVector
<SDValue
, 8> VTBLMask
;
4265 for (SmallVectorImpl
<int>::iterator
4266 I
= ShuffleMask
.begin(), E
= ShuffleMask
.end(); I
!= E
; ++I
)
4267 VTBLMask
.push_back(DAG
.getConstant(*I
, MVT::i32
));
4269 if (V2
.getNode()->getOpcode() == ISD::UNDEF
)
4270 return DAG
.getNode(ARMISD::VTBL1
, DL
, MVT::v8i8
, V1
,
4271 DAG
.getNode(ISD::BUILD_VECTOR
, DL
, MVT::v8i8
,
4274 return DAG
.getNode(ARMISD::VTBL2
, DL
, MVT::v8i8
, V1
, V2
,
4275 DAG
.getNode(ISD::BUILD_VECTOR
, DL
, MVT::v8i8
,
4279 static SDValue
LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
) {
4280 SDValue V1
= Op
.getOperand(0);
4281 SDValue V2
= Op
.getOperand(1);
4282 DebugLoc dl
= Op
.getDebugLoc();
4283 EVT VT
= Op
.getValueType();
4284 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(Op
.getNode());
4285 SmallVector
<int, 8> ShuffleMask
;
4287 // Convert shuffles that are directly supported on NEON to target-specific
4288 // DAG nodes, instead of keeping them as shuffles and matching them again
4289 // during code selection. This is more efficient and avoids the possibility
4290 // of inconsistencies between legalization and selection.
4291 // FIXME: floating-point vectors should be canonicalized to integer vectors
4292 // of the same time so that they get CSEd properly.
4293 SVN
->getMask(ShuffleMask
);
4295 unsigned EltSize
= VT
.getVectorElementType().getSizeInBits();
4296 if (EltSize
<= 32) {
4297 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask
[0], VT
)) {
4298 int Lane
= SVN
->getSplatIndex();
4299 // If this is undef splat, generate it via "just" vdup, if possible.
4300 if (Lane
== -1) Lane
= 0;
4302 if (Lane
== 0 && V1
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
4303 return DAG
.getNode(ARMISD::VDUP
, dl
, VT
, V1
.getOperand(0));
4305 return DAG
.getNode(ARMISD::VDUPLANE
, dl
, VT
, V1
,
4306 DAG
.getConstant(Lane
, MVT::i32
));
4311 if (isVEXTMask(ShuffleMask
, VT
, ReverseVEXT
, Imm
)) {
4314 return DAG
.getNode(ARMISD::VEXT
, dl
, VT
, V1
, V2
,
4315 DAG
.getConstant(Imm
, MVT::i32
));
4318 if (isVREVMask(ShuffleMask
, VT
, 64))
4319 return DAG
.getNode(ARMISD::VREV64
, dl
, VT
, V1
);
4320 if (isVREVMask(ShuffleMask
, VT
, 32))
4321 return DAG
.getNode(ARMISD::VREV32
, dl
, VT
, V1
);
4322 if (isVREVMask(ShuffleMask
, VT
, 16))
4323 return DAG
.getNode(ARMISD::VREV16
, dl
, VT
, V1
);
4325 // Check for Neon shuffles that modify both input vectors in place.
4326 // If both results are used, i.e., if there are two shuffles with the same
4327 // source operands and with masks corresponding to both results of one of
4328 // these operations, DAG memoization will ensure that a single node is
4329 // used for both shuffles.
4330 unsigned WhichResult
;
4331 if (isVTRNMask(ShuffleMask
, VT
, WhichResult
))
4332 return DAG
.getNode(ARMISD::VTRN
, dl
, DAG
.getVTList(VT
, VT
),
4333 V1
, V2
).getValue(WhichResult
);
4334 if (isVUZPMask(ShuffleMask
, VT
, WhichResult
))
4335 return DAG
.getNode(ARMISD::VUZP
, dl
, DAG
.getVTList(VT
, VT
),
4336 V1
, V2
).getValue(WhichResult
);
4337 if (isVZIPMask(ShuffleMask
, VT
, WhichResult
))
4338 return DAG
.getNode(ARMISD::VZIP
, dl
, DAG
.getVTList(VT
, VT
),
4339 V1
, V2
).getValue(WhichResult
);
4341 if (isVTRN_v_undef_Mask(ShuffleMask
, VT
, WhichResult
))
4342 return DAG
.getNode(ARMISD::VTRN
, dl
, DAG
.getVTList(VT
, VT
),
4343 V1
, V1
).getValue(WhichResult
);
4344 if (isVUZP_v_undef_Mask(ShuffleMask
, VT
, WhichResult
))
4345 return DAG
.getNode(ARMISD::VUZP
, dl
, DAG
.getVTList(VT
, VT
),
4346 V1
, V1
).getValue(WhichResult
);
4347 if (isVZIP_v_undef_Mask(ShuffleMask
, VT
, WhichResult
))
4348 return DAG
.getNode(ARMISD::VZIP
, dl
, DAG
.getVTList(VT
, VT
),
4349 V1
, V1
).getValue(WhichResult
);
4352 // If the shuffle is not directly supported and it has 4 elements, use
4353 // the PerfectShuffle-generated table to synthesize it from other shuffles.
4354 unsigned NumElts
= VT
.getVectorNumElements();
4356 unsigned PFIndexes
[4];
4357 for (unsigned i
= 0; i
!= 4; ++i
) {
4358 if (ShuffleMask
[i
] < 0)
4361 PFIndexes
[i
] = ShuffleMask
[i
];
4364 // Compute the index in the perfect shuffle table.
4365 unsigned PFTableIndex
=
4366 PFIndexes
[0]*9*9*9+PFIndexes
[1]*9*9+PFIndexes
[2]*9+PFIndexes
[3];
4367 unsigned PFEntry
= PerfectShuffleTable
[PFTableIndex
];
4368 unsigned Cost
= (PFEntry
>> 30);
4371 return GeneratePerfectShuffle(PFEntry
, V1
, V2
, DAG
, dl
);
4374 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
4375 if (EltSize
>= 32) {
4376 // Do the expansion with floating-point types, since that is what the VFP
4377 // registers are defined to use, and since i64 is not legal.
4378 EVT EltVT
= EVT::getFloatingPointVT(EltSize
);
4379 EVT VecVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, NumElts
);
4380 V1
= DAG
.getNode(ISD::BITCAST
, dl
, VecVT
, V1
);
4381 V2
= DAG
.getNode(ISD::BITCAST
, dl
, VecVT
, V2
);
4382 SmallVector
<SDValue
, 8> Ops
;
4383 for (unsigned i
= 0; i
< NumElts
; ++i
) {
4384 if (ShuffleMask
[i
] < 0)
4385 Ops
.push_back(DAG
.getUNDEF(EltVT
));
4387 Ops
.push_back(DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, EltVT
,
4388 ShuffleMask
[i
] < (int)NumElts
? V1
: V2
,
4389 DAG
.getConstant(ShuffleMask
[i
] & (NumElts
-1),
4392 SDValue Val
= DAG
.getNode(ARMISD::BUILD_VECTOR
, dl
, VecVT
, &Ops
[0],NumElts
);
4393 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, Val
);
4396 if (VT
== MVT::v8i8
) {
4397 SDValue NewOp
= LowerVECTOR_SHUFFLEv8i8(Op
, ShuffleMask
, DAG
);
4398 if (NewOp
.getNode())
4405 static SDValue
LowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) {
4406 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
4407 SDValue Lane
= Op
.getOperand(1);
4408 if (!isa
<ConstantSDNode
>(Lane
))
4411 SDValue Vec
= Op
.getOperand(0);
4412 if (Op
.getValueType() == MVT::i32
&&
4413 Vec
.getValueType().getVectorElementType().getSizeInBits() < 32) {
4414 DebugLoc dl
= Op
.getDebugLoc();
4415 return DAG
.getNode(ARMISD::VGETLANEu
, dl
, MVT::i32
, Vec
, Lane
);
4421 static SDValue
LowerCONCAT_VECTORS(SDValue Op
, SelectionDAG
&DAG
) {
4422 // The only time a CONCAT_VECTORS operation can have legal types is when
4423 // two 64-bit vectors are concatenated to a 128-bit vector.
4424 assert(Op
.getValueType().is128BitVector() && Op
.getNumOperands() == 2 &&
4425 "unexpected CONCAT_VECTORS");
4426 DebugLoc dl
= Op
.getDebugLoc();
4427 SDValue Val
= DAG
.getUNDEF(MVT::v2f64
);
4428 SDValue Op0
= Op
.getOperand(0);
4429 SDValue Op1
= Op
.getOperand(1);
4430 if (Op0
.getOpcode() != ISD::UNDEF
)
4431 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Val
,
4432 DAG
.getNode(ISD::BITCAST
, dl
, MVT::f64
, Op0
),
4433 DAG
.getIntPtrConstant(0));
4434 if (Op1
.getOpcode() != ISD::UNDEF
)
4435 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Val
,
4436 DAG
.getNode(ISD::BITCAST
, dl
, MVT::f64
, Op1
),
4437 DAG
.getIntPtrConstant(1));
4438 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Val
);
4441 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
4442 /// element has been zero/sign-extended, depending on the isSigned parameter,
4443 /// from an integer type half its size.
4444 static bool isExtendedBUILD_VECTOR(SDNode
*N
, SelectionDAG
&DAG
,
4446 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
4447 EVT VT
= N
->getValueType(0);
4448 if (VT
== MVT::v2i64
&& N
->getOpcode() == ISD::BITCAST
) {
4449 SDNode
*BVN
= N
->getOperand(0).getNode();
4450 if (BVN
->getValueType(0) != MVT::v4i32
||
4451 BVN
->getOpcode() != ISD::BUILD_VECTOR
)
4453 unsigned LoElt
= DAG
.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
4454 unsigned HiElt
= 1 - LoElt
;
4455 ConstantSDNode
*Lo0
= dyn_cast
<ConstantSDNode
>(BVN
->getOperand(LoElt
));
4456 ConstantSDNode
*Hi0
= dyn_cast
<ConstantSDNode
>(BVN
->getOperand(HiElt
));
4457 ConstantSDNode
*Lo1
= dyn_cast
<ConstantSDNode
>(BVN
->getOperand(LoElt
+2));
4458 ConstantSDNode
*Hi1
= dyn_cast
<ConstantSDNode
>(BVN
->getOperand(HiElt
+2));
4459 if (!Lo0
|| !Hi0
|| !Lo1
|| !Hi1
)
4462 if (Hi0
->getSExtValue() == Lo0
->getSExtValue() >> 32 &&
4463 Hi1
->getSExtValue() == Lo1
->getSExtValue() >> 32)
4466 if (Hi0
->isNullValue() && Hi1
->isNullValue())
4472 if (N
->getOpcode() != ISD::BUILD_VECTOR
)
4475 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
4476 SDNode
*Elt
= N
->getOperand(i
).getNode();
4477 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Elt
)) {
4478 unsigned EltSize
= VT
.getVectorElementType().getSizeInBits();
4479 unsigned HalfSize
= EltSize
/ 2;
4481 int64_t SExtVal
= C
->getSExtValue();
4482 if ((SExtVal
>> HalfSize
) != (SExtVal
>> EltSize
))
4485 if ((C
->getZExtValue() >> HalfSize
) != 0)
4496 /// isSignExtended - Check if a node is a vector value that is sign-extended
4497 /// or a constant BUILD_VECTOR with sign-extended elements.
4498 static bool isSignExtended(SDNode
*N
, SelectionDAG
&DAG
) {
4499 if (N
->getOpcode() == ISD::SIGN_EXTEND
|| ISD::isSEXTLoad(N
))
4501 if (isExtendedBUILD_VECTOR(N
, DAG
, true))
4506 /// isZeroExtended - Check if a node is a vector value that is zero-extended
4507 /// or a constant BUILD_VECTOR with zero-extended elements.
4508 static bool isZeroExtended(SDNode
*N
, SelectionDAG
&DAG
) {
4509 if (N
->getOpcode() == ISD::ZERO_EXTEND
|| ISD::isZEXTLoad(N
))
4511 if (isExtendedBUILD_VECTOR(N
, DAG
, false))
4516 /// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
4517 /// load, or BUILD_VECTOR with extended elements, return the unextended value.
4518 static SDValue
SkipExtension(SDNode
*N
, SelectionDAG
&DAG
) {
4519 if (N
->getOpcode() == ISD::SIGN_EXTEND
|| N
->getOpcode() == ISD::ZERO_EXTEND
)
4520 return N
->getOperand(0);
4521 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
))
4522 return DAG
.getLoad(LD
->getMemoryVT(), N
->getDebugLoc(), LD
->getChain(),
4523 LD
->getBasePtr(), LD
->getPointerInfo(), LD
->isVolatile(),
4524 LD
->isNonTemporal(), LD
->getAlignment());
4525 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
4526 // have been legalized as a BITCAST from v4i32.
4527 if (N
->getOpcode() == ISD::BITCAST
) {
4528 SDNode
*BVN
= N
->getOperand(0).getNode();
4529 assert(BVN
->getOpcode() == ISD::BUILD_VECTOR
&&
4530 BVN
->getValueType(0) == MVT::v4i32
&& "expected v4i32 BUILD_VECTOR");
4531 unsigned LowElt
= DAG
.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
4532 return DAG
.getNode(ISD::BUILD_VECTOR
, N
->getDebugLoc(), MVT::v2i32
,
4533 BVN
->getOperand(LowElt
), BVN
->getOperand(LowElt
+2));
4535 // Construct a new BUILD_VECTOR with elements truncated to half the size.
4536 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR");
4537 EVT VT
= N
->getValueType(0);
4538 unsigned EltSize
= VT
.getVectorElementType().getSizeInBits() / 2;
4539 unsigned NumElts
= VT
.getVectorNumElements();
4540 MVT TruncVT
= MVT::getIntegerVT(EltSize
);
4541 SmallVector
<SDValue
, 8> Ops
;
4542 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
4543 ConstantSDNode
*C
= cast
<ConstantSDNode
>(N
->getOperand(i
));
4544 const APInt
&CInt
= C
->getAPIntValue();
4545 Ops
.push_back(DAG
.getConstant(CInt
.trunc(EltSize
), TruncVT
));
4547 return DAG
.getNode(ISD::BUILD_VECTOR
, N
->getDebugLoc(),
4548 MVT::getVectorVT(TruncVT
, NumElts
), Ops
.data(), NumElts
);
4551 static bool isAddSubSExt(SDNode
*N
, SelectionDAG
&DAG
) {
4552 unsigned Opcode
= N
->getOpcode();
4553 if (Opcode
== ISD::ADD
|| Opcode
== ISD::SUB
) {
4554 SDNode
*N0
= N
->getOperand(0).getNode();
4555 SDNode
*N1
= N
->getOperand(1).getNode();
4556 return N0
->hasOneUse() && N1
->hasOneUse() &&
4557 isSignExtended(N0
, DAG
) && isSignExtended(N1
, DAG
);
4562 static bool isAddSubZExt(SDNode
*N
, SelectionDAG
&DAG
) {
4563 unsigned Opcode
= N
->getOpcode();
4564 if (Opcode
== ISD::ADD
|| Opcode
== ISD::SUB
) {
4565 SDNode
*N0
= N
->getOperand(0).getNode();
4566 SDNode
*N1
= N
->getOperand(1).getNode();
4567 return N0
->hasOneUse() && N1
->hasOneUse() &&
4568 isZeroExtended(N0
, DAG
) && isZeroExtended(N1
, DAG
);
4573 static SDValue
LowerMUL(SDValue Op
, SelectionDAG
&DAG
) {
4574 // Multiplications are only custom-lowered for 128-bit vectors so that
4575 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
4576 EVT VT
= Op
.getValueType();
4577 assert(VT
.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
4578 SDNode
*N0
= Op
.getOperand(0).getNode();
4579 SDNode
*N1
= Op
.getOperand(1).getNode();
4580 unsigned NewOpc
= 0;
4582 bool isN0SExt
= isSignExtended(N0
, DAG
);
4583 bool isN1SExt
= isSignExtended(N1
, DAG
);
4584 if (isN0SExt
&& isN1SExt
)
4585 NewOpc
= ARMISD::VMULLs
;
4587 bool isN0ZExt
= isZeroExtended(N0
, DAG
);
4588 bool isN1ZExt
= isZeroExtended(N1
, DAG
);
4589 if (isN0ZExt
&& isN1ZExt
)
4590 NewOpc
= ARMISD::VMULLu
;
4591 else if (isN1SExt
|| isN1ZExt
) {
4592 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
4593 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
4594 if (isN1SExt
&& isAddSubSExt(N0
, DAG
)) {
4595 NewOpc
= ARMISD::VMULLs
;
4597 } else if (isN1ZExt
&& isAddSubZExt(N0
, DAG
)) {
4598 NewOpc
= ARMISD::VMULLu
;
4600 } else if (isN0ZExt
&& isAddSubZExt(N1
, DAG
)) {
4602 NewOpc
= ARMISD::VMULLu
;
4608 if (VT
== MVT::v2i64
)
4609 // Fall through to expand this. It is not legal.
4612 // Other vector multiplications are legal.
4617 // Legalize to a VMULL instruction.
4618 DebugLoc DL
= Op
.getDebugLoc();
4620 SDValue Op1
= SkipExtension(N1
, DAG
);
4622 Op0
= SkipExtension(N0
, DAG
);
4623 assert(Op0
.getValueType().is64BitVector() &&
4624 Op1
.getValueType().is64BitVector() &&
4625 "unexpected types for extended operands to VMULL");
4626 return DAG
.getNode(NewOpc
, DL
, VT
, Op0
, Op1
);
4629 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
4630 // isel lowering to take advantage of no-stall back to back vmul + vmla.
4637 SDValue N00
= SkipExtension(N0
->getOperand(0).getNode(), DAG
);
4638 SDValue N01
= SkipExtension(N0
->getOperand(1).getNode(), DAG
);
4639 EVT Op1VT
= Op1
.getValueType();
4640 return DAG
.getNode(N0
->getOpcode(), DL
, VT
,
4641 DAG
.getNode(NewOpc
, DL
, VT
,
4642 DAG
.getNode(ISD::BITCAST
, DL
, Op1VT
, N00
), Op1
),
4643 DAG
.getNode(NewOpc
, DL
, VT
,
4644 DAG
.getNode(ISD::BITCAST
, DL
, Op1VT
, N01
), Op1
));
4648 LowerSDIV_v4i8(SDValue X
, SDValue Y
, DebugLoc dl
, SelectionDAG
&DAG
) {
4650 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
4651 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
4652 X
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::v4i32
, X
);
4653 Y
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::v4i32
, Y
);
4654 X
= DAG
.getNode(ISD::SINT_TO_FP
, dl
, MVT::v4f32
, X
);
4655 Y
= DAG
.getNode(ISD::SINT_TO_FP
, dl
, MVT::v4f32
, Y
);
4656 // Get reciprocal estimate.
4657 // float4 recip = vrecpeq_f32(yf);
4658 Y
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f32
,
4659 DAG
.getConstant(Intrinsic::arm_neon_vrecpe
, MVT::i32
), Y
);
4660 // Because char has a smaller range than uchar, we can actually get away
4661 // without any newton steps. This requires that we use a weird bias
4662 // of 0xb000, however (again, this has been exhaustively tested).
4663 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
4664 X
= DAG
.getNode(ISD::FMUL
, dl
, MVT::v4f32
, X
, Y
);
4665 X
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, X
);
4666 Y
= DAG
.getConstant(0xb000, MVT::i32
);
4667 Y
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
, Y
, Y
, Y
, Y
);
4668 X
= DAG
.getNode(ISD::ADD
, dl
, MVT::v4i32
, X
, Y
);
4669 X
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4f32
, X
);
4670 // Convert back to short.
4671 X
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::v4i32
, X
);
4672 X
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::v4i16
, X
);
4677 LowerSDIV_v4i16(SDValue N0
, SDValue N1
, DebugLoc dl
, SelectionDAG
&DAG
) {
4679 // Convert to float.
4680 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
4681 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
4682 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::v4i32
, N0
);
4683 N1
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::v4i32
, N1
);
4684 N0
= DAG
.getNode(ISD::SINT_TO_FP
, dl
, MVT::v4f32
, N0
);
4685 N1
= DAG
.getNode(ISD::SINT_TO_FP
, dl
, MVT::v4f32
, N1
);
4687 // Use reciprocal estimate and one refinement step.
4688 // float4 recip = vrecpeq_f32(yf);
4689 // recip *= vrecpsq_f32(yf, recip);
4690 N2
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f32
,
4691 DAG
.getConstant(Intrinsic::arm_neon_vrecpe
, MVT::i32
), N1
);
4692 N1
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f32
,
4693 DAG
.getConstant(Intrinsic::arm_neon_vrecps
, MVT::i32
),
4695 N2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::v4f32
, N1
, N2
);
4696 // Because short has a smaller range than ushort, we can actually get away
4697 // with only a single newton step. This requires that we use a weird bias
4698 // of 89, however (again, this has been exhaustively tested).
4699 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
4700 N0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::v4f32
, N0
, N2
);
4701 N0
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, N0
);
4702 N1
= DAG
.getConstant(0x89, MVT::i32
);
4703 N1
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
, N1
, N1
, N1
, N1
);
4704 N0
= DAG
.getNode(ISD::ADD
, dl
, MVT::v4i32
, N0
, N1
);
4705 N0
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4f32
, N0
);
4706 // Convert back to integer and return.
4707 // return vmovn_s32(vcvt_s32_f32(result));
4708 N0
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::v4i32
, N0
);
4709 N0
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::v4i16
, N0
);
4713 static SDValue
LowerSDIV(SDValue Op
, SelectionDAG
&DAG
) {
4714 EVT VT
= Op
.getValueType();
4715 assert((VT
== MVT::v4i16
|| VT
== MVT::v8i8
) &&
4716 "unexpected type for custom-lowering ISD::SDIV");
4718 DebugLoc dl
= Op
.getDebugLoc();
4719 SDValue N0
= Op
.getOperand(0);
4720 SDValue N1
= Op
.getOperand(1);
4723 if (VT
== MVT::v8i8
) {
4724 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::v8i16
, N0
);
4725 N1
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::v8i16
, N1
);
4727 N2
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, MVT::v4i16
, N0
,
4728 DAG
.getIntPtrConstant(4));
4729 N3
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, MVT::v4i16
, N1
,
4730 DAG
.getIntPtrConstant(4));
4731 N0
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, MVT::v4i16
, N0
,
4732 DAG
.getIntPtrConstant(0));
4733 N1
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, MVT::v4i16
, N1
,
4734 DAG
.getIntPtrConstant(0));
4736 N0
= LowerSDIV_v4i8(N0
, N1
, dl
, DAG
); // v4i16
4737 N2
= LowerSDIV_v4i8(N2
, N3
, dl
, DAG
); // v4i16
4739 N0
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, MVT::v8i16
, N0
, N2
);
4740 N0
= LowerCONCAT_VECTORS(N0
, DAG
);
4742 N0
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::v8i8
, N0
);
4745 return LowerSDIV_v4i16(N0
, N1
, dl
, DAG
);
4748 static SDValue
LowerUDIV(SDValue Op
, SelectionDAG
&DAG
) {
4749 EVT VT
= Op
.getValueType();
4750 assert((VT
== MVT::v4i16
|| VT
== MVT::v8i8
) &&
4751 "unexpected type for custom-lowering ISD::UDIV");
4753 DebugLoc dl
= Op
.getDebugLoc();
4754 SDValue N0
= Op
.getOperand(0);
4755 SDValue N1
= Op
.getOperand(1);
4758 if (VT
== MVT::v8i8
) {
4759 N0
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::v8i16
, N0
);
4760 N1
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::v8i16
, N1
);
4762 N2
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, MVT::v4i16
, N0
,
4763 DAG
.getIntPtrConstant(4));
4764 N3
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, MVT::v4i16
, N1
,
4765 DAG
.getIntPtrConstant(4));
4766 N0
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, MVT::v4i16
, N0
,
4767 DAG
.getIntPtrConstant(0));
4768 N1
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, MVT::v4i16
, N1
,
4769 DAG
.getIntPtrConstant(0));
4771 N0
= LowerSDIV_v4i16(N0
, N1
, dl
, DAG
); // v4i16
4772 N2
= LowerSDIV_v4i16(N2
, N3
, dl
, DAG
); // v4i16
4774 N0
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, MVT::v8i16
, N0
, N2
);
4775 N0
= LowerCONCAT_VECTORS(N0
, DAG
);
4777 N0
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v8i8
,
4778 DAG
.getConstant(Intrinsic::arm_neon_vqmovnsu
, MVT::i32
),
4783 // v4i16 sdiv ... Convert to float.
4784 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
4785 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
4786 N0
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::v4i32
, N0
);
4787 N1
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::v4i32
, N1
);
4788 N0
= DAG
.getNode(ISD::SINT_TO_FP
, dl
, MVT::v4f32
, N0
);
4789 SDValue BN1
= DAG
.getNode(ISD::SINT_TO_FP
, dl
, MVT::v4f32
, N1
);
4791 // Use reciprocal estimate and two refinement steps.
4792 // float4 recip = vrecpeq_f32(yf);
4793 // recip *= vrecpsq_f32(yf, recip);
4794 // recip *= vrecpsq_f32(yf, recip);
4795 N2
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f32
,
4796 DAG
.getConstant(Intrinsic::arm_neon_vrecpe
, MVT::i32
), BN1
);
4797 N1
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f32
,
4798 DAG
.getConstant(Intrinsic::arm_neon_vrecps
, MVT::i32
),
4800 N2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::v4f32
, N1
, N2
);
4801 N1
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f32
,
4802 DAG
.getConstant(Intrinsic::arm_neon_vrecps
, MVT::i32
),
4804 N2
= DAG
.getNode(ISD::FMUL
, dl
, MVT::v4f32
, N1
, N2
);
4805 // Simply multiplying by the reciprocal estimate can leave us a few ulps
4806 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
4807 // and that it will never cause us to return an answer too large).
4808 // float4 result = as_float4(as_int4(xf*recip) + 2);
4809 N0
= DAG
.getNode(ISD::FMUL
, dl
, MVT::v4f32
, N0
, N2
);
4810 N0
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, N0
);
4811 N1
= DAG
.getConstant(2, MVT::i32
);
4812 N1
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
, N1
, N1
, N1
, N1
);
4813 N0
= DAG
.getNode(ISD::ADD
, dl
, MVT::v4i32
, N0
, N1
);
4814 N0
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4f32
, N0
);
4815 // Convert back to integer and return.
4816 // return vmovn_u32(vcvt_s32_f32(result));
4817 N0
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::v4i32
, N0
);
4818 N0
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::v4i16
, N0
);
4822 SDValue
ARMTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
4823 switch (Op
.getOpcode()) {
4824 default: llvm_unreachable("Don't know how to custom lower this!");
4825 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
4826 case ISD::BlockAddress
: return LowerBlockAddress(Op
, DAG
);
4827 case ISD::GlobalAddress
:
4828 return Subtarget
->isTargetDarwin() ? LowerGlobalAddressDarwin(Op
, DAG
) :
4829 LowerGlobalAddressELF(Op
, DAG
);
4830 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
4831 case ISD::SELECT
: return LowerSELECT(Op
, DAG
);
4832 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
4833 case ISD::BR_CC
: return LowerBR_CC(Op
, DAG
);
4834 case ISD::BR_JT
: return LowerBR_JT(Op
, DAG
);
4835 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
4836 case ISD::MEMBARRIER
: return LowerMEMBARRIER(Op
, DAG
, Subtarget
);
4837 case ISD::PREFETCH
: return LowerPREFETCH(Op
, DAG
, Subtarget
);
4838 case ISD::SINT_TO_FP
:
4839 case ISD::UINT_TO_FP
: return LowerINT_TO_FP(Op
, DAG
);
4840 case ISD::FP_TO_SINT
:
4841 case ISD::FP_TO_UINT
: return LowerFP_TO_INT(Op
, DAG
);
4842 case ISD::FCOPYSIGN
: return LowerFCOPYSIGN(Op
, DAG
);
4843 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
4844 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
4845 case ISD::GLOBAL_OFFSET_TABLE
: return LowerGLOBAL_OFFSET_TABLE(Op
, DAG
);
4846 case ISD::EH_SJLJ_SETJMP
: return LowerEH_SJLJ_SETJMP(Op
, DAG
);
4847 case ISD::EH_SJLJ_LONGJMP
: return LowerEH_SJLJ_LONGJMP(Op
, DAG
);
4848 case ISD::EH_SJLJ_DISPATCHSETUP
: return LowerEH_SJLJ_DISPATCHSETUP(Op
, DAG
);
4849 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
,
4851 case ISD::BITCAST
: return ExpandBITCAST(Op
.getNode(), DAG
);
4854 case ISD::SRA
: return LowerShift(Op
.getNode(), DAG
, Subtarget
);
4855 case ISD::SHL_PARTS
: return LowerShiftLeftParts(Op
, DAG
);
4856 case ISD::SRL_PARTS
:
4857 case ISD::SRA_PARTS
: return LowerShiftRightParts(Op
, DAG
);
4858 case ISD::CTTZ
: return LowerCTTZ(Op
.getNode(), DAG
, Subtarget
);
4859 case ISD::VSETCC
: return LowerVSETCC(Op
, DAG
);
4860 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
, Subtarget
);
4861 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
4862 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
4863 case ISD::CONCAT_VECTORS
: return LowerCONCAT_VECTORS(Op
, DAG
);
4864 case ISD::FLT_ROUNDS_
: return LowerFLT_ROUNDS_(Op
, DAG
);
4865 case ISD::MUL
: return LowerMUL(Op
, DAG
);
4866 case ISD::SDIV
: return LowerSDIV(Op
, DAG
);
4867 case ISD::UDIV
: return LowerUDIV(Op
, DAG
);
4872 /// ReplaceNodeResults - Replace the results of node with an illegal result
4873 /// type with new values built out of custom code.
4874 void ARMTargetLowering::ReplaceNodeResults(SDNode
*N
,
4875 SmallVectorImpl
<SDValue
>&Results
,
4876 SelectionDAG
&DAG
) const {
4878 switch (N
->getOpcode()) {
4880 llvm_unreachable("Don't know how to custom expand this!");
4883 Res
= ExpandBITCAST(N
, DAG
);
4887 Res
= Expand64BitShift(N
, DAG
, Subtarget
);
4891 Results
.push_back(Res
);
4894 //===----------------------------------------------------------------------===//
4895 // ARM Scheduler Hooks
4896 //===----------------------------------------------------------------------===//
4899 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr
*MI
,
4900 MachineBasicBlock
*BB
,
4901 unsigned Size
) const {
4902 unsigned dest
= MI
->getOperand(0).getReg();
4903 unsigned ptr
= MI
->getOperand(1).getReg();
4904 unsigned oldval
= MI
->getOperand(2).getReg();
4905 unsigned newval
= MI
->getOperand(3).getReg();
4906 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
4907 DebugLoc dl
= MI
->getDebugLoc();
4908 bool isThumb2
= Subtarget
->isThumb2();
4910 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
4912 MRI
.createVirtualRegister(isThumb2
? ARM::rGPRRegisterClass
4913 : ARM::GPRRegisterClass
);
4916 MRI
.constrainRegClass(dest
, ARM::rGPRRegisterClass
);
4917 MRI
.constrainRegClass(oldval
, ARM::rGPRRegisterClass
);
4918 MRI
.constrainRegClass(newval
, ARM::rGPRRegisterClass
);
4921 unsigned ldrOpc
, strOpc
;
4923 default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
4925 ldrOpc
= isThumb2
? ARM::t2LDREXB
: ARM::LDREXB
;
4926 strOpc
= isThumb2
? ARM::t2STREXB
: ARM::STREXB
;
4929 ldrOpc
= isThumb2
? ARM::t2LDREXH
: ARM::LDREXH
;
4930 strOpc
= isThumb2
? ARM::t2STREXH
: ARM::STREXH
;
4933 ldrOpc
= isThumb2
? ARM::t2LDREX
: ARM::LDREX
;
4934 strOpc
= isThumb2
? ARM::t2STREX
: ARM::STREX
;
4938 MachineFunction
*MF
= BB
->getParent();
4939 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
4940 MachineFunction::iterator It
= BB
;
4941 ++It
; // insert the new blocks after the current block
4943 MachineBasicBlock
*loop1MBB
= MF
->CreateMachineBasicBlock(LLVM_BB
);
4944 MachineBasicBlock
*loop2MBB
= MF
->CreateMachineBasicBlock(LLVM_BB
);
4945 MachineBasicBlock
*exitMBB
= MF
->CreateMachineBasicBlock(LLVM_BB
);
4946 MF
->insert(It
, loop1MBB
);
4947 MF
->insert(It
, loop2MBB
);
4948 MF
->insert(It
, exitMBB
);
4950 // Transfer the remainder of BB and its successor edges to exitMBB.
4951 exitMBB
->splice(exitMBB
->begin(), BB
,
4952 llvm::next(MachineBasicBlock::iterator(MI
)),
4954 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
4958 // fallthrough --> loop1MBB
4959 BB
->addSuccessor(loop1MBB
);
4962 // ldrex dest, [ptr]
4966 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(ldrOpc
), dest
).addReg(ptr
));
4967 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2CMPrr
: ARM::CMPrr
))
4968 .addReg(dest
).addReg(oldval
));
4969 BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2Bcc
: ARM::Bcc
))
4970 .addMBB(exitMBB
).addImm(ARMCC::NE
).addReg(ARM::CPSR
);
4971 BB
->addSuccessor(loop2MBB
);
4972 BB
->addSuccessor(exitMBB
);
4975 // strex scratch, newval, [ptr]
4979 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(strOpc
), scratch
).addReg(newval
)
4981 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2CMPri
: ARM::CMPri
))
4982 .addReg(scratch
).addImm(0));
4983 BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2Bcc
: ARM::Bcc
))
4984 .addMBB(loop1MBB
).addImm(ARMCC::NE
).addReg(ARM::CPSR
);
4985 BB
->addSuccessor(loop1MBB
);
4986 BB
->addSuccessor(exitMBB
);
4992 MI
->eraseFromParent(); // The instruction is gone now.
4998 ARMTargetLowering::EmitAtomicBinary(MachineInstr
*MI
, MachineBasicBlock
*BB
,
4999 unsigned Size
, unsigned BinOpcode
) const {
5000 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
5001 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
5003 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
5004 MachineFunction
*MF
= BB
->getParent();
5005 MachineFunction::iterator It
= BB
;
5008 unsigned dest
= MI
->getOperand(0).getReg();
5009 unsigned ptr
= MI
->getOperand(1).getReg();
5010 unsigned incr
= MI
->getOperand(2).getReg();
5011 DebugLoc dl
= MI
->getDebugLoc();
5012 bool isThumb2
= Subtarget
->isThumb2();
5014 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
5016 MRI
.constrainRegClass(dest
, ARM::rGPRRegisterClass
);
5017 MRI
.constrainRegClass(ptr
, ARM::rGPRRegisterClass
);
5020 unsigned ldrOpc
, strOpc
;
5022 default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5024 ldrOpc
= isThumb2
? ARM::t2LDREXB
: ARM::LDREXB
;
5025 strOpc
= isThumb2
? ARM::t2STREXB
: ARM::STREXB
;
5028 ldrOpc
= isThumb2
? ARM::t2LDREXH
: ARM::LDREXH
;
5029 strOpc
= isThumb2
? ARM::t2STREXH
: ARM::STREXH
;
5032 ldrOpc
= isThumb2
? ARM::t2LDREX
: ARM::LDREX
;
5033 strOpc
= isThumb2
? ARM::t2STREX
: ARM::STREX
;
5037 MachineBasicBlock
*loopMBB
= MF
->CreateMachineBasicBlock(LLVM_BB
);
5038 MachineBasicBlock
*exitMBB
= MF
->CreateMachineBasicBlock(LLVM_BB
);
5039 MF
->insert(It
, loopMBB
);
5040 MF
->insert(It
, exitMBB
);
5042 // Transfer the remainder of BB and its successor edges to exitMBB.
5043 exitMBB
->splice(exitMBB
->begin(), BB
,
5044 llvm::next(MachineBasicBlock::iterator(MI
)),
5046 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
5048 TargetRegisterClass
*TRC
=
5049 isThumb2
? ARM::tGPRRegisterClass
: ARM::GPRRegisterClass
;
5050 unsigned scratch
= MRI
.createVirtualRegister(TRC
);
5051 unsigned scratch2
= (!BinOpcode
) ? incr
: MRI
.createVirtualRegister(TRC
);
5055 // fallthrough --> loopMBB
5056 BB
->addSuccessor(loopMBB
);
5060 // <binop> scratch2, dest, incr
5061 // strex scratch, scratch2, ptr
5064 // fallthrough --> exitMBB
5066 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(ldrOpc
), dest
).addReg(ptr
));
5068 // operand order needs to go the other way for NAND
5069 if (BinOpcode
== ARM::BICrr
|| BinOpcode
== ARM::t2BICrr
)
5070 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(BinOpcode
), scratch2
).
5071 addReg(incr
).addReg(dest
)).addReg(0);
5073 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(BinOpcode
), scratch2
).
5074 addReg(dest
).addReg(incr
)).addReg(0);
5077 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(strOpc
), scratch
).addReg(scratch2
)
5079 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2CMPri
: ARM::CMPri
))
5080 .addReg(scratch
).addImm(0));
5081 BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2Bcc
: ARM::Bcc
))
5082 .addMBB(loopMBB
).addImm(ARMCC::NE
).addReg(ARM::CPSR
);
5084 BB
->addSuccessor(loopMBB
);
5085 BB
->addSuccessor(exitMBB
);
5091 MI
->eraseFromParent(); // The instruction is gone now.
5097 ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr
*MI
,
5098 MachineBasicBlock
*BB
,
5101 ARMCC::CondCodes Cond
) const {
5102 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
5104 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
5105 MachineFunction
*MF
= BB
->getParent();
5106 MachineFunction::iterator It
= BB
;
5109 unsigned dest
= MI
->getOperand(0).getReg();
5110 unsigned ptr
= MI
->getOperand(1).getReg();
5111 unsigned incr
= MI
->getOperand(2).getReg();
5112 unsigned oldval
= dest
;
5113 DebugLoc dl
= MI
->getDebugLoc();
5114 bool isThumb2
= Subtarget
->isThumb2();
5116 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
5118 MRI
.constrainRegClass(dest
, ARM::rGPRRegisterClass
);
5119 MRI
.constrainRegClass(ptr
, ARM::rGPRRegisterClass
);
5122 unsigned ldrOpc
, strOpc
, extendOpc
;
5124 default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5126 ldrOpc
= isThumb2
? ARM::t2LDREXB
: ARM::LDREXB
;
5127 strOpc
= isThumb2
? ARM::t2STREXB
: ARM::STREXB
;
5128 extendOpc
= isThumb2
? ARM::t2SXTBr
: ARM::SXTBr
;
5131 ldrOpc
= isThumb2
? ARM::t2LDREXH
: ARM::LDREXH
;
5132 strOpc
= isThumb2
? ARM::t2STREXH
: ARM::STREXH
;
5133 extendOpc
= isThumb2
? ARM::t2SXTHr
: ARM::SXTHr
;
5136 ldrOpc
= isThumb2
? ARM::t2LDREX
: ARM::LDREX
;
5137 strOpc
= isThumb2
? ARM::t2STREX
: ARM::STREX
;
5142 MachineBasicBlock
*loopMBB
= MF
->CreateMachineBasicBlock(LLVM_BB
);
5143 MachineBasicBlock
*exitMBB
= MF
->CreateMachineBasicBlock(LLVM_BB
);
5144 MF
->insert(It
, loopMBB
);
5145 MF
->insert(It
, exitMBB
);
5147 // Transfer the remainder of BB and its successor edges to exitMBB.
5148 exitMBB
->splice(exitMBB
->begin(), BB
,
5149 llvm::next(MachineBasicBlock::iterator(MI
)),
5151 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
5153 TargetRegisterClass
*TRC
=
5154 isThumb2
? ARM::tGPRRegisterClass
: ARM::GPRRegisterClass
;
5155 unsigned scratch
= MRI
.createVirtualRegister(TRC
);
5156 unsigned scratch2
= MRI
.createVirtualRegister(TRC
);
5160 // fallthrough --> loopMBB
5161 BB
->addSuccessor(loopMBB
);
5165 // (sign extend dest, if required)
5167 // cmov.cond scratch2, dest, incr
5168 // strex scratch, scratch2, ptr
5171 // fallthrough --> exitMBB
5173 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(ldrOpc
), dest
).addReg(ptr
));
5175 // Sign extend the value, if necessary.
5176 if (signExtend
&& extendOpc
) {
5177 oldval
= MRI
.createVirtualRegister(ARM::GPRRegisterClass
);
5178 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(extendOpc
), oldval
).addReg(dest
));
5181 // Build compare and cmov instructions.
5182 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2CMPrr
: ARM::CMPrr
))
5183 .addReg(oldval
).addReg(incr
));
5184 BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2MOVCCr
: ARM::MOVCCr
), scratch2
)
5185 .addReg(oldval
).addReg(incr
).addImm(Cond
).addReg(ARM::CPSR
);
5187 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(strOpc
), scratch
).addReg(scratch2
)
5189 AddDefaultPred(BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2CMPri
: ARM::CMPri
))
5190 .addReg(scratch
).addImm(0));
5191 BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2Bcc
: ARM::Bcc
))
5192 .addMBB(loopMBB
).addImm(ARMCC::NE
).addReg(ARM::CPSR
);
5194 BB
->addSuccessor(loopMBB
);
5195 BB
->addSuccessor(exitMBB
);
5201 MI
->eraseFromParent(); // The instruction is gone now.
5207 MachineBasicBlock
*OtherSucc(MachineBasicBlock
*MBB
, MachineBasicBlock
*Succ
) {
5208 for (MachineBasicBlock::succ_iterator I
= MBB
->succ_begin(),
5209 E
= MBB
->succ_end(); I
!= E
; ++I
)
5212 llvm_unreachable("Expecting a BB with two successors!");
5215 // FIXME: This opcode table should obviously be expressed in the target
5216 // description. We probably just need a "machine opcode" value in the pseudo
5217 // instruction. But the ideal solution maybe to simply remove the "S" version
5218 // of the opcode altogether.
5219 struct AddSubFlagsOpcodePair
{
5221 unsigned MachineOpc
;
5224 static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap
[] = {
5225 {ARM::ADCSri
, ARM::ADCri
},
5226 {ARM::ADCSrr
, ARM::ADCrr
},
5227 {ARM::ADCSrs
, ARM::ADCrs
},
5228 {ARM::SBCSri
, ARM::SBCri
},
5229 {ARM::SBCSrr
, ARM::SBCrr
},
5230 {ARM::SBCSrs
, ARM::SBCrs
},
5231 {ARM::RSBSri
, ARM::RSBri
},
5232 {ARM::RSBSrr
, ARM::RSBrr
},
5233 {ARM::RSBSrs
, ARM::RSBrs
},
5234 {ARM::RSCSri
, ARM::RSCri
},
5235 {ARM::RSCSrs
, ARM::RSCrs
},
5236 {ARM::t2ADCSri
, ARM::t2ADCri
},
5237 {ARM::t2ADCSrr
, ARM::t2ADCrr
},
5238 {ARM::t2ADCSrs
, ARM::t2ADCrs
},
5239 {ARM::t2SBCSri
, ARM::t2SBCri
},
5240 {ARM::t2SBCSrr
, ARM::t2SBCrr
},
5241 {ARM::t2SBCSrs
, ARM::t2SBCrs
},
5242 {ARM::t2RSBSri
, ARM::t2RSBri
},
5243 {ARM::t2RSBSrs
, ARM::t2RSBrs
},
5246 // Convert and Add or Subtract with Carry and Flags to a generic opcode with
5247 // CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>).
5249 // FIXME: Somewhere we should assert that CPSR<def> is in the correct
5250 // position to be recognized by the target descrition as the 'S' bit.
5251 bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr
*MI
,
5252 MachineBasicBlock
*BB
) const {
5253 unsigned OldOpc
= MI
->getOpcode();
5254 unsigned NewOpc
= 0;
5256 // This is only called for instructions that need remapping, so iterating over
5257 // the tiny opcode table is not costly.
5258 static const int NPairs
=
5259 sizeof(AddSubFlagsOpcodeMap
) / sizeof(AddSubFlagsOpcodePair
);
5260 for (AddSubFlagsOpcodePair
*Pair
= &AddSubFlagsOpcodeMap
[0],
5261 *End
= &AddSubFlagsOpcodeMap
[NPairs
]; Pair
!= End
; ++Pair
) {
5262 if (OldOpc
== Pair
->PseudoOpc
) {
5263 NewOpc
= Pair
->MachineOpc
;
5270 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
5271 DebugLoc dl
= MI
->getDebugLoc();
5272 MachineInstrBuilder MIB
= BuildMI(*BB
, MI
, dl
, TII
->get(NewOpc
));
5273 for (unsigned i
= 0; i
< MI
->getNumOperands(); ++i
)
5274 MIB
.addOperand(MI
->getOperand(i
));
5275 AddDefaultPred(MIB
);
5276 MIB
.addReg(ARM::CPSR
, RegState::Define
); // S bit
5277 MI
->eraseFromParent();
5282 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr
*MI
,
5283 MachineBasicBlock
*BB
) const {
5284 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
5285 DebugLoc dl
= MI
->getDebugLoc();
5286 bool isThumb2
= Subtarget
->isThumb2();
5287 switch (MI
->getOpcode()) {
5289 if (RemapAddSubWithFlags(MI
, BB
))
5293 llvm_unreachable("Unexpected instr type to insert");
5295 case ARM::ATOMIC_LOAD_ADD_I8
:
5296 return EmitAtomicBinary(MI
, BB
, 1, isThumb2
? ARM::t2ADDrr
: ARM::ADDrr
);
5297 case ARM::ATOMIC_LOAD_ADD_I16
:
5298 return EmitAtomicBinary(MI
, BB
, 2, isThumb2
? ARM::t2ADDrr
: ARM::ADDrr
);
5299 case ARM::ATOMIC_LOAD_ADD_I32
:
5300 return EmitAtomicBinary(MI
, BB
, 4, isThumb2
? ARM::t2ADDrr
: ARM::ADDrr
);
5302 case ARM::ATOMIC_LOAD_AND_I8
:
5303 return EmitAtomicBinary(MI
, BB
, 1, isThumb2
? ARM::t2ANDrr
: ARM::ANDrr
);
5304 case ARM::ATOMIC_LOAD_AND_I16
:
5305 return EmitAtomicBinary(MI
, BB
, 2, isThumb2
? ARM::t2ANDrr
: ARM::ANDrr
);
5306 case ARM::ATOMIC_LOAD_AND_I32
:
5307 return EmitAtomicBinary(MI
, BB
, 4, isThumb2
? ARM::t2ANDrr
: ARM::ANDrr
);
5309 case ARM::ATOMIC_LOAD_OR_I8
:
5310 return EmitAtomicBinary(MI
, BB
, 1, isThumb2
? ARM::t2ORRrr
: ARM::ORRrr
);
5311 case ARM::ATOMIC_LOAD_OR_I16
:
5312 return EmitAtomicBinary(MI
, BB
, 2, isThumb2
? ARM::t2ORRrr
: ARM::ORRrr
);
5313 case ARM::ATOMIC_LOAD_OR_I32
:
5314 return EmitAtomicBinary(MI
, BB
, 4, isThumb2
? ARM::t2ORRrr
: ARM::ORRrr
);
5316 case ARM::ATOMIC_LOAD_XOR_I8
:
5317 return EmitAtomicBinary(MI
, BB
, 1, isThumb2
? ARM::t2EORrr
: ARM::EORrr
);
5318 case ARM::ATOMIC_LOAD_XOR_I16
:
5319 return EmitAtomicBinary(MI
, BB
, 2, isThumb2
? ARM::t2EORrr
: ARM::EORrr
);
5320 case ARM::ATOMIC_LOAD_XOR_I32
:
5321 return EmitAtomicBinary(MI
, BB
, 4, isThumb2
? ARM::t2EORrr
: ARM::EORrr
);
5323 case ARM::ATOMIC_LOAD_NAND_I8
:
5324 return EmitAtomicBinary(MI
, BB
, 1, isThumb2
? ARM::t2BICrr
: ARM::BICrr
);
5325 case ARM::ATOMIC_LOAD_NAND_I16
:
5326 return EmitAtomicBinary(MI
, BB
, 2, isThumb2
? ARM::t2BICrr
: ARM::BICrr
);
5327 case ARM::ATOMIC_LOAD_NAND_I32
:
5328 return EmitAtomicBinary(MI
, BB
, 4, isThumb2
? ARM::t2BICrr
: ARM::BICrr
);
5330 case ARM::ATOMIC_LOAD_SUB_I8
:
5331 return EmitAtomicBinary(MI
, BB
, 1, isThumb2
? ARM::t2SUBrr
: ARM::SUBrr
);
5332 case ARM::ATOMIC_LOAD_SUB_I16
:
5333 return EmitAtomicBinary(MI
, BB
, 2, isThumb2
? ARM::t2SUBrr
: ARM::SUBrr
);
5334 case ARM::ATOMIC_LOAD_SUB_I32
:
5335 return EmitAtomicBinary(MI
, BB
, 4, isThumb2
? ARM::t2SUBrr
: ARM::SUBrr
);
5337 case ARM::ATOMIC_LOAD_MIN_I8
:
5338 return EmitAtomicBinaryMinMax(MI
, BB
, 1, true, ARMCC::LT
);
5339 case ARM::ATOMIC_LOAD_MIN_I16
:
5340 return EmitAtomicBinaryMinMax(MI
, BB
, 2, true, ARMCC::LT
);
5341 case ARM::ATOMIC_LOAD_MIN_I32
:
5342 return EmitAtomicBinaryMinMax(MI
, BB
, 4, true, ARMCC::LT
);
5344 case ARM::ATOMIC_LOAD_MAX_I8
:
5345 return EmitAtomicBinaryMinMax(MI
, BB
, 1, true, ARMCC::GT
);
5346 case ARM::ATOMIC_LOAD_MAX_I16
:
5347 return EmitAtomicBinaryMinMax(MI
, BB
, 2, true, ARMCC::GT
);
5348 case ARM::ATOMIC_LOAD_MAX_I32
:
5349 return EmitAtomicBinaryMinMax(MI
, BB
, 4, true, ARMCC::GT
);
5351 case ARM::ATOMIC_LOAD_UMIN_I8
:
5352 return EmitAtomicBinaryMinMax(MI
, BB
, 1, false, ARMCC::LO
);
5353 case ARM::ATOMIC_LOAD_UMIN_I16
:
5354 return EmitAtomicBinaryMinMax(MI
, BB
, 2, false, ARMCC::LO
);
5355 case ARM::ATOMIC_LOAD_UMIN_I32
:
5356 return EmitAtomicBinaryMinMax(MI
, BB
, 4, false, ARMCC::LO
);
5358 case ARM::ATOMIC_LOAD_UMAX_I8
:
5359 return EmitAtomicBinaryMinMax(MI
, BB
, 1, false, ARMCC::HI
);
5360 case ARM::ATOMIC_LOAD_UMAX_I16
:
5361 return EmitAtomicBinaryMinMax(MI
, BB
, 2, false, ARMCC::HI
);
5362 case ARM::ATOMIC_LOAD_UMAX_I32
:
5363 return EmitAtomicBinaryMinMax(MI
, BB
, 4, false, ARMCC::HI
);
5365 case ARM::ATOMIC_SWAP_I8
: return EmitAtomicBinary(MI
, BB
, 1, 0);
5366 case ARM::ATOMIC_SWAP_I16
: return EmitAtomicBinary(MI
, BB
, 2, 0);
5367 case ARM::ATOMIC_SWAP_I32
: return EmitAtomicBinary(MI
, BB
, 4, 0);
5369 case ARM::ATOMIC_CMP_SWAP_I8
: return EmitAtomicCmpSwap(MI
, BB
, 1);
5370 case ARM::ATOMIC_CMP_SWAP_I16
: return EmitAtomicCmpSwap(MI
, BB
, 2);
5371 case ARM::ATOMIC_CMP_SWAP_I32
: return EmitAtomicCmpSwap(MI
, BB
, 4);
5373 case ARM::tMOVCCr_pseudo
: {
5374 // To "insert" a SELECT_CC instruction, we actually have to insert the
5375 // diamond control-flow pattern. The incoming instruction knows the
5376 // destination vreg to set, the condition code register to branch on, the
5377 // true/false values to select between, and a branch opcode to use.
5378 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
5379 MachineFunction::iterator It
= BB
;
5385 // cmpTY ccX, r1, r2
5387 // fallthrough --> copy0MBB
5388 MachineBasicBlock
*thisMBB
= BB
;
5389 MachineFunction
*F
= BB
->getParent();
5390 MachineBasicBlock
*copy0MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
5391 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
5392 F
->insert(It
, copy0MBB
);
5393 F
->insert(It
, sinkMBB
);
5395 // Transfer the remainder of BB and its successor edges to sinkMBB.
5396 sinkMBB
->splice(sinkMBB
->begin(), BB
,
5397 llvm::next(MachineBasicBlock::iterator(MI
)),
5399 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
5401 BB
->addSuccessor(copy0MBB
);
5402 BB
->addSuccessor(sinkMBB
);
5404 BuildMI(BB
, dl
, TII
->get(ARM::tBcc
)).addMBB(sinkMBB
)
5405 .addImm(MI
->getOperand(3).getImm()).addReg(MI
->getOperand(4).getReg());
5408 // %FalseValue = ...
5409 // # fallthrough to sinkMBB
5412 // Update machine-CFG edges
5413 BB
->addSuccessor(sinkMBB
);
5416 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
5419 BuildMI(*BB
, BB
->begin(), dl
,
5420 TII
->get(ARM::PHI
), MI
->getOperand(0).getReg())
5421 .addReg(MI
->getOperand(1).getReg()).addMBB(copy0MBB
)
5422 .addReg(MI
->getOperand(2).getReg()).addMBB(thisMBB
);
5424 MI
->eraseFromParent(); // The pseudo instruction is gone now.
5429 case ARM::BCCZi64
: {
5430 // If there is an unconditional branch to the other successor, remove it.
5431 BB
->erase(llvm::next(MachineBasicBlock::iterator(MI
)), BB
->end());
5433 // Compare both parts that make up the double comparison separately for
5435 bool RHSisZero
= MI
->getOpcode() == ARM::BCCZi64
;
5437 unsigned LHS1
= MI
->getOperand(1).getReg();
5438 unsigned LHS2
= MI
->getOperand(2).getReg();
5440 AddDefaultPred(BuildMI(BB
, dl
,
5441 TII
->get(isThumb2
? ARM::t2CMPri
: ARM::CMPri
))
5442 .addReg(LHS1
).addImm(0));
5443 BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2CMPri
: ARM::CMPri
))
5444 .addReg(LHS2
).addImm(0)
5445 .addImm(ARMCC::EQ
).addReg(ARM::CPSR
);
5447 unsigned RHS1
= MI
->getOperand(3).getReg();
5448 unsigned RHS2
= MI
->getOperand(4).getReg();
5449 AddDefaultPred(BuildMI(BB
, dl
,
5450 TII
->get(isThumb2
? ARM::t2CMPrr
: ARM::CMPrr
))
5451 .addReg(LHS1
).addReg(RHS1
));
5452 BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2CMPrr
: ARM::CMPrr
))
5453 .addReg(LHS2
).addReg(RHS2
)
5454 .addImm(ARMCC::EQ
).addReg(ARM::CPSR
);
5457 MachineBasicBlock
*destMBB
= MI
->getOperand(RHSisZero
? 3 : 5).getMBB();
5458 MachineBasicBlock
*exitMBB
= OtherSucc(BB
, destMBB
);
5459 if (MI
->getOperand(0).getImm() == ARMCC::NE
)
5460 std::swap(destMBB
, exitMBB
);
5462 BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2Bcc
: ARM::Bcc
))
5463 .addMBB(destMBB
).addImm(ARMCC::EQ
).addReg(ARM::CPSR
);
5464 BuildMI(BB
, dl
, TII
->get(isThumb2
? ARM::t2B
: ARM::B
))
5467 MI
->eraseFromParent(); // The pseudo instruction is gone now.
5473 //===----------------------------------------------------------------------===//
5474 // ARM Optimization Hooks
5475 //===----------------------------------------------------------------------===//
5478 SDValue
combineSelectAndUse(SDNode
*N
, SDValue Slct
, SDValue OtherOp
,
5479 TargetLowering::DAGCombinerInfo
&DCI
) {
5480 SelectionDAG
&DAG
= DCI
.DAG
;
5481 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
5482 EVT VT
= N
->getValueType(0);
5483 unsigned Opc
= N
->getOpcode();
5484 bool isSlctCC
= Slct
.getOpcode() == ISD::SELECT_CC
;
5485 SDValue LHS
= isSlctCC
? Slct
.getOperand(2) : Slct
.getOperand(1);
5486 SDValue RHS
= isSlctCC
? Slct
.getOperand(3) : Slct
.getOperand(2);
5487 ISD::CondCode CC
= ISD::SETCC_INVALID
;
5490 CC
= cast
<CondCodeSDNode
>(Slct
.getOperand(4))->get();
5492 SDValue CCOp
= Slct
.getOperand(0);
5493 if (CCOp
.getOpcode() == ISD::SETCC
)
5494 CC
= cast
<CondCodeSDNode
>(CCOp
.getOperand(2))->get();
5497 bool DoXform
= false;
5499 assert ((Opc
== ISD::ADD
|| (Opc
== ISD::SUB
&& Slct
== N
->getOperand(1))) &&
5502 if (LHS
.getOpcode() == ISD::Constant
&&
5503 cast
<ConstantSDNode
>(LHS
)->isNullValue()) {
5505 } else if (CC
!= ISD::SETCC_INVALID
&&
5506 RHS
.getOpcode() == ISD::Constant
&&
5507 cast
<ConstantSDNode
>(RHS
)->isNullValue()) {
5508 std::swap(LHS
, RHS
);
5509 SDValue Op0
= Slct
.getOperand(0);
5510 EVT OpVT
= isSlctCC
? Op0
.getValueType() :
5511 Op0
.getOperand(0).getValueType();
5512 bool isInt
= OpVT
.isInteger();
5513 CC
= ISD::getSetCCInverse(CC
, isInt
);
5515 if (!TLI
.isCondCodeLegal(CC
, OpVT
))
5516 return SDValue(); // Inverse operator isn't legal.
5523 SDValue Result
= DAG
.getNode(Opc
, RHS
.getDebugLoc(), VT
, OtherOp
, RHS
);
5525 return DAG
.getSelectCC(N
->getDebugLoc(), OtherOp
, Result
,
5526 Slct
.getOperand(0), Slct
.getOperand(1), CC
);
5527 SDValue CCOp
= Slct
.getOperand(0);
5529 CCOp
= DAG
.getSetCC(Slct
.getDebugLoc(), CCOp
.getValueType(),
5530 CCOp
.getOperand(0), CCOp
.getOperand(1), CC
);
5531 return DAG
.getNode(ISD::SELECT
, N
->getDebugLoc(), VT
,
5532 CCOp
, OtherOp
, Result
);
5537 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
5538 // (only after legalization).
5539 static SDValue
AddCombineToVPADDL(SDNode
*N
, SDValue N0
, SDValue N1
,
5540 TargetLowering::DAGCombinerInfo
&DCI
,
5541 const ARMSubtarget
*Subtarget
) {
5543 // Only perform optimization if after legalize, and if NEON is available. We
5544 // also expected both operands to be BUILD_VECTORs.
5545 if (DCI
.isBeforeLegalize() || !Subtarget
->hasNEON()
5546 || N0
.getOpcode() != ISD::BUILD_VECTOR
5547 || N1
.getOpcode() != ISD::BUILD_VECTOR
)
5550 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
5551 EVT VT
= N
->getValueType(0);
5552 if (!VT
.isInteger() || VT
.getVectorElementType() == MVT::i64
)
5555 // Check that the vector operands are of the right form.
5556 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
5557 // operands, where N is the size of the formed vector.
5558 // Each EXTRACT_VECTOR should have the same input vector and odd or even
5559 // index such that we have a pair wise add pattern.
5561 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
5562 if (N0
->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
5564 SDValue Vec
= N0
->getOperand(0)->getOperand(0);
5565 SDNode
*V
= Vec
.getNode();
5566 unsigned nextIndex
= 0;
5568 // For each operands to the ADD which are BUILD_VECTORs,
5569 // check to see if each of their operands are an EXTRACT_VECTOR with
5570 // the same vector and appropriate index.
5571 for (unsigned i
= 0, e
= N0
->getNumOperands(); i
!= e
; ++i
) {
5572 if (N0
->getOperand(i
)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
5573 && N1
->getOperand(i
)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
5575 SDValue ExtVec0
= N0
->getOperand(i
);
5576 SDValue ExtVec1
= N1
->getOperand(i
);
5578 // First operand is the vector, verify its the same.
5579 if (V
!= ExtVec0
->getOperand(0).getNode() ||
5580 V
!= ExtVec1
->getOperand(0).getNode())
5583 // Second is the constant, verify its correct.
5584 ConstantSDNode
*C0
= dyn_cast
<ConstantSDNode
>(ExtVec0
->getOperand(1));
5585 ConstantSDNode
*C1
= dyn_cast
<ConstantSDNode
>(ExtVec1
->getOperand(1));
5587 // For the constant, we want to see all the even or all the odd.
5588 if (!C0
|| !C1
|| C0
->getZExtValue() != nextIndex
5589 || C1
->getZExtValue() != nextIndex
+1)
5598 // Create VPADDL node.
5599 SelectionDAG
&DAG
= DCI
.DAG
;
5600 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
5602 // Build operand list.
5603 SmallVector
<SDValue
, 8> Ops
;
5604 Ops
.push_back(DAG
.getConstant(Intrinsic::arm_neon_vpaddls
,
5605 TLI
.getPointerTy()));
5607 // Input is the vector.
5610 // Get widened type and narrowed type.
5612 unsigned numElem
= VT
.getVectorNumElements();
5613 switch (VT
.getVectorElementType().getSimpleVT().SimpleTy
) {
5614 case MVT::i8
: widenType
= MVT::getVectorVT(MVT::i16
, numElem
); break;
5615 case MVT::i16
: widenType
= MVT::getVectorVT(MVT::i32
, numElem
); break;
5616 case MVT::i32
: widenType
= MVT::getVectorVT(MVT::i64
, numElem
); break;
5618 assert(0 && "Invalid vector element type for padd optimization.");
5621 SDValue tmp
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, N
->getDebugLoc(),
5622 widenType
, &Ops
[0], Ops
.size());
5623 return DAG
.getNode(ISD::TRUNCATE
, N
->getDebugLoc(), VT
, tmp
);
5626 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
5627 /// operands N0 and N1. This is a helper for PerformADDCombine that is
5628 /// called with the default operands, and if that fails, with commuted
5630 static SDValue
PerformADDCombineWithOperands(SDNode
*N
, SDValue N0
, SDValue N1
,
5631 TargetLowering::DAGCombinerInfo
&DCI
,
5632 const ARMSubtarget
*Subtarget
){
5634 // Attempt to create vpaddl for this add.
5635 SDValue Result
= AddCombineToVPADDL(N
, N0
, N1
, DCI
, Subtarget
);
5636 if (Result
.getNode())
5639 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
5640 if (N0
.getOpcode() == ISD::SELECT
&& N0
.getNode()->hasOneUse()) {
5641 SDValue Result
= combineSelectAndUse(N
, N0
, N1
, DCI
);
5642 if (Result
.getNode()) return Result
;
5647 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
5649 static SDValue
PerformADDCombine(SDNode
*N
,
5650 TargetLowering::DAGCombinerInfo
&DCI
,
5651 const ARMSubtarget
*Subtarget
) {
5652 SDValue N0
= N
->getOperand(0);
5653 SDValue N1
= N
->getOperand(1);
5655 // First try with the default operand order.
5656 SDValue Result
= PerformADDCombineWithOperands(N
, N0
, N1
, DCI
, Subtarget
);
5657 if (Result
.getNode())
5660 // If that didn't work, try again with the operands commuted.
5661 return PerformADDCombineWithOperands(N
, N1
, N0
, DCI
, Subtarget
);
5664 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
5666 static SDValue
PerformSUBCombine(SDNode
*N
,
5667 TargetLowering::DAGCombinerInfo
&DCI
) {
5668 SDValue N0
= N
->getOperand(0);
5669 SDValue N1
= N
->getOperand(1);
5671 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
5672 if (N1
.getOpcode() == ISD::SELECT
&& N1
.getNode()->hasOneUse()) {
5673 SDValue Result
= combineSelectAndUse(N
, N1
, N0
, DCI
);
5674 if (Result
.getNode()) return Result
;
5680 /// PerformVMULCombine
5681 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
5682 /// special multiplier accumulator forwarding.
5688 static SDValue
PerformVMULCombine(SDNode
*N
,
5689 TargetLowering::DAGCombinerInfo
&DCI
,
5690 const ARMSubtarget
*Subtarget
) {
5691 if (!Subtarget
->hasVMLxForwarding())
5694 SelectionDAG
&DAG
= DCI
.DAG
;
5695 SDValue N0
= N
->getOperand(0);
5696 SDValue N1
= N
->getOperand(1);
5697 unsigned Opcode
= N0
.getOpcode();
5698 if (Opcode
!= ISD::ADD
&& Opcode
!= ISD::SUB
&&
5699 Opcode
!= ISD::FADD
&& Opcode
!= ISD::FSUB
) {
5700 Opcode
= N1
.getOpcode();
5701 if (Opcode
!= ISD::ADD
&& Opcode
!= ISD::SUB
&&
5702 Opcode
!= ISD::FADD
&& Opcode
!= ISD::FSUB
)
5707 EVT VT
= N
->getValueType(0);
5708 DebugLoc DL
= N
->getDebugLoc();
5709 SDValue N00
= N0
->getOperand(0);
5710 SDValue N01
= N0
->getOperand(1);
5711 return DAG
.getNode(Opcode
, DL
, VT
,
5712 DAG
.getNode(ISD::MUL
, DL
, VT
, N00
, N1
),
5713 DAG
.getNode(ISD::MUL
, DL
, VT
, N01
, N1
));
5716 static SDValue
PerformMULCombine(SDNode
*N
,
5717 TargetLowering::DAGCombinerInfo
&DCI
,
5718 const ARMSubtarget
*Subtarget
) {
5719 SelectionDAG
&DAG
= DCI
.DAG
;
5721 if (Subtarget
->isThumb1Only())
5724 if (DCI
.isBeforeLegalize() || DCI
.isCalledByLegalizer())
5727 EVT VT
= N
->getValueType(0);
5728 if (VT
.is64BitVector() || VT
.is128BitVector())
5729 return PerformVMULCombine(N
, DCI
, Subtarget
);
5733 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
5737 uint64_t MulAmt
= C
->getZExtValue();
5738 unsigned ShiftAmt
= CountTrailingZeros_64(MulAmt
);
5739 ShiftAmt
= ShiftAmt
& (32 - 1);
5740 SDValue V
= N
->getOperand(0);
5741 DebugLoc DL
= N
->getDebugLoc();
5744 MulAmt
>>= ShiftAmt
;
5745 if (isPowerOf2_32(MulAmt
- 1)) {
5746 // (mul x, 2^N + 1) => (add (shl x, N), x)
5747 Res
= DAG
.getNode(ISD::ADD
, DL
, VT
,
5748 V
, DAG
.getNode(ISD::SHL
, DL
, VT
,
5749 V
, DAG
.getConstant(Log2_32(MulAmt
-1),
5751 } else if (isPowerOf2_32(MulAmt
+ 1)) {
5752 // (mul x, 2^N - 1) => (sub (shl x, N), x)
5753 Res
= DAG
.getNode(ISD::SUB
, DL
, VT
,
5754 DAG
.getNode(ISD::SHL
, DL
, VT
,
5755 V
, DAG
.getConstant(Log2_32(MulAmt
+1),
5762 Res
= DAG
.getNode(ISD::SHL
, DL
, VT
, Res
,
5763 DAG
.getConstant(ShiftAmt
, MVT::i32
));
5765 // Do not add new nodes to DAG combiner worklist.
5766 DCI
.CombineTo(N
, Res
, false);
5770 static SDValue
PerformANDCombine(SDNode
*N
,
5771 TargetLowering::DAGCombinerInfo
&DCI
) {
5773 // Attempt to use immediate-form VBIC
5774 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(N
->getOperand(1));
5775 DebugLoc dl
= N
->getDebugLoc();
5776 EVT VT
= N
->getValueType(0);
5777 SelectionDAG
&DAG
= DCI
.DAG
;
5779 if(!DAG
.getTargetLoweringInfo().isTypeLegal(VT
))
5782 APInt SplatBits
, SplatUndef
;
5783 unsigned SplatBitSize
;
5786 BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
)) {
5787 if (SplatBitSize
<= 64) {
5789 SDValue Val
= isNEONModifiedImm((~SplatBits
).getZExtValue(),
5790 SplatUndef
.getZExtValue(), SplatBitSize
,
5791 DAG
, VbicVT
, VT
.is128BitVector(),
5793 if (Val
.getNode()) {
5795 DAG
.getNode(ISD::BITCAST
, dl
, VbicVT
, N
->getOperand(0));
5796 SDValue Vbic
= DAG
.getNode(ARMISD::VBICIMM
, dl
, VbicVT
, Input
, Val
);
5797 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, Vbic
);
5805 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
5806 static SDValue
PerformORCombine(SDNode
*N
,
5807 TargetLowering::DAGCombinerInfo
&DCI
,
5808 const ARMSubtarget
*Subtarget
) {
5809 // Attempt to use immediate-form VORR
5810 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(N
->getOperand(1));
5811 DebugLoc dl
= N
->getDebugLoc();
5812 EVT VT
= N
->getValueType(0);
5813 SelectionDAG
&DAG
= DCI
.DAG
;
5815 if(!DAG
.getTargetLoweringInfo().isTypeLegal(VT
))
5818 APInt SplatBits
, SplatUndef
;
5819 unsigned SplatBitSize
;
5821 if (BVN
&& Subtarget
->hasNEON() &&
5822 BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
)) {
5823 if (SplatBitSize
<= 64) {
5825 SDValue Val
= isNEONModifiedImm(SplatBits
.getZExtValue(),
5826 SplatUndef
.getZExtValue(), SplatBitSize
,
5827 DAG
, VorrVT
, VT
.is128BitVector(),
5829 if (Val
.getNode()) {
5831 DAG
.getNode(ISD::BITCAST
, dl
, VorrVT
, N
->getOperand(0));
5832 SDValue Vorr
= DAG
.getNode(ARMISD::VORRIMM
, dl
, VorrVT
, Input
, Val
);
5833 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, Vorr
);
5838 SDValue N0
= N
->getOperand(0);
5839 if (N0
.getOpcode() != ISD::AND
)
5841 SDValue N1
= N
->getOperand(1);
5843 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
5844 if (Subtarget
->hasNEON() && N1
.getOpcode() == ISD::AND
&& VT
.isVector() &&
5845 DAG
.getTargetLoweringInfo().isTypeLegal(VT
)) {
5847 unsigned SplatBitSize
;
5850 BuildVectorSDNode
*BVN0
= dyn_cast
<BuildVectorSDNode
>(N0
->getOperand(1));
5852 if (BVN0
&& BVN0
->isConstantSplat(SplatBits0
, SplatUndef
, SplatBitSize
,
5853 HasAnyUndefs
) && !HasAnyUndefs
) {
5854 BuildVectorSDNode
*BVN1
= dyn_cast
<BuildVectorSDNode
>(N1
->getOperand(1));
5856 if (BVN1
&& BVN1
->isConstantSplat(SplatBits1
, SplatUndef
, SplatBitSize
,
5857 HasAnyUndefs
) && !HasAnyUndefs
&&
5858 SplatBits0
== ~SplatBits1
) {
5859 // Canonicalize the vector type to make instruction selection simpler.
5860 EVT CanonicalVT
= VT
.is128BitVector() ? MVT::v4i32
: MVT::v2i32
;
5861 SDValue Result
= DAG
.getNode(ARMISD::VBSL
, dl
, CanonicalVT
,
5862 N0
->getOperand(1), N0
->getOperand(0),
5864 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, Result
);
5869 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
5872 // BFI is only available on V6T2+
5873 if (Subtarget
->isThumb1Only() || !Subtarget
->hasV6T2Ops())
5876 DebugLoc DL
= N
->getDebugLoc();
5877 // 1) or (and A, mask), val => ARMbfi A, val, mask
5878 // iff (val & mask) == val
5880 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
5881 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
5882 // && mask == ~mask2
5883 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
5884 // && ~mask == mask2
5885 // (i.e., copy a bitfield value into another bitfield of the same width)
5890 SDValue N00
= N0
.getOperand(0);
5892 // The value and the mask need to be constants so we can verify this is
5893 // actually a bitfield set. If the mask is 0xffff, we can do better
5894 // via a movt instruction, so don't use BFI in that case.
5895 SDValue MaskOp
= N0
.getOperand(1);
5896 ConstantSDNode
*MaskC
= dyn_cast
<ConstantSDNode
>(MaskOp
);
5899 unsigned Mask
= MaskC
->getZExtValue();
5903 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
5904 ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N1
);
5906 unsigned Val
= N1C
->getZExtValue();
5907 if ((Val
& ~Mask
) != Val
)
5910 if (ARM::isBitFieldInvertedMask(Mask
)) {
5911 Val
>>= CountTrailingZeros_32(~Mask
);
5913 Res
= DAG
.getNode(ARMISD::BFI
, DL
, VT
, N00
,
5914 DAG
.getConstant(Val
, MVT::i32
),
5915 DAG
.getConstant(Mask
, MVT::i32
));
5917 // Do not add new nodes to DAG combiner worklist.
5918 DCI
.CombineTo(N
, Res
, false);
5921 } else if (N1
.getOpcode() == ISD::AND
) {
5922 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
5923 ConstantSDNode
*N11C
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1));
5926 unsigned Mask2
= N11C
->getZExtValue();
5928 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
5930 if (ARM::isBitFieldInvertedMask(Mask
) &&
5932 // The pack halfword instruction works better for masks that fit it,
5933 // so use that when it's available.
5934 if (Subtarget
->hasT2ExtractPack() &&
5935 (Mask
== 0xffff || Mask
== 0xffff0000))
5938 unsigned amt
= CountTrailingZeros_32(Mask2
);
5939 Res
= DAG
.getNode(ISD::SRL
, DL
, VT
, N1
.getOperand(0),
5940 DAG
.getConstant(amt
, MVT::i32
));
5941 Res
= DAG
.getNode(ARMISD::BFI
, DL
, VT
, N00
, Res
,
5942 DAG
.getConstant(Mask
, MVT::i32
));
5943 // Do not add new nodes to DAG combiner worklist.
5944 DCI
.CombineTo(N
, Res
, false);
5946 } else if (ARM::isBitFieldInvertedMask(~Mask
) &&
5948 // The pack halfword instruction works better for masks that fit it,
5949 // so use that when it's available.
5950 if (Subtarget
->hasT2ExtractPack() &&
5951 (Mask2
== 0xffff || Mask2
== 0xffff0000))
5954 unsigned lsb
= CountTrailingZeros_32(Mask
);
5955 Res
= DAG
.getNode(ISD::SRL
, DL
, VT
, N00
,
5956 DAG
.getConstant(lsb
, MVT::i32
));
5957 Res
= DAG
.getNode(ARMISD::BFI
, DL
, VT
, N1
.getOperand(0), Res
,
5958 DAG
.getConstant(Mask2
, MVT::i32
));
5959 // Do not add new nodes to DAG combiner worklist.
5960 DCI
.CombineTo(N
, Res
, false);
5965 if (DAG
.MaskedValueIsZero(N1
, MaskC
->getAPIntValue()) &&
5966 N00
.getOpcode() == ISD::SHL
&& isa
<ConstantSDNode
>(N00
.getOperand(1)) &&
5967 ARM::isBitFieldInvertedMask(~Mask
)) {
5968 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
5969 // where lsb(mask) == #shamt and masked bits of B are known zero.
5970 SDValue ShAmt
= N00
.getOperand(1);
5971 unsigned ShAmtC
= cast
<ConstantSDNode
>(ShAmt
)->getZExtValue();
5972 unsigned LSB
= CountTrailingZeros_32(Mask
);
5976 Res
= DAG
.getNode(ARMISD::BFI
, DL
, VT
, N1
, N00
.getOperand(0),
5977 DAG
.getConstant(~Mask
, MVT::i32
));
5979 // Do not add new nodes to DAG combiner worklist.
5980 DCI
.CombineTo(N
, Res
, false);
5986 /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
5987 /// the bits being cleared by the AND are not demanded by the BFI.
5988 static SDValue
PerformBFICombine(SDNode
*N
,
5989 TargetLowering::DAGCombinerInfo
&DCI
) {
5990 SDValue N1
= N
->getOperand(1);
5991 if (N1
.getOpcode() == ISD::AND
) {
5992 ConstantSDNode
*N11C
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1));
5995 unsigned InvMask
= cast
<ConstantSDNode
>(N
->getOperand(2))->getZExtValue();
5996 unsigned LSB
= CountTrailingZeros_32(~InvMask
);
5997 unsigned Width
= (32 - CountLeadingZeros_32(~InvMask
)) - LSB
;
5998 unsigned Mask
= (1 << Width
)-1;
5999 unsigned Mask2
= N11C
->getZExtValue();
6000 if ((Mask
& (~Mask2
)) == 0)
6001 return DCI
.DAG
.getNode(ARMISD::BFI
, N
->getDebugLoc(), N
->getValueType(0),
6002 N
->getOperand(0), N1
.getOperand(0),
6008 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
6009 /// ARMISD::VMOVRRD.
6010 static SDValue
PerformVMOVRRDCombine(SDNode
*N
,
6011 TargetLowering::DAGCombinerInfo
&DCI
) {
6012 // vmovrrd(vmovdrr x, y) -> x,y
6013 SDValue InDouble
= N
->getOperand(0);
6014 if (InDouble
.getOpcode() == ARMISD::VMOVDRR
)
6015 return DCI
.CombineTo(N
, InDouble
.getOperand(0), InDouble
.getOperand(1));
6017 // vmovrrd(load f64) -> (load i32), (load i32)
6018 SDNode
*InNode
= InDouble
.getNode();
6019 if (ISD::isNormalLoad(InNode
) && InNode
->hasOneUse() &&
6020 InNode
->getValueType(0) == MVT::f64
&&
6021 InNode
->getOperand(1).getOpcode() == ISD::FrameIndex
&&
6022 !cast
<LoadSDNode
>(InNode
)->isVolatile()) {
6023 // TODO: Should this be done for non-FrameIndex operands?
6024 LoadSDNode
*LD
= cast
<LoadSDNode
>(InNode
);
6026 SelectionDAG
&DAG
= DCI
.DAG
;
6027 DebugLoc DL
= LD
->getDebugLoc();
6028 SDValue BasePtr
= LD
->getBasePtr();
6029 SDValue NewLD1
= DAG
.getLoad(MVT::i32
, DL
, LD
->getChain(), BasePtr
,
6030 LD
->getPointerInfo(), LD
->isVolatile(),
6031 LD
->isNonTemporal(), LD
->getAlignment());
6033 SDValue OffsetPtr
= DAG
.getNode(ISD::ADD
, DL
, MVT::i32
, BasePtr
,
6034 DAG
.getConstant(4, MVT::i32
));
6035 SDValue NewLD2
= DAG
.getLoad(MVT::i32
, DL
, NewLD1
.getValue(1), OffsetPtr
,
6036 LD
->getPointerInfo(), LD
->isVolatile(),
6037 LD
->isNonTemporal(),
6038 std::min(4U, LD
->getAlignment() / 2));
6040 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, 1), NewLD2
.getValue(1));
6041 SDValue Result
= DCI
.CombineTo(N
, NewLD1
, NewLD2
);
6042 DCI
.RemoveFromWorklist(LD
);
6050 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
6051 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
6052 static SDValue
PerformVMOVDRRCombine(SDNode
*N
, SelectionDAG
&DAG
) {
6053 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
6054 SDValue Op0
= N
->getOperand(0);
6055 SDValue Op1
= N
->getOperand(1);
6056 if (Op0
.getOpcode() == ISD::BITCAST
)
6057 Op0
= Op0
.getOperand(0);
6058 if (Op1
.getOpcode() == ISD::BITCAST
)
6059 Op1
= Op1
.getOperand(0);
6060 if (Op0
.getOpcode() == ARMISD::VMOVRRD
&&
6061 Op0
.getNode() == Op1
.getNode() &&
6062 Op0
.getResNo() == 0 && Op1
.getResNo() == 1)
6063 return DAG
.getNode(ISD::BITCAST
, N
->getDebugLoc(),
6064 N
->getValueType(0), Op0
.getOperand(0));
6068 /// PerformSTORECombine - Target-specific dag combine xforms for
6070 static SDValue
PerformSTORECombine(SDNode
*N
,
6071 TargetLowering::DAGCombinerInfo
&DCI
) {
6072 // Bitcast an i64 store extracted from a vector to f64.
6073 // Otherwise, the i64 value will be legalized to a pair of i32 values.
6074 StoreSDNode
*St
= cast
<StoreSDNode
>(N
);
6075 SDValue StVal
= St
->getValue();
6076 if (!ISD::isNormalStore(St
) || St
->isVolatile())
6079 if (StVal
.getNode()->getOpcode() == ARMISD::VMOVDRR
&&
6080 StVal
.getNode()->hasOneUse() && !St
->isVolatile()) {
6081 SelectionDAG
&DAG
= DCI
.DAG
;
6082 DebugLoc DL
= St
->getDebugLoc();
6083 SDValue BasePtr
= St
->getBasePtr();
6084 SDValue NewST1
= DAG
.getStore(St
->getChain(), DL
,
6085 StVal
.getNode()->getOperand(0), BasePtr
,
6086 St
->getPointerInfo(), St
->isVolatile(),
6087 St
->isNonTemporal(), St
->getAlignment());
6089 SDValue OffsetPtr
= DAG
.getNode(ISD::ADD
, DL
, MVT::i32
, BasePtr
,
6090 DAG
.getConstant(4, MVT::i32
));
6091 return DAG
.getStore(NewST1
.getValue(0), DL
, StVal
.getNode()->getOperand(1),
6092 OffsetPtr
, St
->getPointerInfo(), St
->isVolatile(),
6093 St
->isNonTemporal(),
6094 std::min(4U, St
->getAlignment() / 2));
6097 if (StVal
.getValueType() != MVT::i64
||
6098 StVal
.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
6101 SelectionDAG
&DAG
= DCI
.DAG
;
6102 DebugLoc dl
= StVal
.getDebugLoc();
6103 SDValue IntVec
= StVal
.getOperand(0);
6104 EVT FloatVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::f64
,
6105 IntVec
.getValueType().getVectorNumElements());
6106 SDValue Vec
= DAG
.getNode(ISD::BITCAST
, dl
, FloatVT
, IntVec
);
6107 SDValue ExtElt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
,
6108 Vec
, StVal
.getOperand(1));
6109 dl
= N
->getDebugLoc();
6110 SDValue V
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i64
, ExtElt
);
6111 // Make the DAGCombiner fold the bitcasts.
6112 DCI
.AddToWorklist(Vec
.getNode());
6113 DCI
.AddToWorklist(ExtElt
.getNode());
6114 DCI
.AddToWorklist(V
.getNode());
6115 return DAG
.getStore(St
->getChain(), dl
, V
, St
->getBasePtr(),
6116 St
->getPointerInfo(), St
->isVolatile(),
6117 St
->isNonTemporal(), St
->getAlignment(),
6121 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
6122 /// are normal, non-volatile loads. If so, it is profitable to bitcast an
6123 /// i64 vector to have f64 elements, since the value can then be loaded
6124 /// directly into a VFP register.
6125 static bool hasNormalLoadOperand(SDNode
*N
) {
6126 unsigned NumElts
= N
->getValueType(0).getVectorNumElements();
6127 for (unsigned i
= 0; i
< NumElts
; ++i
) {
6128 SDNode
*Elt
= N
->getOperand(i
).getNode();
6129 if (ISD::isNormalLoad(Elt
) && !cast
<LoadSDNode
>(Elt
)->isVolatile())
6135 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
6136 /// ISD::BUILD_VECTOR.
6137 static SDValue
PerformBUILD_VECTORCombine(SDNode
*N
,
6138 TargetLowering::DAGCombinerInfo
&DCI
){
6139 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
6140 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
6141 // into a pair of GPRs, which is fine when the value is used as a scalar,
6142 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
6143 SelectionDAG
&DAG
= DCI
.DAG
;
6144 if (N
->getNumOperands() == 2) {
6145 SDValue RV
= PerformVMOVDRRCombine(N
, DAG
);
6150 // Load i64 elements as f64 values so that type legalization does not split
6151 // them up into i32 values.
6152 EVT VT
= N
->getValueType(0);
6153 if (VT
.getVectorElementType() != MVT::i64
|| !hasNormalLoadOperand(N
))
6155 DebugLoc dl
= N
->getDebugLoc();
6156 SmallVector
<SDValue
, 8> Ops
;
6157 unsigned NumElts
= VT
.getVectorNumElements();
6158 for (unsigned i
= 0; i
< NumElts
; ++i
) {
6159 SDValue V
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::f64
, N
->getOperand(i
));
6161 // Make the DAGCombiner fold the bitcast.
6162 DCI
.AddToWorklist(V
.getNode());
6164 EVT FloatVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::f64
, NumElts
);
6165 SDValue BV
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, FloatVT
, Ops
.data(), NumElts
);
6166 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, BV
);
6169 /// PerformInsertEltCombine - Target-specific dag combine xforms for
6170 /// ISD::INSERT_VECTOR_ELT.
6171 static SDValue
PerformInsertEltCombine(SDNode
*N
,
6172 TargetLowering::DAGCombinerInfo
&DCI
) {
6173 // Bitcast an i64 load inserted into a vector to f64.
6174 // Otherwise, the i64 value will be legalized to a pair of i32 values.
6175 EVT VT
= N
->getValueType(0);
6176 SDNode
*Elt
= N
->getOperand(1).getNode();
6177 if (VT
.getVectorElementType() != MVT::i64
||
6178 !ISD::isNormalLoad(Elt
) || cast
<LoadSDNode
>(Elt
)->isVolatile())
6181 SelectionDAG
&DAG
= DCI
.DAG
;
6182 DebugLoc dl
= N
->getDebugLoc();
6183 EVT FloatVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::f64
,
6184 VT
.getVectorNumElements());
6185 SDValue Vec
= DAG
.getNode(ISD::BITCAST
, dl
, FloatVT
, N
->getOperand(0));
6186 SDValue V
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::f64
, N
->getOperand(1));
6187 // Make the DAGCombiner fold the bitcasts.
6188 DCI
.AddToWorklist(Vec
.getNode());
6189 DCI
.AddToWorklist(V
.getNode());
6190 SDValue InsElt
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, FloatVT
,
6191 Vec
, V
, N
->getOperand(2));
6192 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, InsElt
);
6195 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
6196 /// ISD::VECTOR_SHUFFLE.
6197 static SDValue
PerformVECTOR_SHUFFLECombine(SDNode
*N
, SelectionDAG
&DAG
) {
6198 // The LLVM shufflevector instruction does not require the shuffle mask
6199 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
6200 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
6201 // operands do not match the mask length, they are extended by concatenating
6202 // them with undef vectors. That is probably the right thing for other
6203 // targets, but for NEON it is better to concatenate two double-register
6204 // size vector operands into a single quad-register size vector. Do that
6205 // transformation here:
6206 // shuffle(concat(v1, undef), concat(v2, undef)) ->
6207 // shuffle(concat(v1, v2), undef)
6208 SDValue Op0
= N
->getOperand(0);
6209 SDValue Op1
= N
->getOperand(1);
6210 if (Op0
.getOpcode() != ISD::CONCAT_VECTORS
||
6211 Op1
.getOpcode() != ISD::CONCAT_VECTORS
||
6212 Op0
.getNumOperands() != 2 ||
6213 Op1
.getNumOperands() != 2)
6215 SDValue Concat0Op1
= Op0
.getOperand(1);
6216 SDValue Concat1Op1
= Op1
.getOperand(1);
6217 if (Concat0Op1
.getOpcode() != ISD::UNDEF
||
6218 Concat1Op1
.getOpcode() != ISD::UNDEF
)
6220 // Skip the transformation if any of the types are illegal.
6221 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
6222 EVT VT
= N
->getValueType(0);
6223 if (!TLI
.isTypeLegal(VT
) ||
6224 !TLI
.isTypeLegal(Concat0Op1
.getValueType()) ||
6225 !TLI
.isTypeLegal(Concat1Op1
.getValueType()))
6228 SDValue NewConcat
= DAG
.getNode(ISD::CONCAT_VECTORS
, N
->getDebugLoc(), VT
,
6229 Op0
.getOperand(0), Op1
.getOperand(0));
6230 // Translate the shuffle mask.
6231 SmallVector
<int, 16> NewMask
;
6232 unsigned NumElts
= VT
.getVectorNumElements();
6233 unsigned HalfElts
= NumElts
/2;
6234 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
6235 for (unsigned n
= 0; n
< NumElts
; ++n
) {
6236 int MaskElt
= SVN
->getMaskElt(n
);
6238 if (MaskElt
< (int)HalfElts
)
6240 else if (MaskElt
>= (int)NumElts
&& MaskElt
< (int)(NumElts
+ HalfElts
))
6241 NewElt
= HalfElts
+ MaskElt
- NumElts
;
6242 NewMask
.push_back(NewElt
);
6244 return DAG
.getVectorShuffle(VT
, N
->getDebugLoc(), NewConcat
,
6245 DAG
.getUNDEF(VT
), NewMask
.data());
6248 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
6249 /// NEON load/store intrinsics to merge base address updates.
6250 static SDValue
CombineBaseUpdate(SDNode
*N
,
6251 TargetLowering::DAGCombinerInfo
&DCI
) {
6252 if (DCI
.isBeforeLegalize() || DCI
.isCalledByLegalizer())
6255 SelectionDAG
&DAG
= DCI
.DAG
;
6256 bool isIntrinsic
= (N
->getOpcode() == ISD::INTRINSIC_VOID
||
6257 N
->getOpcode() == ISD::INTRINSIC_W_CHAIN
);
6258 unsigned AddrOpIdx
= (isIntrinsic
? 2 : 1);
6259 SDValue Addr
= N
->getOperand(AddrOpIdx
);
6261 // Search for a use of the address operand that is an increment.
6262 for (SDNode::use_iterator UI
= Addr
.getNode()->use_begin(),
6263 UE
= Addr
.getNode()->use_end(); UI
!= UE
; ++UI
) {
6265 if (User
->getOpcode() != ISD::ADD
||
6266 UI
.getUse().getResNo() != Addr
.getResNo())
6269 // Check that the add is independent of the load/store. Otherwise, folding
6270 // it would create a cycle.
6271 if (User
->isPredecessorOf(N
) || N
->isPredecessorOf(User
))
6274 // Find the new opcode for the updating load/store.
6276 bool isLaneOp
= false;
6277 unsigned NewOpc
= 0;
6278 unsigned NumVecs
= 0;
6280 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
6282 default: assert(0 && "unexpected intrinsic for Neon base update");
6283 case Intrinsic::arm_neon_vld1
: NewOpc
= ARMISD::VLD1_UPD
;
6285 case Intrinsic::arm_neon_vld2
: NewOpc
= ARMISD::VLD2_UPD
;
6287 case Intrinsic::arm_neon_vld3
: NewOpc
= ARMISD::VLD3_UPD
;
6289 case Intrinsic::arm_neon_vld4
: NewOpc
= ARMISD::VLD4_UPD
;
6291 case Intrinsic::arm_neon_vld2lane
: NewOpc
= ARMISD::VLD2LN_UPD
;
6292 NumVecs
= 2; isLaneOp
= true; break;
6293 case Intrinsic::arm_neon_vld3lane
: NewOpc
= ARMISD::VLD3LN_UPD
;
6294 NumVecs
= 3; isLaneOp
= true; break;
6295 case Intrinsic::arm_neon_vld4lane
: NewOpc
= ARMISD::VLD4LN_UPD
;
6296 NumVecs
= 4; isLaneOp
= true; break;
6297 case Intrinsic::arm_neon_vst1
: NewOpc
= ARMISD::VST1_UPD
;
6298 NumVecs
= 1; isLoad
= false; break;
6299 case Intrinsic::arm_neon_vst2
: NewOpc
= ARMISD::VST2_UPD
;
6300 NumVecs
= 2; isLoad
= false; break;
6301 case Intrinsic::arm_neon_vst3
: NewOpc
= ARMISD::VST3_UPD
;
6302 NumVecs
= 3; isLoad
= false; break;
6303 case Intrinsic::arm_neon_vst4
: NewOpc
= ARMISD::VST4_UPD
;
6304 NumVecs
= 4; isLoad
= false; break;
6305 case Intrinsic::arm_neon_vst2lane
: NewOpc
= ARMISD::VST2LN_UPD
;
6306 NumVecs
= 2; isLoad
= false; isLaneOp
= true; break;
6307 case Intrinsic::arm_neon_vst3lane
: NewOpc
= ARMISD::VST3LN_UPD
;
6308 NumVecs
= 3; isLoad
= false; isLaneOp
= true; break;
6309 case Intrinsic::arm_neon_vst4lane
: NewOpc
= ARMISD::VST4LN_UPD
;
6310 NumVecs
= 4; isLoad
= false; isLaneOp
= true; break;
6314 switch (N
->getOpcode()) {
6315 default: assert(0 && "unexpected opcode for Neon base update");
6316 case ARMISD::VLD2DUP
: NewOpc
= ARMISD::VLD2DUP_UPD
; NumVecs
= 2; break;
6317 case ARMISD::VLD3DUP
: NewOpc
= ARMISD::VLD3DUP_UPD
; NumVecs
= 3; break;
6318 case ARMISD::VLD4DUP
: NewOpc
= ARMISD::VLD4DUP_UPD
; NumVecs
= 4; break;
6322 // Find the size of memory referenced by the load/store.
6325 VecTy
= N
->getValueType(0);
6327 VecTy
= N
->getOperand(AddrOpIdx
+1).getValueType();
6328 unsigned NumBytes
= NumVecs
* VecTy
.getSizeInBits() / 8;
6330 NumBytes
/= VecTy
.getVectorNumElements();
6332 // If the increment is a constant, it must match the memory ref size.
6333 SDValue Inc
= User
->getOperand(User
->getOperand(0) == Addr
? 1 : 0);
6334 if (ConstantSDNode
*CInc
= dyn_cast
<ConstantSDNode
>(Inc
.getNode())) {
6335 uint64_t IncVal
= CInc
->getZExtValue();
6336 if (IncVal
!= NumBytes
)
6338 } else if (NumBytes
>= 3 * 16) {
6339 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
6340 // separate instructions that make it harder to use a non-constant update.
6344 // Create the new updating load/store node.
6346 unsigned NumResultVecs
= (isLoad
? NumVecs
: 0);
6348 for (n
= 0; n
< NumResultVecs
; ++n
)
6350 Tys
[n
++] = MVT::i32
;
6351 Tys
[n
] = MVT::Other
;
6352 SDVTList SDTys
= DAG
.getVTList(Tys
, NumResultVecs
+2);
6353 SmallVector
<SDValue
, 8> Ops
;
6354 Ops
.push_back(N
->getOperand(0)); // incoming chain
6355 Ops
.push_back(N
->getOperand(AddrOpIdx
));
6357 for (unsigned i
= AddrOpIdx
+ 1; i
< N
->getNumOperands(); ++i
) {
6358 Ops
.push_back(N
->getOperand(i
));
6360 MemIntrinsicSDNode
*MemInt
= cast
<MemIntrinsicSDNode
>(N
);
6361 SDValue UpdN
= DAG
.getMemIntrinsicNode(NewOpc
, N
->getDebugLoc(), SDTys
,
6362 Ops
.data(), Ops
.size(),
6363 MemInt
->getMemoryVT(),
6364 MemInt
->getMemOperand());
6367 std::vector
<SDValue
> NewResults
;
6368 for (unsigned i
= 0; i
< NumResultVecs
; ++i
) {
6369 NewResults
.push_back(SDValue(UpdN
.getNode(), i
));
6371 NewResults
.push_back(SDValue(UpdN
.getNode(), NumResultVecs
+1)); // chain
6372 DCI
.CombineTo(N
, NewResults
);
6373 DCI
.CombineTo(User
, SDValue(UpdN
.getNode(), NumResultVecs
));
6380 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
6381 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
6382 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
6384 static bool CombineVLDDUP(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
) {
6385 SelectionDAG
&DAG
= DCI
.DAG
;
6386 EVT VT
= N
->getValueType(0);
6387 // vldN-dup instructions only support 64-bit vectors for N > 1.
6388 if (!VT
.is64BitVector())
6391 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
6392 SDNode
*VLD
= N
->getOperand(0).getNode();
6393 if (VLD
->getOpcode() != ISD::INTRINSIC_W_CHAIN
)
6395 unsigned NumVecs
= 0;
6396 unsigned NewOpc
= 0;
6397 unsigned IntNo
= cast
<ConstantSDNode
>(VLD
->getOperand(1))->getZExtValue();
6398 if (IntNo
== Intrinsic::arm_neon_vld2lane
) {
6400 NewOpc
= ARMISD::VLD2DUP
;
6401 } else if (IntNo
== Intrinsic::arm_neon_vld3lane
) {
6403 NewOpc
= ARMISD::VLD3DUP
;
6404 } else if (IntNo
== Intrinsic::arm_neon_vld4lane
) {
6406 NewOpc
= ARMISD::VLD4DUP
;
6411 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
6412 // numbers match the load.
6413 unsigned VLDLaneNo
=
6414 cast
<ConstantSDNode
>(VLD
->getOperand(NumVecs
+3))->getZExtValue();
6415 for (SDNode::use_iterator UI
= VLD
->use_begin(), UE
= VLD
->use_end();
6417 // Ignore uses of the chain result.
6418 if (UI
.getUse().getResNo() == NumVecs
)
6421 if (User
->getOpcode() != ARMISD::VDUPLANE
||
6422 VLDLaneNo
!= cast
<ConstantSDNode
>(User
->getOperand(1))->getZExtValue())
6426 // Create the vldN-dup node.
6429 for (n
= 0; n
< NumVecs
; ++n
)
6431 Tys
[n
] = MVT::Other
;
6432 SDVTList SDTys
= DAG
.getVTList(Tys
, NumVecs
+1);
6433 SDValue Ops
[] = { VLD
->getOperand(0), VLD
->getOperand(2) };
6434 MemIntrinsicSDNode
*VLDMemInt
= cast
<MemIntrinsicSDNode
>(VLD
);
6435 SDValue VLDDup
= DAG
.getMemIntrinsicNode(NewOpc
, VLD
->getDebugLoc(), SDTys
,
6436 Ops
, 2, VLDMemInt
->getMemoryVT(),
6437 VLDMemInt
->getMemOperand());
6440 for (SDNode::use_iterator UI
= VLD
->use_begin(), UE
= VLD
->use_end();
6442 unsigned ResNo
= UI
.getUse().getResNo();
6443 // Ignore uses of the chain result.
6444 if (ResNo
== NumVecs
)
6447 DCI
.CombineTo(User
, SDValue(VLDDup
.getNode(), ResNo
));
6450 // Now the vldN-lane intrinsic is dead except for its chain result.
6451 // Update uses of the chain.
6452 std::vector
<SDValue
> VLDDupResults
;
6453 for (unsigned n
= 0; n
< NumVecs
; ++n
)
6454 VLDDupResults
.push_back(SDValue(VLDDup
.getNode(), n
));
6455 VLDDupResults
.push_back(SDValue(VLDDup
.getNode(), NumVecs
));
6456 DCI
.CombineTo(VLD
, VLDDupResults
);
6461 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
6462 /// ARMISD::VDUPLANE.
6463 static SDValue
PerformVDUPLANECombine(SDNode
*N
,
6464 TargetLowering::DAGCombinerInfo
&DCI
) {
6465 SDValue Op
= N
->getOperand(0);
6467 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
6468 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
6469 if (CombineVLDDUP(N
, DCI
))
6470 return SDValue(N
, 0);
6472 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
6473 // redundant. Ignore bit_converts for now; element sizes are checked below.
6474 while (Op
.getOpcode() == ISD::BITCAST
)
6475 Op
= Op
.getOperand(0);
6476 if (Op
.getOpcode() != ARMISD::VMOVIMM
&& Op
.getOpcode() != ARMISD::VMVNIMM
)
6479 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
6480 unsigned EltSize
= Op
.getValueType().getVectorElementType().getSizeInBits();
6481 // The canonical VMOV for a zero vector uses a 32-bit element size.
6482 unsigned Imm
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6484 if (ARM_AM::decodeNEONModImm(Imm
, EltBits
) == 0)
6486 EVT VT
= N
->getValueType(0);
6487 if (EltSize
> VT
.getVectorElementType().getSizeInBits())
6490 return DCI
.DAG
.getNode(ISD::BITCAST
, N
->getDebugLoc(), VT
, Op
);
6493 // isConstVecPow2 - Return true if each vector element is a power of 2, all
6494 // elements are the same constant, C, and Log2(C) ranges from 1 to 32.
6495 static bool isConstVecPow2(SDValue ConstVec
, bool isSigned
, uint64_t &C
)
6499 for (unsigned I
= 0, E
= ConstVec
.getValueType().getVectorNumElements();
6501 ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(ConstVec
.getOperand(I
));
6506 APFloat APF
= C
->getValueAPF();
6507 if (APF
.convertToInteger(&cN
, 64, isSigned
, APFloat::rmTowardZero
, &isExact
)
6508 != APFloat::opOK
|| !isExact
)
6511 c0
= (I
== 0) ? cN
: c0
;
6512 if (!isPowerOf2_64(cN
) || c0
!= cN
|| Log2_64(c0
) < 1 || Log2_64(c0
) > 32)
6519 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
6520 /// can replace combinations of VMUL and VCVT (floating-point to integer)
6521 /// when the VMUL has a constant operand that is a power of 2.
6523 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
6524 /// vmul.f32 d16, d17, d16
6525 /// vcvt.s32.f32 d16, d16
6527 /// vcvt.s32.f32 d16, d16, #3
6528 static SDValue
PerformVCVTCombine(SDNode
*N
,
6529 TargetLowering::DAGCombinerInfo
&DCI
,
6530 const ARMSubtarget
*Subtarget
) {
6531 SelectionDAG
&DAG
= DCI
.DAG
;
6532 SDValue Op
= N
->getOperand(0);
6534 if (!Subtarget
->hasNEON() || !Op
.getValueType().isVector() ||
6535 Op
.getOpcode() != ISD::FMUL
)
6539 SDValue N0
= Op
->getOperand(0);
6540 SDValue ConstVec
= Op
->getOperand(1);
6541 bool isSigned
= N
->getOpcode() == ISD::FP_TO_SINT
;
6543 if (ConstVec
.getOpcode() != ISD::BUILD_VECTOR
||
6544 !isConstVecPow2(ConstVec
, isSigned
, C
))
6547 unsigned IntrinsicOpcode
= isSigned
? Intrinsic::arm_neon_vcvtfp2fxs
:
6548 Intrinsic::arm_neon_vcvtfp2fxu
;
6549 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, N
->getDebugLoc(),
6551 DAG
.getConstant(IntrinsicOpcode
, MVT::i32
), N0
,
6552 DAG
.getConstant(Log2_64(C
), MVT::i32
));
6555 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
6556 /// can replace combinations of VCVT (integer to floating-point) and VDIV
6557 /// when the VDIV has a constant operand that is a power of 2.
6559 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
6560 /// vcvt.f32.s32 d16, d16
6561 /// vdiv.f32 d16, d17, d16
6563 /// vcvt.f32.s32 d16, d16, #3
6564 static SDValue
PerformVDIVCombine(SDNode
*N
,
6565 TargetLowering::DAGCombinerInfo
&DCI
,
6566 const ARMSubtarget
*Subtarget
) {
6567 SelectionDAG
&DAG
= DCI
.DAG
;
6568 SDValue Op
= N
->getOperand(0);
6569 unsigned OpOpcode
= Op
.getNode()->getOpcode();
6571 if (!Subtarget
->hasNEON() || !N
->getValueType(0).isVector() ||
6572 (OpOpcode
!= ISD::SINT_TO_FP
&& OpOpcode
!= ISD::UINT_TO_FP
))
6576 SDValue ConstVec
= N
->getOperand(1);
6577 bool isSigned
= OpOpcode
== ISD::SINT_TO_FP
;
6579 if (ConstVec
.getOpcode() != ISD::BUILD_VECTOR
||
6580 !isConstVecPow2(ConstVec
, isSigned
, C
))
6583 unsigned IntrinsicOpcode
= isSigned
? Intrinsic::arm_neon_vcvtfxs2fp
:
6584 Intrinsic::arm_neon_vcvtfxu2fp
;
6585 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, N
->getDebugLoc(),
6587 DAG
.getConstant(IntrinsicOpcode
, MVT::i32
),
6588 Op
.getOperand(0), DAG
.getConstant(Log2_64(C
), MVT::i32
));
6591 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
6592 /// operand of a vector shift operation, where all the elements of the
6593 /// build_vector must have the same constant integer value.
6594 static bool getVShiftImm(SDValue Op
, unsigned ElementBits
, int64_t &Cnt
) {
6595 // Ignore bit_converts.
6596 while (Op
.getOpcode() == ISD::BITCAST
)
6597 Op
= Op
.getOperand(0);
6598 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
6599 APInt SplatBits
, SplatUndef
;
6600 unsigned SplatBitSize
;
6602 if (! BVN
|| ! BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
,
6603 HasAnyUndefs
, ElementBits
) ||
6604 SplatBitSize
> ElementBits
)
6606 Cnt
= SplatBits
.getSExtValue();
6610 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
6611 /// operand of a vector shift left operation. That value must be in the range:
6612 /// 0 <= Value < ElementBits for a left shift; or
6613 /// 0 <= Value <= ElementBits for a long left shift.
6614 static bool isVShiftLImm(SDValue Op
, EVT VT
, bool isLong
, int64_t &Cnt
) {
6615 assert(VT
.isVector() && "vector shift count is not a vector type");
6616 unsigned ElementBits
= VT
.getVectorElementType().getSizeInBits();
6617 if (! getVShiftImm(Op
, ElementBits
, Cnt
))
6619 return (Cnt
>= 0 && (isLong
? Cnt
-1 : Cnt
) < ElementBits
);
6622 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
6623 /// operand of a vector shift right operation. For a shift opcode, the value
6624 /// is positive, but for an intrinsic the value count must be negative. The
6625 /// absolute value must be in the range:
6626 /// 1 <= |Value| <= ElementBits for a right shift; or
6627 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6628 static bool isVShiftRImm(SDValue Op
, EVT VT
, bool isNarrow
, bool isIntrinsic
,
6630 assert(VT
.isVector() && "vector shift count is not a vector type");
6631 unsigned ElementBits
= VT
.getVectorElementType().getSizeInBits();
6632 if (! getVShiftImm(Op
, ElementBits
, Cnt
))
6636 return (Cnt
>= 1 && Cnt
<= (isNarrow
? ElementBits
/2 : ElementBits
));
6639 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
6640 static SDValue
PerformIntrinsicCombine(SDNode
*N
, SelectionDAG
&DAG
) {
6641 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
6644 // Don't do anything for most intrinsics.
6647 // Vector shifts: check for immediate versions and lower them.
6648 // Note: This is done during DAG combining instead of DAG legalizing because
6649 // the build_vectors for 64-bit vector element shift counts are generally
6650 // not legal, and it is hard to see their values after they get legalized to
6651 // loads from a constant pool.
6652 case Intrinsic::arm_neon_vshifts
:
6653 case Intrinsic::arm_neon_vshiftu
:
6654 case Intrinsic::arm_neon_vshiftls
:
6655 case Intrinsic::arm_neon_vshiftlu
:
6656 case Intrinsic::arm_neon_vshiftn
:
6657 case Intrinsic::arm_neon_vrshifts
:
6658 case Intrinsic::arm_neon_vrshiftu
:
6659 case Intrinsic::arm_neon_vrshiftn
:
6660 case Intrinsic::arm_neon_vqshifts
:
6661 case Intrinsic::arm_neon_vqshiftu
:
6662 case Intrinsic::arm_neon_vqshiftsu
:
6663 case Intrinsic::arm_neon_vqshiftns
:
6664 case Intrinsic::arm_neon_vqshiftnu
:
6665 case Intrinsic::arm_neon_vqshiftnsu
:
6666 case Intrinsic::arm_neon_vqrshiftns
:
6667 case Intrinsic::arm_neon_vqrshiftnu
:
6668 case Intrinsic::arm_neon_vqrshiftnsu
: {
6669 EVT VT
= N
->getOperand(1).getValueType();
6671 unsigned VShiftOpc
= 0;
6674 case Intrinsic::arm_neon_vshifts
:
6675 case Intrinsic::arm_neon_vshiftu
:
6676 if (isVShiftLImm(N
->getOperand(2), VT
, false, Cnt
)) {
6677 VShiftOpc
= ARMISD::VSHL
;
6680 if (isVShiftRImm(N
->getOperand(2), VT
, false, true, Cnt
)) {
6681 VShiftOpc
= (IntNo
== Intrinsic::arm_neon_vshifts
?
6682 ARMISD::VSHRs
: ARMISD::VSHRu
);
6687 case Intrinsic::arm_neon_vshiftls
:
6688 case Intrinsic::arm_neon_vshiftlu
:
6689 if (isVShiftLImm(N
->getOperand(2), VT
, true, Cnt
))
6691 llvm_unreachable("invalid shift count for vshll intrinsic");
6693 case Intrinsic::arm_neon_vrshifts
:
6694 case Intrinsic::arm_neon_vrshiftu
:
6695 if (isVShiftRImm(N
->getOperand(2), VT
, false, true, Cnt
))
6699 case Intrinsic::arm_neon_vqshifts
:
6700 case Intrinsic::arm_neon_vqshiftu
:
6701 if (isVShiftLImm(N
->getOperand(2), VT
, false, Cnt
))
6705 case Intrinsic::arm_neon_vqshiftsu
:
6706 if (isVShiftLImm(N
->getOperand(2), VT
, false, Cnt
))
6708 llvm_unreachable("invalid shift count for vqshlu intrinsic");
6710 case Intrinsic::arm_neon_vshiftn
:
6711 case Intrinsic::arm_neon_vrshiftn
:
6712 case Intrinsic::arm_neon_vqshiftns
:
6713 case Intrinsic::arm_neon_vqshiftnu
:
6714 case Intrinsic::arm_neon_vqshiftnsu
:
6715 case Intrinsic::arm_neon_vqrshiftns
:
6716 case Intrinsic::arm_neon_vqrshiftnu
:
6717 case Intrinsic::arm_neon_vqrshiftnsu
:
6718 // Narrowing shifts require an immediate right shift.
6719 if (isVShiftRImm(N
->getOperand(2), VT
, true, true, Cnt
))
6721 llvm_unreachable("invalid shift count for narrowing vector shift "
6725 llvm_unreachable("unhandled vector shift");
6729 case Intrinsic::arm_neon_vshifts
:
6730 case Intrinsic::arm_neon_vshiftu
:
6731 // Opcode already set above.
6733 case Intrinsic::arm_neon_vshiftls
:
6734 case Intrinsic::arm_neon_vshiftlu
:
6735 if (Cnt
== VT
.getVectorElementType().getSizeInBits())
6736 VShiftOpc
= ARMISD::VSHLLi
;
6738 VShiftOpc
= (IntNo
== Intrinsic::arm_neon_vshiftls
?
6739 ARMISD::VSHLLs
: ARMISD::VSHLLu
);
6741 case Intrinsic::arm_neon_vshiftn
:
6742 VShiftOpc
= ARMISD::VSHRN
; break;
6743 case Intrinsic::arm_neon_vrshifts
:
6744 VShiftOpc
= ARMISD::VRSHRs
; break;
6745 case Intrinsic::arm_neon_vrshiftu
:
6746 VShiftOpc
= ARMISD::VRSHRu
; break;
6747 case Intrinsic::arm_neon_vrshiftn
:
6748 VShiftOpc
= ARMISD::VRSHRN
; break;
6749 case Intrinsic::arm_neon_vqshifts
:
6750 VShiftOpc
= ARMISD::VQSHLs
; break;
6751 case Intrinsic::arm_neon_vqshiftu
:
6752 VShiftOpc
= ARMISD::VQSHLu
; break;
6753 case Intrinsic::arm_neon_vqshiftsu
:
6754 VShiftOpc
= ARMISD::VQSHLsu
; break;
6755 case Intrinsic::arm_neon_vqshiftns
:
6756 VShiftOpc
= ARMISD::VQSHRNs
; break;
6757 case Intrinsic::arm_neon_vqshiftnu
:
6758 VShiftOpc
= ARMISD::VQSHRNu
; break;
6759 case Intrinsic::arm_neon_vqshiftnsu
:
6760 VShiftOpc
= ARMISD::VQSHRNsu
; break;
6761 case Intrinsic::arm_neon_vqrshiftns
:
6762 VShiftOpc
= ARMISD::VQRSHRNs
; break;
6763 case Intrinsic::arm_neon_vqrshiftnu
:
6764 VShiftOpc
= ARMISD::VQRSHRNu
; break;
6765 case Intrinsic::arm_neon_vqrshiftnsu
:
6766 VShiftOpc
= ARMISD::VQRSHRNsu
; break;
6769 return DAG
.getNode(VShiftOpc
, N
->getDebugLoc(), N
->getValueType(0),
6770 N
->getOperand(1), DAG
.getConstant(Cnt
, MVT::i32
));
6773 case Intrinsic::arm_neon_vshiftins
: {
6774 EVT VT
= N
->getOperand(1).getValueType();
6776 unsigned VShiftOpc
= 0;
6778 if (isVShiftLImm(N
->getOperand(3), VT
, false, Cnt
))
6779 VShiftOpc
= ARMISD::VSLI
;
6780 else if (isVShiftRImm(N
->getOperand(3), VT
, false, true, Cnt
))
6781 VShiftOpc
= ARMISD::VSRI
;
6783 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
6786 return DAG
.getNode(VShiftOpc
, N
->getDebugLoc(), N
->getValueType(0),
6787 N
->getOperand(1), N
->getOperand(2),
6788 DAG
.getConstant(Cnt
, MVT::i32
));
6791 case Intrinsic::arm_neon_vqrshifts
:
6792 case Intrinsic::arm_neon_vqrshiftu
:
6793 // No immediate versions of these to check for.
6800 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
6801 /// lowers them. As with the vector shift intrinsics, this is done during DAG
6802 /// combining instead of DAG legalizing because the build_vectors for 64-bit
6803 /// vector element shift counts are generally not legal, and it is hard to see
6804 /// their values after they get legalized to loads from a constant pool.
6805 static SDValue
PerformShiftCombine(SDNode
*N
, SelectionDAG
&DAG
,
6806 const ARMSubtarget
*ST
) {
6807 EVT VT
= N
->getValueType(0);
6809 // Nothing to be done for scalar shifts.
6810 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
6811 if (!VT
.isVector() || !TLI
.isTypeLegal(VT
))
6814 assert(ST
->hasNEON() && "unexpected vector shift");
6817 switch (N
->getOpcode()) {
6818 default: llvm_unreachable("unexpected shift opcode");
6821 if (isVShiftLImm(N
->getOperand(1), VT
, false, Cnt
))
6822 return DAG
.getNode(ARMISD::VSHL
, N
->getDebugLoc(), VT
, N
->getOperand(0),
6823 DAG
.getConstant(Cnt
, MVT::i32
));
6828 if (isVShiftRImm(N
->getOperand(1), VT
, false, false, Cnt
)) {
6829 unsigned VShiftOpc
= (N
->getOpcode() == ISD::SRA
?
6830 ARMISD::VSHRs
: ARMISD::VSHRu
);
6831 return DAG
.getNode(VShiftOpc
, N
->getDebugLoc(), VT
, N
->getOperand(0),
6832 DAG
.getConstant(Cnt
, MVT::i32
));
6838 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
6839 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
6840 static SDValue
PerformExtendCombine(SDNode
*N
, SelectionDAG
&DAG
,
6841 const ARMSubtarget
*ST
) {
6842 SDValue N0
= N
->getOperand(0);
6844 // Check for sign- and zero-extensions of vector extract operations of 8-
6845 // and 16-bit vector elements. NEON supports these directly. They are
6846 // handled during DAG combining because type legalization will promote them
6847 // to 32-bit types and it is messy to recognize the operations after that.
6848 if (ST
->hasNEON() && N0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
6849 SDValue Vec
= N0
.getOperand(0);
6850 SDValue Lane
= N0
.getOperand(1);
6851 EVT VT
= N
->getValueType(0);
6852 EVT EltVT
= N0
.getValueType();
6853 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
6855 if (VT
== MVT::i32
&&
6856 (EltVT
== MVT::i8
|| EltVT
== MVT::i16
) &&
6857 TLI
.isTypeLegal(Vec
.getValueType()) &&
6858 isa
<ConstantSDNode
>(Lane
)) {
6861 switch (N
->getOpcode()) {
6862 default: llvm_unreachable("unexpected opcode");
6863 case ISD::SIGN_EXTEND
:
6864 Opc
= ARMISD::VGETLANEs
;
6866 case ISD::ZERO_EXTEND
:
6867 case ISD::ANY_EXTEND
:
6868 Opc
= ARMISD::VGETLANEu
;
6871 return DAG
.getNode(Opc
, N
->getDebugLoc(), VT
, Vec
, Lane
);
6878 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
6879 /// to match f32 max/min patterns to use NEON vmax/vmin instructions.
6880 static SDValue
PerformSELECT_CCCombine(SDNode
*N
, SelectionDAG
&DAG
,
6881 const ARMSubtarget
*ST
) {
6882 // If the target supports NEON, try to use vmax/vmin instructions for f32
6883 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
6884 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
6885 // a NaN; only do the transformation when it matches that behavior.
6887 // For now only do this when using NEON for FP operations; if using VFP, it
6888 // is not obvious that the benefit outweighs the cost of switching to the
6890 if (!ST
->hasNEON() || !ST
->useNEONForSinglePrecisionFP() ||
6891 N
->getValueType(0) != MVT::f32
)
6894 SDValue CondLHS
= N
->getOperand(0);
6895 SDValue CondRHS
= N
->getOperand(1);
6896 SDValue LHS
= N
->getOperand(2);
6897 SDValue RHS
= N
->getOperand(3);
6898 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(4))->get();
6900 unsigned Opcode
= 0;
6902 if (DAG
.isEqualTo(LHS
, CondLHS
) && DAG
.isEqualTo(RHS
, CondRHS
)) {
6903 IsReversed
= false; // x CC y ? x : y
6904 } else if (DAG
.isEqualTo(LHS
, CondRHS
) && DAG
.isEqualTo(RHS
, CondLHS
)) {
6905 IsReversed
= true ; // x CC y ? y : x
6919 // If LHS is NaN, an ordered comparison will be false and the result will
6920 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
6921 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
6922 IsUnordered
= (CC
== ISD::SETULT
|| CC
== ISD::SETULE
);
6923 if (!DAG
.isKnownNeverNaN(IsUnordered
? RHS
: LHS
))
6925 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
6926 // will return -0, so vmin can only be used for unsafe math or if one of
6927 // the operands is known to be nonzero.
6928 if ((CC
== ISD::SETLE
|| CC
== ISD::SETOLE
|| CC
== ISD::SETULE
) &&
6930 !(DAG
.isKnownNeverZero(LHS
) || DAG
.isKnownNeverZero(RHS
)))
6932 Opcode
= IsReversed
? ARMISD::FMAX
: ARMISD::FMIN
;
6941 // If LHS is NaN, an ordered comparison will be false and the result will
6942 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
6943 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
6944 IsUnordered
= (CC
== ISD::SETUGT
|| CC
== ISD::SETUGE
);
6945 if (!DAG
.isKnownNeverNaN(IsUnordered
? RHS
: LHS
))
6947 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
6948 // will return +0, so vmax can only be used for unsafe math or if one of
6949 // the operands is known to be nonzero.
6950 if ((CC
== ISD::SETGE
|| CC
== ISD::SETOGE
|| CC
== ISD::SETUGE
) &&
6952 !(DAG
.isKnownNeverZero(LHS
) || DAG
.isKnownNeverZero(RHS
)))
6954 Opcode
= IsReversed
? ARMISD::FMIN
: ARMISD::FMAX
;
6960 return DAG
.getNode(Opcode
, N
->getDebugLoc(), N
->getValueType(0), LHS
, RHS
);
6963 SDValue
ARMTargetLowering::PerformDAGCombine(SDNode
*N
,
6964 DAGCombinerInfo
&DCI
) const {
6965 switch (N
->getOpcode()) {
6967 case ISD::ADD
: return PerformADDCombine(N
, DCI
, Subtarget
);
6968 case ISD::SUB
: return PerformSUBCombine(N
, DCI
);
6969 case ISD::MUL
: return PerformMULCombine(N
, DCI
, Subtarget
);
6970 case ISD::OR
: return PerformORCombine(N
, DCI
, Subtarget
);
6971 case ISD::AND
: return PerformANDCombine(N
, DCI
);
6972 case ARMISD::BFI
: return PerformBFICombine(N
, DCI
);
6973 case ARMISD::VMOVRRD
: return PerformVMOVRRDCombine(N
, DCI
);
6974 case ARMISD::VMOVDRR
: return PerformVMOVDRRCombine(N
, DCI
.DAG
);
6975 case ISD::STORE
: return PerformSTORECombine(N
, DCI
);
6976 case ISD::BUILD_VECTOR
: return PerformBUILD_VECTORCombine(N
, DCI
);
6977 case ISD::INSERT_VECTOR_ELT
: return PerformInsertEltCombine(N
, DCI
);
6978 case ISD::VECTOR_SHUFFLE
: return PerformVECTOR_SHUFFLECombine(N
, DCI
.DAG
);
6979 case ARMISD::VDUPLANE
: return PerformVDUPLANECombine(N
, DCI
);
6980 case ISD::FP_TO_SINT
:
6981 case ISD::FP_TO_UINT
: return PerformVCVTCombine(N
, DCI
, Subtarget
);
6982 case ISD::FDIV
: return PerformVDIVCombine(N
, DCI
, Subtarget
);
6983 case ISD::INTRINSIC_WO_CHAIN
: return PerformIntrinsicCombine(N
, DCI
.DAG
);
6986 case ISD::SRL
: return PerformShiftCombine(N
, DCI
.DAG
, Subtarget
);
6987 case ISD::SIGN_EXTEND
:
6988 case ISD::ZERO_EXTEND
:
6989 case ISD::ANY_EXTEND
: return PerformExtendCombine(N
, DCI
.DAG
, Subtarget
);
6990 case ISD::SELECT_CC
: return PerformSELECT_CCCombine(N
, DCI
.DAG
, Subtarget
);
6991 case ARMISD::VLD2DUP
:
6992 case ARMISD::VLD3DUP
:
6993 case ARMISD::VLD4DUP
:
6994 return CombineBaseUpdate(N
, DCI
);
6995 case ISD::INTRINSIC_VOID
:
6996 case ISD::INTRINSIC_W_CHAIN
:
6997 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
6998 case Intrinsic::arm_neon_vld1
:
6999 case Intrinsic::arm_neon_vld2
:
7000 case Intrinsic::arm_neon_vld3
:
7001 case Intrinsic::arm_neon_vld4
:
7002 case Intrinsic::arm_neon_vld2lane
:
7003 case Intrinsic::arm_neon_vld3lane
:
7004 case Intrinsic::arm_neon_vld4lane
:
7005 case Intrinsic::arm_neon_vst1
:
7006 case Intrinsic::arm_neon_vst2
:
7007 case Intrinsic::arm_neon_vst3
:
7008 case Intrinsic::arm_neon_vst4
:
7009 case Intrinsic::arm_neon_vst2lane
:
7010 case Intrinsic::arm_neon_vst3lane
:
7011 case Intrinsic::arm_neon_vst4lane
:
7012 return CombineBaseUpdate(N
, DCI
);
7020 bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc
,
7022 return (VT
== MVT::f32
) && (Opc
== ISD::LOAD
|| Opc
== ISD::STORE
);
7025 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT
) const {
7026 if (!Subtarget
->allowsUnalignedMem())
7029 switch (VT
.getSimpleVT().SimpleTy
) {
7036 // FIXME: VLD1 etc with standard alignment is legal.
7040 static bool isLegalT1AddressImmediate(int64_t V
, EVT VT
) {
7045 switch (VT
.getSimpleVT().SimpleTy
) {
7046 default: return false;
7061 if ((V
& (Scale
- 1)) != 0)
7064 return V
== (V
& ((1LL << 5) - 1));
7067 static bool isLegalT2AddressImmediate(int64_t V
, EVT VT
,
7068 const ARMSubtarget
*Subtarget
) {
7075 switch (VT
.getSimpleVT().SimpleTy
) {
7076 default: return false;
7081 // + imm12 or - imm8
7083 return V
== (V
& ((1LL << 8) - 1));
7084 return V
== (V
& ((1LL << 12) - 1));
7087 // Same as ARM mode. FIXME: NEON?
7088 if (!Subtarget
->hasVFP2())
7093 return V
== (V
& ((1LL << 8) - 1));
7097 /// isLegalAddressImmediate - Return true if the integer value can be used
7098 /// as the offset of the target addressing mode for load / store of the
7100 static bool isLegalAddressImmediate(int64_t V
, EVT VT
,
7101 const ARMSubtarget
*Subtarget
) {
7108 if (Subtarget
->isThumb1Only())
7109 return isLegalT1AddressImmediate(V
, VT
);
7110 else if (Subtarget
->isThumb2())
7111 return isLegalT2AddressImmediate(V
, VT
, Subtarget
);
7116 switch (VT
.getSimpleVT().SimpleTy
) {
7117 default: return false;
7122 return V
== (V
& ((1LL << 12) - 1));
7125 return V
== (V
& ((1LL << 8) - 1));
7128 if (!Subtarget
->hasVFP2()) // FIXME: NEON?
7133 return V
== (V
& ((1LL << 8) - 1));
7137 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode
&AM
,
7139 int Scale
= AM
.Scale
;
7143 switch (VT
.getSimpleVT().SimpleTy
) {
7144 default: return false;
7153 return Scale
== 2 || Scale
== 4 || Scale
== 8;
7156 if (((unsigned)AM
.HasBaseReg
+ Scale
) <= 2)
7160 // Note, we allow "void" uses (basically, uses that aren't loads or
7161 // stores), because arm allows folding a scale into many arithmetic
7162 // operations. This should be made more precise and revisited later.
7164 // Allow r << imm, but the imm has to be a multiple of two.
7165 if (Scale
& 1) return false;
7166 return isPowerOf2_32(Scale
);
7170 /// isLegalAddressingMode - Return true if the addressing mode represented
7171 /// by AM is legal for this target, for a load/store of the specified type.
7172 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode
&AM
,
7173 const Type
*Ty
) const {
7174 EVT VT
= getValueType(Ty
, true);
7175 if (!isLegalAddressImmediate(AM
.BaseOffs
, VT
, Subtarget
))
7178 // Can never fold addr of global into load/store.
7183 case 0: // no scale reg, must be "r+i" or "r", or "i".
7186 if (Subtarget
->isThumb1Only())
7190 // ARM doesn't support any R+R*scale+imm addr modes.
7197 if (Subtarget
->isThumb2())
7198 return isLegalT2ScaledAddressingMode(AM
, VT
);
7200 int Scale
= AM
.Scale
;
7201 switch (VT
.getSimpleVT().SimpleTy
) {
7202 default: return false;
7206 if (Scale
< 0) Scale
= -Scale
;
7210 return isPowerOf2_32(Scale
& ~1);
7214 if (((unsigned)AM
.HasBaseReg
+ Scale
) <= 2)
7219 // Note, we allow "void" uses (basically, uses that aren't loads or
7220 // stores), because arm allows folding a scale into many arithmetic
7221 // operations. This should be made more precise and revisited later.
7223 // Allow r << imm, but the imm has to be a multiple of two.
7224 if (Scale
& 1) return false;
7225 return isPowerOf2_32(Scale
);
7232 /// isLegalICmpImmediate - Return true if the specified immediate is legal
7233 /// icmp immediate, that is the target has icmp instructions which can compare
7234 /// a register against the immediate without having to materialize the
7235 /// immediate into a register.
7236 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
7237 if (!Subtarget
->isThumb())
7238 return ARM_AM::getSOImmVal(Imm
) != -1;
7239 if (Subtarget
->isThumb2())
7240 return ARM_AM::getT2SOImmVal(Imm
) != -1;
7241 return Imm
>= 0 && Imm
<= 255;
7244 /// isLegalAddImmediate - Return true if the specified immediate is legal
7245 /// add immediate, that is the target has add instructions which can add
7246 /// a register with the immediate without having to materialize the
7247 /// immediate into a register.
7248 bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
7249 return ARM_AM::getSOImmVal(Imm
) != -1;
7252 static bool getARMIndexedAddressParts(SDNode
*Ptr
, EVT VT
,
7253 bool isSEXTLoad
, SDValue
&Base
,
7254 SDValue
&Offset
, bool &isInc
,
7255 SelectionDAG
&DAG
) {
7256 if (Ptr
->getOpcode() != ISD::ADD
&& Ptr
->getOpcode() != ISD::SUB
)
7259 if (VT
== MVT::i16
|| ((VT
== MVT::i8
|| VT
== MVT::i1
) && isSEXTLoad
)) {
7261 Base
= Ptr
->getOperand(0);
7262 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1))) {
7263 int RHSC
= (int)RHS
->getZExtValue();
7264 if (RHSC
< 0 && RHSC
> -256) {
7265 assert(Ptr
->getOpcode() == ISD::ADD
);
7267 Offset
= DAG
.getConstant(-RHSC
, RHS
->getValueType(0));
7271 isInc
= (Ptr
->getOpcode() == ISD::ADD
);
7272 Offset
= Ptr
->getOperand(1);
7274 } else if (VT
== MVT::i32
|| VT
== MVT::i8
|| VT
== MVT::i1
) {
7276 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1))) {
7277 int RHSC
= (int)RHS
->getZExtValue();
7278 if (RHSC
< 0 && RHSC
> -0x1000) {
7279 assert(Ptr
->getOpcode() == ISD::ADD
);
7281 Offset
= DAG
.getConstant(-RHSC
, RHS
->getValueType(0));
7282 Base
= Ptr
->getOperand(0);
7287 if (Ptr
->getOpcode() == ISD::ADD
) {
7289 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(Ptr
->getOperand(0));
7290 if (ShOpcVal
!= ARM_AM::no_shift
) {
7291 Base
= Ptr
->getOperand(1);
7292 Offset
= Ptr
->getOperand(0);
7294 Base
= Ptr
->getOperand(0);
7295 Offset
= Ptr
->getOperand(1);
7300 isInc
= (Ptr
->getOpcode() == ISD::ADD
);
7301 Base
= Ptr
->getOperand(0);
7302 Offset
= Ptr
->getOperand(1);
7306 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
7310 static bool getT2IndexedAddressParts(SDNode
*Ptr
, EVT VT
,
7311 bool isSEXTLoad
, SDValue
&Base
,
7312 SDValue
&Offset
, bool &isInc
,
7313 SelectionDAG
&DAG
) {
7314 if (Ptr
->getOpcode() != ISD::ADD
&& Ptr
->getOpcode() != ISD::SUB
)
7317 Base
= Ptr
->getOperand(0);
7318 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1))) {
7319 int RHSC
= (int)RHS
->getZExtValue();
7320 if (RHSC
< 0 && RHSC
> -0x100) { // 8 bits.
7321 assert(Ptr
->getOpcode() == ISD::ADD
);
7323 Offset
= DAG
.getConstant(-RHSC
, RHS
->getValueType(0));
7325 } else if (RHSC
> 0 && RHSC
< 0x100) { // 8 bit, no zero.
7326 isInc
= Ptr
->getOpcode() == ISD::ADD
;
7327 Offset
= DAG
.getConstant(RHSC
, RHS
->getValueType(0));
7335 /// getPreIndexedAddressParts - returns true by value, base pointer and
7336 /// offset pointer and addressing mode by reference if the node's address
7337 /// can be legally represented as pre-indexed load / store address.
7339 ARMTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
7341 ISD::MemIndexedMode
&AM
,
7342 SelectionDAG
&DAG
) const {
7343 if (Subtarget
->isThumb1Only())
7348 bool isSEXTLoad
= false;
7349 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
7350 Ptr
= LD
->getBasePtr();
7351 VT
= LD
->getMemoryVT();
7352 isSEXTLoad
= LD
->getExtensionType() == ISD::SEXTLOAD
;
7353 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
7354 Ptr
= ST
->getBasePtr();
7355 VT
= ST
->getMemoryVT();
7360 bool isLegal
= false;
7361 if (Subtarget
->isThumb2())
7362 isLegal
= getT2IndexedAddressParts(Ptr
.getNode(), VT
, isSEXTLoad
, Base
,
7363 Offset
, isInc
, DAG
);
7365 isLegal
= getARMIndexedAddressParts(Ptr
.getNode(), VT
, isSEXTLoad
, Base
,
7366 Offset
, isInc
, DAG
);
7370 AM
= isInc
? ISD::PRE_INC
: ISD::PRE_DEC
;
7374 /// getPostIndexedAddressParts - returns true by value, base pointer and
7375 /// offset pointer and addressing mode by reference if this node can be
7376 /// combined with a load / store to form a post-indexed load / store.
7377 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
,
7380 ISD::MemIndexedMode
&AM
,
7381 SelectionDAG
&DAG
) const {
7382 if (Subtarget
->isThumb1Only())
7387 bool isSEXTLoad
= false;
7388 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
7389 VT
= LD
->getMemoryVT();
7390 Ptr
= LD
->getBasePtr();
7391 isSEXTLoad
= LD
->getExtensionType() == ISD::SEXTLOAD
;
7392 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
7393 VT
= ST
->getMemoryVT();
7394 Ptr
= ST
->getBasePtr();
7399 bool isLegal
= false;
7400 if (Subtarget
->isThumb2())
7401 isLegal
= getT2IndexedAddressParts(Op
, VT
, isSEXTLoad
, Base
, Offset
,
7404 isLegal
= getARMIndexedAddressParts(Op
, VT
, isSEXTLoad
, Base
, Offset
,
7410 // Swap base ptr and offset to catch more post-index load / store when
7411 // it's legal. In Thumb2 mode, offset must be an immediate.
7412 if (Ptr
== Offset
&& Op
->getOpcode() == ISD::ADD
&&
7413 !Subtarget
->isThumb2())
7414 std::swap(Base
, Offset
);
7416 // Post-indexed load / store update the base pointer.
7421 AM
= isInc
? ISD::POST_INC
: ISD::POST_DEC
;
7425 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op
,
7429 const SelectionDAG
&DAG
,
7430 unsigned Depth
) const {
7431 KnownZero
= KnownOne
= APInt(Mask
.getBitWidth(), 0);
7432 switch (Op
.getOpcode()) {
7434 case ARMISD::CMOV
: {
7435 // Bits are known zero/one if known on the LHS and RHS.
7436 DAG
.ComputeMaskedBits(Op
.getOperand(0), Mask
, KnownZero
, KnownOne
, Depth
+1);
7437 if (KnownZero
== 0 && KnownOne
== 0) return;
7439 APInt KnownZeroRHS
, KnownOneRHS
;
7440 DAG
.ComputeMaskedBits(Op
.getOperand(1), Mask
,
7441 KnownZeroRHS
, KnownOneRHS
, Depth
+1);
7442 KnownZero
&= KnownZeroRHS
;
7443 KnownOne
&= KnownOneRHS
;
7449 //===----------------------------------------------------------------------===//
7450 // ARM Inline Assembly Support
7451 //===----------------------------------------------------------------------===//
7453 bool ARMTargetLowering::ExpandInlineAsm(CallInst
*CI
) const {
7454 // Looking for "rev" which is V6+.
7455 if (!Subtarget
->hasV6Ops())
7458 InlineAsm
*IA
= cast
<InlineAsm
>(CI
->getCalledValue());
7459 std::string AsmStr
= IA
->getAsmString();
7460 SmallVector
<StringRef
, 4> AsmPieces
;
7461 SplitString(AsmStr
, AsmPieces
, ";\n");
7463 switch (AsmPieces
.size()) {
7464 default: return false;
7466 AsmStr
= AsmPieces
[0];
7468 SplitString(AsmStr
, AsmPieces
, " \t,");
7471 if (AsmPieces
.size() == 3 &&
7472 AsmPieces
[0] == "rev" && AsmPieces
[1] == "$0" && AsmPieces
[2] == "$1" &&
7473 IA
->getConstraintString().compare(0, 4, "=l,l") == 0) {
7474 const IntegerType
*Ty
= dyn_cast
<IntegerType
>(CI
->getType());
7475 if (Ty
&& Ty
->getBitWidth() == 32)
7476 return IntrinsicLowering::LowerToByteSwap(CI
);
7484 /// getConstraintType - Given a constraint letter, return the type of
7485 /// constraint it is for this target.
7486 ARMTargetLowering::ConstraintType
7487 ARMTargetLowering::getConstraintType(const std::string
&Constraint
) const {
7488 if (Constraint
.size() == 1) {
7489 switch (Constraint
[0]) {
7491 case 'l': return C_RegisterClass
;
7492 case 'w': return C_RegisterClass
;
7493 case 'h': return C_RegisterClass
;
7494 case 'x': return C_RegisterClass
;
7495 case 't': return C_RegisterClass
;
7496 case 'j': return C_Other
; // Constant for movw.
7498 } else if (Constraint
.size() == 2) {
7499 switch (Constraint
[0]) {
7501 // All 'U+' constraints are addresses.
7502 case 'U': return C_Memory
;
7505 return TargetLowering::getConstraintType(Constraint
);
7508 /// Examine constraint type and operand type and determine a weight value.
7509 /// This object must already have been set up with the operand type
7510 /// and the current alternative constraint selected.
7511 TargetLowering::ConstraintWeight
7512 ARMTargetLowering::getSingleConstraintMatchWeight(
7513 AsmOperandInfo
&info
, const char *constraint
) const {
7514 ConstraintWeight weight
= CW_Invalid
;
7515 Value
*CallOperandVal
= info
.CallOperandVal
;
7516 // If we don't have a value, we can't do a match,
7517 // but allow it at the lowest weight.
7518 if (CallOperandVal
== NULL
)
7520 const Type
*type
= CallOperandVal
->getType();
7521 // Look at the constraint type.
7522 switch (*constraint
) {
7524 weight
= TargetLowering::getSingleConstraintMatchWeight(info
, constraint
);
7527 if (type
->isIntegerTy()) {
7528 if (Subtarget
->isThumb())
7529 weight
= CW_SpecificReg
;
7531 weight
= CW_Register
;
7535 if (type
->isFloatingPointTy())
7536 weight
= CW_Register
;
7542 typedef std::pair
<unsigned, const TargetRegisterClass
*> RCPair
;
7544 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string
&Constraint
,
7546 if (Constraint
.size() == 1) {
7547 // GCC ARM Constraint Letters
7548 switch (Constraint
[0]) {
7549 case 'l': // Low regs or general regs.
7550 if (Subtarget
->isThumb())
7551 return RCPair(0U, ARM::tGPRRegisterClass
);
7553 return RCPair(0U, ARM::GPRRegisterClass
);
7554 case 'h': // High regs or no regs.
7555 if (Subtarget
->isThumb())
7556 return RCPair(0U, ARM::hGPRRegisterClass
);
7559 return RCPair(0U, ARM::GPRRegisterClass
);
7562 return RCPair(0U, ARM::SPRRegisterClass
);
7563 if (VT
.getSizeInBits() == 64)
7564 return RCPair(0U, ARM::DPRRegisterClass
);
7565 if (VT
.getSizeInBits() == 128)
7566 return RCPair(0U, ARM::QPRRegisterClass
);
7570 return RCPair(0U, ARM::SPR_8RegisterClass
);
7571 if (VT
.getSizeInBits() == 64)
7572 return RCPair(0U, ARM::DPR_8RegisterClass
);
7573 if (VT
.getSizeInBits() == 128)
7574 return RCPair(0U, ARM::QPR_8RegisterClass
);
7578 return RCPair(0U, ARM::SPRRegisterClass
);
7582 if (StringRef("{cc}").equals_lower(Constraint
))
7583 return std::make_pair(unsigned(ARM::CPSR
), ARM::CCRRegisterClass
);
7585 return TargetLowering::getRegForInlineAsmConstraint(Constraint
, VT
);
7588 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
7589 /// vector. If it is invalid, don't add anything to Ops.
7590 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
7591 std::string
&Constraint
,
7592 std::vector
<SDValue
>&Ops
,
7593 SelectionDAG
&DAG
) const {
7594 SDValue
Result(0, 0);
7596 // Currently only support length 1 constraints.
7597 if (Constraint
.length() != 1) return;
7599 char ConstraintLetter
= Constraint
[0];
7600 switch (ConstraintLetter
) {
7603 case 'I': case 'J': case 'K': case 'L':
7604 case 'M': case 'N': case 'O':
7605 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
7609 int64_t CVal64
= C
->getSExtValue();
7610 int CVal
= (int) CVal64
;
7611 // None of these constraints allow values larger than 32 bits. Check
7612 // that the value fits in an int.
7616 switch (ConstraintLetter
) {
7618 // Constant suitable for movw, must be between 0 and
7620 if (Subtarget
->hasV6T2Ops())
7621 if (CVal
>= 0 && CVal
<= 65535)
7625 if (Subtarget
->isThumb1Only()) {
7626 // This must be a constant between 0 and 255, for ADD
7628 if (CVal
>= 0 && CVal
<= 255)
7630 } else if (Subtarget
->isThumb2()) {
7631 // A constant that can be used as an immediate value in a
7632 // data-processing instruction.
7633 if (ARM_AM::getT2SOImmVal(CVal
) != -1)
7636 // A constant that can be used as an immediate value in a
7637 // data-processing instruction.
7638 if (ARM_AM::getSOImmVal(CVal
) != -1)
7644 if (Subtarget
->isThumb()) { // FIXME thumb2
7645 // This must be a constant between -255 and -1, for negated ADD
7646 // immediates. This can be used in GCC with an "n" modifier that
7647 // prints the negated value, for use with SUB instructions. It is
7648 // not useful otherwise but is implemented for compatibility.
7649 if (CVal
>= -255 && CVal
<= -1)
7652 // This must be a constant between -4095 and 4095. It is not clear
7653 // what this constraint is intended for. Implemented for
7654 // compatibility with GCC.
7655 if (CVal
>= -4095 && CVal
<= 4095)
7661 if (Subtarget
->isThumb1Only()) {
7662 // A 32-bit value where only one byte has a nonzero value. Exclude
7663 // zero to match GCC. This constraint is used by GCC internally for
7664 // constants that can be loaded with a move/shift combination.
7665 // It is not useful otherwise but is implemented for compatibility.
7666 if (CVal
!= 0 && ARM_AM::isThumbImmShiftedVal(CVal
))
7668 } else if (Subtarget
->isThumb2()) {
7669 // A constant whose bitwise inverse can be used as an immediate
7670 // value in a data-processing instruction. This can be used in GCC
7671 // with a "B" modifier that prints the inverted value, for use with
7672 // BIC and MVN instructions. It is not useful otherwise but is
7673 // implemented for compatibility.
7674 if (ARM_AM::getT2SOImmVal(~CVal
) != -1)
7677 // A constant whose bitwise inverse can be used as an immediate
7678 // value in a data-processing instruction. This can be used in GCC
7679 // with a "B" modifier that prints the inverted value, for use with
7680 // BIC and MVN instructions. It is not useful otherwise but is
7681 // implemented for compatibility.
7682 if (ARM_AM::getSOImmVal(~CVal
) != -1)
7688 if (Subtarget
->isThumb1Only()) {
7689 // This must be a constant between -7 and 7,
7690 // for 3-operand ADD/SUB immediate instructions.
7691 if (CVal
>= -7 && CVal
< 7)
7693 } else if (Subtarget
->isThumb2()) {
7694 // A constant whose negation can be used as an immediate value in a
7695 // data-processing instruction. This can be used in GCC with an "n"
7696 // modifier that prints the negated value, for use with SUB
7697 // instructions. It is not useful otherwise but is implemented for
7699 if (ARM_AM::getT2SOImmVal(-CVal
) != -1)
7702 // A constant whose negation can be used as an immediate value in a
7703 // data-processing instruction. This can be used in GCC with an "n"
7704 // modifier that prints the negated value, for use with SUB
7705 // instructions. It is not useful otherwise but is implemented for
7707 if (ARM_AM::getSOImmVal(-CVal
) != -1)
7713 if (Subtarget
->isThumb()) { // FIXME thumb2
7714 // This must be a multiple of 4 between 0 and 1020, for
7715 // ADD sp + immediate.
7716 if ((CVal
>= 0 && CVal
<= 1020) && ((CVal
& 3) == 0))
7719 // A power of two or a constant between 0 and 32. This is used in
7720 // GCC for the shift amount on shifted register operands, but it is
7721 // useful in general for any shift amounts.
7722 if ((CVal
>= 0 && CVal
<= 32) || ((CVal
& (CVal
- 1)) == 0))
7728 if (Subtarget
->isThumb()) { // FIXME thumb2
7729 // This must be a constant between 0 and 31, for shift amounts.
7730 if (CVal
>= 0 && CVal
<= 31)
7736 if (Subtarget
->isThumb()) { // FIXME thumb2
7737 // This must be a multiple of 4 between -508 and 508, for
7738 // ADD/SUB sp = sp + immediate.
7739 if ((CVal
>= -508 && CVal
<= 508) && ((CVal
& 3) == 0))
7744 Result
= DAG
.getTargetConstant(CVal
, Op
.getValueType());
7748 if (Result
.getNode()) {
7749 Ops
.push_back(Result
);
7752 return TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
7756 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
7757 // The ARM target isn't yet aware of offsets.
7761 int ARM::getVFPf32Imm(const APFloat
&FPImm
) {
7762 APInt Imm
= FPImm
.bitcastToAPInt();
7763 uint32_t Sign
= Imm
.lshr(31).getZExtValue() & 1;
7764 int32_t Exp
= (Imm
.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
7765 int64_t Mantissa
= Imm
.getZExtValue() & 0x7fffff; // 23 bits
7767 // We can handle 4 bits of mantissa.
7768 // mantissa = (16+UInt(e:f:g:h))/16.
7769 if (Mantissa
& 0x7ffff)
7772 if ((Mantissa
& 0xf) != Mantissa
)
7775 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
7776 if (Exp
< -3 || Exp
> 4)
7778 Exp
= ((Exp
+3) & 0x7) ^ 4;
7780 return ((int)Sign
<< 7) | (Exp
<< 4) | Mantissa
;
7783 int ARM::getVFPf64Imm(const APFloat
&FPImm
) {
7784 APInt Imm
= FPImm
.bitcastToAPInt();
7785 uint64_t Sign
= Imm
.lshr(63).getZExtValue() & 1;
7786 int64_t Exp
= (Imm
.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
7787 uint64_t Mantissa
= Imm
.getZExtValue() & 0xfffffffffffffLL
;
7789 // We can handle 4 bits of mantissa.
7790 // mantissa = (16+UInt(e:f:g:h))/16.
7791 if (Mantissa
& 0xffffffffffffLL
)
7794 if ((Mantissa
& 0xf) != Mantissa
)
7797 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
7798 if (Exp
< -3 || Exp
> 4)
7800 Exp
= ((Exp
+3) & 0x7) ^ 4;
7802 return ((int)Sign
<< 7) | (Exp
<< 4) | Mantissa
;
7805 bool ARM::isBitFieldInvertedMask(unsigned v
) {
7806 if (v
== 0xffffffff)
7808 // there can be 1's on either or both "outsides", all the "inside"
7810 unsigned int lsb
= 0, msb
= 31;
7811 while (v
& (1 << msb
)) --msb
;
7812 while (v
& (1 << lsb
)) ++lsb
;
7813 for (unsigned int i
= lsb
; i
<= msb
; ++i
) {
7820 /// isFPImmLegal - Returns true if the target can instruction select the
7821 /// specified FP immediate natively. If false, the legalizer will
7822 /// materialize the FP immediate as a load from a constant pool.
7823 bool ARMTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
) const {
7824 if (!Subtarget
->hasVFP3())
7827 return ARM::getVFPf32Imm(Imm
) != -1;
7829 return ARM::getVFPf64Imm(Imm
) != -1;
7833 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
7834 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
7835 /// specified in the intrinsic calls.
7836 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
7838 unsigned Intrinsic
) const {
7839 switch (Intrinsic
) {
7840 case Intrinsic::arm_neon_vld1
:
7841 case Intrinsic::arm_neon_vld2
:
7842 case Intrinsic::arm_neon_vld3
:
7843 case Intrinsic::arm_neon_vld4
:
7844 case Intrinsic::arm_neon_vld2lane
:
7845 case Intrinsic::arm_neon_vld3lane
:
7846 case Intrinsic::arm_neon_vld4lane
: {
7847 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
7848 // Conservatively set memVT to the entire set of vectors loaded.
7849 uint64_t NumElts
= getTargetData()->getTypeAllocSize(I
.getType()) / 8;
7850 Info
.memVT
= EVT::getVectorVT(I
.getType()->getContext(), MVT::i64
, NumElts
);
7851 Info
.ptrVal
= I
.getArgOperand(0);
7853 Value
*AlignArg
= I
.getArgOperand(I
.getNumArgOperands() - 1);
7854 Info
.align
= cast
<ConstantInt
>(AlignArg
)->getZExtValue();
7855 Info
.vol
= false; // volatile loads with NEON intrinsics not supported
7856 Info
.readMem
= true;
7857 Info
.writeMem
= false;
7860 case Intrinsic::arm_neon_vst1
:
7861 case Intrinsic::arm_neon_vst2
:
7862 case Intrinsic::arm_neon_vst3
:
7863 case Intrinsic::arm_neon_vst4
:
7864 case Intrinsic::arm_neon_vst2lane
:
7865 case Intrinsic::arm_neon_vst3lane
:
7866 case Intrinsic::arm_neon_vst4lane
: {
7867 Info
.opc
= ISD::INTRINSIC_VOID
;
7868 // Conservatively set memVT to the entire set of vectors stored.
7869 unsigned NumElts
= 0;
7870 for (unsigned ArgI
= 1, ArgE
= I
.getNumArgOperands(); ArgI
< ArgE
; ++ArgI
) {
7871 const Type
*ArgTy
= I
.getArgOperand(ArgI
)->getType();
7872 if (!ArgTy
->isVectorTy())
7874 NumElts
+= getTargetData()->getTypeAllocSize(ArgTy
) / 8;
7876 Info
.memVT
= EVT::getVectorVT(I
.getType()->getContext(), MVT::i64
, NumElts
);
7877 Info
.ptrVal
= I
.getArgOperand(0);
7879 Value
*AlignArg
= I
.getArgOperand(I
.getNumArgOperands() - 1);
7880 Info
.align
= cast
<ConstantInt
>(AlignArg
)->getZExtValue();
7881 Info
.vol
= false; // volatile stores with NEON intrinsics not supported
7882 Info
.readMem
= false;
7883 Info
.writeMem
= true;
7886 case Intrinsic::arm_strexd
: {
7887 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
7888 Info
.memVT
= MVT::i64
;
7889 Info
.ptrVal
= I
.getArgOperand(2);
7893 Info
.readMem
= false;
7894 Info
.writeMem
= true;
7897 case Intrinsic::arm_ldrexd
: {
7898 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
7899 Info
.memVT
= MVT::i64
;
7900 Info
.ptrVal
= I
.getArgOperand(0);
7904 Info
.readMem
= true;
7905 Info
.writeMem
= false;