1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
16 #include "ARMAddressingModes.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "ARMTargetObjectFile.h"
24 #include "llvm/CallingConv.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Instruction.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/GlobalValue.h"
30 #include "llvm/CodeGen/CallingConvLower.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineFunction.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/PseudoSourceValue.h"
37 #include "llvm/CodeGen/SelectionDAG.h"
38 #include "llvm/Target/TargetOptions.h"
39 #include "llvm/ADT/VectorExtras.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/MathExtras.h"
44 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
45 CCValAssign::LocInfo
&LocInfo
,
46 ISD::ArgFlagsTy
&ArgFlags
,
48 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
49 CCValAssign::LocInfo
&LocInfo
,
50 ISD::ArgFlagsTy
&ArgFlags
,
52 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
53 CCValAssign::LocInfo
&LocInfo
,
54 ISD::ArgFlagsTy
&ArgFlags
,
56 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
57 CCValAssign::LocInfo
&LocInfo
,
58 ISD::ArgFlagsTy
&ArgFlags
,
61 void ARMTargetLowering::addTypeForNEON(EVT VT
, EVT PromotedLdStVT
,
62 EVT PromotedBitwiseVT
) {
63 if (VT
!= PromotedLdStVT
) {
64 setOperationAction(ISD::LOAD
, VT
.getSimpleVT(), Promote
);
65 AddPromotedToType (ISD::LOAD
, VT
.getSimpleVT(),
66 PromotedLdStVT
.getSimpleVT());
68 setOperationAction(ISD::STORE
, VT
.getSimpleVT(), Promote
);
69 AddPromotedToType (ISD::STORE
, VT
.getSimpleVT(),
70 PromotedLdStVT
.getSimpleVT());
73 EVT ElemTy
= VT
.getVectorElementType();
74 if (ElemTy
!= MVT::i64
&& ElemTy
!= MVT::f64
)
75 setOperationAction(ISD::VSETCC
, VT
.getSimpleVT(), Custom
);
76 if (ElemTy
== MVT::i8
|| ElemTy
== MVT::i16
)
77 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
.getSimpleVT(), Custom
);
78 setOperationAction(ISD::BUILD_VECTOR
, VT
.getSimpleVT(), Custom
);
79 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
.getSimpleVT(), Custom
);
80 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
.getSimpleVT(), Custom
);
81 setOperationAction(ISD::CONCAT_VECTORS
, VT
.getSimpleVT(), Custom
);
83 setOperationAction(ISD::SHL
, VT
.getSimpleVT(), Custom
);
84 setOperationAction(ISD::SRA
, VT
.getSimpleVT(), Custom
);
85 setOperationAction(ISD::SRL
, VT
.getSimpleVT(), Custom
);
88 // Promote all bit-wise operations.
89 if (VT
.isInteger() && VT
!= PromotedBitwiseVT
) {
90 setOperationAction(ISD::AND
, VT
.getSimpleVT(), Promote
);
91 AddPromotedToType (ISD::AND
, VT
.getSimpleVT(),
92 PromotedBitwiseVT
.getSimpleVT());
93 setOperationAction(ISD::OR
, VT
.getSimpleVT(), Promote
);
94 AddPromotedToType (ISD::OR
, VT
.getSimpleVT(),
95 PromotedBitwiseVT
.getSimpleVT());
96 setOperationAction(ISD::XOR
, VT
.getSimpleVT(), Promote
);
97 AddPromotedToType (ISD::XOR
, VT
.getSimpleVT(),
98 PromotedBitwiseVT
.getSimpleVT());
102 void ARMTargetLowering::addDRTypeForNEON(EVT VT
) {
103 addRegisterClass(VT
, ARM::DPRRegisterClass
);
104 addTypeForNEON(VT
, MVT::f64
, MVT::v2i32
);
107 void ARMTargetLowering::addQRTypeForNEON(EVT VT
) {
108 addRegisterClass(VT
, ARM::QPRRegisterClass
);
109 addTypeForNEON(VT
, MVT::v2f64
, MVT::v4i32
);
112 static TargetLoweringObjectFile
*createTLOF(TargetMachine
&TM
) {
113 if (TM
.getSubtarget
<ARMSubtarget
>().isTargetDarwin())
114 return new TargetLoweringObjectFileMachO();
115 return new ARMElfTargetObjectFile();
118 ARMTargetLowering::ARMTargetLowering(TargetMachine
&TM
)
119 : TargetLowering(TM
, createTLOF(TM
)), ARMPCLabelIndex(0) {
120 Subtarget
= &TM
.getSubtarget
<ARMSubtarget
>();
122 if (Subtarget
->isTargetDarwin()) {
123 // Uses VFP for Thumb libfuncs if available.
124 if (Subtarget
->isThumb() && Subtarget
->hasVFP2()) {
125 // Single-precision floating-point arithmetic.
126 setLibcallName(RTLIB::ADD_F32
, "__addsf3vfp");
127 setLibcallName(RTLIB::SUB_F32
, "__subsf3vfp");
128 setLibcallName(RTLIB::MUL_F32
, "__mulsf3vfp");
129 setLibcallName(RTLIB::DIV_F32
, "__divsf3vfp");
131 // Double-precision floating-point arithmetic.
132 setLibcallName(RTLIB::ADD_F64
, "__adddf3vfp");
133 setLibcallName(RTLIB::SUB_F64
, "__subdf3vfp");
134 setLibcallName(RTLIB::MUL_F64
, "__muldf3vfp");
135 setLibcallName(RTLIB::DIV_F64
, "__divdf3vfp");
137 // Single-precision comparisons.
138 setLibcallName(RTLIB::OEQ_F32
, "__eqsf2vfp");
139 setLibcallName(RTLIB::UNE_F32
, "__nesf2vfp");
140 setLibcallName(RTLIB::OLT_F32
, "__ltsf2vfp");
141 setLibcallName(RTLIB::OLE_F32
, "__lesf2vfp");
142 setLibcallName(RTLIB::OGE_F32
, "__gesf2vfp");
143 setLibcallName(RTLIB::OGT_F32
, "__gtsf2vfp");
144 setLibcallName(RTLIB::UO_F32
, "__unordsf2vfp");
145 setLibcallName(RTLIB::O_F32
, "__unordsf2vfp");
147 setCmpLibcallCC(RTLIB::OEQ_F32
, ISD::SETNE
);
148 setCmpLibcallCC(RTLIB::UNE_F32
, ISD::SETNE
);
149 setCmpLibcallCC(RTLIB::OLT_F32
, ISD::SETNE
);
150 setCmpLibcallCC(RTLIB::OLE_F32
, ISD::SETNE
);
151 setCmpLibcallCC(RTLIB::OGE_F32
, ISD::SETNE
);
152 setCmpLibcallCC(RTLIB::OGT_F32
, ISD::SETNE
);
153 setCmpLibcallCC(RTLIB::UO_F32
, ISD::SETNE
);
154 setCmpLibcallCC(RTLIB::O_F32
, ISD::SETEQ
);
156 // Double-precision comparisons.
157 setLibcallName(RTLIB::OEQ_F64
, "__eqdf2vfp");
158 setLibcallName(RTLIB::UNE_F64
, "__nedf2vfp");
159 setLibcallName(RTLIB::OLT_F64
, "__ltdf2vfp");
160 setLibcallName(RTLIB::OLE_F64
, "__ledf2vfp");
161 setLibcallName(RTLIB::OGE_F64
, "__gedf2vfp");
162 setLibcallName(RTLIB::OGT_F64
, "__gtdf2vfp");
163 setLibcallName(RTLIB::UO_F64
, "__unorddf2vfp");
164 setLibcallName(RTLIB::O_F64
, "__unorddf2vfp");
166 setCmpLibcallCC(RTLIB::OEQ_F64
, ISD::SETNE
);
167 setCmpLibcallCC(RTLIB::UNE_F64
, ISD::SETNE
);
168 setCmpLibcallCC(RTLIB::OLT_F64
, ISD::SETNE
);
169 setCmpLibcallCC(RTLIB::OLE_F64
, ISD::SETNE
);
170 setCmpLibcallCC(RTLIB::OGE_F64
, ISD::SETNE
);
171 setCmpLibcallCC(RTLIB::OGT_F64
, ISD::SETNE
);
172 setCmpLibcallCC(RTLIB::UO_F64
, ISD::SETNE
);
173 setCmpLibcallCC(RTLIB::O_F64
, ISD::SETEQ
);
175 // Floating-point to integer conversions.
176 // i64 conversions are done via library routines even when generating VFP
177 // instructions, so use the same ones.
178 setLibcallName(RTLIB::FPTOSINT_F64_I32
, "__fixdfsivfp");
179 setLibcallName(RTLIB::FPTOUINT_F64_I32
, "__fixunsdfsivfp");
180 setLibcallName(RTLIB::FPTOSINT_F32_I32
, "__fixsfsivfp");
181 setLibcallName(RTLIB::FPTOUINT_F32_I32
, "__fixunssfsivfp");
183 // Conversions between floating types.
184 setLibcallName(RTLIB::FPROUND_F64_F32
, "__truncdfsf2vfp");
185 setLibcallName(RTLIB::FPEXT_F32_F64
, "__extendsfdf2vfp");
187 // Integer to floating-point conversions.
188 // i64 conversions are done via library routines even when generating VFP
189 // instructions, so use the same ones.
190 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
191 // e.g., __floatunsidf vs. __floatunssidfvfp.
192 setLibcallName(RTLIB::SINTTOFP_I32_F64
, "__floatsidfvfp");
193 setLibcallName(RTLIB::UINTTOFP_I32_F64
, "__floatunssidfvfp");
194 setLibcallName(RTLIB::SINTTOFP_I32_F32
, "__floatsisfvfp");
195 setLibcallName(RTLIB::UINTTOFP_I32_F32
, "__floatunssisfvfp");
199 // These libcalls are not available in 32-bit.
200 setLibcallName(RTLIB::SHL_I128
, 0);
201 setLibcallName(RTLIB::SRL_I128
, 0);
202 setLibcallName(RTLIB::SRA_I128
, 0);
204 // Libcalls should use the AAPCS base standard ABI, even if hard float
205 // is in effect, as per the ARM RTABI specification, section 4.1.2.
206 if (Subtarget
->isAAPCS_ABI()) {
207 for (int i
= 0; i
< RTLIB::UNKNOWN_LIBCALL
; ++i
) {
208 setLibcallCallingConv(static_cast<RTLIB::Libcall
>(i
),
209 CallingConv::ARM_AAPCS
);
213 if (Subtarget
->isThumb1Only())
214 addRegisterClass(MVT::i32
, ARM::tGPRRegisterClass
);
216 addRegisterClass(MVT::i32
, ARM::GPRRegisterClass
);
217 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only()) {
218 addRegisterClass(MVT::f32
, ARM::SPRRegisterClass
);
219 addRegisterClass(MVT::f64
, ARM::DPRRegisterClass
);
221 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
224 if (Subtarget
->hasNEON()) {
225 addDRTypeForNEON(MVT::v2f32
);
226 addDRTypeForNEON(MVT::v8i8
);
227 addDRTypeForNEON(MVT::v4i16
);
228 addDRTypeForNEON(MVT::v2i32
);
229 addDRTypeForNEON(MVT::v1i64
);
231 addQRTypeForNEON(MVT::v4f32
);
232 addQRTypeForNEON(MVT::v2f64
);
233 addQRTypeForNEON(MVT::v16i8
);
234 addQRTypeForNEON(MVT::v8i16
);
235 addQRTypeForNEON(MVT::v4i32
);
236 addQRTypeForNEON(MVT::v2i64
);
238 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN
);
239 setTargetDAGCombine(ISD::SHL
);
240 setTargetDAGCombine(ISD::SRL
);
241 setTargetDAGCombine(ISD::SRA
);
242 setTargetDAGCombine(ISD::SIGN_EXTEND
);
243 setTargetDAGCombine(ISD::ZERO_EXTEND
);
244 setTargetDAGCombine(ISD::ANY_EXTEND
);
247 computeRegisterProperties();
249 // ARM does not have f32 extending load.
250 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, Expand
);
252 // ARM does not have i1 sign extending load.
253 setLoadExtAction(ISD::SEXTLOAD
, MVT::i1
, Promote
);
255 // ARM supports all 4 flavors of integer indexed load / store.
256 if (!Subtarget
->isThumb1Only()) {
257 for (unsigned im
= (unsigned)ISD::PRE_INC
;
258 im
!= (unsigned)ISD::LAST_INDEXED_MODE
; ++im
) {
259 setIndexedLoadAction(im
, MVT::i1
, Legal
);
260 setIndexedLoadAction(im
, MVT::i8
, Legal
);
261 setIndexedLoadAction(im
, MVT::i16
, Legal
);
262 setIndexedLoadAction(im
, MVT::i32
, Legal
);
263 setIndexedStoreAction(im
, MVT::i1
, Legal
);
264 setIndexedStoreAction(im
, MVT::i8
, Legal
);
265 setIndexedStoreAction(im
, MVT::i16
, Legal
);
266 setIndexedStoreAction(im
, MVT::i32
, Legal
);
270 // i64 operation support.
271 if (Subtarget
->isThumb1Only()) {
272 setOperationAction(ISD::MUL
, MVT::i64
, Expand
);
273 setOperationAction(ISD::MULHU
, MVT::i32
, Expand
);
274 setOperationAction(ISD::MULHS
, MVT::i32
, Expand
);
275 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
276 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
278 setOperationAction(ISD::MUL
, MVT::i64
, Expand
);
279 setOperationAction(ISD::MULHU
, MVT::i32
, Expand
);
280 if (!Subtarget
->hasV6Ops())
281 setOperationAction(ISD::MULHS
, MVT::i32
, Expand
);
283 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Expand
);
284 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Expand
);
285 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Expand
);
286 setOperationAction(ISD::SRL
, MVT::i64
, Custom
);
287 setOperationAction(ISD::SRA
, MVT::i64
, Custom
);
289 // ARM does not have ROTL.
290 setOperationAction(ISD::ROTL
, MVT::i32
, Expand
);
291 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
292 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
293 if (!Subtarget
->hasV5TOps() || Subtarget
->isThumb1Only())
294 setOperationAction(ISD::CTLZ
, MVT::i32
, Expand
);
296 // Only ARMv6 has BSWAP.
297 if (!Subtarget
->hasV6Ops())
298 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
300 // These are expanded into libcalls.
301 setOperationAction(ISD::SDIV
, MVT::i32
, Expand
);
302 setOperationAction(ISD::UDIV
, MVT::i32
, Expand
);
303 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
304 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
305 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
306 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
308 // Support label based line numbers.
309 setOperationAction(ISD::DBG_STOPPOINT
, MVT::Other
, Expand
);
310 setOperationAction(ISD::DEBUG_LOC
, MVT::Other
, Expand
);
312 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
313 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
314 setOperationAction(ISD::GLOBAL_OFFSET_TABLE
, MVT::i32
, Custom
);
315 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
317 // Use the default implementation.
318 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
319 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
320 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
321 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
322 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
323 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
324 setOperationAction(ISD::EHSELECTION
, MVT::i32
, Expand
);
325 // FIXME: Shouldn't need this, since no register is used, but the legalizer
326 // doesn't yet know how to not do that for SjLj.
327 setExceptionSelectorRegister(ARM::R0
);
328 if (Subtarget
->isThumb())
329 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
331 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Expand
);
332 setOperationAction(ISD::MEMBARRIER
, MVT::Other
, Expand
);
334 if (!Subtarget
->hasV6Ops() && !Subtarget
->isThumb2()) {
335 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i16
, Expand
);
336 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i8
, Expand
);
338 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
340 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only())
341 // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2.
342 setOperationAction(ISD::BIT_CONVERT
, MVT::i64
, Custom
);
344 // We want to custom lower some of our intrinsics.
345 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
346 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
347 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
349 setOperationAction(ISD::SETCC
, MVT::i32
, Expand
);
350 setOperationAction(ISD::SETCC
, MVT::f32
, Expand
);
351 setOperationAction(ISD::SETCC
, MVT::f64
, Expand
);
352 setOperationAction(ISD::SELECT
, MVT::i32
, Expand
);
353 setOperationAction(ISD::SELECT
, MVT::f32
, Expand
);
354 setOperationAction(ISD::SELECT
, MVT::f64
, Expand
);
355 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
356 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
357 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Custom
);
359 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
360 setOperationAction(ISD::BR_CC
, MVT::i32
, Custom
);
361 setOperationAction(ISD::BR_CC
, MVT::f32
, Custom
);
362 setOperationAction(ISD::BR_CC
, MVT::f64
, Custom
);
363 setOperationAction(ISD::BR_JT
, MVT::Other
, Custom
);
365 // We don't support sin/cos/fmod/copysign/pow
366 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
367 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
368 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
369 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
370 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
371 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
372 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only()) {
373 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Custom
);
374 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Custom
);
376 setOperationAction(ISD::FPOW
, MVT::f64
, Expand
);
377 setOperationAction(ISD::FPOW
, MVT::f32
, Expand
);
379 // int <-> fp are custom expanded into bit_convert + ARMISD ops.
380 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only()) {
381 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
382 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Custom
);
383 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
384 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
387 // We have target-specific dag combine patterns for the following nodes:
388 // ARMISD::FMRRD - No need to call setTargetDAGCombine
389 setTargetDAGCombine(ISD::ADD
);
390 setTargetDAGCombine(ISD::SUB
);
392 setStackPointerRegisterToSaveRestore(ARM::SP
);
393 setSchedulingPreference(SchedulingForRegPressure
);
395 // FIXME: If-converter should use instruction latency to determine
396 // profitability rather than relying on fixed limits.
397 if (Subtarget
->getCPUString() == "generic") {
398 // Generic (and overly aggressive) if-conversion limits.
399 setIfCvtBlockSizeLimit(10);
400 setIfCvtDupBlockSizeLimit(2);
401 } else if (Subtarget
->hasV6Ops()) {
402 setIfCvtBlockSizeLimit(2);
403 setIfCvtDupBlockSizeLimit(1);
405 setIfCvtBlockSizeLimit(3);
406 setIfCvtDupBlockSizeLimit(2);
409 maxStoresPerMemcpy
= 1; //// temporary - rewrite interface to use type
410 // Do not enable CodePlacementOpt for now: it currently runs after the
411 // ARMConstantIslandPass and messes up branch relaxation and placement
412 // of constant islands.
413 // benefitFromCodePlacementOpt = true;
416 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode
) const {
419 case ARMISD::Wrapper
: return "ARMISD::Wrapper";
420 case ARMISD::WrapperJT
: return "ARMISD::WrapperJT";
421 case ARMISD::CALL
: return "ARMISD::CALL";
422 case ARMISD::CALL_PRED
: return "ARMISD::CALL_PRED";
423 case ARMISD::CALL_NOLINK
: return "ARMISD::CALL_NOLINK";
424 case ARMISD::tCALL
: return "ARMISD::tCALL";
425 case ARMISD::BRCOND
: return "ARMISD::BRCOND";
426 case ARMISD::BR_JT
: return "ARMISD::BR_JT";
427 case ARMISD::BR2_JT
: return "ARMISD::BR2_JT";
428 case ARMISD::RET_FLAG
: return "ARMISD::RET_FLAG";
429 case ARMISD::PIC_ADD
: return "ARMISD::PIC_ADD";
430 case ARMISD::CMP
: return "ARMISD::CMP";
431 case ARMISD::CMPZ
: return "ARMISD::CMPZ";
432 case ARMISD::CMPFP
: return "ARMISD::CMPFP";
433 case ARMISD::CMPFPw0
: return "ARMISD::CMPFPw0";
434 case ARMISD::FMSTAT
: return "ARMISD::FMSTAT";
435 case ARMISD::CMOV
: return "ARMISD::CMOV";
436 case ARMISD::CNEG
: return "ARMISD::CNEG";
438 case ARMISD::FTOSI
: return "ARMISD::FTOSI";
439 case ARMISD::FTOUI
: return "ARMISD::FTOUI";
440 case ARMISD::SITOF
: return "ARMISD::SITOF";
441 case ARMISD::UITOF
: return "ARMISD::UITOF";
443 case ARMISD::SRL_FLAG
: return "ARMISD::SRL_FLAG";
444 case ARMISD::SRA_FLAG
: return "ARMISD::SRA_FLAG";
445 case ARMISD::RRX
: return "ARMISD::RRX";
447 case ARMISD::FMRRD
: return "ARMISD::FMRRD";
448 case ARMISD::FMDRR
: return "ARMISD::FMDRR";
450 case ARMISD::THREAD_POINTER
:return "ARMISD::THREAD_POINTER";
452 case ARMISD::DYN_ALLOC
: return "ARMISD::DYN_ALLOC";
454 case ARMISD::VCEQ
: return "ARMISD::VCEQ";
455 case ARMISD::VCGE
: return "ARMISD::VCGE";
456 case ARMISD::VCGEU
: return "ARMISD::VCGEU";
457 case ARMISD::VCGT
: return "ARMISD::VCGT";
458 case ARMISD::VCGTU
: return "ARMISD::VCGTU";
459 case ARMISD::VTST
: return "ARMISD::VTST";
461 case ARMISD::VSHL
: return "ARMISD::VSHL";
462 case ARMISD::VSHRs
: return "ARMISD::VSHRs";
463 case ARMISD::VSHRu
: return "ARMISD::VSHRu";
464 case ARMISD::VSHLLs
: return "ARMISD::VSHLLs";
465 case ARMISD::VSHLLu
: return "ARMISD::VSHLLu";
466 case ARMISD::VSHLLi
: return "ARMISD::VSHLLi";
467 case ARMISD::VSHRN
: return "ARMISD::VSHRN";
468 case ARMISD::VRSHRs
: return "ARMISD::VRSHRs";
469 case ARMISD::VRSHRu
: return "ARMISD::VRSHRu";
470 case ARMISD::VRSHRN
: return "ARMISD::VRSHRN";
471 case ARMISD::VQSHLs
: return "ARMISD::VQSHLs";
472 case ARMISD::VQSHLu
: return "ARMISD::VQSHLu";
473 case ARMISD::VQSHLsu
: return "ARMISD::VQSHLsu";
474 case ARMISD::VQSHRNs
: return "ARMISD::VQSHRNs";
475 case ARMISD::VQSHRNu
: return "ARMISD::VQSHRNu";
476 case ARMISD::VQSHRNsu
: return "ARMISD::VQSHRNsu";
477 case ARMISD::VQRSHRNs
: return "ARMISD::VQRSHRNs";
478 case ARMISD::VQRSHRNu
: return "ARMISD::VQRSHRNu";
479 case ARMISD::VQRSHRNsu
: return "ARMISD::VQRSHRNsu";
480 case ARMISD::VGETLANEu
: return "ARMISD::VGETLANEu";
481 case ARMISD::VGETLANEs
: return "ARMISD::VGETLANEs";
482 case ARMISD::VDUP
: return "ARMISD::VDUP";
483 case ARMISD::VDUPLANE
: return "ARMISD::VDUPLANE";
484 case ARMISD::VLD2D
: return "ARMISD::VLD2D";
485 case ARMISD::VLD3D
: return "ARMISD::VLD3D";
486 case ARMISD::VLD4D
: return "ARMISD::VLD4D";
487 case ARMISD::VST2D
: return "ARMISD::VST2D";
488 case ARMISD::VST3D
: return "ARMISD::VST3D";
489 case ARMISD::VST4D
: return "ARMISD::VST4D";
490 case ARMISD::VREV64
: return "ARMISD::VREV64";
491 case ARMISD::VREV32
: return "ARMISD::VREV32";
492 case ARMISD::VREV16
: return "ARMISD::VREV16";
496 /// getFunctionAlignment - Return the Log2 alignment of this function.
497 unsigned ARMTargetLowering::getFunctionAlignment(const Function
*F
) const {
498 return getTargetMachine().getSubtarget
<ARMSubtarget
>().isThumb() ? 1 : 2;
501 //===----------------------------------------------------------------------===//
503 //===----------------------------------------------------------------------===//
505 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
506 static ARMCC::CondCodes
IntCCToARMCC(ISD::CondCode CC
) {
508 default: llvm_unreachable("Unknown condition code!");
509 case ISD::SETNE
: return ARMCC::NE
;
510 case ISD::SETEQ
: return ARMCC::EQ
;
511 case ISD::SETGT
: return ARMCC::GT
;
512 case ISD::SETGE
: return ARMCC::GE
;
513 case ISD::SETLT
: return ARMCC::LT
;
514 case ISD::SETLE
: return ARMCC::LE
;
515 case ISD::SETUGT
: return ARMCC::HI
;
516 case ISD::SETUGE
: return ARMCC::HS
;
517 case ISD::SETULT
: return ARMCC::LO
;
518 case ISD::SETULE
: return ARMCC::LS
;
522 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
523 /// returns true if the operands should be inverted to form the proper
525 static bool FPCCToARMCC(ISD::CondCode CC
, ARMCC::CondCodes
&CondCode
,
526 ARMCC::CondCodes
&CondCode2
) {
528 CondCode2
= ARMCC::AL
;
530 default: llvm_unreachable("Unknown FP condition!");
532 case ISD::SETOEQ
: CondCode
= ARMCC::EQ
; break;
534 case ISD::SETOGT
: CondCode
= ARMCC::GT
; break;
536 case ISD::SETOGE
: CondCode
= ARMCC::GE
; break;
537 case ISD::SETOLT
: CondCode
= ARMCC::MI
; break;
538 case ISD::SETOLE
: CondCode
= ARMCC::GT
; Invert
= true; break;
539 case ISD::SETONE
: CondCode
= ARMCC::MI
; CondCode2
= ARMCC::GT
; break;
540 case ISD::SETO
: CondCode
= ARMCC::VC
; break;
541 case ISD::SETUO
: CondCode
= ARMCC::VS
; break;
542 case ISD::SETUEQ
: CondCode
= ARMCC::EQ
; CondCode2
= ARMCC::VS
; break;
543 case ISD::SETUGT
: CondCode
= ARMCC::HI
; break;
544 case ISD::SETUGE
: CondCode
= ARMCC::PL
; break;
546 case ISD::SETULT
: CondCode
= ARMCC::LT
; break;
548 case ISD::SETULE
: CondCode
= ARMCC::LE
; break;
550 case ISD::SETUNE
: CondCode
= ARMCC::NE
; break;
555 //===----------------------------------------------------------------------===//
556 // Calling Convention Implementation
557 //===----------------------------------------------------------------------===//
559 #include "ARMGenCallingConv.inc"
561 // APCS f64 is in register pairs, possibly split to stack
562 static bool f64AssignAPCS(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
563 CCValAssign::LocInfo
&LocInfo
,
564 CCState
&State
, bool CanFail
) {
565 static const unsigned RegList
[] = { ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
};
567 // Try to get the first register.
568 if (unsigned Reg
= State
.AllocateReg(RegList
, 4))
569 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
571 // For the 2nd half of a v2f64, do not fail.
575 // Put the whole thing on the stack.
576 State
.addLoc(CCValAssign::getCustomMem(ValNo
, ValVT
,
577 State
.AllocateStack(8, 4),
582 // Try to get the second register.
583 if (unsigned Reg
= State
.AllocateReg(RegList
, 4))
584 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
586 State
.addLoc(CCValAssign::getCustomMem(ValNo
, ValVT
,
587 State
.AllocateStack(4, 4),
592 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
593 CCValAssign::LocInfo
&LocInfo
,
594 ISD::ArgFlagsTy
&ArgFlags
,
596 if (!f64AssignAPCS(ValNo
, ValVT
, LocVT
, LocInfo
, State
, true))
598 if (LocVT
== MVT::v2f64
&&
599 !f64AssignAPCS(ValNo
, ValVT
, LocVT
, LocInfo
, State
, false))
601 return true; // we handled it
604 // AAPCS f64 is in aligned register pairs
605 static bool f64AssignAAPCS(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
606 CCValAssign::LocInfo
&LocInfo
,
607 CCState
&State
, bool CanFail
) {
608 static const unsigned HiRegList
[] = { ARM::R0
, ARM::R2
};
609 static const unsigned LoRegList
[] = { ARM::R1
, ARM::R3
};
611 unsigned Reg
= State
.AllocateReg(HiRegList
, LoRegList
, 2);
613 // For the 2nd half of a v2f64, do not just fail.
617 // Put the whole thing on the stack.
618 State
.addLoc(CCValAssign::getCustomMem(ValNo
, ValVT
,
619 State
.AllocateStack(8, 8),
625 for (i
= 0; i
< 2; ++i
)
626 if (HiRegList
[i
] == Reg
)
629 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
630 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, LoRegList
[i
],
635 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
636 CCValAssign::LocInfo
&LocInfo
,
637 ISD::ArgFlagsTy
&ArgFlags
,
639 if (!f64AssignAAPCS(ValNo
, ValVT
, LocVT
, LocInfo
, State
, true))
641 if (LocVT
== MVT::v2f64
&&
642 !f64AssignAAPCS(ValNo
, ValVT
, LocVT
, LocInfo
, State
, false))
644 return true; // we handled it
647 static bool f64RetAssign(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
648 CCValAssign::LocInfo
&LocInfo
, CCState
&State
) {
649 static const unsigned HiRegList
[] = { ARM::R0
, ARM::R2
};
650 static const unsigned LoRegList
[] = { ARM::R1
, ARM::R3
};
652 unsigned Reg
= State
.AllocateReg(HiRegList
, LoRegList
, 2);
654 return false; // we didn't handle it
657 for (i
= 0; i
< 2; ++i
)
658 if (HiRegList
[i
] == Reg
)
661 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
662 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, LoRegList
[i
],
667 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
668 CCValAssign::LocInfo
&LocInfo
,
669 ISD::ArgFlagsTy
&ArgFlags
,
671 if (!f64RetAssign(ValNo
, ValVT
, LocVT
, LocInfo
, State
))
673 if (LocVT
== MVT::v2f64
&& !f64RetAssign(ValNo
, ValVT
, LocVT
, LocInfo
, State
))
675 return true; // we handled it
678 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
679 CCValAssign::LocInfo
&LocInfo
,
680 ISD::ArgFlagsTy
&ArgFlags
,
682 return RetCC_ARM_APCS_Custom_f64(ValNo
, ValVT
, LocVT
, LocInfo
, ArgFlags
,
686 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
687 /// given CallingConvention value.
688 CCAssignFn
*ARMTargetLowering::CCAssignFnForNode(unsigned CC
,
690 bool isVarArg
) const {
693 llvm_unreachable("Unsupported calling convention");
695 case CallingConv::Fast
:
696 // Use target triple & subtarget features to do actual dispatch.
697 if (Subtarget
->isAAPCS_ABI()) {
698 if (Subtarget
->hasVFP2() &&
699 FloatABIType
== FloatABI::Hard
&& !isVarArg
)
700 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
702 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
704 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
705 case CallingConv::ARM_AAPCS_VFP
:
706 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
707 case CallingConv::ARM_AAPCS
:
708 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
709 case CallingConv::ARM_APCS
:
710 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
714 /// LowerCallResult - Lower the result values of a call into the
715 /// appropriate copies out of appropriate physical registers.
717 ARMTargetLowering::LowerCallResult(SDValue Chain
, SDValue InFlag
,
718 unsigned CallConv
, bool isVarArg
,
719 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
720 DebugLoc dl
, SelectionDAG
&DAG
,
721 SmallVectorImpl
<SDValue
> &InVals
) {
723 // Assign locations to each value returned by this call.
724 SmallVector
<CCValAssign
, 16> RVLocs
;
725 CCState
CCInfo(CallConv
, isVarArg
, getTargetMachine(),
726 RVLocs
, *DAG
.getContext());
727 CCInfo
.AnalyzeCallResult(Ins
,
728 CCAssignFnForNode(CallConv
, /* Return*/ true,
731 // Copy all of the result registers out of their specified physreg.
732 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
733 CCValAssign VA
= RVLocs
[i
];
736 if (VA
.needsCustom()) {
737 // Handle f64 or half of a v2f64.
738 SDValue Lo
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
740 Chain
= Lo
.getValue(1);
741 InFlag
= Lo
.getValue(2);
742 VA
= RVLocs
[++i
]; // skip ahead to next loc
743 SDValue Hi
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
745 Chain
= Hi
.getValue(1);
746 InFlag
= Hi
.getValue(2);
747 Val
= DAG
.getNode(ARMISD::FMDRR
, dl
, MVT::f64
, Lo
, Hi
);
749 if (VA
.getLocVT() == MVT::v2f64
) {
750 SDValue Vec
= DAG
.getNode(ISD::UNDEF
, dl
, MVT::v2f64
);
751 Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Vec
, Val
,
752 DAG
.getConstant(0, MVT::i32
));
754 VA
= RVLocs
[++i
]; // skip ahead to next loc
755 Lo
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
, InFlag
);
756 Chain
= Lo
.getValue(1);
757 InFlag
= Lo
.getValue(2);
758 VA
= RVLocs
[++i
]; // skip ahead to next loc
759 Hi
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
, InFlag
);
760 Chain
= Hi
.getValue(1);
761 InFlag
= Hi
.getValue(2);
762 Val
= DAG
.getNode(ARMISD::FMDRR
, dl
, MVT::f64
, Lo
, Hi
);
763 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Vec
, Val
,
764 DAG
.getConstant(1, MVT::i32
));
767 Val
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), VA
.getLocVT(),
769 Chain
= Val
.getValue(1);
770 InFlag
= Val
.getValue(2);
773 switch (VA
.getLocInfo()) {
774 default: llvm_unreachable("Unknown loc info!");
775 case CCValAssign::Full
: break;
776 case CCValAssign::BCvt
:
777 Val
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VA
.getValVT(), Val
);
781 InVals
.push_back(Val
);
787 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
788 /// by "Src" to address "Dst" of size "Size". Alignment information is
789 /// specified by the specific parameter attribute. The copy will be passed as
790 /// a byval function parameter.
791 /// Sometimes what we are copying is the end of a larger object, the part that
792 /// does not fit in registers.
794 CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
, SDValue Chain
,
795 ISD::ArgFlagsTy Flags
, SelectionDAG
&DAG
,
797 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), MVT::i32
);
798 return DAG
.getMemcpy(Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
799 /*AlwaysInline=*/false, NULL
, 0, NULL
, 0);
802 /// LowerMemOpCallTo - Store the argument to the stack.
804 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain
,
805 SDValue StackPtr
, SDValue Arg
,
806 DebugLoc dl
, SelectionDAG
&DAG
,
807 const CCValAssign
&VA
,
808 ISD::ArgFlagsTy Flags
) {
809 unsigned LocMemOffset
= VA
.getLocMemOffset();
810 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
);
811 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(), StackPtr
, PtrOff
);
812 if (Flags
.isByVal()) {
813 return CreateCopyOfByValArgument(Arg
, PtrOff
, Chain
, Flags
, DAG
, dl
);
815 return DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
816 PseudoSourceValue::getStack(), LocMemOffset
);
819 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl
, SelectionDAG
&DAG
,
820 SDValue Chain
, SDValue
&Arg
,
821 RegsToPassVector
&RegsToPass
,
822 CCValAssign
&VA
, CCValAssign
&NextVA
,
824 SmallVector
<SDValue
, 8> &MemOpChains
,
825 ISD::ArgFlagsTy Flags
) {
827 SDValue fmrrd
= DAG
.getNode(ARMISD::FMRRD
, dl
,
828 DAG
.getVTList(MVT::i32
, MVT::i32
), Arg
);
829 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), fmrrd
));
831 if (NextVA
.isRegLoc())
832 RegsToPass
.push_back(std::make_pair(NextVA
.getLocReg(), fmrrd
.getValue(1)));
834 assert(NextVA
.isMemLoc());
835 if (StackPtr
.getNode() == 0)
836 StackPtr
= DAG
.getCopyFromReg(Chain
, dl
, ARM::SP
, getPointerTy());
838 MemOpChains
.push_back(LowerMemOpCallTo(Chain
, StackPtr
, fmrrd
.getValue(1),
844 /// LowerCall - Lowering a call into a callseq_start <-
845 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
848 ARMTargetLowering::LowerCall(SDValue Chain
, SDValue Callee
,
849 unsigned CallConv
, bool isVarArg
,
851 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
852 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
853 DebugLoc dl
, SelectionDAG
&DAG
,
854 SmallVectorImpl
<SDValue
> &InVals
) {
856 // Analyze operands of the call, assigning locations to each operand.
857 SmallVector
<CCValAssign
, 16> ArgLocs
;
858 CCState
CCInfo(CallConv
, isVarArg
, getTargetMachine(), ArgLocs
,
860 CCInfo
.AnalyzeCallOperands(Outs
,
861 CCAssignFnForNode(CallConv
, /* Return*/ false,
864 // Get a count of how many bytes are to be pushed on the stack.
865 unsigned NumBytes
= CCInfo
.getNextStackOffset();
867 // Adjust the stack pointer for the new arguments...
868 // These operations are automatically eliminated by the prolog/epilog pass
869 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(NumBytes
, true));
871 SDValue StackPtr
= DAG
.getRegister(ARM::SP
, MVT::i32
);
873 RegsToPassVector RegsToPass
;
874 SmallVector
<SDValue
, 8> MemOpChains
;
876 // Walk the register/memloc assignments, inserting copies/loads. In the case
877 // of tail call optimization, arguments are handled later.
878 for (unsigned i
= 0, realArgIdx
= 0, e
= ArgLocs
.size();
881 CCValAssign
&VA
= ArgLocs
[i
];
882 SDValue Arg
= Outs
[realArgIdx
].Val
;
883 ISD::ArgFlagsTy Flags
= Outs
[realArgIdx
].Flags
;
885 // Promote the value if needed.
886 switch (VA
.getLocInfo()) {
887 default: llvm_unreachable("Unknown loc info!");
888 case CCValAssign::Full
: break;
889 case CCValAssign::SExt
:
890 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
892 case CCValAssign::ZExt
:
893 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
895 case CCValAssign::AExt
:
896 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
898 case CCValAssign::BCvt
:
899 Arg
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VA
.getLocVT(), Arg
);
903 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
904 if (VA
.needsCustom()) {
905 if (VA
.getLocVT() == MVT::v2f64
) {
906 SDValue Op0
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
907 DAG
.getConstant(0, MVT::i32
));
908 SDValue Op1
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
909 DAG
.getConstant(1, MVT::i32
));
911 PassF64ArgInRegs(dl
, DAG
, Chain
, Op0
, RegsToPass
,
912 VA
, ArgLocs
[++i
], StackPtr
, MemOpChains
, Flags
);
914 VA
= ArgLocs
[++i
]; // skip ahead to next loc
916 PassF64ArgInRegs(dl
, DAG
, Chain
, Op1
, RegsToPass
,
917 VA
, ArgLocs
[++i
], StackPtr
, MemOpChains
, Flags
);
919 assert(VA
.isMemLoc());
920 if (StackPtr
.getNode() == 0)
921 StackPtr
= DAG
.getCopyFromReg(Chain
, dl
, ARM::SP
, getPointerTy());
923 MemOpChains
.push_back(LowerMemOpCallTo(Chain
, StackPtr
, Op1
,
924 dl
, DAG
, VA
, Flags
));
927 PassF64ArgInRegs(dl
, DAG
, Chain
, Arg
, RegsToPass
, VA
, ArgLocs
[++i
],
928 StackPtr
, MemOpChains
, Flags
);
930 } else if (VA
.isRegLoc()) {
931 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
933 assert(VA
.isMemLoc());
934 if (StackPtr
.getNode() == 0)
935 StackPtr
= DAG
.getCopyFromReg(Chain
, dl
, ARM::SP
, getPointerTy());
937 MemOpChains
.push_back(LowerMemOpCallTo(Chain
, StackPtr
, Arg
,
938 dl
, DAG
, VA
, Flags
));
942 if (!MemOpChains
.empty())
943 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
944 &MemOpChains
[0], MemOpChains
.size());
946 // Build a sequence of copy-to-reg nodes chained together with token chain
947 // and flag operands which copy the outgoing args into the appropriate regs.
949 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
950 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
951 RegsToPass
[i
].second
, InFlag
);
952 InFlag
= Chain
.getValue(1);
955 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
956 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
957 // node so that legalize doesn't hack it.
958 bool isDirect
= false;
959 bool isARMFunc
= false;
960 bool isLocalARMFunc
= false;
961 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
962 GlobalValue
*GV
= G
->getGlobal();
964 bool isExt
= GV
->isDeclaration() || GV
->isWeakForLinker();
965 bool isStub
= (isExt
&& Subtarget
->isTargetDarwin()) &&
966 getTargetMachine().getRelocationModel() != Reloc::Static
;
967 isARMFunc
= !Subtarget
->isThumb() || isStub
;
968 // ARM call to a local ARM function is predicable.
969 isLocalARMFunc
= !Subtarget
->isThumb() && !isExt
;
970 // tBX takes a register source operand.
971 if (isARMFunc
&& Subtarget
->isThumb1Only() && !Subtarget
->hasV5TOps()) {
972 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(GV
, ARMPCLabelIndex
,
974 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, getPointerTy(), 4);
975 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
976 Callee
= DAG
.getLoad(getPointerTy(), dl
,
977 DAG
.getEntryNode(), CPAddr
, NULL
, 0);
978 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
979 Callee
= DAG
.getNode(ARMISD::PIC_ADD
, dl
,
980 getPointerTy(), Callee
, PICLabel
);
982 Callee
= DAG
.getTargetGlobalAddress(GV
, getPointerTy());
983 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
985 bool isStub
= Subtarget
->isTargetDarwin() &&
986 getTargetMachine().getRelocationModel() != Reloc::Static
;
987 isARMFunc
= !Subtarget
->isThumb() || isStub
;
988 // tBX takes a register source operand.
989 const char *Sym
= S
->getSymbol();
990 if (isARMFunc
&& Subtarget
->isThumb1Only() && !Subtarget
->hasV5TOps()) {
991 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(*DAG
.getContext(),
992 Sym
, ARMPCLabelIndex
,
994 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, getPointerTy(), 4);
995 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
996 Callee
= DAG
.getLoad(getPointerTy(), dl
,
997 DAG
.getEntryNode(), CPAddr
, NULL
, 0);
998 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
999 Callee
= DAG
.getNode(ARMISD::PIC_ADD
, dl
,
1000 getPointerTy(), Callee
, PICLabel
);
1002 Callee
= DAG
.getTargetExternalSymbol(Sym
, getPointerTy());
1005 // FIXME: handle tail calls differently.
1007 if (Subtarget
->isThumb()) {
1008 if ((!isDirect
|| isARMFunc
) && !Subtarget
->hasV5TOps())
1009 CallOpc
= ARMISD::CALL_NOLINK
;
1011 CallOpc
= isARMFunc
? ARMISD::CALL
: ARMISD::tCALL
;
1013 CallOpc
= (isDirect
|| Subtarget
->hasV5TOps())
1014 ? (isLocalARMFunc
? ARMISD::CALL_PRED
: ARMISD::CALL
)
1015 : ARMISD::CALL_NOLINK
;
1017 if (CallOpc
== ARMISD::CALL_NOLINK
&& !Subtarget
->isThumb1Only()) {
1018 // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
1019 Chain
= DAG
.getCopyToReg(Chain
, dl
, ARM::LR
, DAG
.getUNDEF(MVT::i32
),InFlag
);
1020 InFlag
= Chain
.getValue(1);
1023 std::vector
<SDValue
> Ops
;
1024 Ops
.push_back(Chain
);
1025 Ops
.push_back(Callee
);
1027 // Add argument registers to the end of the list so that they are known live
1029 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
1030 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
1031 RegsToPass
[i
].second
.getValueType()));
1033 if (InFlag
.getNode())
1034 Ops
.push_back(InFlag
);
1035 // Returns a chain and a flag for retval copy to use.
1036 Chain
= DAG
.getNode(CallOpc
, dl
, DAG
.getVTList(MVT::Other
, MVT::Flag
),
1037 &Ops
[0], Ops
.size());
1038 InFlag
= Chain
.getValue(1);
1040 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, true),
1041 DAG
.getIntPtrConstant(0, true), InFlag
);
1043 InFlag
= Chain
.getValue(1);
1045 // Handle result values, copying them out of physregs into vregs that we
1047 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
, Ins
,
1052 ARMTargetLowering::LowerReturn(SDValue Chain
,
1053 unsigned CallConv
, bool isVarArg
,
1054 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1055 DebugLoc dl
, SelectionDAG
&DAG
) {
1057 // CCValAssign - represent the assignment of the return value to a location.
1058 SmallVector
<CCValAssign
, 16> RVLocs
;
1060 // CCState - Info about the registers and stack slots.
1061 CCState
CCInfo(CallConv
, isVarArg
, getTargetMachine(), RVLocs
,
1064 // Analyze outgoing return values.
1065 CCInfo
.AnalyzeReturn(Outs
, CCAssignFnForNode(CallConv
, /* Return */ true,
1068 // If this is the first return lowered for this function, add
1069 // the regs to the liveout set for the function.
1070 if (DAG
.getMachineFunction().getRegInfo().liveout_empty()) {
1071 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
)
1072 if (RVLocs
[i
].isRegLoc())
1073 DAG
.getMachineFunction().getRegInfo().addLiveOut(RVLocs
[i
].getLocReg());
1078 // Copy the result values into the output registers.
1079 for (unsigned i
= 0, realRVLocIdx
= 0;
1081 ++i
, ++realRVLocIdx
) {
1082 CCValAssign
&VA
= RVLocs
[i
];
1083 assert(VA
.isRegLoc() && "Can only return in registers!");
1085 SDValue Arg
= Outs
[realRVLocIdx
].Val
;
1087 switch (VA
.getLocInfo()) {
1088 default: llvm_unreachable("Unknown loc info!");
1089 case CCValAssign::Full
: break;
1090 case CCValAssign::BCvt
:
1091 Arg
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VA
.getLocVT(), Arg
);
1095 if (VA
.needsCustom()) {
1096 if (VA
.getLocVT() == MVT::v2f64
) {
1097 // Extract the first half and return it in two registers.
1098 SDValue Half
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
1099 DAG
.getConstant(0, MVT::i32
));
1100 SDValue HalfGPRs
= DAG
.getNode(ARMISD::FMRRD
, dl
,
1101 DAG
.getVTList(MVT::i32
, MVT::i32
), Half
);
1103 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), HalfGPRs
, Flag
);
1104 Flag
= Chain
.getValue(1);
1105 VA
= RVLocs
[++i
]; // skip ahead to next loc
1106 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(),
1107 HalfGPRs
.getValue(1), Flag
);
1108 Flag
= Chain
.getValue(1);
1109 VA
= RVLocs
[++i
]; // skip ahead to next loc
1111 // Extract the 2nd half and fall through to handle it as an f64 value.
1112 Arg
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
1113 DAG
.getConstant(1, MVT::i32
));
1115 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
1117 SDValue fmrrd
= DAG
.getNode(ARMISD::FMRRD
, dl
,
1118 DAG
.getVTList(MVT::i32
, MVT::i32
), &Arg
, 1);
1119 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), fmrrd
, Flag
);
1120 Flag
= Chain
.getValue(1);
1121 VA
= RVLocs
[++i
]; // skip ahead to next loc
1122 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), fmrrd
.getValue(1),
1125 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), Arg
, Flag
);
1127 // Guarantee that all emitted copies are
1128 // stuck together, avoiding something bad.
1129 Flag
= Chain
.getValue(1);
1134 result
= DAG
.getNode(ARMISD::RET_FLAG
, dl
, MVT::Other
, Chain
, Flag
);
1136 result
= DAG
.getNode(ARMISD::RET_FLAG
, dl
, MVT::Other
, Chain
);
1141 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1142 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1143 // one of the above mentioned nodes. It has to be wrapped because otherwise
1144 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1145 // be used to form addressing mode. These wrapped nodes will be selected
1147 static SDValue
LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) {
1148 EVT PtrVT
= Op
.getValueType();
1149 // FIXME there is no actual debug info here
1150 DebugLoc dl
= Op
.getDebugLoc();
1151 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
1153 if (CP
->isMachineConstantPoolEntry())
1154 Res
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
1155 CP
->getAlignment());
1157 Res
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
1158 CP
->getAlignment());
1159 return DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Res
);
1162 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
1164 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode
*GA
,
1165 SelectionDAG
&DAG
) {
1166 DebugLoc dl
= GA
->getDebugLoc();
1167 EVT PtrVT
= getPointerTy();
1168 unsigned char PCAdj
= Subtarget
->isThumb() ? 4 : 8;
1169 ARMConstantPoolValue
*CPV
=
1170 new ARMConstantPoolValue(GA
->getGlobal(), ARMPCLabelIndex
, ARMCP::CPValue
,
1171 PCAdj
, "tlsgd", true);
1172 SDValue Argument
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1173 Argument
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Argument
);
1174 Argument
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), Argument
, NULL
, 0);
1175 SDValue Chain
= Argument
.getValue(1);
1177 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
1178 Argument
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Argument
, PICLabel
);
1180 // call __tls_get_addr.
1183 Entry
.Node
= Argument
;
1184 Entry
.Ty
= (const Type
*) Type::getInt32Ty(*DAG
.getContext());
1185 Args
.push_back(Entry
);
1186 // FIXME: is there useful debug info available here?
1187 std::pair
<SDValue
, SDValue
> CallResult
=
1188 LowerCallTo(Chain
, (const Type
*) Type::getInt32Ty(*DAG
.getContext()),
1189 false, false, false, false,
1190 0, CallingConv::C
, false, /*isReturnValueUsed=*/true,
1191 DAG
.getExternalSymbol("__tls_get_addr", PtrVT
), Args
, DAG
, dl
);
1192 return CallResult
.first
;
1195 // Lower ISD::GlobalTLSAddress using the "initial exec" or
1196 // "local exec" model.
1198 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode
*GA
,
1199 SelectionDAG
&DAG
) {
1200 GlobalValue
*GV
= GA
->getGlobal();
1201 DebugLoc dl
= GA
->getDebugLoc();
1203 SDValue Chain
= DAG
.getEntryNode();
1204 EVT PtrVT
= getPointerTy();
1205 // Get the Thread Pointer
1206 SDValue ThreadPointer
= DAG
.getNode(ARMISD::THREAD_POINTER
, dl
, PtrVT
);
1208 if (GV
->isDeclaration()) {
1209 // initial exec model
1210 unsigned char PCAdj
= Subtarget
->isThumb() ? 4 : 8;
1211 ARMConstantPoolValue
*CPV
=
1212 new ARMConstantPoolValue(GA
->getGlobal(), ARMPCLabelIndex
, ARMCP::CPValue
,
1213 PCAdj
, "gottpoff", true);
1214 Offset
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1215 Offset
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Offset
);
1216 Offset
= DAG
.getLoad(PtrVT
, dl
, Chain
, Offset
, NULL
, 0);
1217 Chain
= Offset
.getValue(1);
1219 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
1220 Offset
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Offset
, PICLabel
);
1222 Offset
= DAG
.getLoad(PtrVT
, dl
, Chain
, Offset
, NULL
, 0);
1225 ARMConstantPoolValue
*CPV
=
1226 new ARMConstantPoolValue(GV
, ARMCP::CPValue
, "tpoff");
1227 Offset
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1228 Offset
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Offset
);
1229 Offset
= DAG
.getLoad(PtrVT
, dl
, Chain
, Offset
, NULL
, 0);
1232 // The address of the thread local variable is the add of the thread
1233 // pointer with the offset of the variable.
1234 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, ThreadPointer
, Offset
);
1238 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op
, SelectionDAG
&DAG
) {
1239 // TODO: implement the "local dynamic" model
1240 assert(Subtarget
->isTargetELF() &&
1241 "TLS not implemented for non-ELF targets");
1242 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
1243 // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1244 // otherwise use the "Local Exec" TLS Model
1245 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
)
1246 return LowerToTLSGeneralDynamicModel(GA
, DAG
);
1248 return LowerToTLSExecModels(GA
, DAG
);
1251 SDValue
ARMTargetLowering::LowerGlobalAddressELF(SDValue Op
,
1252 SelectionDAG
&DAG
) {
1253 EVT PtrVT
= getPointerTy();
1254 DebugLoc dl
= Op
.getDebugLoc();
1255 GlobalValue
*GV
= cast
<GlobalAddressSDNode
>(Op
)->getGlobal();
1256 Reloc::Model RelocM
= getTargetMachine().getRelocationModel();
1257 if (RelocM
== Reloc::PIC_
) {
1258 bool UseGOTOFF
= GV
->hasLocalLinkage() || GV
->hasHiddenVisibility();
1259 ARMConstantPoolValue
*CPV
=
1260 new ARMConstantPoolValue(GV
, ARMCP::CPValue
, UseGOTOFF
? "GOTOFF":"GOT");
1261 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1262 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1263 SDValue Result
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(),
1265 SDValue Chain
= Result
.getValue(1);
1266 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
1267 Result
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Result
, GOT
);
1269 Result
= DAG
.getLoad(PtrVT
, dl
, Chain
, Result
, NULL
, 0);
1272 SDValue CPAddr
= DAG
.getTargetConstantPool(GV
, PtrVT
, 4);
1273 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1274 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
, NULL
, 0);
1278 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol
1279 /// even in non-static mode.
1280 static bool GVIsIndirectSymbol(GlobalValue
*GV
, Reloc::Model RelocM
) {
1281 // If symbol visibility is hidden, the extra load is not needed if
1282 // the symbol is definitely defined in the current translation unit.
1283 bool isDecl
= GV
->isDeclaration() || GV
->hasAvailableExternallyLinkage();
1284 if (GV
->hasHiddenVisibility() && (!isDecl
&& !GV
->hasCommonLinkage()))
1286 return RelocM
!= Reloc::Static
&& (isDecl
|| GV
->isWeakForLinker());
1289 SDValue
ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op
,
1290 SelectionDAG
&DAG
) {
1291 EVT PtrVT
= getPointerTy();
1292 DebugLoc dl
= Op
.getDebugLoc();
1293 GlobalValue
*GV
= cast
<GlobalAddressSDNode
>(Op
)->getGlobal();
1294 Reloc::Model RelocM
= getTargetMachine().getRelocationModel();
1295 bool IsIndirect
= GVIsIndirectSymbol(GV
, RelocM
);
1297 if (RelocM
== Reloc::Static
)
1298 CPAddr
= DAG
.getTargetConstantPool(GV
, PtrVT
, 4);
1300 unsigned PCAdj
= (RelocM
!= Reloc::PIC_
)
1301 ? 0 : (Subtarget
->isThumb() ? 4 : 8);
1302 ARMCP::ARMCPKind Kind
= IsIndirect
? ARMCP::CPNonLazyPtr
1304 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(GV
, ARMPCLabelIndex
,
1306 CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1308 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1310 SDValue Result
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
, NULL
, 0);
1311 SDValue Chain
= Result
.getValue(1);
1313 if (RelocM
== Reloc::PIC_
) {
1314 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
1315 Result
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Result
, PICLabel
);
1318 Result
= DAG
.getLoad(PtrVT
, dl
, Chain
, Result
, NULL
, 0);
1323 SDValue
ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op
,
1325 assert(Subtarget
->isTargetELF() &&
1326 "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1327 EVT PtrVT
= getPointerTy();
1328 DebugLoc dl
= Op
.getDebugLoc();
1329 unsigned PCAdj
= Subtarget
->isThumb() ? 4 : 8;
1330 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(*DAG
.getContext(),
1331 "_GLOBAL_OFFSET_TABLE_",
1333 ARMCP::CPValue
, PCAdj
);
1334 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1335 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1336 SDValue Result
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
, NULL
, 0);
1337 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
1338 return DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Result
, PICLabel
);
1341 static SDValue
LowerNeonVLDIntrinsic(SDValue Op
, SelectionDAG
&DAG
,
1343 SDNode
*Node
= Op
.getNode();
1344 EVT VT
= Node
->getValueType(0);
1345 DebugLoc dl
= Op
.getDebugLoc();
1347 if (!VT
.is64BitVector())
1348 return SDValue(); // unimplemented
1350 SDValue Ops
[] = { Node
->getOperand(0),
1351 Node
->getOperand(2) };
1352 return DAG
.getNode(Opcode
, dl
, Node
->getVTList(), Ops
, 2);
1355 static SDValue
LowerNeonVSTIntrinsic(SDValue Op
, SelectionDAG
&DAG
,
1356 unsigned Opcode
, unsigned NumVecs
) {
1357 SDNode
*Node
= Op
.getNode();
1358 EVT VT
= Node
->getOperand(3).getValueType();
1359 DebugLoc dl
= Op
.getDebugLoc();
1361 if (!VT
.is64BitVector())
1362 return SDValue(); // unimplemented
1364 SmallVector
<SDValue
, 6> Ops
;
1365 Ops
.push_back(Node
->getOperand(0));
1366 Ops
.push_back(Node
->getOperand(2));
1367 for (unsigned N
= 0; N
< NumVecs
; ++N
)
1368 Ops
.push_back(Node
->getOperand(N
+ 3));
1369 return DAG
.getNode(Opcode
, dl
, MVT::Other
, Ops
.data(), Ops
.size());
1373 ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op
, SelectionDAG
&DAG
) {
1374 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
1376 case Intrinsic::arm_neon_vld2
:
1377 return LowerNeonVLDIntrinsic(Op
, DAG
, ARMISD::VLD2D
);
1378 case Intrinsic::arm_neon_vld3
:
1379 return LowerNeonVLDIntrinsic(Op
, DAG
, ARMISD::VLD3D
);
1380 case Intrinsic::arm_neon_vld4
:
1381 return LowerNeonVLDIntrinsic(Op
, DAG
, ARMISD::VLD4D
);
1382 case Intrinsic::arm_neon_vst2
:
1383 return LowerNeonVSTIntrinsic(Op
, DAG
, ARMISD::VST2D
, 2);
1384 case Intrinsic::arm_neon_vst3
:
1385 return LowerNeonVSTIntrinsic(Op
, DAG
, ARMISD::VST3D
, 3);
1386 case Intrinsic::arm_neon_vst4
:
1387 return LowerNeonVSTIntrinsic(Op
, DAG
, ARMISD::VST4D
, 4);
1388 default: return SDValue(); // Don't custom lower most intrinsics.
1393 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
, SelectionDAG
&DAG
) {
1394 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1395 DebugLoc dl
= Op
.getDebugLoc();
1397 default: return SDValue(); // Don't custom lower most intrinsics.
1398 case Intrinsic::arm_thread_pointer
: {
1399 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1400 return DAG
.getNode(ARMISD::THREAD_POINTER
, dl
, PtrVT
);
1402 case Intrinsic::eh_sjlj_lsda
: {
1403 // blah. horrible, horrible hack with the forced magic name.
1404 // really need to clean this up. It belongs in the target-independent
1405 // layer somehow that doesn't require the coupling with the asm
1407 MachineFunction
&MF
= DAG
.getMachineFunction();
1408 EVT PtrVT
= getPointerTy();
1409 DebugLoc dl
= Op
.getDebugLoc();
1410 Reloc::Model RelocM
= getTargetMachine().getRelocationModel();
1412 unsigned PCAdj
= (RelocM
!= Reloc::PIC_
)
1413 ? 0 : (Subtarget
->isThumb() ? 4 : 8);
1414 ARMCP::ARMCPKind Kind
= ARMCP::CPValue
;
1415 // Save off the LSDA name for the AsmPrinter to use when it's time
1416 // to emit the table
1417 std::string LSDAName
= "L_lsda_";
1418 LSDAName
+= MF
.getFunction()->getName();
1419 ARMConstantPoolValue
*CPV
=
1420 new ARMConstantPoolValue(*DAG
.getContext(), LSDAName
.c_str(),
1421 ARMPCLabelIndex
, Kind
, PCAdj
);
1422 CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1423 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1425 DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
, NULL
, 0);
1426 SDValue Chain
= Result
.getValue(1);
1428 if (RelocM
== Reloc::PIC_
) {
1429 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
1430 Result
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Result
, PICLabel
);
1434 case Intrinsic::eh_sjlj_setjmp
:
1435 return DAG
.getNode(ARMISD::EH_SJLJ_SETJMP
, dl
, MVT::i32
, Op
.getOperand(1));
1439 static SDValue
LowerVASTART(SDValue Op
, SelectionDAG
&DAG
,
1440 unsigned VarArgsFrameIndex
) {
1441 // vastart just stores the address of the VarArgsFrameIndex slot into the
1442 // memory location argument.
1443 DebugLoc dl
= Op
.getDebugLoc();
1444 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1445 SDValue FR
= DAG
.getFrameIndex(VarArgsFrameIndex
, PtrVT
);
1446 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
1447 return DAG
.getStore(Op
.getOperand(0), dl
, FR
, Op
.getOperand(1), SV
, 0);
1451 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
, SelectionDAG
&DAG
) {
1452 SDNode
*Node
= Op
.getNode();
1453 DebugLoc dl
= Node
->getDebugLoc();
1454 EVT VT
= Node
->getValueType(0);
1455 SDValue Chain
= Op
.getOperand(0);
1456 SDValue Size
= Op
.getOperand(1);
1457 SDValue Align
= Op
.getOperand(2);
1459 // Chain the dynamic stack allocation so that it doesn't modify the stack
1460 // pointer when other instructions are using the stack.
1461 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(0, true));
1463 unsigned AlignVal
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
1464 unsigned StackAlign
= getTargetMachine().getFrameInfo()->getStackAlignment();
1465 if (AlignVal
> StackAlign
)
1466 // Do this now since selection pass cannot introduce new target
1467 // independent node.
1468 Align
= DAG
.getConstant(-(uint64_t)AlignVal
, VT
);
1470 // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
1471 // using a "add r, sp, r" instead. Negate the size now so we don't have to
1472 // do even more horrible hack later.
1473 MachineFunction
&MF
= DAG
.getMachineFunction();
1474 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1475 if (AFI
->isThumb1OnlyFunction()) {
1477 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Size
);
1479 uint32_t Val
= C
->getZExtValue();
1480 if (Val
<= 508 && ((Val
& 3) == 0))
1484 Size
= DAG
.getNode(ISD::SUB
, dl
, VT
, DAG
.getConstant(0, VT
), Size
);
1487 SDVTList VTList
= DAG
.getVTList(VT
, MVT::Other
);
1488 SDValue Ops1
[] = { Chain
, Size
, Align
};
1489 SDValue Res
= DAG
.getNode(ARMISD::DYN_ALLOC
, dl
, VTList
, Ops1
, 3);
1490 Chain
= Res
.getValue(1);
1491 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(0, true),
1492 DAG
.getIntPtrConstant(0, true), SDValue());
1493 SDValue Ops2
[] = { Res
, Chain
};
1494 return DAG
.getMergeValues(Ops2
, 2, dl
);
1498 ARMTargetLowering::GetF64FormalArgument(CCValAssign
&VA
, CCValAssign
&NextVA
,
1499 SDValue
&Root
, SelectionDAG
&DAG
,
1501 MachineFunction
&MF
= DAG
.getMachineFunction();
1502 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1504 TargetRegisterClass
*RC
;
1505 if (AFI
->isThumb1OnlyFunction())
1506 RC
= ARM::tGPRRegisterClass
;
1508 RC
= ARM::GPRRegisterClass
;
1510 // Transform the arguments stored in physical registers into virtual ones.
1511 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
1512 SDValue ArgValue
= DAG
.getCopyFromReg(Root
, dl
, Reg
, MVT::i32
);
1515 if (NextVA
.isMemLoc()) {
1516 unsigned ArgSize
= NextVA
.getLocVT().getSizeInBits()/8;
1517 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1518 int FI
= MFI
->CreateFixedObject(ArgSize
, NextVA
.getLocMemOffset());
1520 // Create load node to retrieve arguments from the stack.
1521 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
1522 ArgValue2
= DAG
.getLoad(MVT::i32
, dl
, Root
, FIN
, NULL
, 0);
1524 Reg
= MF
.addLiveIn(NextVA
.getLocReg(), RC
);
1525 ArgValue2
= DAG
.getCopyFromReg(Root
, dl
, Reg
, MVT::i32
);
1528 return DAG
.getNode(ARMISD::FMDRR
, dl
, MVT::f64
, ArgValue
, ArgValue2
);
1532 ARMTargetLowering::LowerFormalArguments(SDValue Chain
,
1533 unsigned CallConv
, bool isVarArg
,
1534 const SmallVectorImpl
<ISD::InputArg
>
1536 DebugLoc dl
, SelectionDAG
&DAG
,
1537 SmallVectorImpl
<SDValue
> &InVals
) {
1539 MachineFunction
&MF
= DAG
.getMachineFunction();
1540 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1542 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1544 // Assign locations to all of the incoming arguments.
1545 SmallVector
<CCValAssign
, 16> ArgLocs
;
1546 CCState
CCInfo(CallConv
, isVarArg
, getTargetMachine(), ArgLocs
,
1548 CCInfo
.AnalyzeFormalArguments(Ins
,
1549 CCAssignFnForNode(CallConv
, /* Return*/ false,
1552 SmallVector
<SDValue
, 16> ArgValues
;
1554 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1555 CCValAssign
&VA
= ArgLocs
[i
];
1557 // Arguments stored in registers.
1558 if (VA
.isRegLoc()) {
1559 EVT RegVT
= VA
.getLocVT();
1562 if (VA
.needsCustom()) {
1563 // f64 and vector types are split up into multiple registers or
1564 // combinations of registers and stack slots.
1567 if (VA
.getLocVT() == MVT::v2f64
) {
1568 SDValue ArgValue1
= GetF64FormalArgument(VA
, ArgLocs
[++i
],
1570 VA
= ArgLocs
[++i
]; // skip ahead to next loc
1571 SDValue ArgValue2
= GetF64FormalArgument(VA
, ArgLocs
[++i
],
1573 ArgValue
= DAG
.getNode(ISD::UNDEF
, dl
, MVT::v2f64
);
1574 ArgValue
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
,
1575 ArgValue
, ArgValue1
, DAG
.getIntPtrConstant(0));
1576 ArgValue
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
,
1577 ArgValue
, ArgValue2
, DAG
.getIntPtrConstant(1));
1579 ArgValue
= GetF64FormalArgument(VA
, ArgLocs
[++i
], Chain
, DAG
, dl
);
1582 TargetRegisterClass
*RC
;
1584 if (RegVT
== MVT::f32
)
1585 RC
= ARM::SPRRegisterClass
;
1586 else if (RegVT
== MVT::f64
)
1587 RC
= ARM::DPRRegisterClass
;
1588 else if (RegVT
== MVT::v2f64
)
1589 RC
= ARM::QPRRegisterClass
;
1590 else if (RegVT
== MVT::i32
)
1591 RC
= (AFI
->isThumb1OnlyFunction() ?
1592 ARM::tGPRRegisterClass
: ARM::GPRRegisterClass
);
1594 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
1596 // Transform the arguments in physical registers into virtual ones.
1597 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
1598 ArgValue
= DAG
.getCopyFromReg(Chain
, dl
, Reg
, RegVT
);
1601 // If this is an 8 or 16-bit value, it is really passed promoted
1602 // to 32 bits. Insert an assert[sz]ext to capture this, then
1603 // truncate to the right size.
1604 switch (VA
.getLocInfo()) {
1605 default: llvm_unreachable("Unknown loc info!");
1606 case CCValAssign::Full
: break;
1607 case CCValAssign::BCvt
:
1608 ArgValue
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VA
.getValVT(), ArgValue
);
1610 case CCValAssign::SExt
:
1611 ArgValue
= DAG
.getNode(ISD::AssertSext
, dl
, RegVT
, ArgValue
,
1612 DAG
.getValueType(VA
.getValVT()));
1613 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), ArgValue
);
1615 case CCValAssign::ZExt
:
1616 ArgValue
= DAG
.getNode(ISD::AssertZext
, dl
, RegVT
, ArgValue
,
1617 DAG
.getValueType(VA
.getValVT()));
1618 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), ArgValue
);
1622 InVals
.push_back(ArgValue
);
1624 } else { // VA.isRegLoc()
1627 assert(VA
.isMemLoc());
1628 assert(VA
.getValVT() != MVT::i64
&& "i64 should already be lowered");
1630 unsigned ArgSize
= VA
.getLocVT().getSizeInBits()/8;
1631 int FI
= MFI
->CreateFixedObject(ArgSize
, VA
.getLocMemOffset());
1633 // Create load nodes to retrieve arguments from the stack.
1634 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
1635 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
, NULL
, 0));
1641 static const unsigned GPRArgRegs
[] = {
1642 ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
1645 unsigned NumGPRs
= CCInfo
.getFirstUnallocated
1646 (GPRArgRegs
, sizeof(GPRArgRegs
) / sizeof(GPRArgRegs
[0]));
1648 unsigned Align
= MF
.getTarget().getFrameInfo()->getStackAlignment();
1649 unsigned VARegSize
= (4 - NumGPRs
) * 4;
1650 unsigned VARegSaveSize
= (VARegSize
+ Align
- 1) & ~(Align
- 1);
1651 unsigned ArgOffset
= 0;
1652 if (VARegSaveSize
) {
1653 // If this function is vararg, store any remaining integer argument regs
1654 // to their spots on the stack so that they may be loaded by deferencing
1655 // the result of va_next.
1656 AFI
->setVarArgsRegSaveSize(VARegSaveSize
);
1657 ArgOffset
= CCInfo
.getNextStackOffset();
1658 VarArgsFrameIndex
= MFI
->CreateFixedObject(VARegSaveSize
, ArgOffset
+
1659 VARegSaveSize
- VARegSize
);
1660 SDValue FIN
= DAG
.getFrameIndex(VarArgsFrameIndex
, getPointerTy());
1662 SmallVector
<SDValue
, 4> MemOps
;
1663 for (; NumGPRs
< 4; ++NumGPRs
) {
1664 TargetRegisterClass
*RC
;
1665 if (AFI
->isThumb1OnlyFunction())
1666 RC
= ARM::tGPRRegisterClass
;
1668 RC
= ARM::GPRRegisterClass
;
1670 unsigned VReg
= MF
.addLiveIn(GPRArgRegs
[NumGPRs
], RC
);
1671 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i32
);
1672 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, NULL
, 0);
1673 MemOps
.push_back(Store
);
1674 FIN
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(), FIN
,
1675 DAG
.getConstant(4, getPointerTy()));
1677 if (!MemOps
.empty())
1678 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1679 &MemOps
[0], MemOps
.size());
1681 // This will point to the next argument passed via stack.
1682 VarArgsFrameIndex
= MFI
->CreateFixedObject(4, ArgOffset
);
1688 /// isFloatingPointZero - Return true if this is +0.0.
1689 static bool isFloatingPointZero(SDValue Op
) {
1690 if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(Op
))
1691 return CFP
->getValueAPF().isPosZero();
1692 else if (ISD::isEXTLoad(Op
.getNode()) || ISD::isNON_EXTLoad(Op
.getNode())) {
1693 // Maybe this has already been legalized into the constant pool?
1694 if (Op
.getOperand(1).getOpcode() == ARMISD::Wrapper
) {
1695 SDValue WrapperOp
= Op
.getOperand(1).getOperand(0);
1696 if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(WrapperOp
))
1697 if (ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CP
->getConstVal()))
1698 return CFP
->getValueAPF().isPosZero();
1704 static bool isLegalCmpImmediate(unsigned C
, bool isThumb1Only
) {
1705 return ( isThumb1Only
&& (C
& ~255U) == 0) ||
1706 (!isThumb1Only
&& ARM_AM::getSOImmVal(C
) != -1);
1709 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
1710 /// the given operands.
1711 static SDValue
getARMCmp(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
1712 SDValue
&ARMCC
, SelectionDAG
&DAG
, bool isThumb1Only
,
1714 if (ConstantSDNode
*RHSC
= dyn_cast
<ConstantSDNode
>(RHS
.getNode())) {
1715 unsigned C
= RHSC
->getZExtValue();
1716 if (!isLegalCmpImmediate(C
, isThumb1Only
)) {
1717 // Constant does not fit, try adjusting it by one?
1722 if (isLegalCmpImmediate(C
-1, isThumb1Only
)) {
1723 CC
= (CC
== ISD::SETLT
) ? ISD::SETLE
: ISD::SETGT
;
1724 RHS
= DAG
.getConstant(C
-1, MVT::i32
);
1729 if (C
> 0 && isLegalCmpImmediate(C
-1, isThumb1Only
)) {
1730 CC
= (CC
== ISD::SETULT
) ? ISD::SETULE
: ISD::SETUGT
;
1731 RHS
= DAG
.getConstant(C
-1, MVT::i32
);
1736 if (isLegalCmpImmediate(C
+1, isThumb1Only
)) {
1737 CC
= (CC
== ISD::SETLE
) ? ISD::SETLT
: ISD::SETGE
;
1738 RHS
= DAG
.getConstant(C
+1, MVT::i32
);
1743 if (C
< 0xffffffff && isLegalCmpImmediate(C
+1, isThumb1Only
)) {
1744 CC
= (CC
== ISD::SETULE
) ? ISD::SETULT
: ISD::SETUGE
;
1745 RHS
= DAG
.getConstant(C
+1, MVT::i32
);
1752 ARMCC::CondCodes CondCode
= IntCCToARMCC(CC
);
1753 ARMISD::NodeType CompareType
;
1756 CompareType
= ARMISD::CMP
;
1761 CompareType
= ARMISD::CMPZ
;
1764 ARMCC
= DAG
.getConstant(CondCode
, MVT::i32
);
1765 return DAG
.getNode(CompareType
, dl
, MVT::Flag
, LHS
, RHS
);
1768 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
1769 static SDValue
getVFPCmp(SDValue LHS
, SDValue RHS
, SelectionDAG
&DAG
,
1772 if (!isFloatingPointZero(RHS
))
1773 Cmp
= DAG
.getNode(ARMISD::CMPFP
, dl
, MVT::Flag
, LHS
, RHS
);
1775 Cmp
= DAG
.getNode(ARMISD::CMPFPw0
, dl
, MVT::Flag
, LHS
);
1776 return DAG
.getNode(ARMISD::FMSTAT
, dl
, MVT::Flag
, Cmp
);
1779 static SDValue
LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
,
1780 const ARMSubtarget
*ST
) {
1781 EVT VT
= Op
.getValueType();
1782 SDValue LHS
= Op
.getOperand(0);
1783 SDValue RHS
= Op
.getOperand(1);
1784 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
1785 SDValue TrueVal
= Op
.getOperand(2);
1786 SDValue FalseVal
= Op
.getOperand(3);
1787 DebugLoc dl
= Op
.getDebugLoc();
1789 if (LHS
.getValueType() == MVT::i32
) {
1791 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
1792 SDValue Cmp
= getARMCmp(LHS
, RHS
, CC
, ARMCC
, DAG
, ST
->isThumb1Only(), dl
);
1793 return DAG
.getNode(ARMISD::CMOV
, dl
, VT
, FalseVal
, TrueVal
, ARMCC
, CCR
,Cmp
);
1796 ARMCC::CondCodes CondCode
, CondCode2
;
1797 if (FPCCToARMCC(CC
, CondCode
, CondCode2
))
1798 std::swap(TrueVal
, FalseVal
);
1800 SDValue ARMCC
= DAG
.getConstant(CondCode
, MVT::i32
);
1801 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
1802 SDValue Cmp
= getVFPCmp(LHS
, RHS
, DAG
, dl
);
1803 SDValue Result
= DAG
.getNode(ARMISD::CMOV
, dl
, VT
, FalseVal
, TrueVal
,
1805 if (CondCode2
!= ARMCC::AL
) {
1806 SDValue ARMCC2
= DAG
.getConstant(CondCode2
, MVT::i32
);
1807 // FIXME: Needs another CMP because flag can have but one use.
1808 SDValue Cmp2
= getVFPCmp(LHS
, RHS
, DAG
, dl
);
1809 Result
= DAG
.getNode(ARMISD::CMOV
, dl
, VT
,
1810 Result
, TrueVal
, ARMCC2
, CCR
, Cmp2
);
1815 static SDValue
LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
,
1816 const ARMSubtarget
*ST
) {
1817 SDValue Chain
= Op
.getOperand(0);
1818 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(1))->get();
1819 SDValue LHS
= Op
.getOperand(2);
1820 SDValue RHS
= Op
.getOperand(3);
1821 SDValue Dest
= Op
.getOperand(4);
1822 DebugLoc dl
= Op
.getDebugLoc();
1824 if (LHS
.getValueType() == MVT::i32
) {
1826 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
1827 SDValue Cmp
= getARMCmp(LHS
, RHS
, CC
, ARMCC
, DAG
, ST
->isThumb1Only(), dl
);
1828 return DAG
.getNode(ARMISD::BRCOND
, dl
, MVT::Other
,
1829 Chain
, Dest
, ARMCC
, CCR
,Cmp
);
1832 assert(LHS
.getValueType() == MVT::f32
|| LHS
.getValueType() == MVT::f64
);
1833 ARMCC::CondCodes CondCode
, CondCode2
;
1834 if (FPCCToARMCC(CC
, CondCode
, CondCode2
))
1835 // Swap the LHS/RHS of the comparison if needed.
1836 std::swap(LHS
, RHS
);
1838 SDValue Cmp
= getVFPCmp(LHS
, RHS
, DAG
, dl
);
1839 SDValue ARMCC
= DAG
.getConstant(CondCode
, MVT::i32
);
1840 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
1841 SDVTList VTList
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
1842 SDValue Ops
[] = { Chain
, Dest
, ARMCC
, CCR
, Cmp
};
1843 SDValue Res
= DAG
.getNode(ARMISD::BRCOND
, dl
, VTList
, Ops
, 5);
1844 if (CondCode2
!= ARMCC::AL
) {
1845 ARMCC
= DAG
.getConstant(CondCode2
, MVT::i32
);
1846 SDValue Ops
[] = { Res
, Dest
, ARMCC
, CCR
, Res
.getValue(1) };
1847 Res
= DAG
.getNode(ARMISD::BRCOND
, dl
, VTList
, Ops
, 5);
1852 SDValue
ARMTargetLowering::LowerBR_JT(SDValue Op
, SelectionDAG
&DAG
) {
1853 SDValue Chain
= Op
.getOperand(0);
1854 SDValue Table
= Op
.getOperand(1);
1855 SDValue Index
= Op
.getOperand(2);
1856 DebugLoc dl
= Op
.getDebugLoc();
1858 EVT PTy
= getPointerTy();
1859 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Table
);
1860 ARMFunctionInfo
*AFI
= DAG
.getMachineFunction().getInfo
<ARMFunctionInfo
>();
1861 SDValue UId
= DAG
.getConstant(AFI
->createJumpTableUId(), PTy
);
1862 SDValue JTI
= DAG
.getTargetJumpTable(JT
->getIndex(), PTy
);
1863 Table
= DAG
.getNode(ARMISD::WrapperJT
, dl
, MVT::i32
, JTI
, UId
);
1864 Index
= DAG
.getNode(ISD::MUL
, dl
, PTy
, Index
, DAG
.getConstant(4, PTy
));
1865 SDValue Addr
= DAG
.getNode(ISD::ADD
, dl
, PTy
, Index
, Table
);
1866 if (Subtarget
->isThumb2()) {
1867 // Thumb2 uses a two-level jump. That is, it jumps into the jump table
1868 // which does another jump to the destination. This also makes it easier
1869 // to translate it to TBB / TBH later.
1870 // FIXME: This might not work if the function is extremely large.
1871 return DAG
.getNode(ARMISD::BR2_JT
, dl
, MVT::Other
, Chain
,
1872 Addr
, Op
.getOperand(2), JTI
, UId
);
1874 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
) {
1875 Addr
= DAG
.getLoad((EVT
)MVT::i32
, dl
, Chain
, Addr
, NULL
, 0);
1876 Chain
= Addr
.getValue(1);
1877 Addr
= DAG
.getNode(ISD::ADD
, dl
, PTy
, Addr
, Table
);
1878 return DAG
.getNode(ARMISD::BR_JT
, dl
, MVT::Other
, Chain
, Addr
, JTI
, UId
);
1880 Addr
= DAG
.getLoad(PTy
, dl
, Chain
, Addr
, NULL
, 0);
1881 Chain
= Addr
.getValue(1);
1882 return DAG
.getNode(ARMISD::BR_JT
, dl
, MVT::Other
, Chain
, Addr
, JTI
, UId
);
1886 static SDValue
LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
) {
1887 DebugLoc dl
= Op
.getDebugLoc();
1889 Op
.getOpcode() == ISD::FP_TO_SINT
? ARMISD::FTOSI
: ARMISD::FTOUI
;
1890 Op
= DAG
.getNode(Opc
, dl
, MVT::f32
, Op
.getOperand(0));
1891 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::i32
, Op
);
1894 static SDValue
LowerINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) {
1895 EVT VT
= Op
.getValueType();
1896 DebugLoc dl
= Op
.getDebugLoc();
1898 Op
.getOpcode() == ISD::SINT_TO_FP
? ARMISD::SITOF
: ARMISD::UITOF
;
1900 Op
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::f32
, Op
.getOperand(0));
1901 return DAG
.getNode(Opc
, dl
, VT
, Op
);
1904 static SDValue
LowerFCOPYSIGN(SDValue Op
, SelectionDAG
&DAG
) {
1905 // Implement fcopysign with a fabs and a conditional fneg.
1906 SDValue Tmp0
= Op
.getOperand(0);
1907 SDValue Tmp1
= Op
.getOperand(1);
1908 DebugLoc dl
= Op
.getDebugLoc();
1909 EVT VT
= Op
.getValueType();
1910 EVT SrcVT
= Tmp1
.getValueType();
1911 SDValue AbsVal
= DAG
.getNode(ISD::FABS
, dl
, VT
, Tmp0
);
1912 SDValue Cmp
= getVFPCmp(Tmp1
, DAG
.getConstantFP(0.0, SrcVT
), DAG
, dl
);
1913 SDValue ARMCC
= DAG
.getConstant(ARMCC::LT
, MVT::i32
);
1914 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
1915 return DAG
.getNode(ARMISD::CNEG
, dl
, VT
, AbsVal
, AbsVal
, ARMCC
, CCR
, Cmp
);
1918 SDValue
ARMTargetLowering::LowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
) {
1919 MachineFrameInfo
*MFI
= DAG
.getMachineFunction().getFrameInfo();
1920 MFI
->setFrameAddressIsTaken(true);
1921 EVT VT
= Op
.getValueType();
1922 DebugLoc dl
= Op
.getDebugLoc(); // FIXME probably not meaningful
1923 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1924 unsigned FrameReg
= (Subtarget
->isThumb() || Subtarget
->isTargetDarwin())
1925 ? ARM::R7
: ARM::R11
;
1926 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, FrameReg
, VT
);
1928 FrameAddr
= DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(), FrameAddr
, NULL
, 0);
1933 ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG
&DAG
, DebugLoc dl
,
1935 SDValue Dst
, SDValue Src
,
1936 SDValue Size
, unsigned Align
,
1938 const Value
*DstSV
, uint64_t DstSVOff
,
1939 const Value
*SrcSV
, uint64_t SrcSVOff
){
1940 // Do repeated 4-byte loads and stores. To be improved.
1941 // This requires 4-byte alignment.
1942 if ((Align
& 3) != 0)
1944 // This requires the copy size to be a constant, preferrably
1945 // within a subtarget-specific limit.
1946 ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
);
1949 uint64_t SizeVal
= ConstantSize
->getZExtValue();
1950 if (!AlwaysInline
&& SizeVal
> getSubtarget()->getMaxInlineSizeThreshold())
1953 unsigned BytesLeft
= SizeVal
& 3;
1954 unsigned NumMemOps
= SizeVal
>> 2;
1955 unsigned EmittedNumMemOps
= 0;
1957 unsigned VTSize
= 4;
1959 const unsigned MAX_LOADS_IN_LDM
= 6;
1960 SDValue TFOps
[MAX_LOADS_IN_LDM
];
1961 SDValue Loads
[MAX_LOADS_IN_LDM
];
1962 uint64_t SrcOff
= 0, DstOff
= 0;
1964 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
1965 // same number of stores. The loads and stores will get combined into
1966 // ldm/stm later on.
1967 while (EmittedNumMemOps
< NumMemOps
) {
1969 i
< MAX_LOADS_IN_LDM
&& EmittedNumMemOps
+ i
< NumMemOps
; ++i
) {
1970 Loads
[i
] = DAG
.getLoad(VT
, dl
, Chain
,
1971 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Src
,
1972 DAG
.getConstant(SrcOff
, MVT::i32
)),
1973 SrcSV
, SrcSVOff
+ SrcOff
);
1974 TFOps
[i
] = Loads
[i
].getValue(1);
1977 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, &TFOps
[0], i
);
1980 i
< MAX_LOADS_IN_LDM
&& EmittedNumMemOps
+ i
< NumMemOps
; ++i
) {
1981 TFOps
[i
] = DAG
.getStore(Chain
, dl
, Loads
[i
],
1982 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Dst
,
1983 DAG
.getConstant(DstOff
, MVT::i32
)),
1984 DstSV
, DstSVOff
+ DstOff
);
1987 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, &TFOps
[0], i
);
1989 EmittedNumMemOps
+= i
;
1995 // Issue loads / stores for the trailing (1 - 3) bytes.
1996 unsigned BytesLeftSave
= BytesLeft
;
1999 if (BytesLeft
>= 2) {
2007 Loads
[i
] = DAG
.getLoad(VT
, dl
, Chain
,
2008 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Src
,
2009 DAG
.getConstant(SrcOff
, MVT::i32
)),
2010 SrcSV
, SrcSVOff
+ SrcOff
);
2011 TFOps
[i
] = Loads
[i
].getValue(1);
2014 BytesLeft
-= VTSize
;
2016 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, &TFOps
[0], i
);
2019 BytesLeft
= BytesLeftSave
;
2021 if (BytesLeft
>= 2) {
2029 TFOps
[i
] = DAG
.getStore(Chain
, dl
, Loads
[i
],
2030 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Dst
,
2031 DAG
.getConstant(DstOff
, MVT::i32
)),
2032 DstSV
, DstSVOff
+ DstOff
);
2035 BytesLeft
-= VTSize
;
2037 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, &TFOps
[0], i
);
2040 static SDValue
ExpandBIT_CONVERT(SDNode
*N
, SelectionDAG
&DAG
) {
2041 SDValue Op
= N
->getOperand(0);
2042 DebugLoc dl
= N
->getDebugLoc();
2043 if (N
->getValueType(0) == MVT::f64
) {
2044 // Turn i64->f64 into FMDRR.
2045 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, Op
,
2046 DAG
.getConstant(0, MVT::i32
));
2047 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, Op
,
2048 DAG
.getConstant(1, MVT::i32
));
2049 return DAG
.getNode(ARMISD::FMDRR
, dl
, MVT::f64
, Lo
, Hi
);
2052 // Turn f64->i64 into FMRRD.
2053 SDValue Cvt
= DAG
.getNode(ARMISD::FMRRD
, dl
,
2054 DAG
.getVTList(MVT::i32
, MVT::i32
), &Op
, 1);
2056 // Merge the pieces into a single i64 value.
2057 return DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Cvt
, Cvt
.getValue(1));
2060 /// getZeroVector - Returns a vector of specified type with all zero elements.
2062 static SDValue
getZeroVector(EVT VT
, SelectionDAG
&DAG
, DebugLoc dl
) {
2063 assert(VT
.isVector() && "Expected a vector type");
2065 // Zero vectors are used to represent vector negation and in those cases
2066 // will be implemented with the NEON VNEG instruction. However, VNEG does
2067 // not support i64 elements, so sometimes the zero vectors will need to be
2068 // explicitly constructed. For those cases, and potentially other uses in
2069 // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted
2070 // to their dest type. This ensures they get CSE'd.
2072 SDValue Cst
= DAG
.getTargetConstant(0, MVT::i32
);
2073 if (VT
.getSizeInBits() == 64)
2074 Vec
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i32
, Cst
, Cst
);
2076 Vec
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
, Cst
, Cst
, Cst
, Cst
);
2078 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, Vec
);
2081 /// getOnesVector - Returns a vector of specified type with all bits set.
2083 static SDValue
getOnesVector(EVT VT
, SelectionDAG
&DAG
, DebugLoc dl
) {
2084 assert(VT
.isVector() && "Expected a vector type");
2086 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
2087 // type. This ensures they get CSE'd.
2089 SDValue Cst
= DAG
.getTargetConstant(~0U, MVT::i32
);
2090 if (VT
.getSizeInBits() == 64)
2091 Vec
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i32
, Cst
, Cst
);
2093 Vec
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
, Cst
, Cst
, Cst
, Cst
);
2095 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, Vec
);
2098 static SDValue
LowerShift(SDNode
*N
, SelectionDAG
&DAG
,
2099 const ARMSubtarget
*ST
) {
2100 EVT VT
= N
->getValueType(0);
2101 DebugLoc dl
= N
->getDebugLoc();
2103 // Lower vector shifts on NEON to use VSHL.
2104 if (VT
.isVector()) {
2105 assert(ST
->hasNEON() && "unexpected vector shift");
2107 // Left shifts translate directly to the vshiftu intrinsic.
2108 if (N
->getOpcode() == ISD::SHL
)
2109 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, VT
,
2110 DAG
.getConstant(Intrinsic::arm_neon_vshiftu
, MVT::i32
),
2111 N
->getOperand(0), N
->getOperand(1));
2113 assert((N
->getOpcode() == ISD::SRA
||
2114 N
->getOpcode() == ISD::SRL
) && "unexpected vector shift opcode");
2116 // NEON uses the same intrinsics for both left and right shifts. For
2117 // right shifts, the shift amounts are negative, so negate the vector of
2119 EVT ShiftVT
= N
->getOperand(1).getValueType();
2120 SDValue NegatedCount
= DAG
.getNode(ISD::SUB
, dl
, ShiftVT
,
2121 getZeroVector(ShiftVT
, DAG
, dl
),
2123 Intrinsic::ID vshiftInt
= (N
->getOpcode() == ISD::SRA
?
2124 Intrinsic::arm_neon_vshifts
:
2125 Intrinsic::arm_neon_vshiftu
);
2126 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, VT
,
2127 DAG
.getConstant(vshiftInt
, MVT::i32
),
2128 N
->getOperand(0), NegatedCount
);
2131 assert(VT
== MVT::i64
&&
2132 (N
->getOpcode() == ISD::SRL
|| N
->getOpcode() == ISD::SRA
) &&
2133 "Unknown shift to lower!");
2135 // We only lower SRA, SRL of 1 here, all others use generic lowering.
2136 if (!isa
<ConstantSDNode
>(N
->getOperand(1)) ||
2137 cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue() != 1)
2140 // If we are in thumb mode, we don't have RRX.
2141 if (ST
->isThumb1Only()) return SDValue();
2143 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
2144 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, N
->getOperand(0),
2145 DAG
.getConstant(0, MVT::i32
));
2146 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, N
->getOperand(0),
2147 DAG
.getConstant(1, MVT::i32
));
2149 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2150 // captures the result into a carry flag.
2151 unsigned Opc
= N
->getOpcode() == ISD::SRL
? ARMISD::SRL_FLAG
:ARMISD::SRA_FLAG
;
2152 Hi
= DAG
.getNode(Opc
, dl
, DAG
.getVTList(MVT::i32
, MVT::Flag
), &Hi
, 1);
2154 // The low part is an ARMISD::RRX operand, which shifts the carry in.
2155 Lo
= DAG
.getNode(ARMISD::RRX
, dl
, MVT::i32
, Lo
, Hi
.getValue(1));
2157 // Merge the pieces into a single i64 value.
2158 return DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Lo
, Hi
);
2161 static SDValue
LowerVSETCC(SDValue Op
, SelectionDAG
&DAG
) {
2162 SDValue TmpOp0
, TmpOp1
;
2163 bool Invert
= false;
2167 SDValue Op0
= Op
.getOperand(0);
2168 SDValue Op1
= Op
.getOperand(1);
2169 SDValue CC
= Op
.getOperand(2);
2170 EVT VT
= Op
.getValueType();
2171 ISD::CondCode SetCCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
2172 DebugLoc dl
= Op
.getDebugLoc();
2174 if (Op
.getOperand(1).getValueType().isFloatingPoint()) {
2175 switch (SetCCOpcode
) {
2176 default: llvm_unreachable("Illegal FP comparison"); break;
2178 case ISD::SETNE
: Invert
= true; // Fallthrough
2180 case ISD::SETEQ
: Opc
= ARMISD::VCEQ
; break;
2182 case ISD::SETLT
: Swap
= true; // Fallthrough
2184 case ISD::SETGT
: Opc
= ARMISD::VCGT
; break;
2186 case ISD::SETLE
: Swap
= true; // Fallthrough
2188 case ISD::SETGE
: Opc
= ARMISD::VCGE
; break;
2189 case ISD::SETUGE
: Swap
= true; // Fallthrough
2190 case ISD::SETULE
: Invert
= true; Opc
= ARMISD::VCGT
; break;
2191 case ISD::SETUGT
: Swap
= true; // Fallthrough
2192 case ISD::SETULT
: Invert
= true; Opc
= ARMISD::VCGE
; break;
2193 case ISD::SETUEQ
: Invert
= true; // Fallthrough
2195 // Expand this to (OLT | OGT).
2199 Op0
= DAG
.getNode(ARMISD::VCGT
, dl
, VT
, TmpOp1
, TmpOp0
);
2200 Op1
= DAG
.getNode(ARMISD::VCGT
, dl
, VT
, TmpOp0
, TmpOp1
);
2202 case ISD::SETUO
: Invert
= true; // Fallthrough
2204 // Expand this to (OLT | OGE).
2208 Op0
= DAG
.getNode(ARMISD::VCGT
, dl
, VT
, TmpOp1
, TmpOp0
);
2209 Op1
= DAG
.getNode(ARMISD::VCGE
, dl
, VT
, TmpOp0
, TmpOp1
);
2213 // Integer comparisons.
2214 switch (SetCCOpcode
) {
2215 default: llvm_unreachable("Illegal integer comparison"); break;
2216 case ISD::SETNE
: Invert
= true;
2217 case ISD::SETEQ
: Opc
= ARMISD::VCEQ
; break;
2218 case ISD::SETLT
: Swap
= true;
2219 case ISD::SETGT
: Opc
= ARMISD::VCGT
; break;
2220 case ISD::SETLE
: Swap
= true;
2221 case ISD::SETGE
: Opc
= ARMISD::VCGE
; break;
2222 case ISD::SETULT
: Swap
= true;
2223 case ISD::SETUGT
: Opc
= ARMISD::VCGTU
; break;
2224 case ISD::SETULE
: Swap
= true;
2225 case ISD::SETUGE
: Opc
= ARMISD::VCGEU
; break;
2228 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
2229 if (Opc
== ARMISD::VCEQ
) {
2232 if (ISD::isBuildVectorAllZeros(Op1
.getNode()))
2234 else if (ISD::isBuildVectorAllZeros(Op0
.getNode()))
2237 // Ignore bitconvert.
2238 if (AndOp
.getNode() && AndOp
.getOpcode() == ISD::BIT_CONVERT
)
2239 AndOp
= AndOp
.getOperand(0);
2241 if (AndOp
.getNode() && AndOp
.getOpcode() == ISD::AND
) {
2243 Op0
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, AndOp
.getOperand(0));
2244 Op1
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, AndOp
.getOperand(1));
2251 std::swap(Op0
, Op1
);
2253 SDValue Result
= DAG
.getNode(Opc
, dl
, VT
, Op0
, Op1
);
2256 Result
= DAG
.getNOT(dl
, Result
, VT
);
2261 /// isVMOVSplat - Check if the specified splat value corresponds to an immediate
2262 /// VMOV instruction, and if so, return the constant being splatted.
2263 static SDValue
isVMOVSplat(uint64_t SplatBits
, uint64_t SplatUndef
,
2264 unsigned SplatBitSize
, SelectionDAG
&DAG
) {
2265 switch (SplatBitSize
) {
2267 // Any 1-byte value is OK.
2268 assert((SplatBits
& ~0xff) == 0 && "one byte splat value is too big");
2269 return DAG
.getTargetConstant(SplatBits
, MVT::i8
);
2272 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
2273 if ((SplatBits
& ~0xff) == 0 ||
2274 (SplatBits
& ~0xff00) == 0)
2275 return DAG
.getTargetConstant(SplatBits
, MVT::i16
);
2279 // NEON's 32-bit VMOV supports splat values where:
2280 // * only one byte is nonzero, or
2281 // * the least significant byte is 0xff and the second byte is nonzero, or
2282 // * the least significant 2 bytes are 0xff and the third is nonzero.
2283 if ((SplatBits
& ~0xff) == 0 ||
2284 (SplatBits
& ~0xff00) == 0 ||
2285 (SplatBits
& ~0xff0000) == 0 ||
2286 (SplatBits
& ~0xff000000) == 0)
2287 return DAG
.getTargetConstant(SplatBits
, MVT::i32
);
2289 if ((SplatBits
& ~0xffff) == 0 &&
2290 ((SplatBits
| SplatUndef
) & 0xff) == 0xff)
2291 return DAG
.getTargetConstant(SplatBits
| 0xff, MVT::i32
);
2293 if ((SplatBits
& ~0xffffff) == 0 &&
2294 ((SplatBits
| SplatUndef
) & 0xffff) == 0xffff)
2295 return DAG
.getTargetConstant(SplatBits
| 0xffff, MVT::i32
);
2297 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
2298 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
2299 // VMOV.I32. A (very) minor optimization would be to replicate the value
2300 // and fall through here to test for a valid 64-bit splat. But, then the
2301 // caller would also need to check and handle the change in size.
2305 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
2306 uint64_t BitMask
= 0xff;
2308 for (int ByteNum
= 0; ByteNum
< 8; ++ByteNum
) {
2309 if (((SplatBits
| SplatUndef
) & BitMask
) == BitMask
)
2311 else if ((SplatBits
& BitMask
) != 0)
2315 return DAG
.getTargetConstant(Val
, MVT::i64
);
2319 llvm_unreachable("unexpected size for isVMOVSplat");
2326 /// getVMOVImm - If this is a build_vector of constants which can be
2327 /// formed by using a VMOV instruction of the specified element size,
2328 /// return the constant being splatted. The ByteSize field indicates the
2329 /// number of bytes of each element [1248].
2330 SDValue
ARM::getVMOVImm(SDNode
*N
, unsigned ByteSize
, SelectionDAG
&DAG
) {
2331 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(N
);
2332 APInt SplatBits
, SplatUndef
;
2333 unsigned SplatBitSize
;
2335 if (! BVN
|| ! BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
,
2336 HasAnyUndefs
, ByteSize
* 8))
2339 if (SplatBitSize
> ByteSize
* 8)
2342 return isVMOVSplat(SplatBits
.getZExtValue(), SplatUndef
.getZExtValue(),
2346 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
2347 /// instruction with the specified blocksize. (The order of the elements
2348 /// within each block of the vector is reversed.)
2349 static bool isVREVMask(ShuffleVectorSDNode
*N
, unsigned BlockSize
) {
2350 assert((BlockSize
==16 || BlockSize
==32 || BlockSize
==64) &&
2351 "Only possible block sizes for VREV are: 16, 32, 64");
2353 EVT VT
= N
->getValueType(0);
2354 unsigned NumElts
= VT
.getVectorNumElements();
2355 unsigned EltSz
= VT
.getVectorElementType().getSizeInBits();
2356 unsigned BlockElts
= N
->getMaskElt(0) + 1;
2358 if (BlockSize
<= EltSz
|| BlockSize
!= BlockElts
* EltSz
)
2361 for (unsigned i
= 0; i
< NumElts
; ++i
) {
2362 if ((unsigned) N
->getMaskElt(i
) !=
2363 (i
- i
%BlockElts
) + (BlockElts
- 1 - i
%BlockElts
))
2370 static SDValue
BuildSplat(SDValue Val
, EVT VT
, SelectionDAG
&DAG
, DebugLoc dl
) {
2371 // Canonicalize all-zeros and all-ones vectors.
2372 ConstantSDNode
*ConstVal
= cast
<ConstantSDNode
>(Val
.getNode());
2373 if (ConstVal
->isNullValue())
2374 return getZeroVector(VT
, DAG
, dl
);
2375 if (ConstVal
->isAllOnesValue())
2376 return getOnesVector(VT
, DAG
, dl
);
2379 if (VT
.is64BitVector()) {
2380 switch (Val
.getValueType().getSizeInBits()) {
2381 case 8: CanonicalVT
= MVT::v8i8
; break;
2382 case 16: CanonicalVT
= MVT::v4i16
; break;
2383 case 32: CanonicalVT
= MVT::v2i32
; break;
2384 case 64: CanonicalVT
= MVT::v1i64
; break;
2385 default: llvm_unreachable("unexpected splat element type"); break;
2388 assert(VT
.is128BitVector() && "unknown splat vector size");
2389 switch (Val
.getValueType().getSizeInBits()) {
2390 case 8: CanonicalVT
= MVT::v16i8
; break;
2391 case 16: CanonicalVT
= MVT::v8i16
; break;
2392 case 32: CanonicalVT
= MVT::v4i32
; break;
2393 case 64: CanonicalVT
= MVT::v2i64
; break;
2394 default: llvm_unreachable("unexpected splat element type"); break;
2398 // Build a canonical splat for this value.
2399 SmallVector
<SDValue
, 8> Ops
;
2400 Ops
.assign(CanonicalVT
.getVectorNumElements(), Val
);
2401 SDValue Res
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, CanonicalVT
, &Ops
[0],
2403 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, Res
);
2406 // If this is a case we can't handle, return null and let the default
2407 // expansion code take care of it.
2408 static SDValue
LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) {
2409 BuildVectorSDNode
*BVN
= cast
<BuildVectorSDNode
>(Op
.getNode());
2410 DebugLoc dl
= Op
.getDebugLoc();
2411 EVT VT
= Op
.getValueType();
2413 APInt SplatBits
, SplatUndef
;
2414 unsigned SplatBitSize
;
2416 if (BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
)) {
2417 SDValue Val
= isVMOVSplat(SplatBits
.getZExtValue(),
2418 SplatUndef
.getZExtValue(), SplatBitSize
, DAG
);
2420 return BuildSplat(Val
, VT
, DAG
, dl
);
2423 // If there are only 2 elements in a 128-bit vector, insert them into an
2424 // undef vector. This handles the common case for 128-bit vector argument
2425 // passing, where the insertions should be translated to subreg accesses
2426 // with no real instructions.
2427 if (VT
.is128BitVector() && Op
.getNumOperands() == 2) {
2428 SDValue Val
= DAG
.getUNDEF(VT
);
2429 SDValue Op0
= Op
.getOperand(0);
2430 SDValue Op1
= Op
.getOperand(1);
2431 if (Op0
.getOpcode() != ISD::UNDEF
)
2432 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, VT
, Val
, Op0
,
2433 DAG
.getIntPtrConstant(0));
2434 if (Op1
.getOpcode() != ISD::UNDEF
)
2435 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, VT
, Val
, Op1
,
2436 DAG
.getIntPtrConstant(1));
2443 static SDValue
LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
) {
2444 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(Op
.getNode());
2445 DebugLoc dl
= Op
.getDebugLoc();
2446 EVT VT
= Op
.getValueType();
2448 // Convert shuffles that are directly supported on NEON to target-specific
2449 // DAG nodes, instead of keeping them as shuffles and matching them again
2450 // during code selection. This is more efficient and avoids the possibility
2451 // of inconsistencies between legalization and selection.
2452 // FIXME: floating-point vectors should be canonicalized to integer vectors
2453 // of the same time so that they get CSEd properly.
2454 if (SVN
->isSplat()) {
2455 int Lane
= SVN
->getSplatIndex();
2456 SDValue Op0
= SVN
->getOperand(0);
2457 if (Lane
== 0 && Op0
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
2458 return DAG
.getNode(ARMISD::VDUP
, dl
, VT
, Op0
.getOperand(0));
2460 return DAG
.getNode(ARMISD::VDUPLANE
, dl
, VT
, SVN
->getOperand(0),
2461 DAG
.getConstant(Lane
, MVT::i32
));
2463 if (isVREVMask(SVN
, 64))
2464 return DAG
.getNode(ARMISD::VREV64
, dl
, VT
, SVN
->getOperand(0));
2465 if (isVREVMask(SVN
, 32))
2466 return DAG
.getNode(ARMISD::VREV32
, dl
, VT
, SVN
->getOperand(0));
2467 if (isVREVMask(SVN
, 16))
2468 return DAG
.getNode(ARMISD::VREV16
, dl
, VT
, SVN
->getOperand(0));
2473 static SDValue
LowerSCALAR_TO_VECTOR(SDValue Op
, SelectionDAG
&DAG
) {
2477 static SDValue
LowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) {
2478 EVT VT
= Op
.getValueType();
2479 DebugLoc dl
= Op
.getDebugLoc();
2480 assert((VT
== MVT::i8
|| VT
== MVT::i16
) &&
2481 "unexpected type for custom-lowering vector extract");
2482 SDValue Vec
= Op
.getOperand(0);
2483 SDValue Lane
= Op
.getOperand(1);
2484 Op
= DAG
.getNode(ARMISD::VGETLANEu
, dl
, MVT::i32
, Vec
, Lane
);
2485 Op
= DAG
.getNode(ISD::AssertZext
, dl
, MVT::i32
, Op
, DAG
.getValueType(VT
));
2486 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Op
);
2489 static SDValue
LowerCONCAT_VECTORS(SDValue Op
, SelectionDAG
&DAG
) {
2490 // The only time a CONCAT_VECTORS operation can have legal types is when
2491 // two 64-bit vectors are concatenated to a 128-bit vector.
2492 assert(Op
.getValueType().is128BitVector() && Op
.getNumOperands() == 2 &&
2493 "unexpected CONCAT_VECTORS");
2494 DebugLoc dl
= Op
.getDebugLoc();
2495 SDValue Val
= DAG
.getUNDEF(MVT::v2f64
);
2496 SDValue Op0
= Op
.getOperand(0);
2497 SDValue Op1
= Op
.getOperand(1);
2498 if (Op0
.getOpcode() != ISD::UNDEF
)
2499 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Val
,
2500 DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::f64
, Op0
),
2501 DAG
.getIntPtrConstant(0));
2502 if (Op1
.getOpcode() != ISD::UNDEF
)
2503 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Val
,
2504 DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::f64
, Op1
),
2505 DAG
.getIntPtrConstant(1));
2506 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), Val
);
2509 SDValue
ARMTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) {
2510 switch (Op
.getOpcode()) {
2511 default: llvm_unreachable("Don't know how to custom lower this!");
2512 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
2513 case ISD::GlobalAddress
:
2514 return Subtarget
->isTargetDarwin() ? LowerGlobalAddressDarwin(Op
, DAG
) :
2515 LowerGlobalAddressELF(Op
, DAG
);
2516 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
2517 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
, Subtarget
);
2518 case ISD::BR_CC
: return LowerBR_CC(Op
, DAG
, Subtarget
);
2519 case ISD::BR_JT
: return LowerBR_JT(Op
, DAG
);
2520 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
2521 case ISD::VASTART
: return LowerVASTART(Op
, DAG
, VarArgsFrameIndex
);
2522 case ISD::SINT_TO_FP
:
2523 case ISD::UINT_TO_FP
: return LowerINT_TO_FP(Op
, DAG
);
2524 case ISD::FP_TO_SINT
:
2525 case ISD::FP_TO_UINT
: return LowerFP_TO_INT(Op
, DAG
);
2526 case ISD::FCOPYSIGN
: return LowerFCOPYSIGN(Op
, DAG
);
2527 case ISD::RETURNADDR
: break;
2528 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
2529 case ISD::GLOBAL_OFFSET_TABLE
: return LowerGLOBAL_OFFSET_TABLE(Op
, DAG
);
2530 case ISD::INTRINSIC_VOID
:
2531 case ISD::INTRINSIC_W_CHAIN
: return LowerINTRINSIC_W_CHAIN(Op
, DAG
);
2532 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
2533 case ISD::BIT_CONVERT
: return ExpandBIT_CONVERT(Op
.getNode(), DAG
);
2536 case ISD::SRA
: return LowerShift(Op
.getNode(), DAG
, Subtarget
);
2537 case ISD::VSETCC
: return LowerVSETCC(Op
, DAG
);
2538 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
2539 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
2540 case ISD::SCALAR_TO_VECTOR
: return LowerSCALAR_TO_VECTOR(Op
, DAG
);
2541 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
2542 case ISD::CONCAT_VECTORS
: return LowerCONCAT_VECTORS(Op
, DAG
);
2547 /// ReplaceNodeResults - Replace the results of node with an illegal result
2548 /// type with new values built out of custom code.
2549 void ARMTargetLowering::ReplaceNodeResults(SDNode
*N
,
2550 SmallVectorImpl
<SDValue
>&Results
,
2551 SelectionDAG
&DAG
) {
2552 switch (N
->getOpcode()) {
2554 llvm_unreachable("Don't know how to custom expand this!");
2556 case ISD::BIT_CONVERT
:
2557 Results
.push_back(ExpandBIT_CONVERT(N
, DAG
));
2561 SDValue Res
= LowerShift(N
, DAG
, Subtarget
);
2563 Results
.push_back(Res
);
2569 //===----------------------------------------------------------------------===//
2570 // ARM Scheduler Hooks
2571 //===----------------------------------------------------------------------===//
2574 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr
*MI
,
2575 MachineBasicBlock
*BB
) const {
2576 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
2577 DebugLoc dl
= MI
->getDebugLoc();
2578 switch (MI
->getOpcode()) {
2580 llvm_unreachable("Unexpected instr type to insert");
2581 case ARM::tMOVCCr_pseudo
: {
2582 // To "insert" a SELECT_CC instruction, we actually have to insert the
2583 // diamond control-flow pattern. The incoming instruction knows the
2584 // destination vreg to set, the condition code register to branch on, the
2585 // true/false values to select between, and a branch opcode to use.
2586 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
2587 MachineFunction::iterator It
= BB
;
2593 // cmpTY ccX, r1, r2
2595 // fallthrough --> copy0MBB
2596 MachineBasicBlock
*thisMBB
= BB
;
2597 MachineFunction
*F
= BB
->getParent();
2598 MachineBasicBlock
*copy0MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
2599 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
2600 BuildMI(BB
, dl
, TII
->get(ARM::tBcc
)).addMBB(sinkMBB
)
2601 .addImm(MI
->getOperand(3).getImm()).addReg(MI
->getOperand(4).getReg());
2602 F
->insert(It
, copy0MBB
);
2603 F
->insert(It
, sinkMBB
);
2604 // Update machine-CFG edges by first adding all successors of the current
2605 // block to the new block which will contain the Phi node for the select.
2606 for(MachineBasicBlock::succ_iterator i
= BB
->succ_begin(),
2607 e
= BB
->succ_end(); i
!= e
; ++i
)
2608 sinkMBB
->addSuccessor(*i
);
2609 // Next, remove all successors of the current block, and add the true
2610 // and fallthrough blocks as its successors.
2611 while(!BB
->succ_empty())
2612 BB
->removeSuccessor(BB
->succ_begin());
2613 BB
->addSuccessor(copy0MBB
);
2614 BB
->addSuccessor(sinkMBB
);
2617 // %FalseValue = ...
2618 // # fallthrough to sinkMBB
2621 // Update machine-CFG edges
2622 BB
->addSuccessor(sinkMBB
);
2625 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
2628 BuildMI(BB
, dl
, TII
->get(ARM::PHI
), MI
->getOperand(0).getReg())
2629 .addReg(MI
->getOperand(1).getReg()).addMBB(copy0MBB
)
2630 .addReg(MI
->getOperand(2).getReg()).addMBB(thisMBB
);
2632 F
->DeleteMachineInstr(MI
); // The pseudo instruction is gone now.
2639 case ARM::t2SUBrSPi_
:
2640 case ARM::t2SUBrSPi12_
:
2641 case ARM::t2SUBrSPs_
: {
2642 MachineFunction
*MF
= BB
->getParent();
2643 unsigned DstReg
= MI
->getOperand(0).getReg();
2644 unsigned SrcReg
= MI
->getOperand(1).getReg();
2645 bool DstIsDead
= MI
->getOperand(0).isDead();
2646 bool SrcIsKill
= MI
->getOperand(1).isKill();
2648 if (SrcReg
!= ARM::SP
) {
2649 // Copy the source to SP from virtual register.
2650 const TargetRegisterClass
*RC
= MF
->getRegInfo().getRegClass(SrcReg
);
2651 unsigned CopyOpc
= (RC
== ARM::tGPRRegisterClass
)
2652 ? ARM::tMOVtgpr2gpr
: ARM::tMOVgpr2gpr
;
2653 BuildMI(BB
, dl
, TII
->get(CopyOpc
), ARM::SP
)
2654 .addReg(SrcReg
, getKillRegState(SrcIsKill
));
2658 bool NeedPred
= false, NeedCC
= false, NeedOp3
= false;
2659 switch (MI
->getOpcode()) {
2661 llvm_unreachable("Unexpected pseudo instruction!");
2667 OpOpc
= ARM::tADDspr
;
2670 OpOpc
= ARM::tSUBspi
;
2672 case ARM::t2SUBrSPi_
:
2673 OpOpc
= ARM::t2SUBrSPi
;
2674 NeedPred
= true; NeedCC
= true;
2676 case ARM::t2SUBrSPi12_
:
2677 OpOpc
= ARM::t2SUBrSPi12
;
2680 case ARM::t2SUBrSPs_
:
2681 OpOpc
= ARM::t2SUBrSPs
;
2682 NeedPred
= true; NeedCC
= true; NeedOp3
= true;
2685 MachineInstrBuilder MIB
= BuildMI(BB
, dl
, TII
->get(OpOpc
), ARM::SP
);
2686 if (OpOpc
== ARM::tAND
)
2687 AddDefaultT1CC(MIB
);
2688 MIB
.addReg(ARM::SP
);
2689 MIB
.addOperand(MI
->getOperand(2));
2691 MIB
.addOperand(MI
->getOperand(3));
2693 AddDefaultPred(MIB
);
2697 // Copy the result from SP to virtual register.
2698 const TargetRegisterClass
*RC
= MF
->getRegInfo().getRegClass(DstReg
);
2699 unsigned CopyOpc
= (RC
== ARM::tGPRRegisterClass
)
2700 ? ARM::tMOVgpr2tgpr
: ARM::tMOVgpr2gpr
;
2701 BuildMI(BB
, dl
, TII
->get(CopyOpc
))
2702 .addReg(DstReg
, getDefRegState(true) | getDeadRegState(DstIsDead
))
2704 MF
->DeleteMachineInstr(MI
); // The pseudo instruction is gone now.
2710 //===----------------------------------------------------------------------===//
2711 // ARM Optimization Hooks
2712 //===----------------------------------------------------------------------===//
2715 SDValue
combineSelectAndUse(SDNode
*N
, SDValue Slct
, SDValue OtherOp
,
2716 TargetLowering::DAGCombinerInfo
&DCI
) {
2717 SelectionDAG
&DAG
= DCI
.DAG
;
2718 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
2719 EVT VT
= N
->getValueType(0);
2720 unsigned Opc
= N
->getOpcode();
2721 bool isSlctCC
= Slct
.getOpcode() == ISD::SELECT_CC
;
2722 SDValue LHS
= isSlctCC
? Slct
.getOperand(2) : Slct
.getOperand(1);
2723 SDValue RHS
= isSlctCC
? Slct
.getOperand(3) : Slct
.getOperand(2);
2724 ISD::CondCode CC
= ISD::SETCC_INVALID
;
2727 CC
= cast
<CondCodeSDNode
>(Slct
.getOperand(4))->get();
2729 SDValue CCOp
= Slct
.getOperand(0);
2730 if (CCOp
.getOpcode() == ISD::SETCC
)
2731 CC
= cast
<CondCodeSDNode
>(CCOp
.getOperand(2))->get();
2734 bool DoXform
= false;
2736 assert ((Opc
== ISD::ADD
|| (Opc
== ISD::SUB
&& Slct
== N
->getOperand(1))) &&
2739 if (LHS
.getOpcode() == ISD::Constant
&&
2740 cast
<ConstantSDNode
>(LHS
)->isNullValue()) {
2742 } else if (CC
!= ISD::SETCC_INVALID
&&
2743 RHS
.getOpcode() == ISD::Constant
&&
2744 cast
<ConstantSDNode
>(RHS
)->isNullValue()) {
2745 std::swap(LHS
, RHS
);
2746 SDValue Op0
= Slct
.getOperand(0);
2747 EVT OpVT
= isSlctCC
? Op0
.getValueType() :
2748 Op0
.getOperand(0).getValueType();
2749 bool isInt
= OpVT
.isInteger();
2750 CC
= ISD::getSetCCInverse(CC
, isInt
);
2752 if (!TLI
.isCondCodeLegal(CC
, OpVT
))
2753 return SDValue(); // Inverse operator isn't legal.
2760 SDValue Result
= DAG
.getNode(Opc
, RHS
.getDebugLoc(), VT
, OtherOp
, RHS
);
2762 return DAG
.getSelectCC(N
->getDebugLoc(), OtherOp
, Result
,
2763 Slct
.getOperand(0), Slct
.getOperand(1), CC
);
2764 SDValue CCOp
= Slct
.getOperand(0);
2766 CCOp
= DAG
.getSetCC(Slct
.getDebugLoc(), CCOp
.getValueType(),
2767 CCOp
.getOperand(0), CCOp
.getOperand(1), CC
);
2768 return DAG
.getNode(ISD::SELECT
, N
->getDebugLoc(), VT
,
2769 CCOp
, OtherOp
, Result
);
2774 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
2775 static SDValue
PerformADDCombine(SDNode
*N
,
2776 TargetLowering::DAGCombinerInfo
&DCI
) {
2777 // added by evan in r37685 with no testcase.
2778 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
2780 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
2781 if (N0
.getOpcode() == ISD::SELECT
&& N0
.getNode()->hasOneUse()) {
2782 SDValue Result
= combineSelectAndUse(N
, N0
, N1
, DCI
);
2783 if (Result
.getNode()) return Result
;
2785 if (N1
.getOpcode() == ISD::SELECT
&& N1
.getNode()->hasOneUse()) {
2786 SDValue Result
= combineSelectAndUse(N
, N1
, N0
, DCI
);
2787 if (Result
.getNode()) return Result
;
2793 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
2794 static SDValue
PerformSUBCombine(SDNode
*N
,
2795 TargetLowering::DAGCombinerInfo
&DCI
) {
2796 // added by evan in r37685 with no testcase.
2797 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
2799 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
2800 if (N1
.getOpcode() == ISD::SELECT
&& N1
.getNode()->hasOneUse()) {
2801 SDValue Result
= combineSelectAndUse(N
, N1
, N0
, DCI
);
2802 if (Result
.getNode()) return Result
;
2809 /// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
2810 static SDValue
PerformFMRRDCombine(SDNode
*N
,
2811 TargetLowering::DAGCombinerInfo
&DCI
) {
2812 // fmrrd(fmdrr x, y) -> x,y
2813 SDValue InDouble
= N
->getOperand(0);
2814 if (InDouble
.getOpcode() == ARMISD::FMDRR
)
2815 return DCI
.CombineTo(N
, InDouble
.getOperand(0), InDouble
.getOperand(1));
2819 /// getVShiftImm - Check if this is a valid build_vector for the immediate
2820 /// operand of a vector shift operation, where all the elements of the
2821 /// build_vector must have the same constant integer value.
2822 static bool getVShiftImm(SDValue Op
, unsigned ElementBits
, int64_t &Cnt
) {
2823 // Ignore bit_converts.
2824 while (Op
.getOpcode() == ISD::BIT_CONVERT
)
2825 Op
= Op
.getOperand(0);
2826 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
2827 APInt SplatBits
, SplatUndef
;
2828 unsigned SplatBitSize
;
2830 if (! BVN
|| ! BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
,
2831 HasAnyUndefs
, ElementBits
) ||
2832 SplatBitSize
> ElementBits
)
2834 Cnt
= SplatBits
.getSExtValue();
2838 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
2839 /// operand of a vector shift left operation. That value must be in the range:
2840 /// 0 <= Value < ElementBits for a left shift; or
2841 /// 0 <= Value <= ElementBits for a long left shift.
2842 static bool isVShiftLImm(SDValue Op
, EVT VT
, bool isLong
, int64_t &Cnt
) {
2843 assert(VT
.isVector() && "vector shift count is not a vector type");
2844 unsigned ElementBits
= VT
.getVectorElementType().getSizeInBits();
2845 if (! getVShiftImm(Op
, ElementBits
, Cnt
))
2847 return (Cnt
>= 0 && (isLong
? Cnt
-1 : Cnt
) < ElementBits
);
2850 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
2851 /// operand of a vector shift right operation. For a shift opcode, the value
2852 /// is positive, but for an intrinsic the value count must be negative. The
2853 /// absolute value must be in the range:
2854 /// 1 <= |Value| <= ElementBits for a right shift; or
2855 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
2856 static bool isVShiftRImm(SDValue Op
, EVT VT
, bool isNarrow
, bool isIntrinsic
,
2858 assert(VT
.isVector() && "vector shift count is not a vector type");
2859 unsigned ElementBits
= VT
.getVectorElementType().getSizeInBits();
2860 if (! getVShiftImm(Op
, ElementBits
, Cnt
))
2864 return (Cnt
>= 1 && Cnt
<= (isNarrow
? ElementBits
/2 : ElementBits
));
2867 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
2868 static SDValue
PerformIntrinsicCombine(SDNode
*N
, SelectionDAG
&DAG
) {
2869 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
2872 // Don't do anything for most intrinsics.
2875 // Vector shifts: check for immediate versions and lower them.
2876 // Note: This is done during DAG combining instead of DAG legalizing because
2877 // the build_vectors for 64-bit vector element shift counts are generally
2878 // not legal, and it is hard to see their values after they get legalized to
2879 // loads from a constant pool.
2880 case Intrinsic::arm_neon_vshifts
:
2881 case Intrinsic::arm_neon_vshiftu
:
2882 case Intrinsic::arm_neon_vshiftls
:
2883 case Intrinsic::arm_neon_vshiftlu
:
2884 case Intrinsic::arm_neon_vshiftn
:
2885 case Intrinsic::arm_neon_vrshifts
:
2886 case Intrinsic::arm_neon_vrshiftu
:
2887 case Intrinsic::arm_neon_vrshiftn
:
2888 case Intrinsic::arm_neon_vqshifts
:
2889 case Intrinsic::arm_neon_vqshiftu
:
2890 case Intrinsic::arm_neon_vqshiftsu
:
2891 case Intrinsic::arm_neon_vqshiftns
:
2892 case Intrinsic::arm_neon_vqshiftnu
:
2893 case Intrinsic::arm_neon_vqshiftnsu
:
2894 case Intrinsic::arm_neon_vqrshiftns
:
2895 case Intrinsic::arm_neon_vqrshiftnu
:
2896 case Intrinsic::arm_neon_vqrshiftnsu
: {
2897 EVT VT
= N
->getOperand(1).getValueType();
2899 unsigned VShiftOpc
= 0;
2902 case Intrinsic::arm_neon_vshifts
:
2903 case Intrinsic::arm_neon_vshiftu
:
2904 if (isVShiftLImm(N
->getOperand(2), VT
, false, Cnt
)) {
2905 VShiftOpc
= ARMISD::VSHL
;
2908 if (isVShiftRImm(N
->getOperand(2), VT
, false, true, Cnt
)) {
2909 VShiftOpc
= (IntNo
== Intrinsic::arm_neon_vshifts
?
2910 ARMISD::VSHRs
: ARMISD::VSHRu
);
2915 case Intrinsic::arm_neon_vshiftls
:
2916 case Intrinsic::arm_neon_vshiftlu
:
2917 if (isVShiftLImm(N
->getOperand(2), VT
, true, Cnt
))
2919 llvm_unreachable("invalid shift count for vshll intrinsic");
2921 case Intrinsic::arm_neon_vrshifts
:
2922 case Intrinsic::arm_neon_vrshiftu
:
2923 if (isVShiftRImm(N
->getOperand(2), VT
, false, true, Cnt
))
2927 case Intrinsic::arm_neon_vqshifts
:
2928 case Intrinsic::arm_neon_vqshiftu
:
2929 if (isVShiftLImm(N
->getOperand(2), VT
, false, Cnt
))
2933 case Intrinsic::arm_neon_vqshiftsu
:
2934 if (isVShiftLImm(N
->getOperand(2), VT
, false, Cnt
))
2936 llvm_unreachable("invalid shift count for vqshlu intrinsic");
2938 case Intrinsic::arm_neon_vshiftn
:
2939 case Intrinsic::arm_neon_vrshiftn
:
2940 case Intrinsic::arm_neon_vqshiftns
:
2941 case Intrinsic::arm_neon_vqshiftnu
:
2942 case Intrinsic::arm_neon_vqshiftnsu
:
2943 case Intrinsic::arm_neon_vqrshiftns
:
2944 case Intrinsic::arm_neon_vqrshiftnu
:
2945 case Intrinsic::arm_neon_vqrshiftnsu
:
2946 // Narrowing shifts require an immediate right shift.
2947 if (isVShiftRImm(N
->getOperand(2), VT
, true, true, Cnt
))
2949 llvm_unreachable("invalid shift count for narrowing vector shift intrinsic");
2952 llvm_unreachable("unhandled vector shift");
2956 case Intrinsic::arm_neon_vshifts
:
2957 case Intrinsic::arm_neon_vshiftu
:
2958 // Opcode already set above.
2960 case Intrinsic::arm_neon_vshiftls
:
2961 case Intrinsic::arm_neon_vshiftlu
:
2962 if (Cnt
== VT
.getVectorElementType().getSizeInBits())
2963 VShiftOpc
= ARMISD::VSHLLi
;
2965 VShiftOpc
= (IntNo
== Intrinsic::arm_neon_vshiftls
?
2966 ARMISD::VSHLLs
: ARMISD::VSHLLu
);
2968 case Intrinsic::arm_neon_vshiftn
:
2969 VShiftOpc
= ARMISD::VSHRN
; break;
2970 case Intrinsic::arm_neon_vrshifts
:
2971 VShiftOpc
= ARMISD::VRSHRs
; break;
2972 case Intrinsic::arm_neon_vrshiftu
:
2973 VShiftOpc
= ARMISD::VRSHRu
; break;
2974 case Intrinsic::arm_neon_vrshiftn
:
2975 VShiftOpc
= ARMISD::VRSHRN
; break;
2976 case Intrinsic::arm_neon_vqshifts
:
2977 VShiftOpc
= ARMISD::VQSHLs
; break;
2978 case Intrinsic::arm_neon_vqshiftu
:
2979 VShiftOpc
= ARMISD::VQSHLu
; break;
2980 case Intrinsic::arm_neon_vqshiftsu
:
2981 VShiftOpc
= ARMISD::VQSHLsu
; break;
2982 case Intrinsic::arm_neon_vqshiftns
:
2983 VShiftOpc
= ARMISD::VQSHRNs
; break;
2984 case Intrinsic::arm_neon_vqshiftnu
:
2985 VShiftOpc
= ARMISD::VQSHRNu
; break;
2986 case Intrinsic::arm_neon_vqshiftnsu
:
2987 VShiftOpc
= ARMISD::VQSHRNsu
; break;
2988 case Intrinsic::arm_neon_vqrshiftns
:
2989 VShiftOpc
= ARMISD::VQRSHRNs
; break;
2990 case Intrinsic::arm_neon_vqrshiftnu
:
2991 VShiftOpc
= ARMISD::VQRSHRNu
; break;
2992 case Intrinsic::arm_neon_vqrshiftnsu
:
2993 VShiftOpc
= ARMISD::VQRSHRNsu
; break;
2996 return DAG
.getNode(VShiftOpc
, N
->getDebugLoc(), N
->getValueType(0),
2997 N
->getOperand(1), DAG
.getConstant(Cnt
, MVT::i32
));
3000 case Intrinsic::arm_neon_vshiftins
: {
3001 EVT VT
= N
->getOperand(1).getValueType();
3003 unsigned VShiftOpc
= 0;
3005 if (isVShiftLImm(N
->getOperand(3), VT
, false, Cnt
))
3006 VShiftOpc
= ARMISD::VSLI
;
3007 else if (isVShiftRImm(N
->getOperand(3), VT
, false, true, Cnt
))
3008 VShiftOpc
= ARMISD::VSRI
;
3010 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
3013 return DAG
.getNode(VShiftOpc
, N
->getDebugLoc(), N
->getValueType(0),
3014 N
->getOperand(1), N
->getOperand(2),
3015 DAG
.getConstant(Cnt
, MVT::i32
));
3018 case Intrinsic::arm_neon_vqrshifts
:
3019 case Intrinsic::arm_neon_vqrshiftu
:
3020 // No immediate versions of these to check for.
3027 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
3028 /// lowers them. As with the vector shift intrinsics, this is done during DAG
3029 /// combining instead of DAG legalizing because the build_vectors for 64-bit
3030 /// vector element shift counts are generally not legal, and it is hard to see
3031 /// their values after they get legalized to loads from a constant pool.
3032 static SDValue
PerformShiftCombine(SDNode
*N
, SelectionDAG
&DAG
,
3033 const ARMSubtarget
*ST
) {
3034 EVT VT
= N
->getValueType(0);
3036 // Nothing to be done for scalar shifts.
3037 if (! VT
.isVector())
3040 assert(ST
->hasNEON() && "unexpected vector shift");
3043 switch (N
->getOpcode()) {
3044 default: llvm_unreachable("unexpected shift opcode");
3047 if (isVShiftLImm(N
->getOperand(1), VT
, false, Cnt
))
3048 return DAG
.getNode(ARMISD::VSHL
, N
->getDebugLoc(), VT
, N
->getOperand(0),
3049 DAG
.getConstant(Cnt
, MVT::i32
));
3054 if (isVShiftRImm(N
->getOperand(1), VT
, false, false, Cnt
)) {
3055 unsigned VShiftOpc
= (N
->getOpcode() == ISD::SRA
?
3056 ARMISD::VSHRs
: ARMISD::VSHRu
);
3057 return DAG
.getNode(VShiftOpc
, N
->getDebugLoc(), VT
, N
->getOperand(0),
3058 DAG
.getConstant(Cnt
, MVT::i32
));
3064 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
3065 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
3066 static SDValue
PerformExtendCombine(SDNode
*N
, SelectionDAG
&DAG
,
3067 const ARMSubtarget
*ST
) {
3068 SDValue N0
= N
->getOperand(0);
3070 // Check for sign- and zero-extensions of vector extract operations of 8-
3071 // and 16-bit vector elements. NEON supports these directly. They are
3072 // handled during DAG combining because type legalization will promote them
3073 // to 32-bit types and it is messy to recognize the operations after that.
3074 if (ST
->hasNEON() && N0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
3075 SDValue Vec
= N0
.getOperand(0);
3076 SDValue Lane
= N0
.getOperand(1);
3077 EVT VT
= N
->getValueType(0);
3078 EVT EltVT
= N0
.getValueType();
3079 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3081 if (VT
== MVT::i32
&&
3082 (EltVT
== MVT::i8
|| EltVT
== MVT::i16
) &&
3083 TLI
.isTypeLegal(Vec
.getValueType())) {
3086 switch (N
->getOpcode()) {
3087 default: llvm_unreachable("unexpected opcode");
3088 case ISD::SIGN_EXTEND
:
3089 Opc
= ARMISD::VGETLANEs
;
3091 case ISD::ZERO_EXTEND
:
3092 case ISD::ANY_EXTEND
:
3093 Opc
= ARMISD::VGETLANEu
;
3096 return DAG
.getNode(Opc
, N
->getDebugLoc(), VT
, Vec
, Lane
);
3103 SDValue
ARMTargetLowering::PerformDAGCombine(SDNode
*N
,
3104 DAGCombinerInfo
&DCI
) const {
3105 switch (N
->getOpcode()) {
3107 case ISD::ADD
: return PerformADDCombine(N
, DCI
);
3108 case ISD::SUB
: return PerformSUBCombine(N
, DCI
);
3109 case ARMISD::FMRRD
: return PerformFMRRDCombine(N
, DCI
);
3110 case ISD::INTRINSIC_WO_CHAIN
:
3111 return PerformIntrinsicCombine(N
, DCI
.DAG
);
3115 return PerformShiftCombine(N
, DCI
.DAG
, Subtarget
);
3116 case ISD::SIGN_EXTEND
:
3117 case ISD::ZERO_EXTEND
:
3118 case ISD::ANY_EXTEND
:
3119 return PerformExtendCombine(N
, DCI
.DAG
, Subtarget
);
3124 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT
) const {
3125 if (!Subtarget
->hasV6Ops())
3126 // Pre-v6 does not support unaligned mem access.
3128 else if (!Subtarget
->hasV6Ops()) {
3129 // v6 may or may not support unaligned mem access.
3130 if (!Subtarget
->isTargetDarwin())
3134 switch (VT
.getSimpleVT().SimpleTy
) {
3141 // FIXME: VLD1 etc with standard alignment is legal.
3145 static bool isLegalT1AddressImmediate(int64_t V
, EVT VT
) {
3150 switch (VT
.getSimpleVT().SimpleTy
) {
3151 default: return false;
3166 if ((V
& (Scale
- 1)) != 0)
3169 return V
== (V
& ((1LL << 5) - 1));
3172 static bool isLegalT2AddressImmediate(int64_t V
, EVT VT
,
3173 const ARMSubtarget
*Subtarget
) {
3180 switch (VT
.getSimpleVT().SimpleTy
) {
3181 default: return false;
3186 // + imm12 or - imm8
3188 return V
== (V
& ((1LL << 8) - 1));
3189 return V
== (V
& ((1LL << 12) - 1));
3192 // Same as ARM mode. FIXME: NEON?
3193 if (!Subtarget
->hasVFP2())
3198 return V
== (V
& ((1LL << 8) - 1));
3202 /// isLegalAddressImmediate - Return true if the integer value can be used
3203 /// as the offset of the target addressing mode for load / store of the
3205 static bool isLegalAddressImmediate(int64_t V
, EVT VT
,
3206 const ARMSubtarget
*Subtarget
) {
3213 if (Subtarget
->isThumb1Only())
3214 return isLegalT1AddressImmediate(V
, VT
);
3215 else if (Subtarget
->isThumb2())
3216 return isLegalT2AddressImmediate(V
, VT
, Subtarget
);
3221 switch (VT
.getSimpleVT().SimpleTy
) {
3222 default: return false;
3227 return V
== (V
& ((1LL << 12) - 1));
3230 return V
== (V
& ((1LL << 8) - 1));
3233 if (!Subtarget
->hasVFP2()) // FIXME: NEON?
3238 return V
== (V
& ((1LL << 8) - 1));
3242 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode
&AM
,
3244 int Scale
= AM
.Scale
;
3248 switch (VT
.getSimpleVT().SimpleTy
) {
3249 default: return false;
3258 return Scale
== 2 || Scale
== 4 || Scale
== 8;
3261 if (((unsigned)AM
.HasBaseReg
+ Scale
) <= 2)
3265 // Note, we allow "void" uses (basically, uses that aren't loads or
3266 // stores), because arm allows folding a scale into many arithmetic
3267 // operations. This should be made more precise and revisited later.
3269 // Allow r << imm, but the imm has to be a multiple of two.
3270 if (Scale
& 1) return false;
3271 return isPowerOf2_32(Scale
);
3275 /// isLegalAddressingMode - Return true if the addressing mode represented
3276 /// by AM is legal for this target, for a load/store of the specified type.
3277 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode
&AM
,
3278 const Type
*Ty
) const {
3279 EVT VT
= getValueType(Ty
, true);
3280 if (!isLegalAddressImmediate(AM
.BaseOffs
, VT
, Subtarget
))
3283 // Can never fold addr of global into load/store.
3288 case 0: // no scale reg, must be "r+i" or "r", or "i".
3291 if (Subtarget
->isThumb1Only())
3295 // ARM doesn't support any R+R*scale+imm addr modes.
3302 if (Subtarget
->isThumb2())
3303 return isLegalT2ScaledAddressingMode(AM
, VT
);
3305 int Scale
= AM
.Scale
;
3306 switch (VT
.getSimpleVT().SimpleTy
) {
3307 default: return false;
3311 if (Scale
< 0) Scale
= -Scale
;
3315 return isPowerOf2_32(Scale
& ~1);
3319 if (((unsigned)AM
.HasBaseReg
+ Scale
) <= 2)
3324 // Note, we allow "void" uses (basically, uses that aren't loads or
3325 // stores), because arm allows folding a scale into many arithmetic
3326 // operations. This should be made more precise and revisited later.
3328 // Allow r << imm, but the imm has to be a multiple of two.
3329 if (Scale
& 1) return false;
3330 return isPowerOf2_32(Scale
);
3337 static bool getARMIndexedAddressParts(SDNode
*Ptr
, EVT VT
,
3338 bool isSEXTLoad
, SDValue
&Base
,
3339 SDValue
&Offset
, bool &isInc
,
3340 SelectionDAG
&DAG
) {
3341 if (Ptr
->getOpcode() != ISD::ADD
&& Ptr
->getOpcode() != ISD::SUB
)
3344 if (VT
== MVT::i16
|| ((VT
== MVT::i8
|| VT
== MVT::i1
) && isSEXTLoad
)) {
3346 Base
= Ptr
->getOperand(0);
3347 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1))) {
3348 int RHSC
= (int)RHS
->getZExtValue();
3349 if (RHSC
< 0 && RHSC
> -256) {
3350 assert(Ptr
->getOpcode() == ISD::ADD
);
3352 Offset
= DAG
.getConstant(-RHSC
, RHS
->getValueType(0));
3356 isInc
= (Ptr
->getOpcode() == ISD::ADD
);
3357 Offset
= Ptr
->getOperand(1);
3359 } else if (VT
== MVT::i32
|| VT
== MVT::i8
|| VT
== MVT::i1
) {
3361 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1))) {
3362 int RHSC
= (int)RHS
->getZExtValue();
3363 if (RHSC
< 0 && RHSC
> -0x1000) {
3364 assert(Ptr
->getOpcode() == ISD::ADD
);
3366 Offset
= DAG
.getConstant(-RHSC
, RHS
->getValueType(0));
3367 Base
= Ptr
->getOperand(0);
3372 if (Ptr
->getOpcode() == ISD::ADD
) {
3374 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(Ptr
->getOperand(0));
3375 if (ShOpcVal
!= ARM_AM::no_shift
) {
3376 Base
= Ptr
->getOperand(1);
3377 Offset
= Ptr
->getOperand(0);
3379 Base
= Ptr
->getOperand(0);
3380 Offset
= Ptr
->getOperand(1);
3385 isInc
= (Ptr
->getOpcode() == ISD::ADD
);
3386 Base
= Ptr
->getOperand(0);
3387 Offset
= Ptr
->getOperand(1);
3391 // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
3395 static bool getT2IndexedAddressParts(SDNode
*Ptr
, EVT VT
,
3396 bool isSEXTLoad
, SDValue
&Base
,
3397 SDValue
&Offset
, bool &isInc
,
3398 SelectionDAG
&DAG
) {
3399 if (Ptr
->getOpcode() != ISD::ADD
&& Ptr
->getOpcode() != ISD::SUB
)
3402 Base
= Ptr
->getOperand(0);
3403 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1))) {
3404 int RHSC
= (int)RHS
->getZExtValue();
3405 if (RHSC
< 0 && RHSC
> -0x100) { // 8 bits.
3406 assert(Ptr
->getOpcode() == ISD::ADD
);
3408 Offset
= DAG
.getConstant(-RHSC
, RHS
->getValueType(0));
3410 } else if (RHSC
> 0 && RHSC
< 0x100) { // 8 bit, no zero.
3411 isInc
= Ptr
->getOpcode() == ISD::ADD
;
3412 Offset
= DAG
.getConstant(RHSC
, RHS
->getValueType(0));
3420 /// getPreIndexedAddressParts - returns true by value, base pointer and
3421 /// offset pointer and addressing mode by reference if the node's address
3422 /// can be legally represented as pre-indexed load / store address.
3424 ARMTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
3426 ISD::MemIndexedMode
&AM
,
3427 SelectionDAG
&DAG
) const {
3428 if (Subtarget
->isThumb1Only())
3433 bool isSEXTLoad
= false;
3434 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
3435 Ptr
= LD
->getBasePtr();
3436 VT
= LD
->getMemoryVT();
3437 isSEXTLoad
= LD
->getExtensionType() == ISD::SEXTLOAD
;
3438 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
3439 Ptr
= ST
->getBasePtr();
3440 VT
= ST
->getMemoryVT();
3445 bool isLegal
= false;
3446 if (Subtarget
->isThumb2())
3447 isLegal
= getT2IndexedAddressParts(Ptr
.getNode(), VT
, isSEXTLoad
, Base
,
3448 Offset
, isInc
, DAG
);
3450 isLegal
= getARMIndexedAddressParts(Ptr
.getNode(), VT
, isSEXTLoad
, Base
,
3451 Offset
, isInc
, DAG
);
3455 AM
= isInc
? ISD::PRE_INC
: ISD::PRE_DEC
;
3459 /// getPostIndexedAddressParts - returns true by value, base pointer and
3460 /// offset pointer and addressing mode by reference if this node can be
3461 /// combined with a load / store to form a post-indexed load / store.
3462 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
,
3465 ISD::MemIndexedMode
&AM
,
3466 SelectionDAG
&DAG
) const {
3467 if (Subtarget
->isThumb1Only())
3472 bool isSEXTLoad
= false;
3473 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
3474 VT
= LD
->getMemoryVT();
3475 isSEXTLoad
= LD
->getExtensionType() == ISD::SEXTLOAD
;
3476 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
3477 VT
= ST
->getMemoryVT();
3482 bool isLegal
= false;
3483 if (Subtarget
->isThumb2())
3484 isLegal
= getT2IndexedAddressParts(Op
, VT
, isSEXTLoad
, Base
, Offset
,
3487 isLegal
= getARMIndexedAddressParts(Op
, VT
, isSEXTLoad
, Base
, Offset
,
3492 AM
= isInc
? ISD::POST_INC
: ISD::POST_DEC
;
3496 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op
,
3500 const SelectionDAG
&DAG
,
3501 unsigned Depth
) const {
3502 KnownZero
= KnownOne
= APInt(Mask
.getBitWidth(), 0);
3503 switch (Op
.getOpcode()) {
3505 case ARMISD::CMOV
: {
3506 // Bits are known zero/one if known on the LHS and RHS.
3507 DAG
.ComputeMaskedBits(Op
.getOperand(0), Mask
, KnownZero
, KnownOne
, Depth
+1);
3508 if (KnownZero
== 0 && KnownOne
== 0) return;
3510 APInt KnownZeroRHS
, KnownOneRHS
;
3511 DAG
.ComputeMaskedBits(Op
.getOperand(1), Mask
,
3512 KnownZeroRHS
, KnownOneRHS
, Depth
+1);
3513 KnownZero
&= KnownZeroRHS
;
3514 KnownOne
&= KnownOneRHS
;
3520 //===----------------------------------------------------------------------===//
3521 // ARM Inline Assembly Support
3522 //===----------------------------------------------------------------------===//
3524 /// getConstraintType - Given a constraint letter, return the type of
3525 /// constraint it is for this target.
3526 ARMTargetLowering::ConstraintType
3527 ARMTargetLowering::getConstraintType(const std::string
&Constraint
) const {
3528 if (Constraint
.size() == 1) {
3529 switch (Constraint
[0]) {
3531 case 'l': return C_RegisterClass
;
3532 case 'w': return C_RegisterClass
;
3535 return TargetLowering::getConstraintType(Constraint
);
3538 std::pair
<unsigned, const TargetRegisterClass
*>
3539 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string
&Constraint
,
3541 if (Constraint
.size() == 1) {
3542 // GCC RS6000 Constraint Letters
3543 switch (Constraint
[0]) {
3545 if (Subtarget
->isThumb1Only())
3546 return std::make_pair(0U, ARM::tGPRRegisterClass
);
3548 return std::make_pair(0U, ARM::GPRRegisterClass
);
3550 return std::make_pair(0U, ARM::GPRRegisterClass
);
3553 return std::make_pair(0U, ARM::SPRRegisterClass
);
3555 return std::make_pair(0U, ARM::DPRRegisterClass
);
3559 return TargetLowering::getRegForInlineAsmConstraint(Constraint
, VT
);
3562 std::vector
<unsigned> ARMTargetLowering::
3563 getRegClassForInlineAsmConstraint(const std::string
&Constraint
,
3565 if (Constraint
.size() != 1)
3566 return std::vector
<unsigned>();
3568 switch (Constraint
[0]) { // GCC ARM Constraint Letters
3571 return make_vector
<unsigned>(ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
,
3572 ARM::R4
, ARM::R5
, ARM::R6
, ARM::R7
,
3575 return make_vector
<unsigned>(ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
,
3576 ARM::R4
, ARM::R5
, ARM::R6
, ARM::R7
,
3577 ARM::R8
, ARM::R9
, ARM::R10
, ARM::R11
,
3578 ARM::R12
, ARM::LR
, 0);
3581 return make_vector
<unsigned>(ARM::S0
, ARM::S1
, ARM::S2
, ARM::S3
,
3582 ARM::S4
, ARM::S5
, ARM::S6
, ARM::S7
,
3583 ARM::S8
, ARM::S9
, ARM::S10
, ARM::S11
,
3584 ARM::S12
,ARM::S13
,ARM::S14
,ARM::S15
,
3585 ARM::S16
,ARM::S17
,ARM::S18
,ARM::S19
,
3586 ARM::S20
,ARM::S21
,ARM::S22
,ARM::S23
,
3587 ARM::S24
,ARM::S25
,ARM::S26
,ARM::S27
,
3588 ARM::S28
,ARM::S29
,ARM::S30
,ARM::S31
, 0);
3590 return make_vector
<unsigned>(ARM::D0
, ARM::D1
, ARM::D2
, ARM::D3
,
3591 ARM::D4
, ARM::D5
, ARM::D6
, ARM::D7
,
3592 ARM::D8
, ARM::D9
, ARM::D10
,ARM::D11
,
3593 ARM::D12
,ARM::D13
,ARM::D14
,ARM::D15
, 0);
3597 return std::vector
<unsigned>();
3600 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
3601 /// vector. If it is invalid, don't add anything to Ops.
3602 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
3605 std::vector
<SDValue
>&Ops
,
3606 SelectionDAG
&DAG
) const {
3607 SDValue
Result(0, 0);
3609 switch (Constraint
) {
3611 case 'I': case 'J': case 'K': case 'L':
3612 case 'M': case 'N': case 'O':
3613 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
3617 int64_t CVal64
= C
->getSExtValue();
3618 int CVal
= (int) CVal64
;
3619 // None of these constraints allow values larger than 32 bits. Check
3620 // that the value fits in an int.
3624 switch (Constraint
) {
3626 if (Subtarget
->isThumb1Only()) {
3627 // This must be a constant between 0 and 255, for ADD
3629 if (CVal
>= 0 && CVal
<= 255)
3631 } else if (Subtarget
->isThumb2()) {
3632 // A constant that can be used as an immediate value in a
3633 // data-processing instruction.
3634 if (ARM_AM::getT2SOImmVal(CVal
) != -1)
3637 // A constant that can be used as an immediate value in a
3638 // data-processing instruction.
3639 if (ARM_AM::getSOImmVal(CVal
) != -1)
3645 if (Subtarget
->isThumb()) { // FIXME thumb2
3646 // This must be a constant between -255 and -1, for negated ADD
3647 // immediates. This can be used in GCC with an "n" modifier that
3648 // prints the negated value, for use with SUB instructions. It is
3649 // not useful otherwise but is implemented for compatibility.
3650 if (CVal
>= -255 && CVal
<= -1)
3653 // This must be a constant between -4095 and 4095. It is not clear
3654 // what this constraint is intended for. Implemented for
3655 // compatibility with GCC.
3656 if (CVal
>= -4095 && CVal
<= 4095)
3662 if (Subtarget
->isThumb1Only()) {
3663 // A 32-bit value where only one byte has a nonzero value. Exclude
3664 // zero to match GCC. This constraint is used by GCC internally for
3665 // constants that can be loaded with a move/shift combination.
3666 // It is not useful otherwise but is implemented for compatibility.
3667 if (CVal
!= 0 && ARM_AM::isThumbImmShiftedVal(CVal
))
3669 } else if (Subtarget
->isThumb2()) {
3670 // A constant whose bitwise inverse can be used as an immediate
3671 // value in a data-processing instruction. This can be used in GCC
3672 // with a "B" modifier that prints the inverted value, for use with
3673 // BIC and MVN instructions. It is not useful otherwise but is
3674 // implemented for compatibility.
3675 if (ARM_AM::getT2SOImmVal(~CVal
) != -1)
3678 // A constant whose bitwise inverse can be used as an immediate
3679 // value in a data-processing instruction. This can be used in GCC
3680 // with a "B" modifier that prints the inverted value, for use with
3681 // BIC and MVN instructions. It is not useful otherwise but is
3682 // implemented for compatibility.
3683 if (ARM_AM::getSOImmVal(~CVal
) != -1)
3689 if (Subtarget
->isThumb1Only()) {
3690 // This must be a constant between -7 and 7,
3691 // for 3-operand ADD/SUB immediate instructions.
3692 if (CVal
>= -7 && CVal
< 7)
3694 } else if (Subtarget
->isThumb2()) {
3695 // A constant whose negation can be used as an immediate value in a
3696 // data-processing instruction. This can be used in GCC with an "n"
3697 // modifier that prints the negated value, for use with SUB
3698 // instructions. It is not useful otherwise but is implemented for
3700 if (ARM_AM::getT2SOImmVal(-CVal
) != -1)
3703 // A constant whose negation can be used as an immediate value in a
3704 // data-processing instruction. This can be used in GCC with an "n"
3705 // modifier that prints the negated value, for use with SUB
3706 // instructions. It is not useful otherwise but is implemented for
3708 if (ARM_AM::getSOImmVal(-CVal
) != -1)
3714 if (Subtarget
->isThumb()) { // FIXME thumb2
3715 // This must be a multiple of 4 between 0 and 1020, for
3716 // ADD sp + immediate.
3717 if ((CVal
>= 0 && CVal
<= 1020) && ((CVal
& 3) == 0))
3720 // A power of two or a constant between 0 and 32. This is used in
3721 // GCC for the shift amount on shifted register operands, but it is
3722 // useful in general for any shift amounts.
3723 if ((CVal
>= 0 && CVal
<= 32) || ((CVal
& (CVal
- 1)) == 0))
3729 if (Subtarget
->isThumb()) { // FIXME thumb2
3730 // This must be a constant between 0 and 31, for shift amounts.
3731 if (CVal
>= 0 && CVal
<= 31)
3737 if (Subtarget
->isThumb()) { // FIXME thumb2
3738 // This must be a multiple of 4 between -508 and 508, for
3739 // ADD/SUB sp = sp + immediate.
3740 if ((CVal
>= -508 && CVal
<= 508) && ((CVal
& 3) == 0))
3745 Result
= DAG
.getTargetConstant(CVal
, Op
.getValueType());
3749 if (Result
.getNode()) {
3750 Ops
.push_back(Result
);
3753 return TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, hasMemory
,