1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
16 #include "ARMAddressingModes.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSubtarget.h"
23 #include "ARMTargetMachine.h"
24 #include "ARMTargetObjectFile.h"
25 #include "llvm/CallingConv.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Instruction.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/GlobalValue.h"
31 #include "llvm/CodeGen/CallingConvLower.h"
32 #include "llvm/CodeGen/MachineBasicBlock.h"
33 #include "llvm/CodeGen/MachineFrameInfo.h"
34 #include "llvm/CodeGen/MachineFunction.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/PseudoSourceValue.h"
38 #include "llvm/CodeGen/SelectionDAG.h"
39 #include "llvm/Target/TargetOptions.h"
40 #include "llvm/ADT/VectorExtras.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/MathExtras.h"
46 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
47 CCValAssign::LocInfo
&LocInfo
,
48 ISD::ArgFlagsTy
&ArgFlags
,
50 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
51 CCValAssign::LocInfo
&LocInfo
,
52 ISD::ArgFlagsTy
&ArgFlags
,
54 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
55 CCValAssign::LocInfo
&LocInfo
,
56 ISD::ArgFlagsTy
&ArgFlags
,
58 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
59 CCValAssign::LocInfo
&LocInfo
,
60 ISD::ArgFlagsTy
&ArgFlags
,
63 void ARMTargetLowering::addTypeForNEON(EVT VT
, EVT PromotedLdStVT
,
64 EVT PromotedBitwiseVT
) {
65 if (VT
!= PromotedLdStVT
) {
66 setOperationAction(ISD::LOAD
, VT
.getSimpleVT(), Promote
);
67 AddPromotedToType (ISD::LOAD
, VT
.getSimpleVT(),
68 PromotedLdStVT
.getSimpleVT());
70 setOperationAction(ISD::STORE
, VT
.getSimpleVT(), Promote
);
71 AddPromotedToType (ISD::STORE
, VT
.getSimpleVT(),
72 PromotedLdStVT
.getSimpleVT());
75 EVT ElemTy
= VT
.getVectorElementType();
76 if (ElemTy
!= MVT::i64
&& ElemTy
!= MVT::f64
)
77 setOperationAction(ISD::VSETCC
, VT
.getSimpleVT(), Custom
);
78 if (ElemTy
== MVT::i8
|| ElemTy
== MVT::i16
)
79 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
.getSimpleVT(), Custom
);
80 setOperationAction(ISD::BUILD_VECTOR
, VT
.getSimpleVT(), Custom
);
81 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
.getSimpleVT(), Custom
);
82 setOperationAction(ISD::CONCAT_VECTORS
, VT
.getSimpleVT(), Custom
);
83 setOperationAction(ISD::EXTRACT_SUBVECTOR
, VT
.getSimpleVT(), Expand
);
85 setOperationAction(ISD::SHL
, VT
.getSimpleVT(), Custom
);
86 setOperationAction(ISD::SRA
, VT
.getSimpleVT(), Custom
);
87 setOperationAction(ISD::SRL
, VT
.getSimpleVT(), Custom
);
90 // Promote all bit-wise operations.
91 if (VT
.isInteger() && VT
!= PromotedBitwiseVT
) {
92 setOperationAction(ISD::AND
, VT
.getSimpleVT(), Promote
);
93 AddPromotedToType (ISD::AND
, VT
.getSimpleVT(),
94 PromotedBitwiseVT
.getSimpleVT());
95 setOperationAction(ISD::OR
, VT
.getSimpleVT(), Promote
);
96 AddPromotedToType (ISD::OR
, VT
.getSimpleVT(),
97 PromotedBitwiseVT
.getSimpleVT());
98 setOperationAction(ISD::XOR
, VT
.getSimpleVT(), Promote
);
99 AddPromotedToType (ISD::XOR
, VT
.getSimpleVT(),
100 PromotedBitwiseVT
.getSimpleVT());
104 void ARMTargetLowering::addDRTypeForNEON(EVT VT
) {
105 addRegisterClass(VT
, ARM::DPRRegisterClass
);
106 addTypeForNEON(VT
, MVT::f64
, MVT::v2i32
);
109 void ARMTargetLowering::addQRTypeForNEON(EVT VT
) {
110 addRegisterClass(VT
, ARM::QPRRegisterClass
);
111 addTypeForNEON(VT
, MVT::v2f64
, MVT::v4i32
);
114 static TargetLoweringObjectFile
*createTLOF(TargetMachine
&TM
) {
115 if (TM
.getSubtarget
<ARMSubtarget
>().isTargetDarwin())
116 return new TargetLoweringObjectFileMachO();
117 return new ARMElfTargetObjectFile();
120 ARMTargetLowering::ARMTargetLowering(TargetMachine
&TM
)
121 : TargetLowering(TM
, createTLOF(TM
)), ARMPCLabelIndex(0) {
122 Subtarget
= &TM
.getSubtarget
<ARMSubtarget
>();
124 if (Subtarget
->isTargetDarwin()) {
125 // Uses VFP for Thumb libfuncs if available.
126 if (Subtarget
->isThumb() && Subtarget
->hasVFP2()) {
127 // Single-precision floating-point arithmetic.
128 setLibcallName(RTLIB::ADD_F32
, "__addsf3vfp");
129 setLibcallName(RTLIB::SUB_F32
, "__subsf3vfp");
130 setLibcallName(RTLIB::MUL_F32
, "__mulsf3vfp");
131 setLibcallName(RTLIB::DIV_F32
, "__divsf3vfp");
133 // Double-precision floating-point arithmetic.
134 setLibcallName(RTLIB::ADD_F64
, "__adddf3vfp");
135 setLibcallName(RTLIB::SUB_F64
, "__subdf3vfp");
136 setLibcallName(RTLIB::MUL_F64
, "__muldf3vfp");
137 setLibcallName(RTLIB::DIV_F64
, "__divdf3vfp");
139 // Single-precision comparisons.
140 setLibcallName(RTLIB::OEQ_F32
, "__eqsf2vfp");
141 setLibcallName(RTLIB::UNE_F32
, "__nesf2vfp");
142 setLibcallName(RTLIB::OLT_F32
, "__ltsf2vfp");
143 setLibcallName(RTLIB::OLE_F32
, "__lesf2vfp");
144 setLibcallName(RTLIB::OGE_F32
, "__gesf2vfp");
145 setLibcallName(RTLIB::OGT_F32
, "__gtsf2vfp");
146 setLibcallName(RTLIB::UO_F32
, "__unordsf2vfp");
147 setLibcallName(RTLIB::O_F32
, "__unordsf2vfp");
149 setCmpLibcallCC(RTLIB::OEQ_F32
, ISD::SETNE
);
150 setCmpLibcallCC(RTLIB::UNE_F32
, ISD::SETNE
);
151 setCmpLibcallCC(RTLIB::OLT_F32
, ISD::SETNE
);
152 setCmpLibcallCC(RTLIB::OLE_F32
, ISD::SETNE
);
153 setCmpLibcallCC(RTLIB::OGE_F32
, ISD::SETNE
);
154 setCmpLibcallCC(RTLIB::OGT_F32
, ISD::SETNE
);
155 setCmpLibcallCC(RTLIB::UO_F32
, ISD::SETNE
);
156 setCmpLibcallCC(RTLIB::O_F32
, ISD::SETEQ
);
158 // Double-precision comparisons.
159 setLibcallName(RTLIB::OEQ_F64
, "__eqdf2vfp");
160 setLibcallName(RTLIB::UNE_F64
, "__nedf2vfp");
161 setLibcallName(RTLIB::OLT_F64
, "__ltdf2vfp");
162 setLibcallName(RTLIB::OLE_F64
, "__ledf2vfp");
163 setLibcallName(RTLIB::OGE_F64
, "__gedf2vfp");
164 setLibcallName(RTLIB::OGT_F64
, "__gtdf2vfp");
165 setLibcallName(RTLIB::UO_F64
, "__unorddf2vfp");
166 setLibcallName(RTLIB::O_F64
, "__unorddf2vfp");
168 setCmpLibcallCC(RTLIB::OEQ_F64
, ISD::SETNE
);
169 setCmpLibcallCC(RTLIB::UNE_F64
, ISD::SETNE
);
170 setCmpLibcallCC(RTLIB::OLT_F64
, ISD::SETNE
);
171 setCmpLibcallCC(RTLIB::OLE_F64
, ISD::SETNE
);
172 setCmpLibcallCC(RTLIB::OGE_F64
, ISD::SETNE
);
173 setCmpLibcallCC(RTLIB::OGT_F64
, ISD::SETNE
);
174 setCmpLibcallCC(RTLIB::UO_F64
, ISD::SETNE
);
175 setCmpLibcallCC(RTLIB::O_F64
, ISD::SETEQ
);
177 // Floating-point to integer conversions.
178 // i64 conversions are done via library routines even when generating VFP
179 // instructions, so use the same ones.
180 setLibcallName(RTLIB::FPTOSINT_F64_I32
, "__fixdfsivfp");
181 setLibcallName(RTLIB::FPTOUINT_F64_I32
, "__fixunsdfsivfp");
182 setLibcallName(RTLIB::FPTOSINT_F32_I32
, "__fixsfsivfp");
183 setLibcallName(RTLIB::FPTOUINT_F32_I32
, "__fixunssfsivfp");
185 // Conversions between floating types.
186 setLibcallName(RTLIB::FPROUND_F64_F32
, "__truncdfsf2vfp");
187 setLibcallName(RTLIB::FPEXT_F32_F64
, "__extendsfdf2vfp");
189 // Integer to floating-point conversions.
190 // i64 conversions are done via library routines even when generating VFP
191 // instructions, so use the same ones.
192 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
193 // e.g., __floatunsidf vs. __floatunssidfvfp.
194 setLibcallName(RTLIB::SINTTOFP_I32_F64
, "__floatsidfvfp");
195 setLibcallName(RTLIB::UINTTOFP_I32_F64
, "__floatunssidfvfp");
196 setLibcallName(RTLIB::SINTTOFP_I32_F32
, "__floatsisfvfp");
197 setLibcallName(RTLIB::UINTTOFP_I32_F32
, "__floatunssisfvfp");
201 // These libcalls are not available in 32-bit.
202 setLibcallName(RTLIB::SHL_I128
, 0);
203 setLibcallName(RTLIB::SRL_I128
, 0);
204 setLibcallName(RTLIB::SRA_I128
, 0);
206 // Libcalls should use the AAPCS base standard ABI, even if hard float
207 // is in effect, as per the ARM RTABI specification, section 4.1.2.
208 if (Subtarget
->isAAPCS_ABI()) {
209 for (int i
= 0; i
< RTLIB::UNKNOWN_LIBCALL
; ++i
) {
210 setLibcallCallingConv(static_cast<RTLIB::Libcall
>(i
),
211 CallingConv::ARM_AAPCS
);
215 if (Subtarget
->isThumb1Only())
216 addRegisterClass(MVT::i32
, ARM::tGPRRegisterClass
);
218 addRegisterClass(MVT::i32
, ARM::GPRRegisterClass
);
219 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only()) {
220 addRegisterClass(MVT::f32
, ARM::SPRRegisterClass
);
221 addRegisterClass(MVT::f64
, ARM::DPRRegisterClass
);
223 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
226 if (Subtarget
->hasNEON()) {
227 addDRTypeForNEON(MVT::v2f32
);
228 addDRTypeForNEON(MVT::v8i8
);
229 addDRTypeForNEON(MVT::v4i16
);
230 addDRTypeForNEON(MVT::v2i32
);
231 addDRTypeForNEON(MVT::v1i64
);
233 addQRTypeForNEON(MVT::v4f32
);
234 addQRTypeForNEON(MVT::v2f64
);
235 addQRTypeForNEON(MVT::v16i8
);
236 addQRTypeForNEON(MVT::v8i16
);
237 addQRTypeForNEON(MVT::v4i32
);
238 addQRTypeForNEON(MVT::v2i64
);
240 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN
);
241 setTargetDAGCombine(ISD::SHL
);
242 setTargetDAGCombine(ISD::SRL
);
243 setTargetDAGCombine(ISD::SRA
);
244 setTargetDAGCombine(ISD::SIGN_EXTEND
);
245 setTargetDAGCombine(ISD::ZERO_EXTEND
);
246 setTargetDAGCombine(ISD::ANY_EXTEND
);
249 computeRegisterProperties();
251 // ARM does not have f32 extending load.
252 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, Expand
);
254 // ARM does not have i1 sign extending load.
255 setLoadExtAction(ISD::SEXTLOAD
, MVT::i1
, Promote
);
257 // ARM supports all 4 flavors of integer indexed load / store.
258 if (!Subtarget
->isThumb1Only()) {
259 for (unsigned im
= (unsigned)ISD::PRE_INC
;
260 im
!= (unsigned)ISD::LAST_INDEXED_MODE
; ++im
) {
261 setIndexedLoadAction(im
, MVT::i1
, Legal
);
262 setIndexedLoadAction(im
, MVT::i8
, Legal
);
263 setIndexedLoadAction(im
, MVT::i16
, Legal
);
264 setIndexedLoadAction(im
, MVT::i32
, Legal
);
265 setIndexedStoreAction(im
, MVT::i1
, Legal
);
266 setIndexedStoreAction(im
, MVT::i8
, Legal
);
267 setIndexedStoreAction(im
, MVT::i16
, Legal
);
268 setIndexedStoreAction(im
, MVT::i32
, Legal
);
272 // i64 operation support.
273 if (Subtarget
->isThumb1Only()) {
274 setOperationAction(ISD::MUL
, MVT::i64
, Expand
);
275 setOperationAction(ISD::MULHU
, MVT::i32
, Expand
);
276 setOperationAction(ISD::MULHS
, MVT::i32
, Expand
);
277 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
278 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
280 setOperationAction(ISD::MUL
, MVT::i64
, Expand
);
281 setOperationAction(ISD::MULHU
, MVT::i32
, Expand
);
282 if (!Subtarget
->hasV6Ops())
283 setOperationAction(ISD::MULHS
, MVT::i32
, Expand
);
285 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Expand
);
286 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Expand
);
287 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Expand
);
288 setOperationAction(ISD::SRL
, MVT::i64
, Custom
);
289 setOperationAction(ISD::SRA
, MVT::i64
, Custom
);
291 // ARM does not have ROTL.
292 setOperationAction(ISD::ROTL
, MVT::i32
, Expand
);
293 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
294 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
295 if (!Subtarget
->hasV5TOps() || Subtarget
->isThumb1Only())
296 setOperationAction(ISD::CTLZ
, MVT::i32
, Expand
);
298 // Only ARMv6 has BSWAP.
299 if (!Subtarget
->hasV6Ops())
300 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
302 // These are expanded into libcalls.
303 setOperationAction(ISD::SDIV
, MVT::i32
, Expand
);
304 setOperationAction(ISD::UDIV
, MVT::i32
, Expand
);
305 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
306 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
307 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
308 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
310 // Support label based line numbers.
311 setOperationAction(ISD::DBG_STOPPOINT
, MVT::Other
, Expand
);
312 setOperationAction(ISD::DEBUG_LOC
, MVT::Other
, Expand
);
314 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
315 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
316 setOperationAction(ISD::GLOBAL_OFFSET_TABLE
, MVT::i32
, Custom
);
317 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
319 // Use the default implementation.
320 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
321 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
322 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
323 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
324 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
325 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
326 setOperationAction(ISD::EHSELECTION
, MVT::i32
, Expand
);
327 // FIXME: Shouldn't need this, since no register is used, but the legalizer
328 // doesn't yet know how to not do that for SjLj.
329 setExceptionSelectorRegister(ARM::R0
);
330 if (Subtarget
->isThumb())
331 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
333 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Expand
);
334 setOperationAction(ISD::MEMBARRIER
, MVT::Other
, Expand
);
336 if (!Subtarget
->hasV6Ops() && !Subtarget
->isThumb2()) {
337 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i16
, Expand
);
338 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i8
, Expand
);
340 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
342 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only())
343 // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2.
344 setOperationAction(ISD::BIT_CONVERT
, MVT::i64
, Custom
);
346 // We want to custom lower some of our intrinsics.
347 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
348 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
349 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
351 setOperationAction(ISD::SETCC
, MVT::i32
, Expand
);
352 setOperationAction(ISD::SETCC
, MVT::f32
, Expand
);
353 setOperationAction(ISD::SETCC
, MVT::f64
, Expand
);
354 setOperationAction(ISD::SELECT
, MVT::i32
, Expand
);
355 setOperationAction(ISD::SELECT
, MVT::f32
, Expand
);
356 setOperationAction(ISD::SELECT
, MVT::f64
, Expand
);
357 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
358 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
359 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Custom
);
361 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
362 setOperationAction(ISD::BR_CC
, MVT::i32
, Custom
);
363 setOperationAction(ISD::BR_CC
, MVT::f32
, Custom
);
364 setOperationAction(ISD::BR_CC
, MVT::f64
, Custom
);
365 setOperationAction(ISD::BR_JT
, MVT::Other
, Custom
);
367 // We don't support sin/cos/fmod/copysign/pow
368 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
369 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
370 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
371 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
372 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
373 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
374 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only()) {
375 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Custom
);
376 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Custom
);
378 setOperationAction(ISD::FPOW
, MVT::f64
, Expand
);
379 setOperationAction(ISD::FPOW
, MVT::f32
, Expand
);
381 // int <-> fp are custom expanded into bit_convert + ARMISD ops.
382 if (!UseSoftFloat
&& Subtarget
->hasVFP2() && !Subtarget
->isThumb1Only()) {
383 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
384 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Custom
);
385 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
386 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
389 // We have target-specific dag combine patterns for the following nodes:
390 // ARMISD::FMRRD - No need to call setTargetDAGCombine
391 setTargetDAGCombine(ISD::ADD
);
392 setTargetDAGCombine(ISD::SUB
);
394 setStackPointerRegisterToSaveRestore(ARM::SP
);
395 setSchedulingPreference(SchedulingForRegPressure
);
397 // FIXME: If-converter should use instruction latency to determine
398 // profitability rather than relying on fixed limits.
399 if (Subtarget
->getCPUString() == "generic") {
400 // Generic (and overly aggressive) if-conversion limits.
401 setIfCvtBlockSizeLimit(10);
402 setIfCvtDupBlockSizeLimit(2);
403 } else if (Subtarget
->hasV6Ops()) {
404 setIfCvtBlockSizeLimit(2);
405 setIfCvtDupBlockSizeLimit(1);
407 setIfCvtBlockSizeLimit(3);
408 setIfCvtDupBlockSizeLimit(2);
411 maxStoresPerMemcpy
= 1; //// temporary - rewrite interface to use type
412 // Do not enable CodePlacementOpt for now: it currently runs after the
413 // ARMConstantIslandPass and messes up branch relaxation and placement
414 // of constant islands.
415 // benefitFromCodePlacementOpt = true;
418 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode
) const {
421 case ARMISD::Wrapper
: return "ARMISD::Wrapper";
422 case ARMISD::WrapperJT
: return "ARMISD::WrapperJT";
423 case ARMISD::CALL
: return "ARMISD::CALL";
424 case ARMISD::CALL_PRED
: return "ARMISD::CALL_PRED";
425 case ARMISD::CALL_NOLINK
: return "ARMISD::CALL_NOLINK";
426 case ARMISD::tCALL
: return "ARMISD::tCALL";
427 case ARMISD::BRCOND
: return "ARMISD::BRCOND";
428 case ARMISD::BR_JT
: return "ARMISD::BR_JT";
429 case ARMISD::BR2_JT
: return "ARMISD::BR2_JT";
430 case ARMISD::RET_FLAG
: return "ARMISD::RET_FLAG";
431 case ARMISD::PIC_ADD
: return "ARMISD::PIC_ADD";
432 case ARMISD::CMP
: return "ARMISD::CMP";
433 case ARMISD::CMPZ
: return "ARMISD::CMPZ";
434 case ARMISD::CMPFP
: return "ARMISD::CMPFP";
435 case ARMISD::CMPFPw0
: return "ARMISD::CMPFPw0";
436 case ARMISD::FMSTAT
: return "ARMISD::FMSTAT";
437 case ARMISD::CMOV
: return "ARMISD::CMOV";
438 case ARMISD::CNEG
: return "ARMISD::CNEG";
440 case ARMISD::FTOSI
: return "ARMISD::FTOSI";
441 case ARMISD::FTOUI
: return "ARMISD::FTOUI";
442 case ARMISD::SITOF
: return "ARMISD::SITOF";
443 case ARMISD::UITOF
: return "ARMISD::UITOF";
445 case ARMISD::SRL_FLAG
: return "ARMISD::SRL_FLAG";
446 case ARMISD::SRA_FLAG
: return "ARMISD::SRA_FLAG";
447 case ARMISD::RRX
: return "ARMISD::RRX";
449 case ARMISD::FMRRD
: return "ARMISD::FMRRD";
450 case ARMISD::FMDRR
: return "ARMISD::FMDRR";
452 case ARMISD::THREAD_POINTER
:return "ARMISD::THREAD_POINTER";
454 case ARMISD::DYN_ALLOC
: return "ARMISD::DYN_ALLOC";
456 case ARMISD::VCEQ
: return "ARMISD::VCEQ";
457 case ARMISD::VCGE
: return "ARMISD::VCGE";
458 case ARMISD::VCGEU
: return "ARMISD::VCGEU";
459 case ARMISD::VCGT
: return "ARMISD::VCGT";
460 case ARMISD::VCGTU
: return "ARMISD::VCGTU";
461 case ARMISD::VTST
: return "ARMISD::VTST";
463 case ARMISD::VSHL
: return "ARMISD::VSHL";
464 case ARMISD::VSHRs
: return "ARMISD::VSHRs";
465 case ARMISD::VSHRu
: return "ARMISD::VSHRu";
466 case ARMISD::VSHLLs
: return "ARMISD::VSHLLs";
467 case ARMISD::VSHLLu
: return "ARMISD::VSHLLu";
468 case ARMISD::VSHLLi
: return "ARMISD::VSHLLi";
469 case ARMISD::VSHRN
: return "ARMISD::VSHRN";
470 case ARMISD::VRSHRs
: return "ARMISD::VRSHRs";
471 case ARMISD::VRSHRu
: return "ARMISD::VRSHRu";
472 case ARMISD::VRSHRN
: return "ARMISD::VRSHRN";
473 case ARMISD::VQSHLs
: return "ARMISD::VQSHLs";
474 case ARMISD::VQSHLu
: return "ARMISD::VQSHLu";
475 case ARMISD::VQSHLsu
: return "ARMISD::VQSHLsu";
476 case ARMISD::VQSHRNs
: return "ARMISD::VQSHRNs";
477 case ARMISD::VQSHRNu
: return "ARMISD::VQSHRNu";
478 case ARMISD::VQSHRNsu
: return "ARMISD::VQSHRNsu";
479 case ARMISD::VQRSHRNs
: return "ARMISD::VQRSHRNs";
480 case ARMISD::VQRSHRNu
: return "ARMISD::VQRSHRNu";
481 case ARMISD::VQRSHRNsu
: return "ARMISD::VQRSHRNsu";
482 case ARMISD::VGETLANEu
: return "ARMISD::VGETLANEu";
483 case ARMISD::VGETLANEs
: return "ARMISD::VGETLANEs";
484 case ARMISD::VDUP
: return "ARMISD::VDUP";
485 case ARMISD::VDUPLANE
: return "ARMISD::VDUPLANE";
486 case ARMISD::VEXT
: return "ARMISD::VEXT";
487 case ARMISD::VREV64
: return "ARMISD::VREV64";
488 case ARMISD::VREV32
: return "ARMISD::VREV32";
489 case ARMISD::VREV16
: return "ARMISD::VREV16";
490 case ARMISD::VZIP
: return "ARMISD::VZIP";
491 case ARMISD::VUZP
: return "ARMISD::VUZP";
492 case ARMISD::VTRN
: return "ARMISD::VTRN";
496 /// getFunctionAlignment - Return the Log2 alignment of this function.
497 unsigned ARMTargetLowering::getFunctionAlignment(const Function
*F
) const {
498 return getTargetMachine().getSubtarget
<ARMSubtarget
>().isThumb() ? 1 : 2;
501 //===----------------------------------------------------------------------===//
503 //===----------------------------------------------------------------------===//
505 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
506 static ARMCC::CondCodes
IntCCToARMCC(ISD::CondCode CC
) {
508 default: llvm_unreachable("Unknown condition code!");
509 case ISD::SETNE
: return ARMCC::NE
;
510 case ISD::SETEQ
: return ARMCC::EQ
;
511 case ISD::SETGT
: return ARMCC::GT
;
512 case ISD::SETGE
: return ARMCC::GE
;
513 case ISD::SETLT
: return ARMCC::LT
;
514 case ISD::SETLE
: return ARMCC::LE
;
515 case ISD::SETUGT
: return ARMCC::HI
;
516 case ISD::SETUGE
: return ARMCC::HS
;
517 case ISD::SETULT
: return ARMCC::LO
;
518 case ISD::SETULE
: return ARMCC::LS
;
522 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
523 /// returns true if the operands should be inverted to form the proper
525 static bool FPCCToARMCC(ISD::CondCode CC
, ARMCC::CondCodes
&CondCode
,
526 ARMCC::CondCodes
&CondCode2
) {
528 CondCode2
= ARMCC::AL
;
530 default: llvm_unreachable("Unknown FP condition!");
532 case ISD::SETOEQ
: CondCode
= ARMCC::EQ
; break;
534 case ISD::SETOGT
: CondCode
= ARMCC::GT
; break;
536 case ISD::SETOGE
: CondCode
= ARMCC::GE
; break;
537 case ISD::SETOLT
: CondCode
= ARMCC::MI
; break;
538 case ISD::SETOLE
: CondCode
= ARMCC::GT
; Invert
= true; break;
539 case ISD::SETONE
: CondCode
= ARMCC::MI
; CondCode2
= ARMCC::GT
; break;
540 case ISD::SETO
: CondCode
= ARMCC::VC
; break;
541 case ISD::SETUO
: CondCode
= ARMCC::VS
; break;
542 case ISD::SETUEQ
: CondCode
= ARMCC::EQ
; CondCode2
= ARMCC::VS
; break;
543 case ISD::SETUGT
: CondCode
= ARMCC::HI
; break;
544 case ISD::SETUGE
: CondCode
= ARMCC::PL
; break;
546 case ISD::SETULT
: CondCode
= ARMCC::LT
; break;
548 case ISD::SETULE
: CondCode
= ARMCC::LE
; break;
550 case ISD::SETUNE
: CondCode
= ARMCC::NE
; break;
555 //===----------------------------------------------------------------------===//
556 // Calling Convention Implementation
557 //===----------------------------------------------------------------------===//
559 #include "ARMGenCallingConv.inc"
561 // APCS f64 is in register pairs, possibly split to stack
562 static bool f64AssignAPCS(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
563 CCValAssign::LocInfo
&LocInfo
,
564 CCState
&State
, bool CanFail
) {
565 static const unsigned RegList
[] = { ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
};
567 // Try to get the first register.
568 if (unsigned Reg
= State
.AllocateReg(RegList
, 4))
569 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
571 // For the 2nd half of a v2f64, do not fail.
575 // Put the whole thing on the stack.
576 State
.addLoc(CCValAssign::getCustomMem(ValNo
, ValVT
,
577 State
.AllocateStack(8, 4),
582 // Try to get the second register.
583 if (unsigned Reg
= State
.AllocateReg(RegList
, 4))
584 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
586 State
.addLoc(CCValAssign::getCustomMem(ValNo
, ValVT
,
587 State
.AllocateStack(4, 4),
592 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
593 CCValAssign::LocInfo
&LocInfo
,
594 ISD::ArgFlagsTy
&ArgFlags
,
596 if (!f64AssignAPCS(ValNo
, ValVT
, LocVT
, LocInfo
, State
, true))
598 if (LocVT
== MVT::v2f64
&&
599 !f64AssignAPCS(ValNo
, ValVT
, LocVT
, LocInfo
, State
, false))
601 return true; // we handled it
604 // AAPCS f64 is in aligned register pairs
605 static bool f64AssignAAPCS(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
606 CCValAssign::LocInfo
&LocInfo
,
607 CCState
&State
, bool CanFail
) {
608 static const unsigned HiRegList
[] = { ARM::R0
, ARM::R2
};
609 static const unsigned LoRegList
[] = { ARM::R1
, ARM::R3
};
611 unsigned Reg
= State
.AllocateReg(HiRegList
, LoRegList
, 2);
613 // For the 2nd half of a v2f64, do not just fail.
617 // Put the whole thing on the stack.
618 State
.addLoc(CCValAssign::getCustomMem(ValNo
, ValVT
,
619 State
.AllocateStack(8, 8),
625 for (i
= 0; i
< 2; ++i
)
626 if (HiRegList
[i
] == Reg
)
629 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
630 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, LoRegList
[i
],
635 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
636 CCValAssign::LocInfo
&LocInfo
,
637 ISD::ArgFlagsTy
&ArgFlags
,
639 if (!f64AssignAAPCS(ValNo
, ValVT
, LocVT
, LocInfo
, State
, true))
641 if (LocVT
== MVT::v2f64
&&
642 !f64AssignAAPCS(ValNo
, ValVT
, LocVT
, LocInfo
, State
, false))
644 return true; // we handled it
647 static bool f64RetAssign(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
648 CCValAssign::LocInfo
&LocInfo
, CCState
&State
) {
649 static const unsigned HiRegList
[] = { ARM::R0
, ARM::R2
};
650 static const unsigned LoRegList
[] = { ARM::R1
, ARM::R3
};
652 unsigned Reg
= State
.AllocateReg(HiRegList
, LoRegList
, 2);
654 return false; // we didn't handle it
657 for (i
= 0; i
< 2; ++i
)
658 if (HiRegList
[i
] == Reg
)
661 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
662 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, LoRegList
[i
],
667 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
668 CCValAssign::LocInfo
&LocInfo
,
669 ISD::ArgFlagsTy
&ArgFlags
,
671 if (!f64RetAssign(ValNo
, ValVT
, LocVT
, LocInfo
, State
))
673 if (LocVT
== MVT::v2f64
&& !f64RetAssign(ValNo
, ValVT
, LocVT
, LocInfo
, State
))
675 return true; // we handled it
678 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo
, EVT
&ValVT
, EVT
&LocVT
,
679 CCValAssign::LocInfo
&LocInfo
,
680 ISD::ArgFlagsTy
&ArgFlags
,
682 return RetCC_ARM_APCS_Custom_f64(ValNo
, ValVT
, LocVT
, LocInfo
, ArgFlags
,
686 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
687 /// given CallingConvention value.
688 CCAssignFn
*ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC
,
690 bool isVarArg
) const {
693 llvm_unreachable("Unsupported calling convention");
695 case CallingConv::Fast
:
696 // Use target triple & subtarget features to do actual dispatch.
697 if (Subtarget
->isAAPCS_ABI()) {
698 if (Subtarget
->hasVFP2() &&
699 FloatABIType
== FloatABI::Hard
&& !isVarArg
)
700 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
702 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
704 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
705 case CallingConv::ARM_AAPCS_VFP
:
706 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
707 case CallingConv::ARM_AAPCS
:
708 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
709 case CallingConv::ARM_APCS
:
710 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
714 /// LowerCallResult - Lower the result values of a call into the
715 /// appropriate copies out of appropriate physical registers.
717 ARMTargetLowering::LowerCallResult(SDValue Chain
, SDValue InFlag
,
718 CallingConv::ID CallConv
, bool isVarArg
,
719 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
720 DebugLoc dl
, SelectionDAG
&DAG
,
721 SmallVectorImpl
<SDValue
> &InVals
) {
723 // Assign locations to each value returned by this call.
724 SmallVector
<CCValAssign
, 16> RVLocs
;
725 CCState
CCInfo(CallConv
, isVarArg
, getTargetMachine(),
726 RVLocs
, *DAG
.getContext());
727 CCInfo
.AnalyzeCallResult(Ins
,
728 CCAssignFnForNode(CallConv
, /* Return*/ true,
731 // Copy all of the result registers out of their specified physreg.
732 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
733 CCValAssign VA
= RVLocs
[i
];
736 if (VA
.needsCustom()) {
737 // Handle f64 or half of a v2f64.
738 SDValue Lo
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
740 Chain
= Lo
.getValue(1);
741 InFlag
= Lo
.getValue(2);
742 VA
= RVLocs
[++i
]; // skip ahead to next loc
743 SDValue Hi
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
745 Chain
= Hi
.getValue(1);
746 InFlag
= Hi
.getValue(2);
747 Val
= DAG
.getNode(ARMISD::FMDRR
, dl
, MVT::f64
, Lo
, Hi
);
749 if (VA
.getLocVT() == MVT::v2f64
) {
750 SDValue Vec
= DAG
.getNode(ISD::UNDEF
, dl
, MVT::v2f64
);
751 Vec
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Vec
, Val
,
752 DAG
.getConstant(0, MVT::i32
));
754 VA
= RVLocs
[++i
]; // skip ahead to next loc
755 Lo
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
, InFlag
);
756 Chain
= Lo
.getValue(1);
757 InFlag
= Lo
.getValue(2);
758 VA
= RVLocs
[++i
]; // skip ahead to next loc
759 Hi
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
, InFlag
);
760 Chain
= Hi
.getValue(1);
761 InFlag
= Hi
.getValue(2);
762 Val
= DAG
.getNode(ARMISD::FMDRR
, dl
, MVT::f64
, Lo
, Hi
);
763 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Vec
, Val
,
764 DAG
.getConstant(1, MVT::i32
));
767 Val
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), VA
.getLocVT(),
769 Chain
= Val
.getValue(1);
770 InFlag
= Val
.getValue(2);
773 switch (VA
.getLocInfo()) {
774 default: llvm_unreachable("Unknown loc info!");
775 case CCValAssign::Full
: break;
776 case CCValAssign::BCvt
:
777 Val
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VA
.getValVT(), Val
);
781 InVals
.push_back(Val
);
787 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
788 /// by "Src" to address "Dst" of size "Size". Alignment information is
789 /// specified by the specific parameter attribute. The copy will be passed as
790 /// a byval function parameter.
791 /// Sometimes what we are copying is the end of a larger object, the part that
792 /// does not fit in registers.
794 CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
, SDValue Chain
,
795 ISD::ArgFlagsTy Flags
, SelectionDAG
&DAG
,
797 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), MVT::i32
);
798 return DAG
.getMemcpy(Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
799 /*AlwaysInline=*/false, NULL
, 0, NULL
, 0);
802 /// LowerMemOpCallTo - Store the argument to the stack.
804 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain
,
805 SDValue StackPtr
, SDValue Arg
,
806 DebugLoc dl
, SelectionDAG
&DAG
,
807 const CCValAssign
&VA
,
808 ISD::ArgFlagsTy Flags
) {
809 unsigned LocMemOffset
= VA
.getLocMemOffset();
810 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
);
811 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(), StackPtr
, PtrOff
);
812 if (Flags
.isByVal()) {
813 return CreateCopyOfByValArgument(Arg
, PtrOff
, Chain
, Flags
, DAG
, dl
);
815 return DAG
.getStore(Chain
, dl
, Arg
, PtrOff
,
816 PseudoSourceValue::getStack(), LocMemOffset
);
819 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl
, SelectionDAG
&DAG
,
820 SDValue Chain
, SDValue
&Arg
,
821 RegsToPassVector
&RegsToPass
,
822 CCValAssign
&VA
, CCValAssign
&NextVA
,
824 SmallVector
<SDValue
, 8> &MemOpChains
,
825 ISD::ArgFlagsTy Flags
) {
827 SDValue fmrrd
= DAG
.getNode(ARMISD::FMRRD
, dl
,
828 DAG
.getVTList(MVT::i32
, MVT::i32
), Arg
);
829 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), fmrrd
));
831 if (NextVA
.isRegLoc())
832 RegsToPass
.push_back(std::make_pair(NextVA
.getLocReg(), fmrrd
.getValue(1)));
834 assert(NextVA
.isMemLoc());
835 if (StackPtr
.getNode() == 0)
836 StackPtr
= DAG
.getCopyFromReg(Chain
, dl
, ARM::SP
, getPointerTy());
838 MemOpChains
.push_back(LowerMemOpCallTo(Chain
, StackPtr
, fmrrd
.getValue(1),
844 /// LowerCall - Lowering a call into a callseq_start <-
845 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
848 ARMTargetLowering::LowerCall(SDValue Chain
, SDValue Callee
,
849 CallingConv::ID CallConv
, bool isVarArg
,
851 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
852 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
853 DebugLoc dl
, SelectionDAG
&DAG
,
854 SmallVectorImpl
<SDValue
> &InVals
) {
856 // Analyze operands of the call, assigning locations to each operand.
857 SmallVector
<CCValAssign
, 16> ArgLocs
;
858 CCState
CCInfo(CallConv
, isVarArg
, getTargetMachine(), ArgLocs
,
860 CCInfo
.AnalyzeCallOperands(Outs
,
861 CCAssignFnForNode(CallConv
, /* Return*/ false,
864 // Get a count of how many bytes are to be pushed on the stack.
865 unsigned NumBytes
= CCInfo
.getNextStackOffset();
867 // Adjust the stack pointer for the new arguments...
868 // These operations are automatically eliminated by the prolog/epilog pass
869 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(NumBytes
, true));
871 SDValue StackPtr
= DAG
.getRegister(ARM::SP
, MVT::i32
);
873 RegsToPassVector RegsToPass
;
874 SmallVector
<SDValue
, 8> MemOpChains
;
876 // Walk the register/memloc assignments, inserting copies/loads. In the case
877 // of tail call optimization, arguments are handled later.
878 for (unsigned i
= 0, realArgIdx
= 0, e
= ArgLocs
.size();
881 CCValAssign
&VA
= ArgLocs
[i
];
882 SDValue Arg
= Outs
[realArgIdx
].Val
;
883 ISD::ArgFlagsTy Flags
= Outs
[realArgIdx
].Flags
;
885 // Promote the value if needed.
886 switch (VA
.getLocInfo()) {
887 default: llvm_unreachable("Unknown loc info!");
888 case CCValAssign::Full
: break;
889 case CCValAssign::SExt
:
890 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
892 case CCValAssign::ZExt
:
893 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
895 case CCValAssign::AExt
:
896 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
898 case CCValAssign::BCvt
:
899 Arg
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VA
.getLocVT(), Arg
);
903 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
904 if (VA
.needsCustom()) {
905 if (VA
.getLocVT() == MVT::v2f64
) {
906 SDValue Op0
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
907 DAG
.getConstant(0, MVT::i32
));
908 SDValue Op1
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
909 DAG
.getConstant(1, MVT::i32
));
911 PassF64ArgInRegs(dl
, DAG
, Chain
, Op0
, RegsToPass
,
912 VA
, ArgLocs
[++i
], StackPtr
, MemOpChains
, Flags
);
914 VA
= ArgLocs
[++i
]; // skip ahead to next loc
916 PassF64ArgInRegs(dl
, DAG
, Chain
, Op1
, RegsToPass
,
917 VA
, ArgLocs
[++i
], StackPtr
, MemOpChains
, Flags
);
919 assert(VA
.isMemLoc());
920 if (StackPtr
.getNode() == 0)
921 StackPtr
= DAG
.getCopyFromReg(Chain
, dl
, ARM::SP
, getPointerTy());
923 MemOpChains
.push_back(LowerMemOpCallTo(Chain
, StackPtr
, Op1
,
924 dl
, DAG
, VA
, Flags
));
927 PassF64ArgInRegs(dl
, DAG
, Chain
, Arg
, RegsToPass
, VA
, ArgLocs
[++i
],
928 StackPtr
, MemOpChains
, Flags
);
930 } else if (VA
.isRegLoc()) {
931 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
933 assert(VA
.isMemLoc());
934 if (StackPtr
.getNode() == 0)
935 StackPtr
= DAG
.getCopyFromReg(Chain
, dl
, ARM::SP
, getPointerTy());
937 MemOpChains
.push_back(LowerMemOpCallTo(Chain
, StackPtr
, Arg
,
938 dl
, DAG
, VA
, Flags
));
942 if (!MemOpChains
.empty())
943 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
944 &MemOpChains
[0], MemOpChains
.size());
946 // Build a sequence of copy-to-reg nodes chained together with token chain
947 // and flag operands which copy the outgoing args into the appropriate regs.
949 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
950 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
951 RegsToPass
[i
].second
, InFlag
);
952 InFlag
= Chain
.getValue(1);
955 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
956 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
957 // node so that legalize doesn't hack it.
958 bool isDirect
= false;
959 bool isARMFunc
= false;
960 bool isLocalARMFunc
= false;
961 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
962 GlobalValue
*GV
= G
->getGlobal();
964 bool isExt
= GV
->isDeclaration() || GV
->isWeakForLinker();
965 bool isStub
= (isExt
&& Subtarget
->isTargetDarwin()) &&
966 getTargetMachine().getRelocationModel() != Reloc::Static
;
967 isARMFunc
= !Subtarget
->isThumb() || isStub
;
968 // ARM call to a local ARM function is predicable.
969 isLocalARMFunc
= !Subtarget
->isThumb() && !isExt
;
970 // tBX takes a register source operand.
971 if (isARMFunc
&& Subtarget
->isThumb1Only() && !Subtarget
->hasV5TOps()) {
972 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(GV
,
975 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, getPointerTy(), 4);
976 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
977 Callee
= DAG
.getLoad(getPointerTy(), dl
,
978 DAG
.getEntryNode(), CPAddr
, NULL
, 0);
979 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
980 Callee
= DAG
.getNode(ARMISD::PIC_ADD
, dl
,
981 getPointerTy(), Callee
, PICLabel
);
983 Callee
= DAG
.getTargetGlobalAddress(GV
, getPointerTy());
984 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
986 bool isStub
= Subtarget
->isTargetDarwin() &&
987 getTargetMachine().getRelocationModel() != Reloc::Static
;
988 isARMFunc
= !Subtarget
->isThumb() || isStub
;
989 // tBX takes a register source operand.
990 const char *Sym
= S
->getSymbol();
991 if (isARMFunc
&& Subtarget
->isThumb1Only() && !Subtarget
->hasV5TOps()) {
992 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(*DAG
.getContext(),
993 Sym
, ARMPCLabelIndex
, 4);
994 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, getPointerTy(), 4);
995 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
996 Callee
= DAG
.getLoad(getPointerTy(), dl
,
997 DAG
.getEntryNode(), CPAddr
, NULL
, 0);
998 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
999 Callee
= DAG
.getNode(ARMISD::PIC_ADD
, dl
,
1000 getPointerTy(), Callee
, PICLabel
);
1002 Callee
= DAG
.getTargetExternalSymbol(Sym
, getPointerTy());
1005 // FIXME: handle tail calls differently.
1007 if (Subtarget
->isThumb()) {
1008 if ((!isDirect
|| isARMFunc
) && !Subtarget
->hasV5TOps())
1009 CallOpc
= ARMISD::CALL_NOLINK
;
1011 CallOpc
= isARMFunc
? ARMISD::CALL
: ARMISD::tCALL
;
1013 CallOpc
= (isDirect
|| Subtarget
->hasV5TOps())
1014 ? (isLocalARMFunc
? ARMISD::CALL_PRED
: ARMISD::CALL
)
1015 : ARMISD::CALL_NOLINK
;
1017 if (CallOpc
== ARMISD::CALL_NOLINK
&& !Subtarget
->isThumb1Only()) {
1018 // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
1019 Chain
= DAG
.getCopyToReg(Chain
, dl
, ARM::LR
, DAG
.getUNDEF(MVT::i32
),InFlag
);
1020 InFlag
= Chain
.getValue(1);
1023 std::vector
<SDValue
> Ops
;
1024 Ops
.push_back(Chain
);
1025 Ops
.push_back(Callee
);
1027 // Add argument registers to the end of the list so that they are known live
1029 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
1030 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
1031 RegsToPass
[i
].second
.getValueType()));
1033 if (InFlag
.getNode())
1034 Ops
.push_back(InFlag
);
1035 // Returns a chain and a flag for retval copy to use.
1036 Chain
= DAG
.getNode(CallOpc
, dl
, DAG
.getVTList(MVT::Other
, MVT::Flag
),
1037 &Ops
[0], Ops
.size());
1038 InFlag
= Chain
.getValue(1);
1040 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, true),
1041 DAG
.getIntPtrConstant(0, true), InFlag
);
1043 InFlag
= Chain
.getValue(1);
1045 // Handle result values, copying them out of physregs into vregs that we
1047 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
, Ins
,
1052 ARMTargetLowering::LowerReturn(SDValue Chain
,
1053 CallingConv::ID CallConv
, bool isVarArg
,
1054 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1055 DebugLoc dl
, SelectionDAG
&DAG
) {
1057 // CCValAssign - represent the assignment of the return value to a location.
1058 SmallVector
<CCValAssign
, 16> RVLocs
;
1060 // CCState - Info about the registers and stack slots.
1061 CCState
CCInfo(CallConv
, isVarArg
, getTargetMachine(), RVLocs
,
1064 // Analyze outgoing return values.
1065 CCInfo
.AnalyzeReturn(Outs
, CCAssignFnForNode(CallConv
, /* Return */ true,
1068 // If this is the first return lowered for this function, add
1069 // the regs to the liveout set for the function.
1070 if (DAG
.getMachineFunction().getRegInfo().liveout_empty()) {
1071 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
)
1072 if (RVLocs
[i
].isRegLoc())
1073 DAG
.getMachineFunction().getRegInfo().addLiveOut(RVLocs
[i
].getLocReg());
1078 // Copy the result values into the output registers.
1079 for (unsigned i
= 0, realRVLocIdx
= 0;
1081 ++i
, ++realRVLocIdx
) {
1082 CCValAssign
&VA
= RVLocs
[i
];
1083 assert(VA
.isRegLoc() && "Can only return in registers!");
1085 SDValue Arg
= Outs
[realRVLocIdx
].Val
;
1087 switch (VA
.getLocInfo()) {
1088 default: llvm_unreachable("Unknown loc info!");
1089 case CCValAssign::Full
: break;
1090 case CCValAssign::BCvt
:
1091 Arg
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VA
.getLocVT(), Arg
);
1095 if (VA
.needsCustom()) {
1096 if (VA
.getLocVT() == MVT::v2f64
) {
1097 // Extract the first half and return it in two registers.
1098 SDValue Half
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
1099 DAG
.getConstant(0, MVT::i32
));
1100 SDValue HalfGPRs
= DAG
.getNode(ARMISD::FMRRD
, dl
,
1101 DAG
.getVTList(MVT::i32
, MVT::i32
), Half
);
1103 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), HalfGPRs
, Flag
);
1104 Flag
= Chain
.getValue(1);
1105 VA
= RVLocs
[++i
]; // skip ahead to next loc
1106 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(),
1107 HalfGPRs
.getValue(1), Flag
);
1108 Flag
= Chain
.getValue(1);
1109 VA
= RVLocs
[++i
]; // skip ahead to next loc
1111 // Extract the 2nd half and fall through to handle it as an f64 value.
1112 Arg
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::f64
, Arg
,
1113 DAG
.getConstant(1, MVT::i32
));
1115 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
1117 SDValue fmrrd
= DAG
.getNode(ARMISD::FMRRD
, dl
,
1118 DAG
.getVTList(MVT::i32
, MVT::i32
), &Arg
, 1);
1119 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), fmrrd
, Flag
);
1120 Flag
= Chain
.getValue(1);
1121 VA
= RVLocs
[++i
]; // skip ahead to next loc
1122 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), fmrrd
.getValue(1),
1125 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), Arg
, Flag
);
1127 // Guarantee that all emitted copies are
1128 // stuck together, avoiding something bad.
1129 Flag
= Chain
.getValue(1);
1134 result
= DAG
.getNode(ARMISD::RET_FLAG
, dl
, MVT::Other
, Chain
, Flag
);
1136 result
= DAG
.getNode(ARMISD::RET_FLAG
, dl
, MVT::Other
, Chain
);
1141 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1142 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1143 // one of the above mentioned nodes. It has to be wrapped because otherwise
1144 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1145 // be used to form addressing mode. These wrapped nodes will be selected
1147 static SDValue
LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
) {
1148 EVT PtrVT
= Op
.getValueType();
1149 // FIXME there is no actual debug info here
1150 DebugLoc dl
= Op
.getDebugLoc();
1151 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
1153 if (CP
->isMachineConstantPoolEntry())
1154 Res
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
1155 CP
->getAlignment());
1157 Res
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
1158 CP
->getAlignment());
1159 return DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Res
);
1162 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
1164 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode
*GA
,
1165 SelectionDAG
&DAG
) {
1166 DebugLoc dl
= GA
->getDebugLoc();
1167 EVT PtrVT
= getPointerTy();
1168 unsigned char PCAdj
= Subtarget
->isThumb() ? 4 : 8;
1169 ARMConstantPoolValue
*CPV
=
1170 new ARMConstantPoolValue(GA
->getGlobal(), ARMPCLabelIndex
,
1171 ARMCP::CPValue
, PCAdj
, "tlsgd", true);
1172 SDValue Argument
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1173 Argument
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Argument
);
1174 Argument
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), Argument
, NULL
, 0);
1175 SDValue Chain
= Argument
.getValue(1);
1177 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
1178 Argument
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Argument
, PICLabel
);
1180 // call __tls_get_addr.
1183 Entry
.Node
= Argument
;
1184 Entry
.Ty
= (const Type
*) Type::getInt32Ty(*DAG
.getContext());
1185 Args
.push_back(Entry
);
1186 // FIXME: is there useful debug info available here?
1187 std::pair
<SDValue
, SDValue
> CallResult
=
1188 LowerCallTo(Chain
, (const Type
*) Type::getInt32Ty(*DAG
.getContext()),
1189 false, false, false, false,
1190 0, CallingConv::C
, false, /*isReturnValueUsed=*/true,
1191 DAG
.getExternalSymbol("__tls_get_addr", PtrVT
), Args
, DAG
, dl
);
1192 return CallResult
.first
;
1195 // Lower ISD::GlobalTLSAddress using the "initial exec" or
1196 // "local exec" model.
1198 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode
*GA
,
1199 SelectionDAG
&DAG
) {
1200 GlobalValue
*GV
= GA
->getGlobal();
1201 DebugLoc dl
= GA
->getDebugLoc();
1203 SDValue Chain
= DAG
.getEntryNode();
1204 EVT PtrVT
= getPointerTy();
1205 // Get the Thread Pointer
1206 SDValue ThreadPointer
= DAG
.getNode(ARMISD::THREAD_POINTER
, dl
, PtrVT
);
1208 if (GV
->isDeclaration()) {
1209 // initial exec model
1210 unsigned char PCAdj
= Subtarget
->isThumb() ? 4 : 8;
1211 ARMConstantPoolValue
*CPV
=
1212 new ARMConstantPoolValue(GA
->getGlobal(), ARMPCLabelIndex
,
1213 ARMCP::CPValue
, PCAdj
, "gottpoff", true);
1214 Offset
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1215 Offset
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Offset
);
1216 Offset
= DAG
.getLoad(PtrVT
, dl
, Chain
, Offset
, NULL
, 0);
1217 Chain
= Offset
.getValue(1);
1219 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
1220 Offset
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Offset
, PICLabel
);
1222 Offset
= DAG
.getLoad(PtrVT
, dl
, Chain
, Offset
, NULL
, 0);
1225 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(GV
, "tpoff");
1226 Offset
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1227 Offset
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, Offset
);
1228 Offset
= DAG
.getLoad(PtrVT
, dl
, Chain
, Offset
, NULL
, 0);
1231 // The address of the thread local variable is the add of the thread
1232 // pointer with the offset of the variable.
1233 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, ThreadPointer
, Offset
);
1237 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op
, SelectionDAG
&DAG
) {
1238 // TODO: implement the "local dynamic" model
1239 assert(Subtarget
->isTargetELF() &&
1240 "TLS not implemented for non-ELF targets");
1241 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
1242 // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1243 // otherwise use the "Local Exec" TLS Model
1244 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
)
1245 return LowerToTLSGeneralDynamicModel(GA
, DAG
);
1247 return LowerToTLSExecModels(GA
, DAG
);
1250 SDValue
ARMTargetLowering::LowerGlobalAddressELF(SDValue Op
,
1251 SelectionDAG
&DAG
) {
1252 EVT PtrVT
= getPointerTy();
1253 DebugLoc dl
= Op
.getDebugLoc();
1254 GlobalValue
*GV
= cast
<GlobalAddressSDNode
>(Op
)->getGlobal();
1255 Reloc::Model RelocM
= getTargetMachine().getRelocationModel();
1256 if (RelocM
== Reloc::PIC_
) {
1257 bool UseGOTOFF
= GV
->hasLocalLinkage() || GV
->hasHiddenVisibility();
1258 ARMConstantPoolValue
*CPV
=
1259 new ARMConstantPoolValue(GV
, UseGOTOFF
? "GOTOFF" : "GOT");
1260 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1261 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1262 SDValue Result
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(),
1264 SDValue Chain
= Result
.getValue(1);
1265 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
1266 Result
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Result
, GOT
);
1268 Result
= DAG
.getLoad(PtrVT
, dl
, Chain
, Result
, NULL
, 0);
1271 SDValue CPAddr
= DAG
.getTargetConstantPool(GV
, PtrVT
, 4);
1272 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1273 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
, NULL
, 0);
1277 SDValue
ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op
,
1278 SelectionDAG
&DAG
) {
1279 EVT PtrVT
= getPointerTy();
1280 DebugLoc dl
= Op
.getDebugLoc();
1281 GlobalValue
*GV
= cast
<GlobalAddressSDNode
>(Op
)->getGlobal();
1282 Reloc::Model RelocM
= getTargetMachine().getRelocationModel();
1284 if (RelocM
== Reloc::Static
)
1285 CPAddr
= DAG
.getTargetConstantPool(GV
, PtrVT
, 4);
1287 unsigned PCAdj
= (RelocM
!= Reloc::PIC_
) ? 0 : (Subtarget
->isThumb()?4:8);
1288 ARMConstantPoolValue
*CPV
=
1289 new ARMConstantPoolValue(GV
, ARMPCLabelIndex
, ARMCP::CPValue
, PCAdj
);
1290 CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1292 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1294 SDValue Result
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
, NULL
, 0);
1295 SDValue Chain
= Result
.getValue(1);
1297 if (RelocM
== Reloc::PIC_
) {
1298 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
1299 Result
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Result
, PICLabel
);
1302 if (Subtarget
->GVIsIndirectSymbol(GV
, RelocM
))
1303 Result
= DAG
.getLoad(PtrVT
, dl
, Chain
, Result
, NULL
, 0);
1308 SDValue
ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op
,
1310 assert(Subtarget
->isTargetELF() &&
1311 "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1312 EVT PtrVT
= getPointerTy();
1313 DebugLoc dl
= Op
.getDebugLoc();
1314 unsigned PCAdj
= Subtarget
->isThumb() ? 4 : 8;
1315 ARMConstantPoolValue
*CPV
= new ARMConstantPoolValue(*DAG
.getContext(),
1316 "_GLOBAL_OFFSET_TABLE_",
1317 ARMPCLabelIndex
, PCAdj
);
1318 SDValue CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1319 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1320 SDValue Result
= DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
, NULL
, 0);
1321 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
1322 return DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Result
, PICLabel
);
1325 static SDValue
LowerNeonVLDIntrinsic(SDValue Op
, SelectionDAG
&DAG
,
1327 SDNode
*Node
= Op
.getNode();
1328 EVT VT
= Node
->getValueType(0);
1330 // No expansion needed for 64-bit vectors.
1331 if (VT
.is64BitVector())
1334 // FIXME: We need to expand VLD3 and VLD4 of 128-bit vectors into separate
1335 // operations to load the even and odd registers.
1339 static SDValue
LowerNeonVSTIntrinsic(SDValue Op
, SelectionDAG
&DAG
,
1341 SDNode
*Node
= Op
.getNode();
1342 EVT VT
= Node
->getOperand(3).getValueType();
1344 // No expansion needed for 64-bit vectors.
1345 if (VT
.is64BitVector())
1348 // FIXME: We need to expand VST3 and VST4 of 128-bit vectors into separate
1349 // operations to store the even and odd registers.
1353 static SDValue
LowerNeonVLDLaneIntrinsic(SDValue Op
, SelectionDAG
&DAG
,
1355 SDNode
*Node
= Op
.getNode();
1356 EVT VT
= Node
->getValueType(0);
1358 if (!VT
.is64BitVector())
1359 return SDValue(); // unimplemented
1361 // Change the lane number operand to be a TargetConstant; otherwise it
1362 // will be legalized into a register.
1363 ConstantSDNode
*Lane
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(NumVecs
+3));
1365 assert(false && "vld lane number must be a constant");
1368 SmallVector
<SDValue
, 8> Ops(Node
->op_begin(), Node
->op_end());
1369 Ops
[NumVecs
+3] = DAG
.getTargetConstant(Lane
->getZExtValue(), MVT::i32
);
1370 return DAG
.UpdateNodeOperands(Op
, &Ops
[0], Ops
.size());
1373 static SDValue
LowerNeonVSTLaneIntrinsic(SDValue Op
, SelectionDAG
&DAG
,
1375 SDNode
*Node
= Op
.getNode();
1376 EVT VT
= Node
->getOperand(3).getValueType();
1378 if (!VT
.is64BitVector())
1379 return SDValue(); // unimplemented
1381 // Change the lane number operand to be a TargetConstant; otherwise it
1382 // will be legalized into a register.
1383 ConstantSDNode
*Lane
= dyn_cast
<ConstantSDNode
>(Node
->getOperand(NumVecs
+3));
1385 assert(false && "vst lane number must be a constant");
1388 SmallVector
<SDValue
, 8> Ops(Node
->op_begin(), Node
->op_end());
1389 Ops
[NumVecs
+3] = DAG
.getTargetConstant(Lane
->getZExtValue(), MVT::i32
);
1390 return DAG
.UpdateNodeOperands(Op
, &Ops
[0], Ops
.size());
1394 ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op
, SelectionDAG
&DAG
) {
1395 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
1397 case Intrinsic::arm_neon_vld3
:
1398 return LowerNeonVLDIntrinsic(Op
, DAG
, 3);
1399 case Intrinsic::arm_neon_vld4
:
1400 return LowerNeonVLDIntrinsic(Op
, DAG
, 4);
1401 case Intrinsic::arm_neon_vld2lane
:
1402 return LowerNeonVLDLaneIntrinsic(Op
, DAG
, 2);
1403 case Intrinsic::arm_neon_vld3lane
:
1404 return LowerNeonVLDLaneIntrinsic(Op
, DAG
, 3);
1405 case Intrinsic::arm_neon_vld4lane
:
1406 return LowerNeonVLDLaneIntrinsic(Op
, DAG
, 4);
1407 case Intrinsic::arm_neon_vst3
:
1408 return LowerNeonVSTIntrinsic(Op
, DAG
, 3);
1409 case Intrinsic::arm_neon_vst4
:
1410 return LowerNeonVSTIntrinsic(Op
, DAG
, 4);
1411 case Intrinsic::arm_neon_vst2lane
:
1412 return LowerNeonVSTLaneIntrinsic(Op
, DAG
, 2);
1413 case Intrinsic::arm_neon_vst3lane
:
1414 return LowerNeonVSTLaneIntrinsic(Op
, DAG
, 3);
1415 case Intrinsic::arm_neon_vst4lane
:
1416 return LowerNeonVSTLaneIntrinsic(Op
, DAG
, 4);
1417 default: return SDValue(); // Don't custom lower most intrinsics.
1422 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
, SelectionDAG
&DAG
) {
1423 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1424 DebugLoc dl
= Op
.getDebugLoc();
1426 default: return SDValue(); // Don't custom lower most intrinsics.
1427 case Intrinsic::arm_thread_pointer
: {
1428 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1429 return DAG
.getNode(ARMISD::THREAD_POINTER
, dl
, PtrVT
);
1431 case Intrinsic::eh_sjlj_lsda
: {
1432 MachineFunction
&MF
= DAG
.getMachineFunction();
1433 EVT PtrVT
= getPointerTy();
1434 DebugLoc dl
= Op
.getDebugLoc();
1435 Reloc::Model RelocM
= getTargetMachine().getRelocationModel();
1437 unsigned PCAdj
= (RelocM
!= Reloc::PIC_
)
1438 ? 0 : (Subtarget
->isThumb() ? 4 : 8);
1439 ARMConstantPoolValue
*CPV
=
1440 new ARMConstantPoolValue(MF
.getFunction(), ARMPCLabelIndex
,
1441 ARMCP::CPLSDA
, PCAdj
);
1442 CPAddr
= DAG
.getTargetConstantPool(CPV
, PtrVT
, 4);
1443 CPAddr
= DAG
.getNode(ARMISD::Wrapper
, dl
, MVT::i32
, CPAddr
);
1445 DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), CPAddr
, NULL
, 0);
1446 SDValue Chain
= Result
.getValue(1);
1448 if (RelocM
== Reloc::PIC_
) {
1449 SDValue PICLabel
= DAG
.getConstant(ARMPCLabelIndex
++, MVT::i32
);
1450 Result
= DAG
.getNode(ARMISD::PIC_ADD
, dl
, PtrVT
, Result
, PICLabel
);
1454 case Intrinsic::eh_sjlj_setjmp
:
1455 return DAG
.getNode(ARMISD::EH_SJLJ_SETJMP
, dl
, MVT::i32
, Op
.getOperand(1));
1459 static SDValue
LowerVASTART(SDValue Op
, SelectionDAG
&DAG
,
1460 unsigned VarArgsFrameIndex
) {
1461 // vastart just stores the address of the VarArgsFrameIndex slot into the
1462 // memory location argument.
1463 DebugLoc dl
= Op
.getDebugLoc();
1464 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1465 SDValue FR
= DAG
.getFrameIndex(VarArgsFrameIndex
, PtrVT
);
1466 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
1467 return DAG
.getStore(Op
.getOperand(0), dl
, FR
, Op
.getOperand(1), SV
, 0);
1471 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
, SelectionDAG
&DAG
) {
1472 SDNode
*Node
= Op
.getNode();
1473 DebugLoc dl
= Node
->getDebugLoc();
1474 EVT VT
= Node
->getValueType(0);
1475 SDValue Chain
= Op
.getOperand(0);
1476 SDValue Size
= Op
.getOperand(1);
1477 SDValue Align
= Op
.getOperand(2);
1479 // Chain the dynamic stack allocation so that it doesn't modify the stack
1480 // pointer when other instructions are using the stack.
1481 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(0, true));
1483 unsigned AlignVal
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
1484 unsigned StackAlign
= getTargetMachine().getFrameInfo()->getStackAlignment();
1485 if (AlignVal
> StackAlign
)
1486 // Do this now since selection pass cannot introduce new target
1487 // independent node.
1488 Align
= DAG
.getConstant(-(uint64_t)AlignVal
, VT
);
1490 // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
1491 // using a "add r, sp, r" instead. Negate the size now so we don't have to
1492 // do even more horrible hack later.
1493 MachineFunction
&MF
= DAG
.getMachineFunction();
1494 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1495 if (AFI
->isThumb1OnlyFunction()) {
1497 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Size
);
1499 uint32_t Val
= C
->getZExtValue();
1500 if (Val
<= 508 && ((Val
& 3) == 0))
1504 Size
= DAG
.getNode(ISD::SUB
, dl
, VT
, DAG
.getConstant(0, VT
), Size
);
1507 SDVTList VTList
= DAG
.getVTList(VT
, MVT::Other
);
1508 SDValue Ops1
[] = { Chain
, Size
, Align
};
1509 SDValue Res
= DAG
.getNode(ARMISD::DYN_ALLOC
, dl
, VTList
, Ops1
, 3);
1510 Chain
= Res
.getValue(1);
1511 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(0, true),
1512 DAG
.getIntPtrConstant(0, true), SDValue());
1513 SDValue Ops2
[] = { Res
, Chain
};
1514 return DAG
.getMergeValues(Ops2
, 2, dl
);
1518 ARMTargetLowering::GetF64FormalArgument(CCValAssign
&VA
, CCValAssign
&NextVA
,
1519 SDValue
&Root
, SelectionDAG
&DAG
,
1521 MachineFunction
&MF
= DAG
.getMachineFunction();
1522 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1524 TargetRegisterClass
*RC
;
1525 if (AFI
->isThumb1OnlyFunction())
1526 RC
= ARM::tGPRRegisterClass
;
1528 RC
= ARM::GPRRegisterClass
;
1530 // Transform the arguments stored in physical registers into virtual ones.
1531 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
1532 SDValue ArgValue
= DAG
.getCopyFromReg(Root
, dl
, Reg
, MVT::i32
);
1535 if (NextVA
.isMemLoc()) {
1536 unsigned ArgSize
= NextVA
.getLocVT().getSizeInBits()/8;
1537 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1538 int FI
= MFI
->CreateFixedObject(ArgSize
, NextVA
.getLocMemOffset());
1540 // Create load node to retrieve arguments from the stack.
1541 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
1542 ArgValue2
= DAG
.getLoad(MVT::i32
, dl
, Root
, FIN
, NULL
, 0);
1544 Reg
= MF
.addLiveIn(NextVA
.getLocReg(), RC
);
1545 ArgValue2
= DAG
.getCopyFromReg(Root
, dl
, Reg
, MVT::i32
);
1548 return DAG
.getNode(ARMISD::FMDRR
, dl
, MVT::f64
, ArgValue
, ArgValue2
);
1552 ARMTargetLowering::LowerFormalArguments(SDValue Chain
,
1553 CallingConv::ID CallConv
, bool isVarArg
,
1554 const SmallVectorImpl
<ISD::InputArg
>
1556 DebugLoc dl
, SelectionDAG
&DAG
,
1557 SmallVectorImpl
<SDValue
> &InVals
) {
1559 MachineFunction
&MF
= DAG
.getMachineFunction();
1560 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1562 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1564 // Assign locations to all of the incoming arguments.
1565 SmallVector
<CCValAssign
, 16> ArgLocs
;
1566 CCState
CCInfo(CallConv
, isVarArg
, getTargetMachine(), ArgLocs
,
1568 CCInfo
.AnalyzeFormalArguments(Ins
,
1569 CCAssignFnForNode(CallConv
, /* Return*/ false,
1572 SmallVector
<SDValue
, 16> ArgValues
;
1574 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1575 CCValAssign
&VA
= ArgLocs
[i
];
1577 // Arguments stored in registers.
1578 if (VA
.isRegLoc()) {
1579 EVT RegVT
= VA
.getLocVT();
1582 if (VA
.needsCustom()) {
1583 // f64 and vector types are split up into multiple registers or
1584 // combinations of registers and stack slots.
1587 if (VA
.getLocVT() == MVT::v2f64
) {
1588 SDValue ArgValue1
= GetF64FormalArgument(VA
, ArgLocs
[++i
],
1590 VA
= ArgLocs
[++i
]; // skip ahead to next loc
1591 SDValue ArgValue2
= GetF64FormalArgument(VA
, ArgLocs
[++i
],
1593 ArgValue
= DAG
.getNode(ISD::UNDEF
, dl
, MVT::v2f64
);
1594 ArgValue
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
,
1595 ArgValue
, ArgValue1
, DAG
.getIntPtrConstant(0));
1596 ArgValue
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
,
1597 ArgValue
, ArgValue2
, DAG
.getIntPtrConstant(1));
1599 ArgValue
= GetF64FormalArgument(VA
, ArgLocs
[++i
], Chain
, DAG
, dl
);
1602 TargetRegisterClass
*RC
;
1604 if (RegVT
== MVT::f32
)
1605 RC
= ARM::SPRRegisterClass
;
1606 else if (RegVT
== MVT::f64
)
1607 RC
= ARM::DPRRegisterClass
;
1608 else if (RegVT
== MVT::v2f64
)
1609 RC
= ARM::QPRRegisterClass
;
1610 else if (RegVT
== MVT::i32
)
1611 RC
= (AFI
->isThumb1OnlyFunction() ?
1612 ARM::tGPRRegisterClass
: ARM::GPRRegisterClass
);
1614 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
1616 // Transform the arguments in physical registers into virtual ones.
1617 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
1618 ArgValue
= DAG
.getCopyFromReg(Chain
, dl
, Reg
, RegVT
);
1621 // If this is an 8 or 16-bit value, it is really passed promoted
1622 // to 32 bits. Insert an assert[sz]ext to capture this, then
1623 // truncate to the right size.
1624 switch (VA
.getLocInfo()) {
1625 default: llvm_unreachable("Unknown loc info!");
1626 case CCValAssign::Full
: break;
1627 case CCValAssign::BCvt
:
1628 ArgValue
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VA
.getValVT(), ArgValue
);
1630 case CCValAssign::SExt
:
1631 ArgValue
= DAG
.getNode(ISD::AssertSext
, dl
, RegVT
, ArgValue
,
1632 DAG
.getValueType(VA
.getValVT()));
1633 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), ArgValue
);
1635 case CCValAssign::ZExt
:
1636 ArgValue
= DAG
.getNode(ISD::AssertZext
, dl
, RegVT
, ArgValue
,
1637 DAG
.getValueType(VA
.getValVT()));
1638 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), ArgValue
);
1642 InVals
.push_back(ArgValue
);
1644 } else { // VA.isRegLoc()
1647 assert(VA
.isMemLoc());
1648 assert(VA
.getValVT() != MVT::i64
&& "i64 should already be lowered");
1650 unsigned ArgSize
= VA
.getLocVT().getSizeInBits()/8;
1651 int FI
= MFI
->CreateFixedObject(ArgSize
, VA
.getLocMemOffset());
1653 // Create load nodes to retrieve arguments from the stack.
1654 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
1655 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
, NULL
, 0));
1661 static const unsigned GPRArgRegs
[] = {
1662 ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
1665 unsigned NumGPRs
= CCInfo
.getFirstUnallocated
1666 (GPRArgRegs
, sizeof(GPRArgRegs
) / sizeof(GPRArgRegs
[0]));
1668 unsigned Align
= MF
.getTarget().getFrameInfo()->getStackAlignment();
1669 unsigned VARegSize
= (4 - NumGPRs
) * 4;
1670 unsigned VARegSaveSize
= (VARegSize
+ Align
- 1) & ~(Align
- 1);
1671 unsigned ArgOffset
= 0;
1672 if (VARegSaveSize
) {
1673 // If this function is vararg, store any remaining integer argument regs
1674 // to their spots on the stack so that they may be loaded by deferencing
1675 // the result of va_next.
1676 AFI
->setVarArgsRegSaveSize(VARegSaveSize
);
1677 ArgOffset
= CCInfo
.getNextStackOffset();
1678 VarArgsFrameIndex
= MFI
->CreateFixedObject(VARegSaveSize
, ArgOffset
+
1679 VARegSaveSize
- VARegSize
);
1680 SDValue FIN
= DAG
.getFrameIndex(VarArgsFrameIndex
, getPointerTy());
1682 SmallVector
<SDValue
, 4> MemOps
;
1683 for (; NumGPRs
< 4; ++NumGPRs
) {
1684 TargetRegisterClass
*RC
;
1685 if (AFI
->isThumb1OnlyFunction())
1686 RC
= ARM::tGPRRegisterClass
;
1688 RC
= ARM::GPRRegisterClass
;
1690 unsigned VReg
= MF
.addLiveIn(GPRArgRegs
[NumGPRs
], RC
);
1691 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i32
);
1692 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, NULL
, 0);
1693 MemOps
.push_back(Store
);
1694 FIN
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(), FIN
,
1695 DAG
.getConstant(4, getPointerTy()));
1697 if (!MemOps
.empty())
1698 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1699 &MemOps
[0], MemOps
.size());
1701 // This will point to the next argument passed via stack.
1702 VarArgsFrameIndex
= MFI
->CreateFixedObject(4, ArgOffset
);
1708 /// isFloatingPointZero - Return true if this is +0.0.
1709 static bool isFloatingPointZero(SDValue Op
) {
1710 if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(Op
))
1711 return CFP
->getValueAPF().isPosZero();
1712 else if (ISD::isEXTLoad(Op
.getNode()) || ISD::isNON_EXTLoad(Op
.getNode())) {
1713 // Maybe this has already been legalized into the constant pool?
1714 if (Op
.getOperand(1).getOpcode() == ARMISD::Wrapper
) {
1715 SDValue WrapperOp
= Op
.getOperand(1).getOperand(0);
1716 if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(WrapperOp
))
1717 if (ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CP
->getConstVal()))
1718 return CFP
->getValueAPF().isPosZero();
1724 static bool isLegalCmpImmediate(unsigned C
, bool isThumb1Only
) {
1725 return ( isThumb1Only
&& (C
& ~255U) == 0) ||
1726 (!isThumb1Only
&& ARM_AM::getSOImmVal(C
) != -1);
1729 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
1730 /// the given operands.
1731 static SDValue
getARMCmp(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
1732 SDValue
&ARMCC
, SelectionDAG
&DAG
, bool isThumb1Only
,
1734 if (ConstantSDNode
*RHSC
= dyn_cast
<ConstantSDNode
>(RHS
.getNode())) {
1735 unsigned C
= RHSC
->getZExtValue();
1736 if (!isLegalCmpImmediate(C
, isThumb1Only
)) {
1737 // Constant does not fit, try adjusting it by one?
1742 if (isLegalCmpImmediate(C
-1, isThumb1Only
)) {
1743 CC
= (CC
== ISD::SETLT
) ? ISD::SETLE
: ISD::SETGT
;
1744 RHS
= DAG
.getConstant(C
-1, MVT::i32
);
1749 if (C
> 0 && isLegalCmpImmediate(C
-1, isThumb1Only
)) {
1750 CC
= (CC
== ISD::SETULT
) ? ISD::SETULE
: ISD::SETUGT
;
1751 RHS
= DAG
.getConstant(C
-1, MVT::i32
);
1756 if (isLegalCmpImmediate(C
+1, isThumb1Only
)) {
1757 CC
= (CC
== ISD::SETLE
) ? ISD::SETLT
: ISD::SETGE
;
1758 RHS
= DAG
.getConstant(C
+1, MVT::i32
);
1763 if (C
< 0xffffffff && isLegalCmpImmediate(C
+1, isThumb1Only
)) {
1764 CC
= (CC
== ISD::SETULE
) ? ISD::SETULT
: ISD::SETUGE
;
1765 RHS
= DAG
.getConstant(C
+1, MVT::i32
);
1772 ARMCC::CondCodes CondCode
= IntCCToARMCC(CC
);
1773 ARMISD::NodeType CompareType
;
1776 CompareType
= ARMISD::CMP
;
1781 CompareType
= ARMISD::CMPZ
;
1784 ARMCC
= DAG
.getConstant(CondCode
, MVT::i32
);
1785 return DAG
.getNode(CompareType
, dl
, MVT::Flag
, LHS
, RHS
);
1788 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
1789 static SDValue
getVFPCmp(SDValue LHS
, SDValue RHS
, SelectionDAG
&DAG
,
1792 if (!isFloatingPointZero(RHS
))
1793 Cmp
= DAG
.getNode(ARMISD::CMPFP
, dl
, MVT::Flag
, LHS
, RHS
);
1795 Cmp
= DAG
.getNode(ARMISD::CMPFPw0
, dl
, MVT::Flag
, LHS
);
1796 return DAG
.getNode(ARMISD::FMSTAT
, dl
, MVT::Flag
, Cmp
);
1799 static SDValue
LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
,
1800 const ARMSubtarget
*ST
) {
1801 EVT VT
= Op
.getValueType();
1802 SDValue LHS
= Op
.getOperand(0);
1803 SDValue RHS
= Op
.getOperand(1);
1804 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
1805 SDValue TrueVal
= Op
.getOperand(2);
1806 SDValue FalseVal
= Op
.getOperand(3);
1807 DebugLoc dl
= Op
.getDebugLoc();
1809 if (LHS
.getValueType() == MVT::i32
) {
1811 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
1812 SDValue Cmp
= getARMCmp(LHS
, RHS
, CC
, ARMCC
, DAG
, ST
->isThumb1Only(), dl
);
1813 return DAG
.getNode(ARMISD::CMOV
, dl
, VT
, FalseVal
, TrueVal
, ARMCC
, CCR
,Cmp
);
1816 ARMCC::CondCodes CondCode
, CondCode2
;
1817 if (FPCCToARMCC(CC
, CondCode
, CondCode2
))
1818 std::swap(TrueVal
, FalseVal
);
1820 SDValue ARMCC
= DAG
.getConstant(CondCode
, MVT::i32
);
1821 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
1822 SDValue Cmp
= getVFPCmp(LHS
, RHS
, DAG
, dl
);
1823 SDValue Result
= DAG
.getNode(ARMISD::CMOV
, dl
, VT
, FalseVal
, TrueVal
,
1825 if (CondCode2
!= ARMCC::AL
) {
1826 SDValue ARMCC2
= DAG
.getConstant(CondCode2
, MVT::i32
);
1827 // FIXME: Needs another CMP because flag can have but one use.
1828 SDValue Cmp2
= getVFPCmp(LHS
, RHS
, DAG
, dl
);
1829 Result
= DAG
.getNode(ARMISD::CMOV
, dl
, VT
,
1830 Result
, TrueVal
, ARMCC2
, CCR
, Cmp2
);
1835 static SDValue
LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
,
1836 const ARMSubtarget
*ST
) {
1837 SDValue Chain
= Op
.getOperand(0);
1838 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(1))->get();
1839 SDValue LHS
= Op
.getOperand(2);
1840 SDValue RHS
= Op
.getOperand(3);
1841 SDValue Dest
= Op
.getOperand(4);
1842 DebugLoc dl
= Op
.getDebugLoc();
1844 if (LHS
.getValueType() == MVT::i32
) {
1846 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
1847 SDValue Cmp
= getARMCmp(LHS
, RHS
, CC
, ARMCC
, DAG
, ST
->isThumb1Only(), dl
);
1848 return DAG
.getNode(ARMISD::BRCOND
, dl
, MVT::Other
,
1849 Chain
, Dest
, ARMCC
, CCR
,Cmp
);
1852 assert(LHS
.getValueType() == MVT::f32
|| LHS
.getValueType() == MVT::f64
);
1853 ARMCC::CondCodes CondCode
, CondCode2
;
1854 if (FPCCToARMCC(CC
, CondCode
, CondCode2
))
1855 // Swap the LHS/RHS of the comparison if needed.
1856 std::swap(LHS
, RHS
);
1858 SDValue Cmp
= getVFPCmp(LHS
, RHS
, DAG
, dl
);
1859 SDValue ARMCC
= DAG
.getConstant(CondCode
, MVT::i32
);
1860 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
1861 SDVTList VTList
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
1862 SDValue Ops
[] = { Chain
, Dest
, ARMCC
, CCR
, Cmp
};
1863 SDValue Res
= DAG
.getNode(ARMISD::BRCOND
, dl
, VTList
, Ops
, 5);
1864 if (CondCode2
!= ARMCC::AL
) {
1865 ARMCC
= DAG
.getConstant(CondCode2
, MVT::i32
);
1866 SDValue Ops
[] = { Res
, Dest
, ARMCC
, CCR
, Res
.getValue(1) };
1867 Res
= DAG
.getNode(ARMISD::BRCOND
, dl
, VTList
, Ops
, 5);
1872 SDValue
ARMTargetLowering::LowerBR_JT(SDValue Op
, SelectionDAG
&DAG
) {
1873 SDValue Chain
= Op
.getOperand(0);
1874 SDValue Table
= Op
.getOperand(1);
1875 SDValue Index
= Op
.getOperand(2);
1876 DebugLoc dl
= Op
.getDebugLoc();
1878 EVT PTy
= getPointerTy();
1879 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Table
);
1880 ARMFunctionInfo
*AFI
= DAG
.getMachineFunction().getInfo
<ARMFunctionInfo
>();
1881 SDValue UId
= DAG
.getConstant(AFI
->createJumpTableUId(), PTy
);
1882 SDValue JTI
= DAG
.getTargetJumpTable(JT
->getIndex(), PTy
);
1883 Table
= DAG
.getNode(ARMISD::WrapperJT
, dl
, MVT::i32
, JTI
, UId
);
1884 Index
= DAG
.getNode(ISD::MUL
, dl
, PTy
, Index
, DAG
.getConstant(4, PTy
));
1885 SDValue Addr
= DAG
.getNode(ISD::ADD
, dl
, PTy
, Index
, Table
);
1886 if (Subtarget
->isThumb2()) {
1887 // Thumb2 uses a two-level jump. That is, it jumps into the jump table
1888 // which does another jump to the destination. This also makes it easier
1889 // to translate it to TBB / TBH later.
1890 // FIXME: This might not work if the function is extremely large.
1891 return DAG
.getNode(ARMISD::BR2_JT
, dl
, MVT::Other
, Chain
,
1892 Addr
, Op
.getOperand(2), JTI
, UId
);
1894 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
) {
1895 Addr
= DAG
.getLoad((EVT
)MVT::i32
, dl
, Chain
, Addr
, NULL
, 0);
1896 Chain
= Addr
.getValue(1);
1897 Addr
= DAG
.getNode(ISD::ADD
, dl
, PTy
, Addr
, Table
);
1898 return DAG
.getNode(ARMISD::BR_JT
, dl
, MVT::Other
, Chain
, Addr
, JTI
, UId
);
1900 Addr
= DAG
.getLoad(PTy
, dl
, Chain
, Addr
, NULL
, 0);
1901 Chain
= Addr
.getValue(1);
1902 return DAG
.getNode(ARMISD::BR_JT
, dl
, MVT::Other
, Chain
, Addr
, JTI
, UId
);
1906 static SDValue
LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
) {
1907 DebugLoc dl
= Op
.getDebugLoc();
1909 Op
.getOpcode() == ISD::FP_TO_SINT
? ARMISD::FTOSI
: ARMISD::FTOUI
;
1910 Op
= DAG
.getNode(Opc
, dl
, MVT::f32
, Op
.getOperand(0));
1911 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::i32
, Op
);
1914 static SDValue
LowerINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) {
1915 EVT VT
= Op
.getValueType();
1916 DebugLoc dl
= Op
.getDebugLoc();
1918 Op
.getOpcode() == ISD::SINT_TO_FP
? ARMISD::SITOF
: ARMISD::UITOF
;
1920 Op
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::f32
, Op
.getOperand(0));
1921 return DAG
.getNode(Opc
, dl
, VT
, Op
);
1924 static SDValue
LowerFCOPYSIGN(SDValue Op
, SelectionDAG
&DAG
) {
1925 // Implement fcopysign with a fabs and a conditional fneg.
1926 SDValue Tmp0
= Op
.getOperand(0);
1927 SDValue Tmp1
= Op
.getOperand(1);
1928 DebugLoc dl
= Op
.getDebugLoc();
1929 EVT VT
= Op
.getValueType();
1930 EVT SrcVT
= Tmp1
.getValueType();
1931 SDValue AbsVal
= DAG
.getNode(ISD::FABS
, dl
, VT
, Tmp0
);
1932 SDValue Cmp
= getVFPCmp(Tmp1
, DAG
.getConstantFP(0.0, SrcVT
), DAG
, dl
);
1933 SDValue ARMCC
= DAG
.getConstant(ARMCC::LT
, MVT::i32
);
1934 SDValue CCR
= DAG
.getRegister(ARM::CPSR
, MVT::i32
);
1935 return DAG
.getNode(ARMISD::CNEG
, dl
, VT
, AbsVal
, AbsVal
, ARMCC
, CCR
, Cmp
);
1938 SDValue
ARMTargetLowering::LowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
) {
1939 MachineFrameInfo
*MFI
= DAG
.getMachineFunction().getFrameInfo();
1940 MFI
->setFrameAddressIsTaken(true);
1941 EVT VT
= Op
.getValueType();
1942 DebugLoc dl
= Op
.getDebugLoc(); // FIXME probably not meaningful
1943 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1944 unsigned FrameReg
= (Subtarget
->isThumb() || Subtarget
->isTargetDarwin())
1945 ? ARM::R7
: ARM::R11
;
1946 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, FrameReg
, VT
);
1948 FrameAddr
= DAG
.getLoad(VT
, dl
, DAG
.getEntryNode(), FrameAddr
, NULL
, 0);
1953 ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG
&DAG
, DebugLoc dl
,
1955 SDValue Dst
, SDValue Src
,
1956 SDValue Size
, unsigned Align
,
1958 const Value
*DstSV
, uint64_t DstSVOff
,
1959 const Value
*SrcSV
, uint64_t SrcSVOff
){
1960 // Do repeated 4-byte loads and stores. To be improved.
1961 // This requires 4-byte alignment.
1962 if ((Align
& 3) != 0)
1964 // This requires the copy size to be a constant, preferrably
1965 // within a subtarget-specific limit.
1966 ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
);
1969 uint64_t SizeVal
= ConstantSize
->getZExtValue();
1970 if (!AlwaysInline
&& SizeVal
> getSubtarget()->getMaxInlineSizeThreshold())
1973 unsigned BytesLeft
= SizeVal
& 3;
1974 unsigned NumMemOps
= SizeVal
>> 2;
1975 unsigned EmittedNumMemOps
= 0;
1977 unsigned VTSize
= 4;
1979 const unsigned MAX_LOADS_IN_LDM
= 6;
1980 SDValue TFOps
[MAX_LOADS_IN_LDM
];
1981 SDValue Loads
[MAX_LOADS_IN_LDM
];
1982 uint64_t SrcOff
= 0, DstOff
= 0;
1984 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
1985 // same number of stores. The loads and stores will get combined into
1986 // ldm/stm later on.
1987 while (EmittedNumMemOps
< NumMemOps
) {
1989 i
< MAX_LOADS_IN_LDM
&& EmittedNumMemOps
+ i
< NumMemOps
; ++i
) {
1990 Loads
[i
] = DAG
.getLoad(VT
, dl
, Chain
,
1991 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Src
,
1992 DAG
.getConstant(SrcOff
, MVT::i32
)),
1993 SrcSV
, SrcSVOff
+ SrcOff
);
1994 TFOps
[i
] = Loads
[i
].getValue(1);
1997 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, &TFOps
[0], i
);
2000 i
< MAX_LOADS_IN_LDM
&& EmittedNumMemOps
+ i
< NumMemOps
; ++i
) {
2001 TFOps
[i
] = DAG
.getStore(Chain
, dl
, Loads
[i
],
2002 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Dst
,
2003 DAG
.getConstant(DstOff
, MVT::i32
)),
2004 DstSV
, DstSVOff
+ DstOff
);
2007 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, &TFOps
[0], i
);
2009 EmittedNumMemOps
+= i
;
2015 // Issue loads / stores for the trailing (1 - 3) bytes.
2016 unsigned BytesLeftSave
= BytesLeft
;
2019 if (BytesLeft
>= 2) {
2027 Loads
[i
] = DAG
.getLoad(VT
, dl
, Chain
,
2028 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Src
,
2029 DAG
.getConstant(SrcOff
, MVT::i32
)),
2030 SrcSV
, SrcSVOff
+ SrcOff
);
2031 TFOps
[i
] = Loads
[i
].getValue(1);
2034 BytesLeft
-= VTSize
;
2036 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, &TFOps
[0], i
);
2039 BytesLeft
= BytesLeftSave
;
2041 if (BytesLeft
>= 2) {
2049 TFOps
[i
] = DAG
.getStore(Chain
, dl
, Loads
[i
],
2050 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Dst
,
2051 DAG
.getConstant(DstOff
, MVT::i32
)),
2052 DstSV
, DstSVOff
+ DstOff
);
2055 BytesLeft
-= VTSize
;
2057 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, &TFOps
[0], i
);
2060 static SDValue
ExpandBIT_CONVERT(SDNode
*N
, SelectionDAG
&DAG
) {
2061 SDValue Op
= N
->getOperand(0);
2062 DebugLoc dl
= N
->getDebugLoc();
2063 if (N
->getValueType(0) == MVT::f64
) {
2064 // Turn i64->f64 into FMDRR.
2065 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, Op
,
2066 DAG
.getConstant(0, MVT::i32
));
2067 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, Op
,
2068 DAG
.getConstant(1, MVT::i32
));
2069 return DAG
.getNode(ARMISD::FMDRR
, dl
, MVT::f64
, Lo
, Hi
);
2072 // Turn f64->i64 into FMRRD.
2073 SDValue Cvt
= DAG
.getNode(ARMISD::FMRRD
, dl
,
2074 DAG
.getVTList(MVT::i32
, MVT::i32
), &Op
, 1);
2076 // Merge the pieces into a single i64 value.
2077 return DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Cvt
, Cvt
.getValue(1));
2080 /// getZeroVector - Returns a vector of specified type with all zero elements.
2082 static SDValue
getZeroVector(EVT VT
, SelectionDAG
&DAG
, DebugLoc dl
) {
2083 assert(VT
.isVector() && "Expected a vector type");
2085 // Zero vectors are used to represent vector negation and in those cases
2086 // will be implemented with the NEON VNEG instruction. However, VNEG does
2087 // not support i64 elements, so sometimes the zero vectors will need to be
2088 // explicitly constructed. For those cases, and potentially other uses in
2089 // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted
2090 // to their dest type. This ensures they get CSE'd.
2092 SDValue Cst
= DAG
.getTargetConstant(0, MVT::i32
);
2093 if (VT
.getSizeInBits() == 64)
2094 Vec
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i32
, Cst
, Cst
);
2096 Vec
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
, Cst
, Cst
, Cst
, Cst
);
2098 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, Vec
);
2101 /// getOnesVector - Returns a vector of specified type with all bits set.
2103 static SDValue
getOnesVector(EVT VT
, SelectionDAG
&DAG
, DebugLoc dl
) {
2104 assert(VT
.isVector() && "Expected a vector type");
2106 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
2107 // type. This ensures they get CSE'd.
2109 SDValue Cst
= DAG
.getTargetConstant(~0U, MVT::i32
);
2110 if (VT
.getSizeInBits() == 64)
2111 Vec
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i32
, Cst
, Cst
);
2113 Vec
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
, Cst
, Cst
, Cst
, Cst
);
2115 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, Vec
);
2118 static SDValue
LowerShift(SDNode
*N
, SelectionDAG
&DAG
,
2119 const ARMSubtarget
*ST
) {
2120 EVT VT
= N
->getValueType(0);
2121 DebugLoc dl
= N
->getDebugLoc();
2123 // Lower vector shifts on NEON to use VSHL.
2124 if (VT
.isVector()) {
2125 assert(ST
->hasNEON() && "unexpected vector shift");
2127 // Left shifts translate directly to the vshiftu intrinsic.
2128 if (N
->getOpcode() == ISD::SHL
)
2129 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, VT
,
2130 DAG
.getConstant(Intrinsic::arm_neon_vshiftu
, MVT::i32
),
2131 N
->getOperand(0), N
->getOperand(1));
2133 assert((N
->getOpcode() == ISD::SRA
||
2134 N
->getOpcode() == ISD::SRL
) && "unexpected vector shift opcode");
2136 // NEON uses the same intrinsics for both left and right shifts. For
2137 // right shifts, the shift amounts are negative, so negate the vector of
2139 EVT ShiftVT
= N
->getOperand(1).getValueType();
2140 SDValue NegatedCount
= DAG
.getNode(ISD::SUB
, dl
, ShiftVT
,
2141 getZeroVector(ShiftVT
, DAG
, dl
),
2143 Intrinsic::ID vshiftInt
= (N
->getOpcode() == ISD::SRA
?
2144 Intrinsic::arm_neon_vshifts
:
2145 Intrinsic::arm_neon_vshiftu
);
2146 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, VT
,
2147 DAG
.getConstant(vshiftInt
, MVT::i32
),
2148 N
->getOperand(0), NegatedCount
);
2151 // We can get here for a node like i32 = ISD::SHL i32, i64
2155 assert((N
->getOpcode() == ISD::SRL
|| N
->getOpcode() == ISD::SRA
) &&
2156 "Unknown shift to lower!");
2158 // We only lower SRA, SRL of 1 here, all others use generic lowering.
2159 if (!isa
<ConstantSDNode
>(N
->getOperand(1)) ||
2160 cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue() != 1)
2163 // If we are in thumb mode, we don't have RRX.
2164 if (ST
->isThumb1Only()) return SDValue();
2166 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
2167 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, N
->getOperand(0),
2168 DAG
.getConstant(0, MVT::i32
));
2169 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::i32
, N
->getOperand(0),
2170 DAG
.getConstant(1, MVT::i32
));
2172 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2173 // captures the result into a carry flag.
2174 unsigned Opc
= N
->getOpcode() == ISD::SRL
? ARMISD::SRL_FLAG
:ARMISD::SRA_FLAG
;
2175 Hi
= DAG
.getNode(Opc
, dl
, DAG
.getVTList(MVT::i32
, MVT::Flag
), &Hi
, 1);
2177 // The low part is an ARMISD::RRX operand, which shifts the carry in.
2178 Lo
= DAG
.getNode(ARMISD::RRX
, dl
, MVT::i32
, Lo
, Hi
.getValue(1));
2180 // Merge the pieces into a single i64 value.
2181 return DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Lo
, Hi
);
2184 static SDValue
LowerVSETCC(SDValue Op
, SelectionDAG
&DAG
) {
2185 SDValue TmpOp0
, TmpOp1
;
2186 bool Invert
= false;
2190 SDValue Op0
= Op
.getOperand(0);
2191 SDValue Op1
= Op
.getOperand(1);
2192 SDValue CC
= Op
.getOperand(2);
2193 EVT VT
= Op
.getValueType();
2194 ISD::CondCode SetCCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
2195 DebugLoc dl
= Op
.getDebugLoc();
2197 if (Op
.getOperand(1).getValueType().isFloatingPoint()) {
2198 switch (SetCCOpcode
) {
2199 default: llvm_unreachable("Illegal FP comparison"); break;
2201 case ISD::SETNE
: Invert
= true; // Fallthrough
2203 case ISD::SETEQ
: Opc
= ARMISD::VCEQ
; break;
2205 case ISD::SETLT
: Swap
= true; // Fallthrough
2207 case ISD::SETGT
: Opc
= ARMISD::VCGT
; break;
2209 case ISD::SETLE
: Swap
= true; // Fallthrough
2211 case ISD::SETGE
: Opc
= ARMISD::VCGE
; break;
2212 case ISD::SETUGE
: Swap
= true; // Fallthrough
2213 case ISD::SETULE
: Invert
= true; Opc
= ARMISD::VCGT
; break;
2214 case ISD::SETUGT
: Swap
= true; // Fallthrough
2215 case ISD::SETULT
: Invert
= true; Opc
= ARMISD::VCGE
; break;
2216 case ISD::SETUEQ
: Invert
= true; // Fallthrough
2218 // Expand this to (OLT | OGT).
2222 Op0
= DAG
.getNode(ARMISD::VCGT
, dl
, VT
, TmpOp1
, TmpOp0
);
2223 Op1
= DAG
.getNode(ARMISD::VCGT
, dl
, VT
, TmpOp0
, TmpOp1
);
2225 case ISD::SETUO
: Invert
= true; // Fallthrough
2227 // Expand this to (OLT | OGE).
2231 Op0
= DAG
.getNode(ARMISD::VCGT
, dl
, VT
, TmpOp1
, TmpOp0
);
2232 Op1
= DAG
.getNode(ARMISD::VCGE
, dl
, VT
, TmpOp0
, TmpOp1
);
2236 // Integer comparisons.
2237 switch (SetCCOpcode
) {
2238 default: llvm_unreachable("Illegal integer comparison"); break;
2239 case ISD::SETNE
: Invert
= true;
2240 case ISD::SETEQ
: Opc
= ARMISD::VCEQ
; break;
2241 case ISD::SETLT
: Swap
= true;
2242 case ISD::SETGT
: Opc
= ARMISD::VCGT
; break;
2243 case ISD::SETLE
: Swap
= true;
2244 case ISD::SETGE
: Opc
= ARMISD::VCGE
; break;
2245 case ISD::SETULT
: Swap
= true;
2246 case ISD::SETUGT
: Opc
= ARMISD::VCGTU
; break;
2247 case ISD::SETULE
: Swap
= true;
2248 case ISD::SETUGE
: Opc
= ARMISD::VCGEU
; break;
2251 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
2252 if (Opc
== ARMISD::VCEQ
) {
2255 if (ISD::isBuildVectorAllZeros(Op1
.getNode()))
2257 else if (ISD::isBuildVectorAllZeros(Op0
.getNode()))
2260 // Ignore bitconvert.
2261 if (AndOp
.getNode() && AndOp
.getOpcode() == ISD::BIT_CONVERT
)
2262 AndOp
= AndOp
.getOperand(0);
2264 if (AndOp
.getNode() && AndOp
.getOpcode() == ISD::AND
) {
2266 Op0
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, AndOp
.getOperand(0));
2267 Op1
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, AndOp
.getOperand(1));
2274 std::swap(Op0
, Op1
);
2276 SDValue Result
= DAG
.getNode(Opc
, dl
, VT
, Op0
, Op1
);
2279 Result
= DAG
.getNOT(dl
, Result
, VT
);
2284 /// isVMOVSplat - Check if the specified splat value corresponds to an immediate
2285 /// VMOV instruction, and if so, return the constant being splatted.
2286 static SDValue
isVMOVSplat(uint64_t SplatBits
, uint64_t SplatUndef
,
2287 unsigned SplatBitSize
, SelectionDAG
&DAG
) {
2288 switch (SplatBitSize
) {
2290 // Any 1-byte value is OK.
2291 assert((SplatBits
& ~0xff) == 0 && "one byte splat value is too big");
2292 return DAG
.getTargetConstant(SplatBits
, MVT::i8
);
2295 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
2296 if ((SplatBits
& ~0xff) == 0 ||
2297 (SplatBits
& ~0xff00) == 0)
2298 return DAG
.getTargetConstant(SplatBits
, MVT::i16
);
2302 // NEON's 32-bit VMOV supports splat values where:
2303 // * only one byte is nonzero, or
2304 // * the least significant byte is 0xff and the second byte is nonzero, or
2305 // * the least significant 2 bytes are 0xff and the third is nonzero.
2306 if ((SplatBits
& ~0xff) == 0 ||
2307 (SplatBits
& ~0xff00) == 0 ||
2308 (SplatBits
& ~0xff0000) == 0 ||
2309 (SplatBits
& ~0xff000000) == 0)
2310 return DAG
.getTargetConstant(SplatBits
, MVT::i32
);
2312 if ((SplatBits
& ~0xffff) == 0 &&
2313 ((SplatBits
| SplatUndef
) & 0xff) == 0xff)
2314 return DAG
.getTargetConstant(SplatBits
| 0xff, MVT::i32
);
2316 if ((SplatBits
& ~0xffffff) == 0 &&
2317 ((SplatBits
| SplatUndef
) & 0xffff) == 0xffff)
2318 return DAG
.getTargetConstant(SplatBits
| 0xffff, MVT::i32
);
2320 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
2321 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
2322 // VMOV.I32. A (very) minor optimization would be to replicate the value
2323 // and fall through here to test for a valid 64-bit splat. But, then the
2324 // caller would also need to check and handle the change in size.
2328 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
2329 uint64_t BitMask
= 0xff;
2331 for (int ByteNum
= 0; ByteNum
< 8; ++ByteNum
) {
2332 if (((SplatBits
| SplatUndef
) & BitMask
) == BitMask
)
2334 else if ((SplatBits
& BitMask
) != 0)
2338 return DAG
.getTargetConstant(Val
, MVT::i64
);
2342 llvm_unreachable("unexpected size for isVMOVSplat");
2349 /// getVMOVImm - If this is a build_vector of constants which can be
2350 /// formed by using a VMOV instruction of the specified element size,
2351 /// return the constant being splatted. The ByteSize field indicates the
2352 /// number of bytes of each element [1248].
2353 SDValue
ARM::getVMOVImm(SDNode
*N
, unsigned ByteSize
, SelectionDAG
&DAG
) {
2354 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(N
);
2355 APInt SplatBits
, SplatUndef
;
2356 unsigned SplatBitSize
;
2358 if (! BVN
|| ! BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
,
2359 HasAnyUndefs
, ByteSize
* 8))
2362 if (SplatBitSize
> ByteSize
* 8)
2365 return isVMOVSplat(SplatBits
.getZExtValue(), SplatUndef
.getZExtValue(),
2369 static bool isVEXTMask(const SmallVectorImpl
<int> &M
, EVT VT
,
2370 bool &ReverseVEXT
, unsigned &Imm
) {
2371 unsigned NumElts
= VT
.getVectorNumElements();
2372 ReverseVEXT
= false;
2375 // If this is a VEXT shuffle, the immediate value is the index of the first
2376 // element. The other shuffle indices must be the successive elements after
2378 unsigned ExpectedElt
= Imm
;
2379 for (unsigned i
= 1; i
< NumElts
; ++i
) {
2380 // Increment the expected index. If it wraps around, it may still be
2381 // a VEXT but the source vectors must be swapped.
2383 if (ExpectedElt
== NumElts
* 2) {
2388 if (ExpectedElt
!= static_cast<unsigned>(M
[i
]))
2392 // Adjust the index value if the source operands will be swapped.
2399 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
2400 /// instruction with the specified blocksize. (The order of the elements
2401 /// within each block of the vector is reversed.)
2402 static bool isVREVMask(const SmallVectorImpl
<int> &M
, EVT VT
,
2403 unsigned BlockSize
) {
2404 assert((BlockSize
==16 || BlockSize
==32 || BlockSize
==64) &&
2405 "Only possible block sizes for VREV are: 16, 32, 64");
2407 unsigned NumElts
= VT
.getVectorNumElements();
2408 unsigned EltSz
= VT
.getVectorElementType().getSizeInBits();
2409 unsigned BlockElts
= M
[0] + 1;
2411 if (BlockSize
<= EltSz
|| BlockSize
!= BlockElts
* EltSz
)
2414 for (unsigned i
= 0; i
< NumElts
; ++i
) {
2415 if ((unsigned) M
[i
] !=
2416 (i
- i
%BlockElts
) + (BlockElts
- 1 - i
%BlockElts
))
2423 static bool isVTRNMask(const SmallVectorImpl
<int> &M
, EVT VT
,
2424 unsigned &WhichResult
) {
2425 unsigned NumElts
= VT
.getVectorNumElements();
2426 WhichResult
= (M
[0] == 0 ? 0 : 1);
2427 for (unsigned i
= 0; i
< NumElts
; i
+= 2) {
2428 if ((unsigned) M
[i
] != i
+ WhichResult
||
2429 (unsigned) M
[i
+1] != i
+ NumElts
+ WhichResult
)
2435 static bool isVUZPMask(const SmallVectorImpl
<int> &M
, EVT VT
,
2436 unsigned &WhichResult
) {
2437 unsigned NumElts
= VT
.getVectorNumElements();
2438 WhichResult
= (M
[0] == 0 ? 0 : 1);
2439 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2440 if ((unsigned) M
[i
] != 2 * i
+ WhichResult
)
2444 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2445 if (VT
.is64BitVector() && VT
.getVectorElementType().getSizeInBits() == 32)
2451 static bool isVZIPMask(const SmallVectorImpl
<int> &M
, EVT VT
,
2452 unsigned &WhichResult
) {
2453 unsigned NumElts
= VT
.getVectorNumElements();
2454 WhichResult
= (M
[0] == 0 ? 0 : 1);
2455 unsigned Idx
= WhichResult
* NumElts
/ 2;
2456 for (unsigned i
= 0; i
!= NumElts
; i
+= 2) {
2457 if ((unsigned) M
[i
] != Idx
||
2458 (unsigned) M
[i
+1] != Idx
+ NumElts
)
2463 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2464 if (VT
.is64BitVector() && VT
.getVectorElementType().getSizeInBits() == 32)
2470 static SDValue
BuildSplat(SDValue Val
, EVT VT
, SelectionDAG
&DAG
, DebugLoc dl
) {
2471 // Canonicalize all-zeros and all-ones vectors.
2472 ConstantSDNode
*ConstVal
= cast
<ConstantSDNode
>(Val
.getNode());
2473 if (ConstVal
->isNullValue())
2474 return getZeroVector(VT
, DAG
, dl
);
2475 if (ConstVal
->isAllOnesValue())
2476 return getOnesVector(VT
, DAG
, dl
);
2479 if (VT
.is64BitVector()) {
2480 switch (Val
.getValueType().getSizeInBits()) {
2481 case 8: CanonicalVT
= MVT::v8i8
; break;
2482 case 16: CanonicalVT
= MVT::v4i16
; break;
2483 case 32: CanonicalVT
= MVT::v2i32
; break;
2484 case 64: CanonicalVT
= MVT::v1i64
; break;
2485 default: llvm_unreachable("unexpected splat element type"); break;
2488 assert(VT
.is128BitVector() && "unknown splat vector size");
2489 switch (Val
.getValueType().getSizeInBits()) {
2490 case 8: CanonicalVT
= MVT::v16i8
; break;
2491 case 16: CanonicalVT
= MVT::v8i16
; break;
2492 case 32: CanonicalVT
= MVT::v4i32
; break;
2493 case 64: CanonicalVT
= MVT::v2i64
; break;
2494 default: llvm_unreachable("unexpected splat element type"); break;
2498 // Build a canonical splat for this value.
2499 SmallVector
<SDValue
, 8> Ops
;
2500 Ops
.assign(CanonicalVT
.getVectorNumElements(), Val
);
2501 SDValue Res
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, CanonicalVT
, &Ops
[0],
2503 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, Res
);
2506 // If this is a case we can't handle, return null and let the default
2507 // expansion code take care of it.
2508 static SDValue
LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) {
2509 BuildVectorSDNode
*BVN
= cast
<BuildVectorSDNode
>(Op
.getNode());
2510 DebugLoc dl
= Op
.getDebugLoc();
2511 EVT VT
= Op
.getValueType();
2513 APInt SplatBits
, SplatUndef
;
2514 unsigned SplatBitSize
;
2516 if (BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
)) {
2517 if (SplatBitSize
<= 64) {
2518 SDValue Val
= isVMOVSplat(SplatBits
.getZExtValue(),
2519 SplatUndef
.getZExtValue(), SplatBitSize
, DAG
);
2521 return BuildSplat(Val
, VT
, DAG
, dl
);
2525 // If there are only 2 elements in a 128-bit vector, insert them into an
2526 // undef vector. This handles the common case for 128-bit vector argument
2527 // passing, where the insertions should be translated to subreg accesses
2528 // with no real instructions.
2529 if (VT
.is128BitVector() && Op
.getNumOperands() == 2) {
2530 SDValue Val
= DAG
.getUNDEF(VT
);
2531 SDValue Op0
= Op
.getOperand(0);
2532 SDValue Op1
= Op
.getOperand(1);
2533 if (Op0
.getOpcode() != ISD::UNDEF
)
2534 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, VT
, Val
, Op0
,
2535 DAG
.getIntPtrConstant(0));
2536 if (Op1
.getOpcode() != ISD::UNDEF
)
2537 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, VT
, Val
, Op1
,
2538 DAG
.getIntPtrConstant(1));
2545 /// isShuffleMaskLegal - Targets can use this to indicate that they only
2546 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
2547 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
2548 /// are assumed to be legal.
2550 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl
<int> &M
,
2552 if (VT
.getVectorNumElements() == 4 &&
2553 (VT
.is128BitVector() || VT
.is64BitVector())) {
2554 unsigned PFIndexes
[4];
2555 for (unsigned i
= 0; i
!= 4; ++i
) {
2559 PFIndexes
[i
] = M
[i
];
2562 // Compute the index in the perfect shuffle table.
2563 unsigned PFTableIndex
=
2564 PFIndexes
[0]*9*9*9+PFIndexes
[1]*9*9+PFIndexes
[2]*9+PFIndexes
[3];
2565 unsigned PFEntry
= PerfectShuffleTable
[PFTableIndex
];
2566 unsigned Cost
= (PFEntry
>> 30);
2573 unsigned Imm
, WhichResult
;
2575 return (ShuffleVectorSDNode::isSplatMask(&M
[0], VT
) ||
2576 isVREVMask(M
, VT
, 64) ||
2577 isVREVMask(M
, VT
, 32) ||
2578 isVREVMask(M
, VT
, 16) ||
2579 isVEXTMask(M
, VT
, ReverseVEXT
, Imm
) ||
2580 isVTRNMask(M
, VT
, WhichResult
) ||
2581 isVUZPMask(M
, VT
, WhichResult
) ||
2582 isVZIPMask(M
, VT
, WhichResult
));
2585 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
2586 /// the specified operations to build the shuffle.
2587 static SDValue
GeneratePerfectShuffle(unsigned PFEntry
, SDValue LHS
,
2588 SDValue RHS
, SelectionDAG
&DAG
,
2590 unsigned OpNum
= (PFEntry
>> 26) & 0x0F;
2591 unsigned LHSID
= (PFEntry
>> 13) & ((1 << 13)-1);
2592 unsigned RHSID
= (PFEntry
>> 0) & ((1 << 13)-1);
2595 OP_COPY
= 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
2604 OP_VUZPL
, // VUZP, left result
2605 OP_VUZPR
, // VUZP, right result
2606 OP_VZIPL
, // VZIP, left result
2607 OP_VZIPR
, // VZIP, right result
2608 OP_VTRNL
, // VTRN, left result
2609 OP_VTRNR
// VTRN, right result
2612 if (OpNum
== OP_COPY
) {
2613 if (LHSID
== (1*9+2)*9+3) return LHS
;
2614 assert(LHSID
== ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
2618 SDValue OpLHS
, OpRHS
;
2619 OpLHS
= GeneratePerfectShuffle(PerfectShuffleTable
[LHSID
], LHS
, RHS
, DAG
, dl
);
2620 OpRHS
= GeneratePerfectShuffle(PerfectShuffleTable
[RHSID
], LHS
, RHS
, DAG
, dl
);
2621 EVT VT
= OpLHS
.getValueType();
2624 default: llvm_unreachable("Unknown shuffle opcode!");
2626 return DAG
.getNode(ARMISD::VREV64
, dl
, VT
, OpLHS
);
2631 return DAG
.getNode(ARMISD::VDUPLANE
, dl
, VT
,
2632 OpLHS
, DAG
.getConstant(OpNum
-OP_VDUP0
, MVT::i32
));
2636 return DAG
.getNode(ARMISD::VEXT
, dl
, VT
,
2638 DAG
.getConstant(OpNum
-OP_VEXT1
+1, MVT::i32
));
2641 return DAG
.getNode(ARMISD::VUZP
, dl
, DAG
.getVTList(VT
, VT
),
2642 OpLHS
, OpRHS
).getValue(OpNum
-OP_VUZPL
);
2645 return DAG
.getNode(ARMISD::VZIP
, dl
, DAG
.getVTList(VT
, VT
),
2646 OpLHS
, OpRHS
).getValue(OpNum
-OP_VZIPL
);
2649 return DAG
.getNode(ARMISD::VTRN
, dl
, DAG
.getVTList(VT
, VT
),
2650 OpLHS
, OpRHS
).getValue(OpNum
-OP_VTRNL
);
2654 static SDValue
LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
) {
2655 SDValue V1
= Op
.getOperand(0);
2656 SDValue V2
= Op
.getOperand(1);
2657 DebugLoc dl
= Op
.getDebugLoc();
2658 EVT VT
= Op
.getValueType();
2659 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(Op
.getNode());
2660 SmallVector
<int, 8> ShuffleMask
;
2662 // Convert shuffles that are directly supported on NEON to target-specific
2663 // DAG nodes, instead of keeping them as shuffles and matching them again
2664 // during code selection. This is more efficient and avoids the possibility
2665 // of inconsistencies between legalization and selection.
2666 // FIXME: floating-point vectors should be canonicalized to integer vectors
2667 // of the same time so that they get CSEd properly.
2668 SVN
->getMask(ShuffleMask
);
2670 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask
[0], VT
)) {
2671 int Lane
= SVN
->getSplatIndex();
2672 if (Lane
== 0 && V1
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
2673 return DAG
.getNode(ARMISD::VDUP
, dl
, VT
, V1
.getOperand(0));
2675 return DAG
.getNode(ARMISD::VDUPLANE
, dl
, VT
, V1
,
2676 DAG
.getConstant(Lane
, MVT::i32
));
2681 if (isVEXTMask(ShuffleMask
, VT
, ReverseVEXT
, Imm
)) {
2684 return DAG
.getNode(ARMISD::VEXT
, dl
, VT
, V1
, V2
,
2685 DAG
.getConstant(Imm
, MVT::i32
));
2688 if (isVREVMask(ShuffleMask
, VT
, 64))
2689 return DAG
.getNode(ARMISD::VREV64
, dl
, VT
, V1
);
2690 if (isVREVMask(ShuffleMask
, VT
, 32))
2691 return DAG
.getNode(ARMISD::VREV32
, dl
, VT
, V1
);
2692 if (isVREVMask(ShuffleMask
, VT
, 16))
2693 return DAG
.getNode(ARMISD::VREV16
, dl
, VT
, V1
);
2695 // Check for Neon shuffles that modify both input vectors in place.
2696 // If both results are used, i.e., if there are two shuffles with the same
2697 // source operands and with masks corresponding to both results of one of
2698 // these operations, DAG memoization will ensure that a single node is
2699 // used for both shuffles.
2700 unsigned WhichResult
;
2701 if (isVTRNMask(ShuffleMask
, VT
, WhichResult
))
2702 return DAG
.getNode(ARMISD::VTRN
, dl
, DAG
.getVTList(VT
, VT
),
2703 V1
, V2
).getValue(WhichResult
);
2704 if (isVUZPMask(ShuffleMask
, VT
, WhichResult
))
2705 return DAG
.getNode(ARMISD::VUZP
, dl
, DAG
.getVTList(VT
, VT
),
2706 V1
, V2
).getValue(WhichResult
);
2707 if (isVZIPMask(ShuffleMask
, VT
, WhichResult
))
2708 return DAG
.getNode(ARMISD::VZIP
, dl
, DAG
.getVTList(VT
, VT
),
2709 V1
, V2
).getValue(WhichResult
);
2711 // If the shuffle is not directly supported and it has 4 elements, use
2712 // the PerfectShuffle-generated table to synthesize it from other shuffles.
2713 if (VT
.getVectorNumElements() == 4 &&
2714 (VT
.is128BitVector() || VT
.is64BitVector())) {
2715 unsigned PFIndexes
[4];
2716 for (unsigned i
= 0; i
!= 4; ++i
) {
2717 if (ShuffleMask
[i
] < 0)
2720 PFIndexes
[i
] = ShuffleMask
[i
];
2723 // Compute the index in the perfect shuffle table.
2724 unsigned PFTableIndex
=
2725 PFIndexes
[0]*9*9*9+PFIndexes
[1]*9*9+PFIndexes
[2]*9+PFIndexes
[3];
2727 unsigned PFEntry
= PerfectShuffleTable
[PFTableIndex
];
2728 unsigned Cost
= (PFEntry
>> 30);
2731 return GeneratePerfectShuffle(PFEntry
, V1
, V2
, DAG
, dl
);
2737 static SDValue
LowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) {
2738 EVT VT
= Op
.getValueType();
2739 DebugLoc dl
= Op
.getDebugLoc();
2740 SDValue Vec
= Op
.getOperand(0);
2741 SDValue Lane
= Op
.getOperand(1);
2743 // FIXME: This is invalid for 8 and 16-bit elements - the information about
2744 // sign / zero extension is lost!
2745 Op
= DAG
.getNode(ARMISD::VGETLANEu
, dl
, MVT::i32
, Vec
, Lane
);
2746 Op
= DAG
.getNode(ISD::AssertZext
, dl
, MVT::i32
, Op
, DAG
.getValueType(VT
));
2748 if (VT
.bitsLT(MVT::i32
))
2749 Op
= DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Op
);
2750 else if (VT
.bitsGT(MVT::i32
))
2751 Op
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VT
, Op
);
2756 static SDValue
LowerCONCAT_VECTORS(SDValue Op
, SelectionDAG
&DAG
) {
2757 // The only time a CONCAT_VECTORS operation can have legal types is when
2758 // two 64-bit vectors are concatenated to a 128-bit vector.
2759 assert(Op
.getValueType().is128BitVector() && Op
.getNumOperands() == 2 &&
2760 "unexpected CONCAT_VECTORS");
2761 DebugLoc dl
= Op
.getDebugLoc();
2762 SDValue Val
= DAG
.getUNDEF(MVT::v2f64
);
2763 SDValue Op0
= Op
.getOperand(0);
2764 SDValue Op1
= Op
.getOperand(1);
2765 if (Op0
.getOpcode() != ISD::UNDEF
)
2766 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Val
,
2767 DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::f64
, Op0
),
2768 DAG
.getIntPtrConstant(0));
2769 if (Op1
.getOpcode() != ISD::UNDEF
)
2770 Val
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v2f64
, Val
,
2771 DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::f64
, Op1
),
2772 DAG
.getIntPtrConstant(1));
2773 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), Val
);
2776 SDValue
ARMTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) {
2777 switch (Op
.getOpcode()) {
2778 default: llvm_unreachable("Don't know how to custom lower this!");
2779 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
2780 case ISD::GlobalAddress
:
2781 return Subtarget
->isTargetDarwin() ? LowerGlobalAddressDarwin(Op
, DAG
) :
2782 LowerGlobalAddressELF(Op
, DAG
);
2783 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
2784 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
, Subtarget
);
2785 case ISD::BR_CC
: return LowerBR_CC(Op
, DAG
, Subtarget
);
2786 case ISD::BR_JT
: return LowerBR_JT(Op
, DAG
);
2787 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
2788 case ISD::VASTART
: return LowerVASTART(Op
, DAG
, VarArgsFrameIndex
);
2789 case ISD::SINT_TO_FP
:
2790 case ISD::UINT_TO_FP
: return LowerINT_TO_FP(Op
, DAG
);
2791 case ISD::FP_TO_SINT
:
2792 case ISD::FP_TO_UINT
: return LowerFP_TO_INT(Op
, DAG
);
2793 case ISD::FCOPYSIGN
: return LowerFCOPYSIGN(Op
, DAG
);
2794 case ISD::RETURNADDR
: break;
2795 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
2796 case ISD::GLOBAL_OFFSET_TABLE
: return LowerGLOBAL_OFFSET_TABLE(Op
, DAG
);
2797 case ISD::INTRINSIC_VOID
:
2798 case ISD::INTRINSIC_W_CHAIN
: return LowerINTRINSIC_W_CHAIN(Op
, DAG
);
2799 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
2800 case ISD::BIT_CONVERT
: return ExpandBIT_CONVERT(Op
.getNode(), DAG
);
2803 case ISD::SRA
: return LowerShift(Op
.getNode(), DAG
, Subtarget
);
2804 case ISD::VSETCC
: return LowerVSETCC(Op
, DAG
);
2805 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
2806 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
2807 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
2808 case ISD::CONCAT_VECTORS
: return LowerCONCAT_VECTORS(Op
, DAG
);
2813 /// ReplaceNodeResults - Replace the results of node with an illegal result
2814 /// type with new values built out of custom code.
2815 void ARMTargetLowering::ReplaceNodeResults(SDNode
*N
,
2816 SmallVectorImpl
<SDValue
>&Results
,
2817 SelectionDAG
&DAG
) {
2818 switch (N
->getOpcode()) {
2820 llvm_unreachable("Don't know how to custom expand this!");
2822 case ISD::BIT_CONVERT
:
2823 Results
.push_back(ExpandBIT_CONVERT(N
, DAG
));
2827 SDValue Res
= LowerShift(N
, DAG
, Subtarget
);
2829 Results
.push_back(Res
);
2835 //===----------------------------------------------------------------------===//
2836 // ARM Scheduler Hooks
2837 //===----------------------------------------------------------------------===//
2840 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr
*MI
,
2841 MachineBasicBlock
*BB
) const {
2842 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
2843 DebugLoc dl
= MI
->getDebugLoc();
2844 switch (MI
->getOpcode()) {
2846 llvm_unreachable("Unexpected instr type to insert");
2847 case ARM::tMOVCCr_pseudo
: {
2848 // To "insert" a SELECT_CC instruction, we actually have to insert the
2849 // diamond control-flow pattern. The incoming instruction knows the
2850 // destination vreg to set, the condition code register to branch on, the
2851 // true/false values to select between, and a branch opcode to use.
2852 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
2853 MachineFunction::iterator It
= BB
;
2859 // cmpTY ccX, r1, r2
2861 // fallthrough --> copy0MBB
2862 MachineBasicBlock
*thisMBB
= BB
;
2863 MachineFunction
*F
= BB
->getParent();
2864 MachineBasicBlock
*copy0MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
2865 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
2866 BuildMI(BB
, dl
, TII
->get(ARM::tBcc
)).addMBB(sinkMBB
)
2867 .addImm(MI
->getOperand(3).getImm()).addReg(MI
->getOperand(4).getReg());
2868 F
->insert(It
, copy0MBB
);
2869 F
->insert(It
, sinkMBB
);
2870 // Update machine-CFG edges by first adding all successors of the current
2871 // block to the new block which will contain the Phi node for the select.
2872 for(MachineBasicBlock::succ_iterator i
= BB
->succ_begin(),
2873 e
= BB
->succ_end(); i
!= e
; ++i
)
2874 sinkMBB
->addSuccessor(*i
);
2875 // Next, remove all successors of the current block, and add the true
2876 // and fallthrough blocks as its successors.
2877 while(!BB
->succ_empty())
2878 BB
->removeSuccessor(BB
->succ_begin());
2879 BB
->addSuccessor(copy0MBB
);
2880 BB
->addSuccessor(sinkMBB
);
2883 // %FalseValue = ...
2884 // # fallthrough to sinkMBB
2887 // Update machine-CFG edges
2888 BB
->addSuccessor(sinkMBB
);
2891 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
2894 BuildMI(BB
, dl
, TII
->get(ARM::PHI
), MI
->getOperand(0).getReg())
2895 .addReg(MI
->getOperand(1).getReg()).addMBB(copy0MBB
)
2896 .addReg(MI
->getOperand(2).getReg()).addMBB(thisMBB
);
2898 F
->DeleteMachineInstr(MI
); // The pseudo instruction is gone now.
2905 case ARM::t2SUBrSPi_
:
2906 case ARM::t2SUBrSPi12_
:
2907 case ARM::t2SUBrSPs_
: {
2908 MachineFunction
*MF
= BB
->getParent();
2909 unsigned DstReg
= MI
->getOperand(0).getReg();
2910 unsigned SrcReg
= MI
->getOperand(1).getReg();
2911 bool DstIsDead
= MI
->getOperand(0).isDead();
2912 bool SrcIsKill
= MI
->getOperand(1).isKill();
2914 if (SrcReg
!= ARM::SP
) {
2915 // Copy the source to SP from virtual register.
2916 const TargetRegisterClass
*RC
= MF
->getRegInfo().getRegClass(SrcReg
);
2917 unsigned CopyOpc
= (RC
== ARM::tGPRRegisterClass
)
2918 ? ARM::tMOVtgpr2gpr
: ARM::tMOVgpr2gpr
;
2919 BuildMI(BB
, dl
, TII
->get(CopyOpc
), ARM::SP
)
2920 .addReg(SrcReg
, getKillRegState(SrcIsKill
));
2924 bool NeedPred
= false, NeedCC
= false, NeedOp3
= false;
2925 switch (MI
->getOpcode()) {
2927 llvm_unreachable("Unexpected pseudo instruction!");
2933 OpOpc
= ARM::tADDspr
;
2936 OpOpc
= ARM::tSUBspi
;
2938 case ARM::t2SUBrSPi_
:
2939 OpOpc
= ARM::t2SUBrSPi
;
2940 NeedPred
= true; NeedCC
= true;
2942 case ARM::t2SUBrSPi12_
:
2943 OpOpc
= ARM::t2SUBrSPi12
;
2946 case ARM::t2SUBrSPs_
:
2947 OpOpc
= ARM::t2SUBrSPs
;
2948 NeedPred
= true; NeedCC
= true; NeedOp3
= true;
2951 MachineInstrBuilder MIB
= BuildMI(BB
, dl
, TII
->get(OpOpc
), ARM::SP
);
2952 if (OpOpc
== ARM::tAND
)
2953 AddDefaultT1CC(MIB
);
2954 MIB
.addReg(ARM::SP
);
2955 MIB
.addOperand(MI
->getOperand(2));
2957 MIB
.addOperand(MI
->getOperand(3));
2959 AddDefaultPred(MIB
);
2963 // Copy the result from SP to virtual register.
2964 const TargetRegisterClass
*RC
= MF
->getRegInfo().getRegClass(DstReg
);
2965 unsigned CopyOpc
= (RC
== ARM::tGPRRegisterClass
)
2966 ? ARM::tMOVgpr2tgpr
: ARM::tMOVgpr2gpr
;
2967 BuildMI(BB
, dl
, TII
->get(CopyOpc
))
2968 .addReg(DstReg
, getDefRegState(true) | getDeadRegState(DstIsDead
))
2970 MF
->DeleteMachineInstr(MI
); // The pseudo instruction is gone now.
2976 //===----------------------------------------------------------------------===//
2977 // ARM Optimization Hooks
2978 //===----------------------------------------------------------------------===//
2981 SDValue
combineSelectAndUse(SDNode
*N
, SDValue Slct
, SDValue OtherOp
,
2982 TargetLowering::DAGCombinerInfo
&DCI
) {
2983 SelectionDAG
&DAG
= DCI
.DAG
;
2984 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
2985 EVT VT
= N
->getValueType(0);
2986 unsigned Opc
= N
->getOpcode();
2987 bool isSlctCC
= Slct
.getOpcode() == ISD::SELECT_CC
;
2988 SDValue LHS
= isSlctCC
? Slct
.getOperand(2) : Slct
.getOperand(1);
2989 SDValue RHS
= isSlctCC
? Slct
.getOperand(3) : Slct
.getOperand(2);
2990 ISD::CondCode CC
= ISD::SETCC_INVALID
;
2993 CC
= cast
<CondCodeSDNode
>(Slct
.getOperand(4))->get();
2995 SDValue CCOp
= Slct
.getOperand(0);
2996 if (CCOp
.getOpcode() == ISD::SETCC
)
2997 CC
= cast
<CondCodeSDNode
>(CCOp
.getOperand(2))->get();
3000 bool DoXform
= false;
3002 assert ((Opc
== ISD::ADD
|| (Opc
== ISD::SUB
&& Slct
== N
->getOperand(1))) &&
3005 if (LHS
.getOpcode() == ISD::Constant
&&
3006 cast
<ConstantSDNode
>(LHS
)->isNullValue()) {
3008 } else if (CC
!= ISD::SETCC_INVALID
&&
3009 RHS
.getOpcode() == ISD::Constant
&&
3010 cast
<ConstantSDNode
>(RHS
)->isNullValue()) {
3011 std::swap(LHS
, RHS
);
3012 SDValue Op0
= Slct
.getOperand(0);
3013 EVT OpVT
= isSlctCC
? Op0
.getValueType() :
3014 Op0
.getOperand(0).getValueType();
3015 bool isInt
= OpVT
.isInteger();
3016 CC
= ISD::getSetCCInverse(CC
, isInt
);
3018 if (!TLI
.isCondCodeLegal(CC
, OpVT
))
3019 return SDValue(); // Inverse operator isn't legal.
3026 SDValue Result
= DAG
.getNode(Opc
, RHS
.getDebugLoc(), VT
, OtherOp
, RHS
);
3028 return DAG
.getSelectCC(N
->getDebugLoc(), OtherOp
, Result
,
3029 Slct
.getOperand(0), Slct
.getOperand(1), CC
);
3030 SDValue CCOp
= Slct
.getOperand(0);
3032 CCOp
= DAG
.getSetCC(Slct
.getDebugLoc(), CCOp
.getValueType(),
3033 CCOp
.getOperand(0), CCOp
.getOperand(1), CC
);
3034 return DAG
.getNode(ISD::SELECT
, N
->getDebugLoc(), VT
,
3035 CCOp
, OtherOp
, Result
);
3040 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
3041 static SDValue
PerformADDCombine(SDNode
*N
,
3042 TargetLowering::DAGCombinerInfo
&DCI
) {
3043 // added by evan in r37685 with no testcase.
3044 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
3046 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
3047 if (N0
.getOpcode() == ISD::SELECT
&& N0
.getNode()->hasOneUse()) {
3048 SDValue Result
= combineSelectAndUse(N
, N0
, N1
, DCI
);
3049 if (Result
.getNode()) return Result
;
3051 if (N1
.getOpcode() == ISD::SELECT
&& N1
.getNode()->hasOneUse()) {
3052 SDValue Result
= combineSelectAndUse(N
, N1
, N0
, DCI
);
3053 if (Result
.getNode()) return Result
;
3059 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
3060 static SDValue
PerformSUBCombine(SDNode
*N
,
3061 TargetLowering::DAGCombinerInfo
&DCI
) {
3062 // added by evan in r37685 with no testcase.
3063 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
3065 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
3066 if (N1
.getOpcode() == ISD::SELECT
&& N1
.getNode()->hasOneUse()) {
3067 SDValue Result
= combineSelectAndUse(N
, N1
, N0
, DCI
);
3068 if (Result
.getNode()) return Result
;
3075 /// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
3076 static SDValue
PerformFMRRDCombine(SDNode
*N
,
3077 TargetLowering::DAGCombinerInfo
&DCI
) {
3078 // fmrrd(fmdrr x, y) -> x,y
3079 SDValue InDouble
= N
->getOperand(0);
3080 if (InDouble
.getOpcode() == ARMISD::FMDRR
)
3081 return DCI
.CombineTo(N
, InDouble
.getOperand(0), InDouble
.getOperand(1));
3085 /// getVShiftImm - Check if this is a valid build_vector for the immediate
3086 /// operand of a vector shift operation, where all the elements of the
3087 /// build_vector must have the same constant integer value.
3088 static bool getVShiftImm(SDValue Op
, unsigned ElementBits
, int64_t &Cnt
) {
3089 // Ignore bit_converts.
3090 while (Op
.getOpcode() == ISD::BIT_CONVERT
)
3091 Op
= Op
.getOperand(0);
3092 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
3093 APInt SplatBits
, SplatUndef
;
3094 unsigned SplatBitSize
;
3096 if (! BVN
|| ! BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
,
3097 HasAnyUndefs
, ElementBits
) ||
3098 SplatBitSize
> ElementBits
)
3100 Cnt
= SplatBits
.getSExtValue();
3104 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
3105 /// operand of a vector shift left operation. That value must be in the range:
3106 /// 0 <= Value < ElementBits for a left shift; or
3107 /// 0 <= Value <= ElementBits for a long left shift.
3108 static bool isVShiftLImm(SDValue Op
, EVT VT
, bool isLong
, int64_t &Cnt
) {
3109 assert(VT
.isVector() && "vector shift count is not a vector type");
3110 unsigned ElementBits
= VT
.getVectorElementType().getSizeInBits();
3111 if (! getVShiftImm(Op
, ElementBits
, Cnt
))
3113 return (Cnt
>= 0 && (isLong
? Cnt
-1 : Cnt
) < ElementBits
);
3116 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
3117 /// operand of a vector shift right operation. For a shift opcode, the value
3118 /// is positive, but for an intrinsic the value count must be negative. The
3119 /// absolute value must be in the range:
3120 /// 1 <= |Value| <= ElementBits for a right shift; or
3121 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
3122 static bool isVShiftRImm(SDValue Op
, EVT VT
, bool isNarrow
, bool isIntrinsic
,
3124 assert(VT
.isVector() && "vector shift count is not a vector type");
3125 unsigned ElementBits
= VT
.getVectorElementType().getSizeInBits();
3126 if (! getVShiftImm(Op
, ElementBits
, Cnt
))
3130 return (Cnt
>= 1 && Cnt
<= (isNarrow
? ElementBits
/2 : ElementBits
));
3133 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
3134 static SDValue
PerformIntrinsicCombine(SDNode
*N
, SelectionDAG
&DAG
) {
3135 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
3138 // Don't do anything for most intrinsics.
3141 // Vector shifts: check for immediate versions and lower them.
3142 // Note: This is done during DAG combining instead of DAG legalizing because
3143 // the build_vectors for 64-bit vector element shift counts are generally
3144 // not legal, and it is hard to see their values after they get legalized to
3145 // loads from a constant pool.
3146 case Intrinsic::arm_neon_vshifts
:
3147 case Intrinsic::arm_neon_vshiftu
:
3148 case Intrinsic::arm_neon_vshiftls
:
3149 case Intrinsic::arm_neon_vshiftlu
:
3150 case Intrinsic::arm_neon_vshiftn
:
3151 case Intrinsic::arm_neon_vrshifts
:
3152 case Intrinsic::arm_neon_vrshiftu
:
3153 case Intrinsic::arm_neon_vrshiftn
:
3154 case Intrinsic::arm_neon_vqshifts
:
3155 case Intrinsic::arm_neon_vqshiftu
:
3156 case Intrinsic::arm_neon_vqshiftsu
:
3157 case Intrinsic::arm_neon_vqshiftns
:
3158 case Intrinsic::arm_neon_vqshiftnu
:
3159 case Intrinsic::arm_neon_vqshiftnsu
:
3160 case Intrinsic::arm_neon_vqrshiftns
:
3161 case Intrinsic::arm_neon_vqrshiftnu
:
3162 case Intrinsic::arm_neon_vqrshiftnsu
: {
3163 EVT VT
= N
->getOperand(1).getValueType();
3165 unsigned VShiftOpc
= 0;
3168 case Intrinsic::arm_neon_vshifts
:
3169 case Intrinsic::arm_neon_vshiftu
:
3170 if (isVShiftLImm(N
->getOperand(2), VT
, false, Cnt
)) {
3171 VShiftOpc
= ARMISD::VSHL
;
3174 if (isVShiftRImm(N
->getOperand(2), VT
, false, true, Cnt
)) {
3175 VShiftOpc
= (IntNo
== Intrinsic::arm_neon_vshifts
?
3176 ARMISD::VSHRs
: ARMISD::VSHRu
);
3181 case Intrinsic::arm_neon_vshiftls
:
3182 case Intrinsic::arm_neon_vshiftlu
:
3183 if (isVShiftLImm(N
->getOperand(2), VT
, true, Cnt
))
3185 llvm_unreachable("invalid shift count for vshll intrinsic");
3187 case Intrinsic::arm_neon_vrshifts
:
3188 case Intrinsic::arm_neon_vrshiftu
:
3189 if (isVShiftRImm(N
->getOperand(2), VT
, false, true, Cnt
))
3193 case Intrinsic::arm_neon_vqshifts
:
3194 case Intrinsic::arm_neon_vqshiftu
:
3195 if (isVShiftLImm(N
->getOperand(2), VT
, false, Cnt
))
3199 case Intrinsic::arm_neon_vqshiftsu
:
3200 if (isVShiftLImm(N
->getOperand(2), VT
, false, Cnt
))
3202 llvm_unreachable("invalid shift count for vqshlu intrinsic");
3204 case Intrinsic::arm_neon_vshiftn
:
3205 case Intrinsic::arm_neon_vrshiftn
:
3206 case Intrinsic::arm_neon_vqshiftns
:
3207 case Intrinsic::arm_neon_vqshiftnu
:
3208 case Intrinsic::arm_neon_vqshiftnsu
:
3209 case Intrinsic::arm_neon_vqrshiftns
:
3210 case Intrinsic::arm_neon_vqrshiftnu
:
3211 case Intrinsic::arm_neon_vqrshiftnsu
:
3212 // Narrowing shifts require an immediate right shift.
3213 if (isVShiftRImm(N
->getOperand(2), VT
, true, true, Cnt
))
3215 llvm_unreachable("invalid shift count for narrowing vector shift intrinsic");
3218 llvm_unreachable("unhandled vector shift");
3222 case Intrinsic::arm_neon_vshifts
:
3223 case Intrinsic::arm_neon_vshiftu
:
3224 // Opcode already set above.
3226 case Intrinsic::arm_neon_vshiftls
:
3227 case Intrinsic::arm_neon_vshiftlu
:
3228 if (Cnt
== VT
.getVectorElementType().getSizeInBits())
3229 VShiftOpc
= ARMISD::VSHLLi
;
3231 VShiftOpc
= (IntNo
== Intrinsic::arm_neon_vshiftls
?
3232 ARMISD::VSHLLs
: ARMISD::VSHLLu
);
3234 case Intrinsic::arm_neon_vshiftn
:
3235 VShiftOpc
= ARMISD::VSHRN
; break;
3236 case Intrinsic::arm_neon_vrshifts
:
3237 VShiftOpc
= ARMISD::VRSHRs
; break;
3238 case Intrinsic::arm_neon_vrshiftu
:
3239 VShiftOpc
= ARMISD::VRSHRu
; break;
3240 case Intrinsic::arm_neon_vrshiftn
:
3241 VShiftOpc
= ARMISD::VRSHRN
; break;
3242 case Intrinsic::arm_neon_vqshifts
:
3243 VShiftOpc
= ARMISD::VQSHLs
; break;
3244 case Intrinsic::arm_neon_vqshiftu
:
3245 VShiftOpc
= ARMISD::VQSHLu
; break;
3246 case Intrinsic::arm_neon_vqshiftsu
:
3247 VShiftOpc
= ARMISD::VQSHLsu
; break;
3248 case Intrinsic::arm_neon_vqshiftns
:
3249 VShiftOpc
= ARMISD::VQSHRNs
; break;
3250 case Intrinsic::arm_neon_vqshiftnu
:
3251 VShiftOpc
= ARMISD::VQSHRNu
; break;
3252 case Intrinsic::arm_neon_vqshiftnsu
:
3253 VShiftOpc
= ARMISD::VQSHRNsu
; break;
3254 case Intrinsic::arm_neon_vqrshiftns
:
3255 VShiftOpc
= ARMISD::VQRSHRNs
; break;
3256 case Intrinsic::arm_neon_vqrshiftnu
:
3257 VShiftOpc
= ARMISD::VQRSHRNu
; break;
3258 case Intrinsic::arm_neon_vqrshiftnsu
:
3259 VShiftOpc
= ARMISD::VQRSHRNsu
; break;
3262 return DAG
.getNode(VShiftOpc
, N
->getDebugLoc(), N
->getValueType(0),
3263 N
->getOperand(1), DAG
.getConstant(Cnt
, MVT::i32
));
3266 case Intrinsic::arm_neon_vshiftins
: {
3267 EVT VT
= N
->getOperand(1).getValueType();
3269 unsigned VShiftOpc
= 0;
3271 if (isVShiftLImm(N
->getOperand(3), VT
, false, Cnt
))
3272 VShiftOpc
= ARMISD::VSLI
;
3273 else if (isVShiftRImm(N
->getOperand(3), VT
, false, true, Cnt
))
3274 VShiftOpc
= ARMISD::VSRI
;
3276 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
3279 return DAG
.getNode(VShiftOpc
, N
->getDebugLoc(), N
->getValueType(0),
3280 N
->getOperand(1), N
->getOperand(2),
3281 DAG
.getConstant(Cnt
, MVT::i32
));
3284 case Intrinsic::arm_neon_vqrshifts
:
3285 case Intrinsic::arm_neon_vqrshiftu
:
3286 // No immediate versions of these to check for.
3293 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
3294 /// lowers them. As with the vector shift intrinsics, this is done during DAG
3295 /// combining instead of DAG legalizing because the build_vectors for 64-bit
3296 /// vector element shift counts are generally not legal, and it is hard to see
3297 /// their values after they get legalized to loads from a constant pool.
3298 static SDValue
PerformShiftCombine(SDNode
*N
, SelectionDAG
&DAG
,
3299 const ARMSubtarget
*ST
) {
3300 EVT VT
= N
->getValueType(0);
3302 // Nothing to be done for scalar shifts.
3303 if (! VT
.isVector())
3306 assert(ST
->hasNEON() && "unexpected vector shift");
3309 switch (N
->getOpcode()) {
3310 default: llvm_unreachable("unexpected shift opcode");
3313 if (isVShiftLImm(N
->getOperand(1), VT
, false, Cnt
))
3314 return DAG
.getNode(ARMISD::VSHL
, N
->getDebugLoc(), VT
, N
->getOperand(0),
3315 DAG
.getConstant(Cnt
, MVT::i32
));
3320 if (isVShiftRImm(N
->getOperand(1), VT
, false, false, Cnt
)) {
3321 unsigned VShiftOpc
= (N
->getOpcode() == ISD::SRA
?
3322 ARMISD::VSHRs
: ARMISD::VSHRu
);
3323 return DAG
.getNode(VShiftOpc
, N
->getDebugLoc(), VT
, N
->getOperand(0),
3324 DAG
.getConstant(Cnt
, MVT::i32
));
3330 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
3331 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
3332 static SDValue
PerformExtendCombine(SDNode
*N
, SelectionDAG
&DAG
,
3333 const ARMSubtarget
*ST
) {
3334 SDValue N0
= N
->getOperand(0);
3336 // Check for sign- and zero-extensions of vector extract operations of 8-
3337 // and 16-bit vector elements. NEON supports these directly. They are
3338 // handled during DAG combining because type legalization will promote them
3339 // to 32-bit types and it is messy to recognize the operations after that.
3340 if (ST
->hasNEON() && N0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
3341 SDValue Vec
= N0
.getOperand(0);
3342 SDValue Lane
= N0
.getOperand(1);
3343 EVT VT
= N
->getValueType(0);
3344 EVT EltVT
= N0
.getValueType();
3345 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3347 if (VT
== MVT::i32
&&
3348 (EltVT
== MVT::i8
|| EltVT
== MVT::i16
) &&
3349 TLI
.isTypeLegal(Vec
.getValueType())) {
3352 switch (N
->getOpcode()) {
3353 default: llvm_unreachable("unexpected opcode");
3354 case ISD::SIGN_EXTEND
:
3355 Opc
= ARMISD::VGETLANEs
;
3357 case ISD::ZERO_EXTEND
:
3358 case ISD::ANY_EXTEND
:
3359 Opc
= ARMISD::VGETLANEu
;
3362 return DAG
.getNode(Opc
, N
->getDebugLoc(), VT
, Vec
, Lane
);
3369 SDValue
ARMTargetLowering::PerformDAGCombine(SDNode
*N
,
3370 DAGCombinerInfo
&DCI
) const {
3371 switch (N
->getOpcode()) {
3373 case ISD::ADD
: return PerformADDCombine(N
, DCI
);
3374 case ISD::SUB
: return PerformSUBCombine(N
, DCI
);
3375 case ARMISD::FMRRD
: return PerformFMRRDCombine(N
, DCI
);
3376 case ISD::INTRINSIC_WO_CHAIN
:
3377 return PerformIntrinsicCombine(N
, DCI
.DAG
);
3381 return PerformShiftCombine(N
, DCI
.DAG
, Subtarget
);
3382 case ISD::SIGN_EXTEND
:
3383 case ISD::ZERO_EXTEND
:
3384 case ISD::ANY_EXTEND
:
3385 return PerformExtendCombine(N
, DCI
.DAG
, Subtarget
);
3390 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT
) const {
3391 if (!Subtarget
->hasV6Ops())
3392 // Pre-v6 does not support unaligned mem access.
3394 else if (!Subtarget
->hasV6Ops()) {
3395 // v6 may or may not support unaligned mem access.
3396 if (!Subtarget
->isTargetDarwin())
3400 switch (VT
.getSimpleVT().SimpleTy
) {
3407 // FIXME: VLD1 etc with standard alignment is legal.
3411 static bool isLegalT1AddressImmediate(int64_t V
, EVT VT
) {
3416 switch (VT
.getSimpleVT().SimpleTy
) {
3417 default: return false;
3432 if ((V
& (Scale
- 1)) != 0)
3435 return V
== (V
& ((1LL << 5) - 1));
3438 static bool isLegalT2AddressImmediate(int64_t V
, EVT VT
,
3439 const ARMSubtarget
*Subtarget
) {
3446 switch (VT
.getSimpleVT().SimpleTy
) {
3447 default: return false;
3452 // + imm12 or - imm8
3454 return V
== (V
& ((1LL << 8) - 1));
3455 return V
== (V
& ((1LL << 12) - 1));
3458 // Same as ARM mode. FIXME: NEON?
3459 if (!Subtarget
->hasVFP2())
3464 return V
== (V
& ((1LL << 8) - 1));
3468 /// isLegalAddressImmediate - Return true if the integer value can be used
3469 /// as the offset of the target addressing mode for load / store of the
3471 static bool isLegalAddressImmediate(int64_t V
, EVT VT
,
3472 const ARMSubtarget
*Subtarget
) {
3479 if (Subtarget
->isThumb1Only())
3480 return isLegalT1AddressImmediate(V
, VT
);
3481 else if (Subtarget
->isThumb2())
3482 return isLegalT2AddressImmediate(V
, VT
, Subtarget
);
3487 switch (VT
.getSimpleVT().SimpleTy
) {
3488 default: return false;
3493 return V
== (V
& ((1LL << 12) - 1));
3496 return V
== (V
& ((1LL << 8) - 1));
3499 if (!Subtarget
->hasVFP2()) // FIXME: NEON?
3504 return V
== (V
& ((1LL << 8) - 1));
3508 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode
&AM
,
3510 int Scale
= AM
.Scale
;
3514 switch (VT
.getSimpleVT().SimpleTy
) {
3515 default: return false;
3524 return Scale
== 2 || Scale
== 4 || Scale
== 8;
3527 if (((unsigned)AM
.HasBaseReg
+ Scale
) <= 2)
3531 // Note, we allow "void" uses (basically, uses that aren't loads or
3532 // stores), because arm allows folding a scale into many arithmetic
3533 // operations. This should be made more precise and revisited later.
3535 // Allow r << imm, but the imm has to be a multiple of two.
3536 if (Scale
& 1) return false;
3537 return isPowerOf2_32(Scale
);
3541 /// isLegalAddressingMode - Return true if the addressing mode represented
3542 /// by AM is legal for this target, for a load/store of the specified type.
3543 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode
&AM
,
3544 const Type
*Ty
) const {
3545 EVT VT
= getValueType(Ty
, true);
3546 if (!isLegalAddressImmediate(AM
.BaseOffs
, VT
, Subtarget
))
3549 // Can never fold addr of global into load/store.
3554 case 0: // no scale reg, must be "r+i" or "r", or "i".
3557 if (Subtarget
->isThumb1Only())
3561 // ARM doesn't support any R+R*scale+imm addr modes.
3568 if (Subtarget
->isThumb2())
3569 return isLegalT2ScaledAddressingMode(AM
, VT
);
3571 int Scale
= AM
.Scale
;
3572 switch (VT
.getSimpleVT().SimpleTy
) {
3573 default: return false;
3577 if (Scale
< 0) Scale
= -Scale
;
3581 return isPowerOf2_32(Scale
& ~1);
3585 if (((unsigned)AM
.HasBaseReg
+ Scale
) <= 2)
3590 // Note, we allow "void" uses (basically, uses that aren't loads or
3591 // stores), because arm allows folding a scale into many arithmetic
3592 // operations. This should be made more precise and revisited later.
3594 // Allow r << imm, but the imm has to be a multiple of two.
3595 if (Scale
& 1) return false;
3596 return isPowerOf2_32(Scale
);
3603 static bool getARMIndexedAddressParts(SDNode
*Ptr
, EVT VT
,
3604 bool isSEXTLoad
, SDValue
&Base
,
3605 SDValue
&Offset
, bool &isInc
,
3606 SelectionDAG
&DAG
) {
3607 if (Ptr
->getOpcode() != ISD::ADD
&& Ptr
->getOpcode() != ISD::SUB
)
3610 if (VT
== MVT::i16
|| ((VT
== MVT::i8
|| VT
== MVT::i1
) && isSEXTLoad
)) {
3612 Base
= Ptr
->getOperand(0);
3613 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1))) {
3614 int RHSC
= (int)RHS
->getZExtValue();
3615 if (RHSC
< 0 && RHSC
> -256) {
3616 assert(Ptr
->getOpcode() == ISD::ADD
);
3618 Offset
= DAG
.getConstant(-RHSC
, RHS
->getValueType(0));
3622 isInc
= (Ptr
->getOpcode() == ISD::ADD
);
3623 Offset
= Ptr
->getOperand(1);
3625 } else if (VT
== MVT::i32
|| VT
== MVT::i8
|| VT
== MVT::i1
) {
3627 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1))) {
3628 int RHSC
= (int)RHS
->getZExtValue();
3629 if (RHSC
< 0 && RHSC
> -0x1000) {
3630 assert(Ptr
->getOpcode() == ISD::ADD
);
3632 Offset
= DAG
.getConstant(-RHSC
, RHS
->getValueType(0));
3633 Base
= Ptr
->getOperand(0);
3638 if (Ptr
->getOpcode() == ISD::ADD
) {
3640 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(Ptr
->getOperand(0));
3641 if (ShOpcVal
!= ARM_AM::no_shift
) {
3642 Base
= Ptr
->getOperand(1);
3643 Offset
= Ptr
->getOperand(0);
3645 Base
= Ptr
->getOperand(0);
3646 Offset
= Ptr
->getOperand(1);
3651 isInc
= (Ptr
->getOpcode() == ISD::ADD
);
3652 Base
= Ptr
->getOperand(0);
3653 Offset
= Ptr
->getOperand(1);
3657 // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
3661 static bool getT2IndexedAddressParts(SDNode
*Ptr
, EVT VT
,
3662 bool isSEXTLoad
, SDValue
&Base
,
3663 SDValue
&Offset
, bool &isInc
,
3664 SelectionDAG
&DAG
) {
3665 if (Ptr
->getOpcode() != ISD::ADD
&& Ptr
->getOpcode() != ISD::SUB
)
3668 Base
= Ptr
->getOperand(0);
3669 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(Ptr
->getOperand(1))) {
3670 int RHSC
= (int)RHS
->getZExtValue();
3671 if (RHSC
< 0 && RHSC
> -0x100) { // 8 bits.
3672 assert(Ptr
->getOpcode() == ISD::ADD
);
3674 Offset
= DAG
.getConstant(-RHSC
, RHS
->getValueType(0));
3676 } else if (RHSC
> 0 && RHSC
< 0x100) { // 8 bit, no zero.
3677 isInc
= Ptr
->getOpcode() == ISD::ADD
;
3678 Offset
= DAG
.getConstant(RHSC
, RHS
->getValueType(0));
3686 /// getPreIndexedAddressParts - returns true by value, base pointer and
3687 /// offset pointer and addressing mode by reference if the node's address
3688 /// can be legally represented as pre-indexed load / store address.
3690 ARMTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
3692 ISD::MemIndexedMode
&AM
,
3693 SelectionDAG
&DAG
) const {
3694 if (Subtarget
->isThumb1Only())
3699 bool isSEXTLoad
= false;
3700 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
3701 Ptr
= LD
->getBasePtr();
3702 VT
= LD
->getMemoryVT();
3703 isSEXTLoad
= LD
->getExtensionType() == ISD::SEXTLOAD
;
3704 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
3705 Ptr
= ST
->getBasePtr();
3706 VT
= ST
->getMemoryVT();
3711 bool isLegal
= false;
3712 if (Subtarget
->isThumb2())
3713 isLegal
= getT2IndexedAddressParts(Ptr
.getNode(), VT
, isSEXTLoad
, Base
,
3714 Offset
, isInc
, DAG
);
3716 isLegal
= getARMIndexedAddressParts(Ptr
.getNode(), VT
, isSEXTLoad
, Base
,
3717 Offset
, isInc
, DAG
);
3721 AM
= isInc
? ISD::PRE_INC
: ISD::PRE_DEC
;
3725 /// getPostIndexedAddressParts - returns true by value, base pointer and
3726 /// offset pointer and addressing mode by reference if this node can be
3727 /// combined with a load / store to form a post-indexed load / store.
3728 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode
*N
, SDNode
*Op
,
3731 ISD::MemIndexedMode
&AM
,
3732 SelectionDAG
&DAG
) const {
3733 if (Subtarget
->isThumb1Only())
3738 bool isSEXTLoad
= false;
3739 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
3740 VT
= LD
->getMemoryVT();
3741 isSEXTLoad
= LD
->getExtensionType() == ISD::SEXTLOAD
;
3742 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
3743 VT
= ST
->getMemoryVT();
3748 bool isLegal
= false;
3749 if (Subtarget
->isThumb2())
3750 isLegal
= getT2IndexedAddressParts(Op
, VT
, isSEXTLoad
, Base
, Offset
,
3753 isLegal
= getARMIndexedAddressParts(Op
, VT
, isSEXTLoad
, Base
, Offset
,
3758 AM
= isInc
? ISD::POST_INC
: ISD::POST_DEC
;
3762 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op
,
3766 const SelectionDAG
&DAG
,
3767 unsigned Depth
) const {
3768 KnownZero
= KnownOne
= APInt(Mask
.getBitWidth(), 0);
3769 switch (Op
.getOpcode()) {
3771 case ARMISD::CMOV
: {
3772 // Bits are known zero/one if known on the LHS and RHS.
3773 DAG
.ComputeMaskedBits(Op
.getOperand(0), Mask
, KnownZero
, KnownOne
, Depth
+1);
3774 if (KnownZero
== 0 && KnownOne
== 0) return;
3776 APInt KnownZeroRHS
, KnownOneRHS
;
3777 DAG
.ComputeMaskedBits(Op
.getOperand(1), Mask
,
3778 KnownZeroRHS
, KnownOneRHS
, Depth
+1);
3779 KnownZero
&= KnownZeroRHS
;
3780 KnownOne
&= KnownOneRHS
;
3786 //===----------------------------------------------------------------------===//
3787 // ARM Inline Assembly Support
3788 //===----------------------------------------------------------------------===//
3790 /// getConstraintType - Given a constraint letter, return the type of
3791 /// constraint it is for this target.
3792 ARMTargetLowering::ConstraintType
3793 ARMTargetLowering::getConstraintType(const std::string
&Constraint
) const {
3794 if (Constraint
.size() == 1) {
3795 switch (Constraint
[0]) {
3797 case 'l': return C_RegisterClass
;
3798 case 'w': return C_RegisterClass
;
3801 return TargetLowering::getConstraintType(Constraint
);
3804 std::pair
<unsigned, const TargetRegisterClass
*>
3805 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string
&Constraint
,
3807 if (Constraint
.size() == 1) {
3808 // GCC RS6000 Constraint Letters
3809 switch (Constraint
[0]) {
3811 if (Subtarget
->isThumb1Only())
3812 return std::make_pair(0U, ARM::tGPRRegisterClass
);
3814 return std::make_pair(0U, ARM::GPRRegisterClass
);
3816 return std::make_pair(0U, ARM::GPRRegisterClass
);
3819 return std::make_pair(0U, ARM::SPRRegisterClass
);
3821 return std::make_pair(0U, ARM::DPRRegisterClass
);
3825 return TargetLowering::getRegForInlineAsmConstraint(Constraint
, VT
);
3828 std::vector
<unsigned> ARMTargetLowering::
3829 getRegClassForInlineAsmConstraint(const std::string
&Constraint
,
3831 if (Constraint
.size() != 1)
3832 return std::vector
<unsigned>();
3834 switch (Constraint
[0]) { // GCC ARM Constraint Letters
3837 return make_vector
<unsigned>(ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
,
3838 ARM::R4
, ARM::R5
, ARM::R6
, ARM::R7
,
3841 return make_vector
<unsigned>(ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
,
3842 ARM::R4
, ARM::R5
, ARM::R6
, ARM::R7
,
3843 ARM::R8
, ARM::R9
, ARM::R10
, ARM::R11
,
3844 ARM::R12
, ARM::LR
, 0);
3847 return make_vector
<unsigned>(ARM::S0
, ARM::S1
, ARM::S2
, ARM::S3
,
3848 ARM::S4
, ARM::S5
, ARM::S6
, ARM::S7
,
3849 ARM::S8
, ARM::S9
, ARM::S10
, ARM::S11
,
3850 ARM::S12
,ARM::S13
,ARM::S14
,ARM::S15
,
3851 ARM::S16
,ARM::S17
,ARM::S18
,ARM::S19
,
3852 ARM::S20
,ARM::S21
,ARM::S22
,ARM::S23
,
3853 ARM::S24
,ARM::S25
,ARM::S26
,ARM::S27
,
3854 ARM::S28
,ARM::S29
,ARM::S30
,ARM::S31
, 0);
3856 return make_vector
<unsigned>(ARM::D0
, ARM::D1
, ARM::D2
, ARM::D3
,
3857 ARM::D4
, ARM::D5
, ARM::D6
, ARM::D7
,
3858 ARM::D8
, ARM::D9
, ARM::D10
,ARM::D11
,
3859 ARM::D12
,ARM::D13
,ARM::D14
,ARM::D15
, 0);
3863 return std::vector
<unsigned>();
3866 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
3867 /// vector. If it is invalid, don't add anything to Ops.
3868 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
3871 std::vector
<SDValue
>&Ops
,
3872 SelectionDAG
&DAG
) const {
3873 SDValue
Result(0, 0);
3875 switch (Constraint
) {
3877 case 'I': case 'J': case 'K': case 'L':
3878 case 'M': case 'N': case 'O':
3879 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
3883 int64_t CVal64
= C
->getSExtValue();
3884 int CVal
= (int) CVal64
;
3885 // None of these constraints allow values larger than 32 bits. Check
3886 // that the value fits in an int.
3890 switch (Constraint
) {
3892 if (Subtarget
->isThumb1Only()) {
3893 // This must be a constant between 0 and 255, for ADD
3895 if (CVal
>= 0 && CVal
<= 255)
3897 } else if (Subtarget
->isThumb2()) {
3898 // A constant that can be used as an immediate value in a
3899 // data-processing instruction.
3900 if (ARM_AM::getT2SOImmVal(CVal
) != -1)
3903 // A constant that can be used as an immediate value in a
3904 // data-processing instruction.
3905 if (ARM_AM::getSOImmVal(CVal
) != -1)
3911 if (Subtarget
->isThumb()) { // FIXME thumb2
3912 // This must be a constant between -255 and -1, for negated ADD
3913 // immediates. This can be used in GCC with an "n" modifier that
3914 // prints the negated value, for use with SUB instructions. It is
3915 // not useful otherwise but is implemented for compatibility.
3916 if (CVal
>= -255 && CVal
<= -1)
3919 // This must be a constant between -4095 and 4095. It is not clear
3920 // what this constraint is intended for. Implemented for
3921 // compatibility with GCC.
3922 if (CVal
>= -4095 && CVal
<= 4095)
3928 if (Subtarget
->isThumb1Only()) {
3929 // A 32-bit value where only one byte has a nonzero value. Exclude
3930 // zero to match GCC. This constraint is used by GCC internally for
3931 // constants that can be loaded with a move/shift combination.
3932 // It is not useful otherwise but is implemented for compatibility.
3933 if (CVal
!= 0 && ARM_AM::isThumbImmShiftedVal(CVal
))
3935 } else if (Subtarget
->isThumb2()) {
3936 // A constant whose bitwise inverse can be used as an immediate
3937 // value in a data-processing instruction. This can be used in GCC
3938 // with a "B" modifier that prints the inverted value, for use with
3939 // BIC and MVN instructions. It is not useful otherwise but is
3940 // implemented for compatibility.
3941 if (ARM_AM::getT2SOImmVal(~CVal
) != -1)
3944 // A constant whose bitwise inverse can be used as an immediate
3945 // value in a data-processing instruction. This can be used in GCC
3946 // with a "B" modifier that prints the inverted value, for use with
3947 // BIC and MVN instructions. It is not useful otherwise but is
3948 // implemented for compatibility.
3949 if (ARM_AM::getSOImmVal(~CVal
) != -1)
3955 if (Subtarget
->isThumb1Only()) {
3956 // This must be a constant between -7 and 7,
3957 // for 3-operand ADD/SUB immediate instructions.
3958 if (CVal
>= -7 && CVal
< 7)
3960 } else if (Subtarget
->isThumb2()) {
3961 // A constant whose negation can be used as an immediate value in a
3962 // data-processing instruction. This can be used in GCC with an "n"
3963 // modifier that prints the negated value, for use with SUB
3964 // instructions. It is not useful otherwise but is implemented for
3966 if (ARM_AM::getT2SOImmVal(-CVal
) != -1)
3969 // A constant whose negation can be used as an immediate value in a
3970 // data-processing instruction. This can be used in GCC with an "n"
3971 // modifier that prints the negated value, for use with SUB
3972 // instructions. It is not useful otherwise but is implemented for
3974 if (ARM_AM::getSOImmVal(-CVal
) != -1)
3980 if (Subtarget
->isThumb()) { // FIXME thumb2
3981 // This must be a multiple of 4 between 0 and 1020, for
3982 // ADD sp + immediate.
3983 if ((CVal
>= 0 && CVal
<= 1020) && ((CVal
& 3) == 0))
3986 // A power of two or a constant between 0 and 32. This is used in
3987 // GCC for the shift amount on shifted register operands, but it is
3988 // useful in general for any shift amounts.
3989 if ((CVal
>= 0 && CVal
<= 32) || ((CVal
& (CVal
- 1)) == 0))
3995 if (Subtarget
->isThumb()) { // FIXME thumb2
3996 // This must be a constant between 0 and 31, for shift amounts.
3997 if (CVal
>= 0 && CVal
<= 31)
4003 if (Subtarget
->isThumb()) { // FIXME thumb2
4004 // This must be a multiple of 4 between -508 and 508, for
4005 // ADD/SUB sp = sp + immediate.
4006 if ((CVal
>= -508 && CVal
<= 508) && ((CVal
& 3) == 0))
4011 Result
= DAG
.getTargetConstant(CVal
, Op
.getValueType());
4015 if (Result
.getNode()) {
4016 Ops
.push_back(Result
);
4019 return TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, hasMemory
,