1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Chris Lattner and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
16 #include "X86InstrBuilder.h"
17 #include "X86ISelLowering.h"
18 #include "X86MachineFunctionInfo.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/Constants.h"
22 #include "llvm/DerivedTypes.h"
23 #include "llvm/GlobalVariable.h"
24 #include "llvm/Function.h"
25 #include "llvm/Intrinsics.h"
26 #include "llvm/ADT/VectorExtras.h"
27 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
28 #include "llvm/CodeGen/CallingConvLower.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/SelectionDAG.h"
33 #include "llvm/CodeGen/SSARegMap.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Target/TargetOptions.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ParameterAttributes.h"
41 X86TargetLowering::X86TargetLowering(TargetMachine
&TM
)
42 : TargetLowering(TM
) {
43 Subtarget
= &TM
.getSubtarget
<X86Subtarget
>();
44 X86ScalarSSEf64
= Subtarget
->hasSSE2();
45 X86ScalarSSEf32
= Subtarget
->hasSSE1();
46 X86StackPtr
= Subtarget
->is64Bit() ? X86::RSP
: X86::ESP
;
48 RegInfo
= TM
.getRegisterInfo();
50 // Set up the TargetLowering object.
52 // X86 is weird, it always uses i8 for shift amounts and setcc results.
53 setShiftAmountType(MVT::i8
);
54 setSetCCResultType(MVT::i8
);
55 setSetCCResultContents(ZeroOrOneSetCCResult
);
56 setSchedulingPreference(SchedulingForRegPressure
);
57 setShiftAmountFlavor(Mask
); // shl X, 32 == shl X, 0
58 setStackPointerRegisterToSaveRestore(X86StackPtr
);
60 if (Subtarget
->isTargetDarwin()) {
61 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
62 setUseUnderscoreSetJmp(false);
63 setUseUnderscoreLongJmp(false);
64 } else if (Subtarget
->isTargetMingw()) {
65 // MS runtime is weird: it exports _setjmp, but longjmp!
66 setUseUnderscoreSetJmp(true);
67 setUseUnderscoreLongJmp(false);
69 setUseUnderscoreSetJmp(true);
70 setUseUnderscoreLongJmp(true);
73 // Set up the register classes.
74 addRegisterClass(MVT::i8
, X86::GR8RegisterClass
);
75 addRegisterClass(MVT::i16
, X86::GR16RegisterClass
);
76 addRegisterClass(MVT::i32
, X86::GR32RegisterClass
);
77 if (Subtarget
->is64Bit())
78 addRegisterClass(MVT::i64
, X86::GR64RegisterClass
);
80 setLoadXAction(ISD::SEXTLOAD
, MVT::i1
, Expand
);
82 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
84 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Promote
);
85 setOperationAction(ISD::UINT_TO_FP
, MVT::i8
, Promote
);
86 setOperationAction(ISD::UINT_TO_FP
, MVT::i16
, Promote
);
88 if (Subtarget
->is64Bit()) {
89 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
90 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Promote
);
93 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
94 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Expand
);
96 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Promote
);
99 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
101 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Promote
);
102 setOperationAction(ISD::SINT_TO_FP
, MVT::i8
, Promote
);
103 // SSE has no i16 to fp conversion, only i32
104 if (X86ScalarSSEf32
) {
105 setOperationAction(ISD::SINT_TO_FP
, MVT::i16
, Promote
);
106 // f32 and f64 cases are Legal, f80 case is not
107 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
109 setOperationAction(ISD::SINT_TO_FP
, MVT::i16
, Custom
);
110 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
113 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
114 // are Legal, f80 is custom lowered.
115 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
116 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
118 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
120 setOperationAction(ISD::FP_TO_SINT
, MVT::i1
, Promote
);
121 setOperationAction(ISD::FP_TO_SINT
, MVT::i8
, Promote
);
123 if (X86ScalarSSEf32
) {
124 setOperationAction(ISD::FP_TO_SINT
, MVT::i16
, Promote
);
125 // f32 and f64 cases are Legal, f80 case is not
126 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
128 setOperationAction(ISD::FP_TO_SINT
, MVT::i16
, Custom
);
129 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
132 // Handle FP_TO_UINT by promoting the destination to a larger signed
134 setOperationAction(ISD::FP_TO_UINT
, MVT::i1
, Promote
);
135 setOperationAction(ISD::FP_TO_UINT
, MVT::i8
, Promote
);
136 setOperationAction(ISD::FP_TO_UINT
, MVT::i16
, Promote
);
138 if (Subtarget
->is64Bit()) {
139 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Expand
);
140 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Promote
);
142 if (X86ScalarSSEf32
&& !Subtarget
->hasSSE3())
143 // Expand FP_TO_UINT into a select.
144 // FIXME: We would like to use a Custom expander here eventually to do
145 // the optimal thing for SSE vs. the default expansion in the legalizer.
146 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Expand
);
148 // With SSE3 we can use fisttpll to convert to a signed i64.
149 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Promote
);
152 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
153 if (!X86ScalarSSEf64
) {
154 setOperationAction(ISD::BIT_CONVERT
, MVT::f32
, Expand
);
155 setOperationAction(ISD::BIT_CONVERT
, MVT::i32
, Expand
);
158 // Divide and remainder are lowered to use div or idiv in legalize in
159 // order to expose the intermediate computations to trivial CSE. This is
160 // most noticeable when both x/y and x%y are being computed; they can be
161 // done with a single div or idiv.
162 setOperationAction(ISD::SDIV
, MVT::i8
, Custom
);
163 setOperationAction(ISD::UDIV
, MVT::i8
, Custom
);
164 setOperationAction(ISD::SREM
, MVT::i8
, Custom
);
165 setOperationAction(ISD::UREM
, MVT::i8
, Custom
);
166 setOperationAction(ISD::SDIV
, MVT::i16
, Custom
);
167 setOperationAction(ISD::UDIV
, MVT::i16
, Custom
);
168 setOperationAction(ISD::SREM
, MVT::i16
, Custom
);
169 setOperationAction(ISD::UREM
, MVT::i16
, Custom
);
170 setOperationAction(ISD::SDIV
, MVT::i32
, Custom
);
171 setOperationAction(ISD::UDIV
, MVT::i32
, Custom
);
172 setOperationAction(ISD::SREM
, MVT::i32
, Custom
);
173 setOperationAction(ISD::UREM
, MVT::i32
, Custom
);
174 setOperationAction(ISD::SDIV
, MVT::i64
, Custom
);
175 setOperationAction(ISD::UDIV
, MVT::i64
, Custom
);
176 setOperationAction(ISD::SREM
, MVT::i64
, Custom
);
177 setOperationAction(ISD::UREM
, MVT::i64
, Custom
);
179 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
180 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
181 setOperationAction(ISD::BR_CC
, MVT::Other
, Expand
);
182 setOperationAction(ISD::SELECT_CC
, MVT::Other
, Expand
);
183 setOperationAction(ISD::MEMMOVE
, MVT::Other
, Expand
);
184 if (Subtarget
->is64Bit())
185 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i32
, Legal
);
186 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i16
, Legal
);
187 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i8
, Legal
);
188 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
189 setOperationAction(ISD::FP_ROUND_INREG
, MVT::f32
, Expand
);
190 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
192 setOperationAction(ISD::CTPOP
, MVT::i8
, Expand
);
193 setOperationAction(ISD::CTTZ
, MVT::i8
, Expand
);
194 setOperationAction(ISD::CTLZ
, MVT::i8
, Expand
);
195 setOperationAction(ISD::CTPOP
, MVT::i16
, Expand
);
196 setOperationAction(ISD::CTTZ
, MVT::i16
, Expand
);
197 setOperationAction(ISD::CTLZ
, MVT::i16
, Expand
);
198 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
199 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
200 setOperationAction(ISD::CTLZ
, MVT::i32
, Expand
);
201 if (Subtarget
->is64Bit()) {
202 setOperationAction(ISD::CTPOP
, MVT::i64
, Expand
);
203 setOperationAction(ISD::CTTZ
, MVT::i64
, Expand
);
204 setOperationAction(ISD::CTLZ
, MVT::i64
, Expand
);
207 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, Custom
);
208 setOperationAction(ISD::BSWAP
, MVT::i16
, Expand
);
210 // These should be promoted to a larger select which is supported.
211 setOperationAction(ISD::SELECT
, MVT::i1
, Promote
);
212 setOperationAction(ISD::SELECT
, MVT::i8
, Promote
);
213 // X86 wants to expand cmov itself.
214 setOperationAction(ISD::SELECT
, MVT::i16
, Custom
);
215 setOperationAction(ISD::SELECT
, MVT::i32
, Custom
);
216 setOperationAction(ISD::SELECT
, MVT::f32
, Custom
);
217 setOperationAction(ISD::SELECT
, MVT::f64
, Custom
);
218 setOperationAction(ISD::SELECT
, MVT::f80
, Custom
);
219 setOperationAction(ISD::SETCC
, MVT::i8
, Custom
);
220 setOperationAction(ISD::SETCC
, MVT::i16
, Custom
);
221 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
222 setOperationAction(ISD::SETCC
, MVT::f32
, Custom
);
223 setOperationAction(ISD::SETCC
, MVT::f64
, Custom
);
224 setOperationAction(ISD::SETCC
, MVT::f80
, Custom
);
225 if (Subtarget
->is64Bit()) {
226 setOperationAction(ISD::SELECT
, MVT::i64
, Custom
);
227 setOperationAction(ISD::SETCC
, MVT::i64
, Custom
);
229 // X86 ret instruction may pop stack.
230 setOperationAction(ISD::RET
, MVT::Other
, Custom
);
231 if (!Subtarget
->is64Bit())
232 setOperationAction(ISD::EH_RETURN
, MVT::Other
, Custom
);
235 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
236 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
237 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
238 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
239 setOperationAction(ISD::ExternalSymbol
, MVT::i32
, Custom
);
240 if (Subtarget
->is64Bit()) {
241 setOperationAction(ISD::ConstantPool
, MVT::i64
, Custom
);
242 setOperationAction(ISD::JumpTable
, MVT::i64
, Custom
);
243 setOperationAction(ISD::GlobalAddress
, MVT::i64
, Custom
);
244 setOperationAction(ISD::ExternalSymbol
, MVT::i64
, Custom
);
246 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
247 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Custom
);
248 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Custom
);
249 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Custom
);
250 // X86 wants to expand memset / memcpy itself.
251 setOperationAction(ISD::MEMSET
, MVT::Other
, Custom
);
252 setOperationAction(ISD::MEMCPY
, MVT::Other
, Custom
);
254 // Use the default ISD::LOCATION expansion.
255 setOperationAction(ISD::LOCATION
, MVT::Other
, Expand
);
256 // FIXME - use subtarget debug flags
257 if (!Subtarget
->isTargetDarwin() &&
258 !Subtarget
->isTargetELF() &&
259 !Subtarget
->isTargetCygMing())
260 setOperationAction(ISD::LABEL
, MVT::Other
, Expand
);
262 setOperationAction(ISD::EXCEPTIONADDR
, MVT::i64
, Expand
);
263 setOperationAction(ISD::EHSELECTION
, MVT::i64
, Expand
);
264 setOperationAction(ISD::EXCEPTIONADDR
, MVT::i32
, Expand
);
265 setOperationAction(ISD::EHSELECTION
, MVT::i32
, Expand
);
266 if (Subtarget
->is64Bit()) {
268 setExceptionPointerRegister(X86::RAX
);
269 setExceptionSelectorRegister(X86::RDX
);
271 setExceptionPointerRegister(X86::EAX
);
272 setExceptionSelectorRegister(X86::EDX
);
274 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET
, MVT::i32
, Custom
);
276 setOperationAction(ISD::TRAMPOLINE
, MVT::Other
, Custom
);
278 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
279 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
280 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
281 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
282 if (Subtarget
->is64Bit())
283 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
285 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
287 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
288 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
289 if (Subtarget
->is64Bit())
290 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Expand
);
291 if (Subtarget
->isTargetCygMing())
292 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
294 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Expand
);
296 if (X86ScalarSSEf64
) {
297 // f32 and f64 use SSE.
298 // Set up the FP register classes.
299 addRegisterClass(MVT::f32
, X86::FR32RegisterClass
);
300 addRegisterClass(MVT::f64
, X86::FR64RegisterClass
);
302 // Use ANDPD to simulate FABS.
303 setOperationAction(ISD::FABS
, MVT::f64
, Custom
);
304 setOperationAction(ISD::FABS
, MVT::f32
, Custom
);
306 // Use XORP to simulate FNEG.
307 setOperationAction(ISD::FNEG
, MVT::f64
, Custom
);
308 setOperationAction(ISD::FNEG
, MVT::f32
, Custom
);
310 // Use ANDPD and ORPD to simulate FCOPYSIGN.
311 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Custom
);
312 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Custom
);
314 // We don't support sin/cos/fmod
315 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
316 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
317 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
318 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
319 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
320 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
322 // Expand FP immediates into loads from the stack, except for the special
324 setOperationAction(ISD::ConstantFP
, MVT::f64
, Expand
);
325 setOperationAction(ISD::ConstantFP
, MVT::f32
, Expand
);
326 addLegalFPImmediate(APFloat(+0.0)); // xorpd
327 addLegalFPImmediate(APFloat(+0.0f
)); // xorps
329 // Conversions to long double (in X87) go through memory.
330 setConvertAction(MVT::f32
, MVT::f80
, Expand
);
331 setConvertAction(MVT::f64
, MVT::f80
, Expand
);
333 // Conversions from long double (in X87) go through memory.
334 setConvertAction(MVT::f80
, MVT::f32
, Expand
);
335 setConvertAction(MVT::f80
, MVT::f64
, Expand
);
336 } else if (X86ScalarSSEf32
) {
337 // Use SSE for f32, x87 for f64.
338 // Set up the FP register classes.
339 addRegisterClass(MVT::f32
, X86::FR32RegisterClass
);
340 addRegisterClass(MVT::f64
, X86::RFP64RegisterClass
);
342 // Use ANDPS to simulate FABS.
343 setOperationAction(ISD::FABS
, MVT::f32
, Custom
);
345 // Use XORP to simulate FNEG.
346 setOperationAction(ISD::FNEG
, MVT::f32
, Custom
);
348 setOperationAction(ISD::UNDEF
, MVT::f64
, Expand
);
350 // Use ANDPS and ORPS to simulate FCOPYSIGN.
351 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
352 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Custom
);
354 // We don't support sin/cos/fmod
355 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
356 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
357 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
359 // Expand FP immediates into loads from the stack, except for the special
361 setOperationAction(ISD::ConstantFP
, MVT::f64
, Expand
);
362 setOperationAction(ISD::ConstantFP
, MVT::f32
, Expand
);
363 addLegalFPImmediate(APFloat(+0.0f
)); // xorps
364 addLegalFPImmediate(APFloat(+0.0)); // FLD0
365 addLegalFPImmediate(APFloat(+1.0)); // FLD1
366 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
367 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
369 // SSE->x87 conversions go through memory.
370 setConvertAction(MVT::f32
, MVT::f64
, Expand
);
371 setConvertAction(MVT::f32
, MVT::f80
, Expand
);
373 // x87->SSE truncations need to go through memory.
374 setConvertAction(MVT::f80
, MVT::f32
, Expand
);
375 setConvertAction(MVT::f64
, MVT::f32
, Expand
);
376 // And x87->x87 truncations also.
377 setConvertAction(MVT::f80
, MVT::f64
, Expand
);
380 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
381 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
384 // f32 and f64 in x87.
385 // Set up the FP register classes.
386 addRegisterClass(MVT::f64
, X86::RFP64RegisterClass
);
387 addRegisterClass(MVT::f32
, X86::RFP32RegisterClass
);
389 setOperationAction(ISD::UNDEF
, MVT::f64
, Expand
);
390 setOperationAction(ISD::UNDEF
, MVT::f32
, Expand
);
391 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
392 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Expand
);
394 // Floating truncations need to go through memory.
395 setConvertAction(MVT::f80
, MVT::f32
, Expand
);
396 setConvertAction(MVT::f64
, MVT::f32
, Expand
);
397 setConvertAction(MVT::f80
, MVT::f64
, Expand
);
400 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
401 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
404 setOperationAction(ISD::ConstantFP
, MVT::f64
, Expand
);
405 setOperationAction(ISD::ConstantFP
, MVT::f32
, Expand
);
406 addLegalFPImmediate(APFloat(+0.0)); // FLD0
407 addLegalFPImmediate(APFloat(+1.0)); // FLD1
408 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
409 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
410 addLegalFPImmediate(APFloat(+0.0f
)); // FLD0
411 addLegalFPImmediate(APFloat(+1.0f
)); // FLD1
412 addLegalFPImmediate(APFloat(-0.0f
)); // FLD0/FCHS
413 addLegalFPImmediate(APFloat(-1.0f
)); // FLD1/FCHS
416 // Long double always uses X87.
417 addRegisterClass(MVT::f80
, X86::RFP80RegisterClass
);
418 setOperationAction(ISD::UNDEF
, MVT::f80
, Expand
);
419 setOperationAction(ISD::FCOPYSIGN
, MVT::f80
, Expand
);
420 setOperationAction(ISD::ConstantFP
, MVT::f80
, Expand
);
422 setOperationAction(ISD::FSIN
, MVT::f80
, Expand
);
423 setOperationAction(ISD::FCOS
, MVT::f80
, Expand
);
426 // First set operation action for all vector types to expand. Then we
427 // will selectively turn on ones that can be effectively codegen'd.
428 for (unsigned VT
= (unsigned)MVT::FIRST_VECTOR_VALUETYPE
;
429 VT
<= (unsigned)MVT::LAST_VECTOR_VALUETYPE
; ++VT
) {
430 setOperationAction(ISD::ADD
, (MVT::ValueType
)VT
, Expand
);
431 setOperationAction(ISD::SUB
, (MVT::ValueType
)VT
, Expand
);
432 setOperationAction(ISD::FADD
, (MVT::ValueType
)VT
, Expand
);
433 setOperationAction(ISD::FNEG
, (MVT::ValueType
)VT
, Expand
);
434 setOperationAction(ISD::FSUB
, (MVT::ValueType
)VT
, Expand
);
435 setOperationAction(ISD::MUL
, (MVT::ValueType
)VT
, Expand
);
436 setOperationAction(ISD::FMUL
, (MVT::ValueType
)VT
, Expand
);
437 setOperationAction(ISD::SDIV
, (MVT::ValueType
)VT
, Expand
);
438 setOperationAction(ISD::UDIV
, (MVT::ValueType
)VT
, Expand
);
439 setOperationAction(ISD::FDIV
, (MVT::ValueType
)VT
, Expand
);
440 setOperationAction(ISD::SREM
, (MVT::ValueType
)VT
, Expand
);
441 setOperationAction(ISD::UREM
, (MVT::ValueType
)VT
, Expand
);
442 setOperationAction(ISD::LOAD
, (MVT::ValueType
)VT
, Expand
);
443 setOperationAction(ISD::VECTOR_SHUFFLE
, (MVT::ValueType
)VT
, Expand
);
444 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, (MVT::ValueType
)VT
, Expand
);
445 setOperationAction(ISD::INSERT_VECTOR_ELT
, (MVT::ValueType
)VT
, Expand
);
446 setOperationAction(ISD::FABS
, (MVT::ValueType
)VT
, Expand
);
447 setOperationAction(ISD::FSIN
, (MVT::ValueType
)VT
, Expand
);
448 setOperationAction(ISD::FCOS
, (MVT::ValueType
)VT
, Expand
);
449 setOperationAction(ISD::FREM
, (MVT::ValueType
)VT
, Expand
);
450 setOperationAction(ISD::FPOWI
, (MVT::ValueType
)VT
, Expand
);
451 setOperationAction(ISD::FSQRT
, (MVT::ValueType
)VT
, Expand
);
452 setOperationAction(ISD::FCOPYSIGN
, (MVT::ValueType
)VT
, Expand
);
455 if (Subtarget
->hasMMX()) {
456 addRegisterClass(MVT::v8i8
, X86::VR64RegisterClass
);
457 addRegisterClass(MVT::v4i16
, X86::VR64RegisterClass
);
458 addRegisterClass(MVT::v2i32
, X86::VR64RegisterClass
);
459 addRegisterClass(MVT::v1i64
, X86::VR64RegisterClass
);
461 // FIXME: add MMX packed arithmetics
463 setOperationAction(ISD::ADD
, MVT::v8i8
, Legal
);
464 setOperationAction(ISD::ADD
, MVT::v4i16
, Legal
);
465 setOperationAction(ISD::ADD
, MVT::v2i32
, Legal
);
466 setOperationAction(ISD::ADD
, MVT::v1i64
, Legal
);
468 setOperationAction(ISD::SUB
, MVT::v8i8
, Legal
);
469 setOperationAction(ISD::SUB
, MVT::v4i16
, Legal
);
470 setOperationAction(ISD::SUB
, MVT::v2i32
, Legal
);
472 setOperationAction(ISD::MULHS
, MVT::v4i16
, Legal
);
473 setOperationAction(ISD::MUL
, MVT::v4i16
, Legal
);
475 setOperationAction(ISD::AND
, MVT::v8i8
, Promote
);
476 AddPromotedToType (ISD::AND
, MVT::v8i8
, MVT::v1i64
);
477 setOperationAction(ISD::AND
, MVT::v4i16
, Promote
);
478 AddPromotedToType (ISD::AND
, MVT::v4i16
, MVT::v1i64
);
479 setOperationAction(ISD::AND
, MVT::v2i32
, Promote
);
480 AddPromotedToType (ISD::AND
, MVT::v2i32
, MVT::v1i64
);
481 setOperationAction(ISD::AND
, MVT::v1i64
, Legal
);
483 setOperationAction(ISD::OR
, MVT::v8i8
, Promote
);
484 AddPromotedToType (ISD::OR
, MVT::v8i8
, MVT::v1i64
);
485 setOperationAction(ISD::OR
, MVT::v4i16
, Promote
);
486 AddPromotedToType (ISD::OR
, MVT::v4i16
, MVT::v1i64
);
487 setOperationAction(ISD::OR
, MVT::v2i32
, Promote
);
488 AddPromotedToType (ISD::OR
, MVT::v2i32
, MVT::v1i64
);
489 setOperationAction(ISD::OR
, MVT::v1i64
, Legal
);
491 setOperationAction(ISD::XOR
, MVT::v8i8
, Promote
);
492 AddPromotedToType (ISD::XOR
, MVT::v8i8
, MVT::v1i64
);
493 setOperationAction(ISD::XOR
, MVT::v4i16
, Promote
);
494 AddPromotedToType (ISD::XOR
, MVT::v4i16
, MVT::v1i64
);
495 setOperationAction(ISD::XOR
, MVT::v2i32
, Promote
);
496 AddPromotedToType (ISD::XOR
, MVT::v2i32
, MVT::v1i64
);
497 setOperationAction(ISD::XOR
, MVT::v1i64
, Legal
);
499 setOperationAction(ISD::LOAD
, MVT::v8i8
, Promote
);
500 AddPromotedToType (ISD::LOAD
, MVT::v8i8
, MVT::v1i64
);
501 setOperationAction(ISD::LOAD
, MVT::v4i16
, Promote
);
502 AddPromotedToType (ISD::LOAD
, MVT::v4i16
, MVT::v1i64
);
503 setOperationAction(ISD::LOAD
, MVT::v2i32
, Promote
);
504 AddPromotedToType (ISD::LOAD
, MVT::v2i32
, MVT::v1i64
);
505 setOperationAction(ISD::LOAD
, MVT::v1i64
, Legal
);
507 setOperationAction(ISD::BUILD_VECTOR
, MVT::v8i8
, Custom
);
508 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i16
, Custom
);
509 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2i32
, Custom
);
510 setOperationAction(ISD::BUILD_VECTOR
, MVT::v1i64
, Custom
);
512 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v8i8
, Custom
);
513 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i16
, Custom
);
514 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2i32
, Custom
);
515 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v1i64
, Custom
);
517 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v8i8
, Custom
);
518 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i16
, Custom
);
519 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2i32
, Custom
);
520 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v1i64
, Custom
);
523 if (Subtarget
->hasSSE1()) {
524 addRegisterClass(MVT::v4f32
, X86::VR128RegisterClass
);
526 setOperationAction(ISD::FADD
, MVT::v4f32
, Legal
);
527 setOperationAction(ISD::FSUB
, MVT::v4f32
, Legal
);
528 setOperationAction(ISD::FMUL
, MVT::v4f32
, Legal
);
529 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
530 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
531 setOperationAction(ISD::FNEG
, MVT::v4f32
, Custom
);
532 setOperationAction(ISD::LOAD
, MVT::v4f32
, Legal
);
533 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
534 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4f32
, Custom
);
535 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Custom
);
536 setOperationAction(ISD::SELECT
, MVT::v4f32
, Custom
);
539 if (Subtarget
->hasSSE2()) {
540 addRegisterClass(MVT::v2f64
, X86::VR128RegisterClass
);
541 addRegisterClass(MVT::v16i8
, X86::VR128RegisterClass
);
542 addRegisterClass(MVT::v8i16
, X86::VR128RegisterClass
);
543 addRegisterClass(MVT::v4i32
, X86::VR128RegisterClass
);
544 addRegisterClass(MVT::v2i64
, X86::VR128RegisterClass
);
546 setOperationAction(ISD::ADD
, MVT::v16i8
, Legal
);
547 setOperationAction(ISD::ADD
, MVT::v8i16
, Legal
);
548 setOperationAction(ISD::ADD
, MVT::v4i32
, Legal
);
549 setOperationAction(ISD::ADD
, MVT::v2i64
, Legal
);
550 setOperationAction(ISD::SUB
, MVT::v16i8
, Legal
);
551 setOperationAction(ISD::SUB
, MVT::v8i16
, Legal
);
552 setOperationAction(ISD::SUB
, MVT::v4i32
, Legal
);
553 setOperationAction(ISD::SUB
, MVT::v2i64
, Legal
);
554 setOperationAction(ISD::MUL
, MVT::v8i16
, Legal
);
555 setOperationAction(ISD::FADD
, MVT::v2f64
, Legal
);
556 setOperationAction(ISD::FSUB
, MVT::v2f64
, Legal
);
557 setOperationAction(ISD::FMUL
, MVT::v2f64
, Legal
);
558 setOperationAction(ISD::FDIV
, MVT::v2f64
, Legal
);
559 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Legal
);
560 setOperationAction(ISD::FNEG
, MVT::v2f64
, Custom
);
562 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v16i8
, Custom
);
563 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v8i16
, Custom
);
564 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v8i16
, Custom
);
565 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i32
, Custom
);
566 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
567 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
569 // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
570 for (unsigned VT
= (unsigned)MVT::v16i8
; VT
!= (unsigned)MVT::v2i64
; VT
++) {
571 setOperationAction(ISD::BUILD_VECTOR
, (MVT::ValueType
)VT
, Custom
);
572 setOperationAction(ISD::VECTOR_SHUFFLE
, (MVT::ValueType
)VT
, Custom
);
573 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, (MVT::ValueType
)VT
, Custom
);
575 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2f64
, Custom
);
576 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2i64
, Custom
);
577 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2f64
, Custom
);
578 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2i64
, Custom
);
579 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Custom
);
580 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2i64
, Custom
);
582 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
583 for (unsigned VT
= (unsigned)MVT::v16i8
; VT
!= (unsigned)MVT::v2i64
; VT
++) {
584 setOperationAction(ISD::AND
, (MVT::ValueType
)VT
, Promote
);
585 AddPromotedToType (ISD::AND
, (MVT::ValueType
)VT
, MVT::v2i64
);
586 setOperationAction(ISD::OR
, (MVT::ValueType
)VT
, Promote
);
587 AddPromotedToType (ISD::OR
, (MVT::ValueType
)VT
, MVT::v2i64
);
588 setOperationAction(ISD::XOR
, (MVT::ValueType
)VT
, Promote
);
589 AddPromotedToType (ISD::XOR
, (MVT::ValueType
)VT
, MVT::v2i64
);
590 setOperationAction(ISD::LOAD
, (MVT::ValueType
)VT
, Promote
);
591 AddPromotedToType (ISD::LOAD
, (MVT::ValueType
)VT
, MVT::v2i64
);
592 setOperationAction(ISD::SELECT
, (MVT::ValueType
)VT
, Promote
);
593 AddPromotedToType (ISD::SELECT
, (MVT::ValueType
)VT
, MVT::v2i64
);
596 // Custom lower v2i64 and v2f64 selects.
597 setOperationAction(ISD::LOAD
, MVT::v2f64
, Legal
);
598 setOperationAction(ISD::LOAD
, MVT::v2i64
, Legal
);
599 setOperationAction(ISD::SELECT
, MVT::v2f64
, Custom
);
600 setOperationAction(ISD::SELECT
, MVT::v2i64
, Custom
);
603 // We want to custom lower some of our intrinsics.
604 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
606 // We have target-specific dag combine patterns for the following nodes:
607 setTargetDAGCombine(ISD::VECTOR_SHUFFLE
);
608 setTargetDAGCombine(ISD::SELECT
);
610 computeRegisterProperties();
612 // FIXME: These should be based on subtarget info. Plus, the values should
613 // be smaller when we are in optimizing for size mode.
614 maxStoresPerMemset
= 16; // For %llvm.memset -> sequence of stores
615 maxStoresPerMemcpy
= 16; // For %llvm.memcpy -> sequence of stores
616 maxStoresPerMemmove
= 16; // For %llvm.memmove -> sequence of stores
617 allowUnalignedMemoryAccesses
= true; // x86 supports it!
621 //===----------------------------------------------------------------------===//
622 // Return Value Calling Convention Implementation
623 //===----------------------------------------------------------------------===//
625 #include "X86GenCallingConv.inc"
627 /// LowerRET - Lower an ISD::RET node.
628 SDOperand
X86TargetLowering::LowerRET(SDOperand Op
, SelectionDAG
&DAG
) {
629 assert((Op
.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
631 SmallVector
<CCValAssign
, 16> RVLocs
;
632 unsigned CC
= DAG
.getMachineFunction().getFunction()->getCallingConv();
633 bool isVarArg
= DAG
.getMachineFunction().getFunction()->isVarArg();
634 CCState
CCInfo(CC
, isVarArg
, getTargetMachine(), RVLocs
);
635 CCInfo
.AnalyzeReturn(Op
.Val
, RetCC_X86
);
638 // If this is the first return lowered for this function, add the regs to the
639 // liveout set for the function.
640 if (DAG
.getMachineFunction().liveout_empty()) {
641 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
)
642 if (RVLocs
[i
].isRegLoc())
643 DAG
.getMachineFunction().addLiveOut(RVLocs
[i
].getLocReg());
646 SDOperand Chain
= Op
.getOperand(0);
649 // Copy the result values into the output registers.
650 if (RVLocs
.size() != 1 || !RVLocs
[0].isRegLoc() ||
651 RVLocs
[0].getLocReg() != X86::ST0
) {
652 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
653 CCValAssign
&VA
= RVLocs
[i
];
654 assert(VA
.isRegLoc() && "Can only return in registers!");
655 Chain
= DAG
.getCopyToReg(Chain
, VA
.getLocReg(), Op
.getOperand(i
*2+1),
657 Flag
= Chain
.getValue(1);
660 // We need to handle a destination of ST0 specially, because it isn't really
662 SDOperand Value
= Op
.getOperand(1);
664 // If this is an FP return with ScalarSSE, we need to move the value from
665 // an XMM register onto the fp-stack.
666 if ((X86ScalarSSEf32
&& RVLocs
[0].getValVT()==MVT::f32
) ||
667 (X86ScalarSSEf64
&& RVLocs
[0].getValVT()==MVT::f64
)) {
670 // If this is a load into a scalarsse value, don't store the loaded value
671 // back to the stack, only to reload it: just replace the scalar-sse load.
672 if (ISD::isNON_EXTLoad(Value
.Val
) &&
673 (Chain
== Value
.getValue(1) || Chain
== Value
.getOperand(0))) {
674 Chain
= Value
.getOperand(0);
675 MemLoc
= Value
.getOperand(1);
677 // Spill the value to memory and reload it into top of stack.
678 unsigned Size
= MVT::getSizeInBits(RVLocs
[0].getValVT())/8;
679 MachineFunction
&MF
= DAG
.getMachineFunction();
680 int SSFI
= MF
.getFrameInfo()->CreateStackObject(Size
, Size
);
681 MemLoc
= DAG
.getFrameIndex(SSFI
, getPointerTy());
682 Chain
= DAG
.getStore(Op
.getOperand(0), Value
, MemLoc
, NULL
, 0);
684 SDVTList Tys
= DAG
.getVTList(RVLocs
[0].getValVT(), MVT::Other
);
685 SDOperand Ops
[] = {Chain
, MemLoc
, DAG
.getValueType(RVLocs
[0].getValVT())};
686 Value
= DAG
.getNode(X86ISD::FLD
, Tys
, Ops
, 3);
687 Chain
= Value
.getValue(1);
690 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
691 SDOperand Ops
[] = { Chain
, Value
};
692 Chain
= DAG
.getNode(X86ISD::FP_SET_RESULT
, Tys
, Ops
, 2);
693 Flag
= Chain
.getValue(1);
696 SDOperand BytesToPop
= DAG
.getConstant(getBytesToPopOnReturn(), MVT::i16
);
698 return DAG
.getNode(X86ISD::RET_FLAG
, MVT::Other
, Chain
, BytesToPop
, Flag
);
700 return DAG
.getNode(X86ISD::RET_FLAG
, MVT::Other
, Chain
, BytesToPop
);
704 /// LowerCallResult - Lower the result values of an ISD::CALL into the
705 /// appropriate copies out of appropriate physical registers. This assumes that
706 /// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
707 /// being lowered. The returns a SDNode with the same number of values as the
709 SDNode
*X86TargetLowering::
710 LowerCallResult(SDOperand Chain
, SDOperand InFlag
, SDNode
*TheCall
,
711 unsigned CallingConv
, SelectionDAG
&DAG
) {
713 // Assign locations to each value returned by this call.
714 SmallVector
<CCValAssign
, 16> RVLocs
;
715 bool isVarArg
= cast
<ConstantSDNode
>(TheCall
->getOperand(2))->getValue() != 0;
716 CCState
CCInfo(CallingConv
, isVarArg
, getTargetMachine(), RVLocs
);
717 CCInfo
.AnalyzeCallResult(TheCall
, RetCC_X86
);
720 SmallVector
<SDOperand
, 8> ResultVals
;
722 // Copy all of the result registers out of their specified physreg.
723 if (RVLocs
.size() != 1 || RVLocs
[0].getLocReg() != X86::ST0
) {
724 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
725 Chain
= DAG
.getCopyFromReg(Chain
, RVLocs
[i
].getLocReg(),
726 RVLocs
[i
].getValVT(), InFlag
).getValue(1);
727 InFlag
= Chain
.getValue(2);
728 ResultVals
.push_back(Chain
.getValue(0));
731 // Copies from the FP stack are special, as ST0 isn't a valid register
732 // before the fp stackifier runs.
734 // Copy ST0 into an RFP register with FP_GET_RESULT.
735 SDVTList Tys
= DAG
.getVTList(RVLocs
[0].getValVT(), MVT::Other
, MVT::Flag
);
736 SDOperand GROps
[] = { Chain
, InFlag
};
737 SDOperand RetVal
= DAG
.getNode(X86ISD::FP_GET_RESULT
, Tys
, GROps
, 2);
738 Chain
= RetVal
.getValue(1);
739 InFlag
= RetVal
.getValue(2);
741 // If we are using ScalarSSE, store ST(0) to the stack and reload it into
743 if ((X86ScalarSSEf32
&& RVLocs
[0].getValVT() == MVT::f32
) ||
744 (X86ScalarSSEf64
&& RVLocs
[0].getValVT() == MVT::f64
)) {
745 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
746 // shouldn't be necessary except that RFP cannot be live across
747 // multiple blocks. When stackifier is fixed, they can be uncoupled.
748 MachineFunction
&MF
= DAG
.getMachineFunction();
749 int SSFI
= MF
.getFrameInfo()->CreateStackObject(8, 8);
750 SDOperand StackSlot
= DAG
.getFrameIndex(SSFI
, getPointerTy());
752 Chain
, RetVal
, StackSlot
, DAG
.getValueType(RVLocs
[0].getValVT()), InFlag
754 Chain
= DAG
.getNode(X86ISD::FST
, MVT::Other
, Ops
, 5);
755 RetVal
= DAG
.getLoad(RVLocs
[0].getValVT(), Chain
, StackSlot
, NULL
, 0);
756 Chain
= RetVal
.getValue(1);
758 ResultVals
.push_back(RetVal
);
761 // Merge everything together with a MERGE_VALUES node.
762 ResultVals
.push_back(Chain
);
763 return DAG
.getNode(ISD::MERGE_VALUES
, TheCall
->getVTList(),
764 &ResultVals
[0], ResultVals
.size()).Val
;
768 //===----------------------------------------------------------------------===//
769 // C & StdCall Calling Convention implementation
770 //===----------------------------------------------------------------------===//
771 // StdCall calling convention seems to be standard for many Windows' API
772 // routines and around. It differs from C calling convention just a little:
773 // callee should clean up the stack, not caller. Symbols should be also
774 // decorated in some fancy way :) It doesn't support any vector arguments.
776 /// AddLiveIn - This helper function adds the specified physical register to the
777 /// MachineFunction as a live in value. It also creates a corresponding virtual
779 static unsigned AddLiveIn(MachineFunction
&MF
, unsigned PReg
,
780 const TargetRegisterClass
*RC
) {
781 assert(RC
->contains(PReg
) && "Not the correct regclass!");
782 unsigned VReg
= MF
.getSSARegMap()->createVirtualRegister(RC
);
783 MF
.addLiveIn(PReg
, VReg
);
787 SDOperand
X86TargetLowering::LowerMemArgument(SDOperand Op
, SelectionDAG
&DAG
,
788 const CCValAssign
&VA
,
789 MachineFrameInfo
*MFI
,
790 SDOperand Root
, unsigned i
) {
791 // Create the nodes corresponding to a load from this parameter slot.
792 int FI
= MFI
->CreateFixedObject(MVT::getSizeInBits(VA
.getValVT())/8,
793 VA
.getLocMemOffset());
794 SDOperand FIN
= DAG
.getFrameIndex(FI
, getPointerTy());
796 unsigned Flags
= cast
<ConstantSDNode
>(Op
.getOperand(3 + i
))->getValue();
798 if (Flags
& ISD::ParamFlags::ByVal
)
801 return DAG
.getLoad(VA
.getValVT(), Root
, FIN
, NULL
, 0);
804 SDOperand
X86TargetLowering::LowerCCCArguments(SDOperand Op
, SelectionDAG
&DAG
,
806 unsigned NumArgs
= Op
.Val
->getNumValues() - 1;
807 MachineFunction
&MF
= DAG
.getMachineFunction();
808 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
809 SDOperand Root
= Op
.getOperand(0);
810 bool isVarArg
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getValue() != 0;
812 // Assign locations to all of the incoming arguments.
813 SmallVector
<CCValAssign
, 16> ArgLocs
;
814 CCState
CCInfo(MF
.getFunction()->getCallingConv(), isVarArg
,
815 getTargetMachine(), ArgLocs
);
816 CCInfo
.AnalyzeFormalArguments(Op
.Val
, CC_X86_32_C
);
818 SmallVector
<SDOperand
, 8> ArgValues
;
819 unsigned LastVal
= ~0U;
820 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
821 CCValAssign
&VA
= ArgLocs
[i
];
822 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
824 assert(VA
.getValNo() != LastVal
&&
825 "Don't support value assigned to multiple locs yet");
826 LastVal
= VA
.getValNo();
829 MVT::ValueType RegVT
= VA
.getLocVT();
830 TargetRegisterClass
*RC
;
831 if (RegVT
== MVT::i32
)
832 RC
= X86::GR32RegisterClass
;
834 assert(MVT::isVector(RegVT
));
835 RC
= X86::VR128RegisterClass
;
838 unsigned Reg
= AddLiveIn(DAG
.getMachineFunction(), VA
.getLocReg(), RC
);
839 SDOperand ArgValue
= DAG
.getCopyFromReg(Root
, Reg
, RegVT
);
841 // If this is an 8 or 16-bit value, it is really passed promoted to 32
842 // bits. Insert an assert[sz]ext to capture this, then truncate to the
844 if (VA
.getLocInfo() == CCValAssign::SExt
)
845 ArgValue
= DAG
.getNode(ISD::AssertSext
, RegVT
, ArgValue
,
846 DAG
.getValueType(VA
.getValVT()));
847 else if (VA
.getLocInfo() == CCValAssign::ZExt
)
848 ArgValue
= DAG
.getNode(ISD::AssertZext
, RegVT
, ArgValue
,
849 DAG
.getValueType(VA
.getValVT()));
851 if (VA
.getLocInfo() != CCValAssign::Full
)
852 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, VA
.getValVT(), ArgValue
);
854 ArgValues
.push_back(ArgValue
);
856 assert(VA
.isMemLoc());
857 ArgValues
.push_back(LowerMemArgument(Op
, DAG
, VA
, MFI
, Root
, i
));
861 unsigned StackSize
= CCInfo
.getNextStackOffset();
863 ArgValues
.push_back(Root
);
865 // If the function takes variable number of arguments, make a frame index for
866 // the start of the first vararg value... for expansion of llvm.va_start.
868 VarArgsFrameIndex
= MFI
->CreateFixedObject(1, StackSize
);
870 if (isStdCall
&& !isVarArg
) {
871 BytesToPopOnReturn
= StackSize
; // Callee pops everything..
872 BytesCallerReserves
= 0;
874 BytesToPopOnReturn
= 0; // Callee pops nothing.
876 // If this is an sret function, the return should pop the hidden pointer.
878 (cast
<ConstantSDNode
>(Op
.getOperand(3))->getValue() &
879 ISD::ParamFlags::StructReturn
))
880 BytesToPopOnReturn
= 4;
882 BytesCallerReserves
= StackSize
;
885 RegSaveFrameIndex
= 0xAAAAAAA; // X86-64 only.
887 X86MachineFunctionInfo
*FuncInfo
= MF
.getInfo
<X86MachineFunctionInfo
>();
888 FuncInfo
->setBytesToPopOnReturn(BytesToPopOnReturn
);
890 // Return the new list of results.
891 return DAG
.getNode(ISD::MERGE_VALUES
, Op
.Val
->getVTList(),
892 &ArgValues
[0], ArgValues
.size()).getValue(Op
.ResNo
);
895 SDOperand
X86TargetLowering::LowerCCCCallTo(SDOperand Op
, SelectionDAG
&DAG
,
897 SDOperand Chain
= Op
.getOperand(0);
898 bool isVarArg
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getValue() != 0;
899 bool isTailCall
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getValue() != 0;
900 SDOperand Callee
= Op
.getOperand(4);
901 unsigned NumOps
= (Op
.getNumOperands() - 5) / 2;
903 // Analyze operands of the call, assigning locations to each operand.
904 SmallVector
<CCValAssign
, 16> ArgLocs
;
905 CCState
CCInfo(CC
, isVarArg
, getTargetMachine(), ArgLocs
);
906 CCInfo
.AnalyzeCallOperands(Op
.Val
, CC_X86_32_C
);
908 // Get a count of how many bytes are to be pushed on the stack.
909 unsigned NumBytes
= CCInfo
.getNextStackOffset();
911 Chain
= DAG
.getCALLSEQ_START(Chain
,DAG
.getConstant(NumBytes
, getPointerTy()));
913 SmallVector
<std::pair
<unsigned, SDOperand
>, 8> RegsToPass
;
914 SmallVector
<SDOperand
, 8> MemOpChains
;
918 // Walk the register/memloc assignments, inserting copies/loads.
919 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
920 CCValAssign
&VA
= ArgLocs
[i
];
921 SDOperand Arg
= Op
.getOperand(5+2*VA
.getValNo());
923 // Promote the value if needed.
924 switch (VA
.getLocInfo()) {
925 default: assert(0 && "Unknown loc info!");
926 case CCValAssign::Full
: break;
927 case CCValAssign::SExt
:
928 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, VA
.getLocVT(), Arg
);
930 case CCValAssign::ZExt
:
931 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, VA
.getLocVT(), Arg
);
933 case CCValAssign::AExt
:
934 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, VA
.getLocVT(), Arg
);
939 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
941 assert(VA
.isMemLoc());
942 if (StackPtr
.Val
== 0)
943 StackPtr
= DAG
.getRegister(getStackPtrReg(), getPointerTy());
945 MemOpChains
.push_back(LowerMemOpCallTo(Op
, DAG
, StackPtr
, VA
, Chain
,
950 // If the first argument is an sret pointer, remember it.
951 bool isSRet
= NumOps
&&
952 (cast
<ConstantSDNode
>(Op
.getOperand(6))->getValue() &
953 ISD::ParamFlags::StructReturn
);
955 if (!MemOpChains
.empty())
956 Chain
= DAG
.getNode(ISD::TokenFactor
, MVT::Other
,
957 &MemOpChains
[0], MemOpChains
.size());
959 // Build a sequence of copy-to-reg nodes chained together with token chain
960 // and flag operands which copy the outgoing args into registers.
962 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
963 Chain
= DAG
.getCopyToReg(Chain
, RegsToPass
[i
].first
, RegsToPass
[i
].second
,
965 InFlag
= Chain
.getValue(1);
968 // ELF / PIC requires GOT in the EBX register before function calls via PLT
970 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
&&
971 Subtarget
->isPICStyleGOT()) {
972 Chain
= DAG
.getCopyToReg(Chain
, X86::EBX
,
973 DAG
.getNode(X86ISD::GlobalBaseReg
, getPointerTy()),
975 InFlag
= Chain
.getValue(1);
978 // If the callee is a GlobalAddress node (quite common, every direct call is)
979 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
980 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
981 // We should use extra load for direct calls to dllimported functions in
983 if (!Subtarget
->GVRequiresExtraLoad(G
->getGlobal(),
984 getTargetMachine(), true))
985 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), getPointerTy());
986 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
))
987 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), getPointerTy());
989 // Returns a chain & a flag for retval copy to use.
990 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
991 SmallVector
<SDOperand
, 8> Ops
;
992 Ops
.push_back(Chain
);
993 Ops
.push_back(Callee
);
995 // Add argument registers to the end of the list so that they are known live
997 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
998 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
999 RegsToPass
[i
].second
.getValueType()));
1001 // Add an implicit use GOT pointer in EBX.
1002 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
&&
1003 Subtarget
->isPICStyleGOT())
1004 Ops
.push_back(DAG
.getRegister(X86::EBX
, getPointerTy()));
1007 Ops
.push_back(InFlag
);
1009 Chain
= DAG
.getNode(isTailCall
? X86ISD::TAILCALL
: X86ISD::CALL
,
1010 NodeTys
, &Ops
[0], Ops
.size());
1011 InFlag
= Chain
.getValue(1);
1013 // Create the CALLSEQ_END node.
1014 unsigned NumBytesForCalleeToPush
= 0;
1016 if (CC
== CallingConv::X86_StdCall
) {
1018 NumBytesForCalleeToPush
= isSRet
? 4 : 0;
1020 NumBytesForCalleeToPush
= NumBytes
;
1022 // If this is is a call to a struct-return function, the callee
1023 // pops the hidden struct pointer, so we have to push it back.
1024 // This is common for Darwin/X86, Linux & Mingw32 targets.
1025 NumBytesForCalleeToPush
= isSRet
? 4 : 0;
1028 NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
1030 Ops
.push_back(Chain
);
1031 Ops
.push_back(DAG
.getConstant(NumBytes
, getPointerTy()));
1032 Ops
.push_back(DAG
.getConstant(NumBytesForCalleeToPush
, getPointerTy()));
1033 Ops
.push_back(InFlag
);
1034 Chain
= DAG
.getNode(ISD::CALLSEQ_END
, NodeTys
, &Ops
[0], Ops
.size());
1035 InFlag
= Chain
.getValue(1);
1037 // Handle result values, copying them out of physregs into vregs that we
1039 return SDOperand(LowerCallResult(Chain
, InFlag
, Op
.Val
, CC
, DAG
), Op
.ResNo
);
1043 //===----------------------------------------------------------------------===//
1044 // FastCall Calling Convention implementation
1045 //===----------------------------------------------------------------------===//
1047 // The X86 'fastcall' calling convention passes up to two integer arguments in
1048 // registers (an appropriate portion of ECX/EDX), passes arguments in C order,
1049 // and requires that the callee pop its arguments off the stack (allowing proper
1050 // tail calls), and has the same return value conventions as C calling convs.
1052 // This calling convention always arranges for the callee pop value to be 8n+4
1053 // bytes, which is needed for tail recursion elimination and stack alignment
1056 X86TargetLowering::LowerFastCCArguments(SDOperand Op
, SelectionDAG
&DAG
) {
1057 MachineFunction
&MF
= DAG
.getMachineFunction();
1058 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1059 SDOperand Root
= Op
.getOperand(0);
1060 bool isVarArg
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getValue() != 0;
1062 // Assign locations to all of the incoming arguments.
1063 SmallVector
<CCValAssign
, 16> ArgLocs
;
1064 CCState
CCInfo(MF
.getFunction()->getCallingConv(), isVarArg
,
1065 getTargetMachine(), ArgLocs
);
1066 CCInfo
.AnalyzeFormalArguments(Op
.Val
, CC_X86_32_FastCall
);
1068 SmallVector
<SDOperand
, 8> ArgValues
;
1069 unsigned LastVal
= ~0U;
1070 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1071 CCValAssign
&VA
= ArgLocs
[i
];
1072 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
1074 assert(VA
.getValNo() != LastVal
&&
1075 "Don't support value assigned to multiple locs yet");
1076 LastVal
= VA
.getValNo();
1078 if (VA
.isRegLoc()) {
1079 MVT::ValueType RegVT
= VA
.getLocVT();
1080 TargetRegisterClass
*RC
;
1081 if (RegVT
== MVT::i32
)
1082 RC
= X86::GR32RegisterClass
;
1084 assert(MVT::isVector(RegVT
));
1085 RC
= X86::VR128RegisterClass
;
1088 unsigned Reg
= AddLiveIn(DAG
.getMachineFunction(), VA
.getLocReg(), RC
);
1089 SDOperand ArgValue
= DAG
.getCopyFromReg(Root
, Reg
, RegVT
);
1091 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1092 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1094 if (VA
.getLocInfo() == CCValAssign::SExt
)
1095 ArgValue
= DAG
.getNode(ISD::AssertSext
, RegVT
, ArgValue
,
1096 DAG
.getValueType(VA
.getValVT()));
1097 else if (VA
.getLocInfo() == CCValAssign::ZExt
)
1098 ArgValue
= DAG
.getNode(ISD::AssertZext
, RegVT
, ArgValue
,
1099 DAG
.getValueType(VA
.getValVT()));
1101 if (VA
.getLocInfo() != CCValAssign::Full
)
1102 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, VA
.getValVT(), ArgValue
);
1104 ArgValues
.push_back(ArgValue
);
1106 assert(VA
.isMemLoc());
1107 ArgValues
.push_back(LowerMemArgument(Op
, DAG
, VA
, MFI
, Root
, i
));
1111 ArgValues
.push_back(Root
);
1113 unsigned StackSize
= CCInfo
.getNextStackOffset();
1115 if (!Subtarget
->isTargetCygMing() && !Subtarget
->isTargetWindows()) {
1116 // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1117 // arguments and the arguments after the retaddr has been pushed are aligned.
1118 if ((StackSize
& 7) == 0)
1122 VarArgsFrameIndex
= 0xAAAAAAA; // fastcc functions can't have varargs.
1123 RegSaveFrameIndex
= 0xAAAAAAA; // X86-64 only.
1124 BytesToPopOnReturn
= StackSize
; // Callee pops all stack arguments.
1125 BytesCallerReserves
= 0;
1127 X86MachineFunctionInfo
*FuncInfo
= MF
.getInfo
<X86MachineFunctionInfo
>();
1128 FuncInfo
->setBytesToPopOnReturn(BytesToPopOnReturn
);
1130 // Return the new list of results.
1131 return DAG
.getNode(ISD::MERGE_VALUES
, Op
.Val
->getVTList(),
1132 &ArgValues
[0], ArgValues
.size()).getValue(Op
.ResNo
);
1136 X86TargetLowering::LowerMemOpCallTo(SDOperand Op
, SelectionDAG
&DAG
,
1137 const SDOperand
&StackPtr
,
1138 const CCValAssign
&VA
,
1141 SDOperand PtrOff
= DAG
.getConstant(VA
.getLocMemOffset(), getPointerTy());
1142 PtrOff
= DAG
.getNode(ISD::ADD
, getPointerTy(), StackPtr
, PtrOff
);
1143 SDOperand FlagsOp
= Op
.getOperand(6+2*VA
.getValNo());
1144 unsigned Flags
= cast
<ConstantSDNode
>(FlagsOp
)->getValue();
1145 if (Flags
& ISD::ParamFlags::ByVal
) {
1146 unsigned Align
= 1 << ((Flags
& ISD::ParamFlags::ByValAlign
) >>
1147 ISD::ParamFlags::ByValAlignOffs
);
1149 unsigned Size
= (Flags
& ISD::ParamFlags::ByValSize
) >>
1150 ISD::ParamFlags::ByValSizeOffs
;
1152 SDOperand AlignNode
= DAG
.getConstant(Align
, MVT::i32
);
1153 SDOperand SizeNode
= DAG
.getConstant(Size
, MVT::i32
);
1155 return DAG
.getNode(ISD::MEMCPY
, MVT::Other
, Chain
, PtrOff
, Arg
, SizeNode
,
1158 return DAG
.getStore(Chain
, Arg
, PtrOff
, NULL
, 0);
1162 SDOperand
X86TargetLowering::LowerFastCCCallTo(SDOperand Op
, SelectionDAG
&DAG
,
1164 SDOperand Chain
= Op
.getOperand(0);
1165 bool isTailCall
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getValue() != 0;
1166 bool isVarArg
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getValue() != 0;
1167 SDOperand Callee
= Op
.getOperand(4);
1169 // Analyze operands of the call, assigning locations to each operand.
1170 SmallVector
<CCValAssign
, 16> ArgLocs
;
1171 CCState
CCInfo(CC
, isVarArg
, getTargetMachine(), ArgLocs
);
1172 CCInfo
.AnalyzeCallOperands(Op
.Val
, CC_X86_32_FastCall
);
1174 // Get a count of how many bytes are to be pushed on the stack.
1175 unsigned NumBytes
= CCInfo
.getNextStackOffset();
1177 if (!Subtarget
->isTargetCygMing() && !Subtarget
->isTargetWindows()) {
1178 // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1179 // arguments and the arguments after the retaddr has been pushed are aligned.
1180 if ((NumBytes
& 7) == 0)
1184 Chain
= DAG
.getCALLSEQ_START(Chain
,DAG
.getConstant(NumBytes
, getPointerTy()));
1186 SmallVector
<std::pair
<unsigned, SDOperand
>, 8> RegsToPass
;
1187 SmallVector
<SDOperand
, 8> MemOpChains
;
1191 // Walk the register/memloc assignments, inserting copies/loads.
1192 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1193 CCValAssign
&VA
= ArgLocs
[i
];
1194 SDOperand Arg
= Op
.getOperand(5+2*VA
.getValNo());
1196 // Promote the value if needed.
1197 switch (VA
.getLocInfo()) {
1198 default: assert(0 && "Unknown loc info!");
1199 case CCValAssign::Full
: break;
1200 case CCValAssign::SExt
:
1201 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, VA
.getLocVT(), Arg
);
1203 case CCValAssign::ZExt
:
1204 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, VA
.getLocVT(), Arg
);
1206 case CCValAssign::AExt
:
1207 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, VA
.getLocVT(), Arg
);
1211 if (VA
.isRegLoc()) {
1212 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
1214 assert(VA
.isMemLoc());
1215 if (StackPtr
.Val
== 0)
1216 StackPtr
= DAG
.getRegister(getStackPtrReg(), getPointerTy());
1218 MemOpChains
.push_back(LowerMemOpCallTo(Op
, DAG
, StackPtr
, VA
, Chain
,
1223 if (!MemOpChains
.empty())
1224 Chain
= DAG
.getNode(ISD::TokenFactor
, MVT::Other
,
1225 &MemOpChains
[0], MemOpChains
.size());
1227 // Build a sequence of copy-to-reg nodes chained together with token chain
1228 // and flag operands which copy the outgoing args into registers.
1230 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1231 Chain
= DAG
.getCopyToReg(Chain
, RegsToPass
[i
].first
, RegsToPass
[i
].second
,
1233 InFlag
= Chain
.getValue(1);
1236 // If the callee is a GlobalAddress node (quite common, every direct call is)
1237 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1238 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1239 // We should use extra load for direct calls to dllimported functions in
1241 if (!Subtarget
->GVRequiresExtraLoad(G
->getGlobal(),
1242 getTargetMachine(), true))
1243 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), getPointerTy());
1244 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
))
1245 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), getPointerTy());
1247 // ELF / PIC requires GOT in the EBX register before function calls via PLT
1249 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
&&
1250 Subtarget
->isPICStyleGOT()) {
1251 Chain
= DAG
.getCopyToReg(Chain
, X86::EBX
,
1252 DAG
.getNode(X86ISD::GlobalBaseReg
, getPointerTy()),
1254 InFlag
= Chain
.getValue(1);
1257 // Returns a chain & a flag for retval copy to use.
1258 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
1259 SmallVector
<SDOperand
, 8> Ops
;
1260 Ops
.push_back(Chain
);
1261 Ops
.push_back(Callee
);
1263 // Add argument registers to the end of the list so that they are known live
1265 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
1266 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
1267 RegsToPass
[i
].second
.getValueType()));
1269 // Add an implicit use GOT pointer in EBX.
1270 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
&&
1271 Subtarget
->isPICStyleGOT())
1272 Ops
.push_back(DAG
.getRegister(X86::EBX
, getPointerTy()));
1275 Ops
.push_back(InFlag
);
1277 // FIXME: Do not generate X86ISD::TAILCALL for now.
1278 Chain
= DAG
.getNode(isTailCall
? X86ISD::TAILCALL
: X86ISD::CALL
,
1279 NodeTys
, &Ops
[0], Ops
.size());
1280 InFlag
= Chain
.getValue(1);
1282 // Returns a flag for retval copy to use.
1283 NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
1285 Ops
.push_back(Chain
);
1286 Ops
.push_back(DAG
.getConstant(NumBytes
, getPointerTy()));
1287 Ops
.push_back(DAG
.getConstant(NumBytes
, getPointerTy()));
1288 Ops
.push_back(InFlag
);
1289 Chain
= DAG
.getNode(ISD::CALLSEQ_END
, NodeTys
, &Ops
[0], Ops
.size());
1290 InFlag
= Chain
.getValue(1);
1292 // Handle result values, copying them out of physregs into vregs that we
1294 return SDOperand(LowerCallResult(Chain
, InFlag
, Op
.Val
, CC
, DAG
), Op
.ResNo
);
1298 //===----------------------------------------------------------------------===//
1299 // X86-64 C Calling Convention implementation
1300 //===----------------------------------------------------------------------===//
1303 X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op
, SelectionDAG
&DAG
) {
1304 MachineFunction
&MF
= DAG
.getMachineFunction();
1305 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1306 SDOperand Root
= Op
.getOperand(0);
1307 bool isVarArg
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getValue() != 0;
1309 static const unsigned GPR64ArgRegs
[] = {
1310 X86::RDI
, X86::RSI
, X86::RDX
, X86::RCX
, X86::R8
, X86::R9
1312 static const unsigned XMMArgRegs
[] = {
1313 X86::XMM0
, X86::XMM1
, X86::XMM2
, X86::XMM3
,
1314 X86::XMM4
, X86::XMM5
, X86::XMM6
, X86::XMM7
1318 // Assign locations to all of the incoming arguments.
1319 SmallVector
<CCValAssign
, 16> ArgLocs
;
1320 CCState
CCInfo(MF
.getFunction()->getCallingConv(), isVarArg
,
1321 getTargetMachine(), ArgLocs
);
1322 CCInfo
.AnalyzeFormalArguments(Op
.Val
, CC_X86_64_C
);
1324 SmallVector
<SDOperand
, 8> ArgValues
;
1325 unsigned LastVal
= ~0U;
1326 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1327 CCValAssign
&VA
= ArgLocs
[i
];
1328 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
1330 assert(VA
.getValNo() != LastVal
&&
1331 "Don't support value assigned to multiple locs yet");
1332 LastVal
= VA
.getValNo();
1334 if (VA
.isRegLoc()) {
1335 MVT::ValueType RegVT
= VA
.getLocVT();
1336 TargetRegisterClass
*RC
;
1337 if (RegVT
== MVT::i32
)
1338 RC
= X86::GR32RegisterClass
;
1339 else if (RegVT
== MVT::i64
)
1340 RC
= X86::GR64RegisterClass
;
1341 else if (RegVT
== MVT::f32
)
1342 RC
= X86::FR32RegisterClass
;
1343 else if (RegVT
== MVT::f64
)
1344 RC
= X86::FR64RegisterClass
;
1346 assert(MVT::isVector(RegVT
));
1347 if (MVT::getSizeInBits(RegVT
) == 64) {
1348 RC
= X86::GR64RegisterClass
; // MMX values are passed in GPRs.
1351 RC
= X86::VR128RegisterClass
;
1354 unsigned Reg
= AddLiveIn(DAG
.getMachineFunction(), VA
.getLocReg(), RC
);
1355 SDOperand ArgValue
= DAG
.getCopyFromReg(Root
, Reg
, RegVT
);
1357 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1358 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1360 if (VA
.getLocInfo() == CCValAssign::SExt
)
1361 ArgValue
= DAG
.getNode(ISD::AssertSext
, RegVT
, ArgValue
,
1362 DAG
.getValueType(VA
.getValVT()));
1363 else if (VA
.getLocInfo() == CCValAssign::ZExt
)
1364 ArgValue
= DAG
.getNode(ISD::AssertZext
, RegVT
, ArgValue
,
1365 DAG
.getValueType(VA
.getValVT()));
1367 if (VA
.getLocInfo() != CCValAssign::Full
)
1368 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, VA
.getValVT(), ArgValue
);
1370 // Handle MMX values passed in GPRs.
1371 if (RegVT
!= VA
.getLocVT() && RC
== X86::GR64RegisterClass
&&
1372 MVT::getSizeInBits(RegVT
) == 64)
1373 ArgValue
= DAG
.getNode(ISD::BIT_CONVERT
, VA
.getLocVT(), ArgValue
);
1375 ArgValues
.push_back(ArgValue
);
1377 assert(VA
.isMemLoc());
1378 ArgValues
.push_back(LowerMemArgument(Op
, DAG
, VA
, MFI
, Root
, i
));
1382 unsigned StackSize
= CCInfo
.getNextStackOffset();
1384 // If the function takes variable number of arguments, make a frame index for
1385 // the start of the first vararg value... for expansion of llvm.va_start.
1387 unsigned NumIntRegs
= CCInfo
.getFirstUnallocated(GPR64ArgRegs
, 6);
1388 unsigned NumXMMRegs
= CCInfo
.getFirstUnallocated(XMMArgRegs
, 8);
1390 // For X86-64, if there are vararg parameters that are passed via
1391 // registers, then we must store them to their spots on the stack so they
1392 // may be loaded by deferencing the result of va_next.
1393 VarArgsGPOffset
= NumIntRegs
* 8;
1394 VarArgsFPOffset
= 6 * 8 + NumXMMRegs
* 16;
1395 VarArgsFrameIndex
= MFI
->CreateFixedObject(1, StackSize
);
1396 RegSaveFrameIndex
= MFI
->CreateStackObject(6 * 8 + 8 * 16, 16);
1398 // Store the integer parameter registers.
1399 SmallVector
<SDOperand
, 8> MemOps
;
1400 SDOperand RSFIN
= DAG
.getFrameIndex(RegSaveFrameIndex
, getPointerTy());
1401 SDOperand FIN
= DAG
.getNode(ISD::ADD
, getPointerTy(), RSFIN
,
1402 DAG
.getConstant(VarArgsGPOffset
, getPointerTy()));
1403 for (; NumIntRegs
!= 6; ++NumIntRegs
) {
1404 unsigned VReg
= AddLiveIn(MF
, GPR64ArgRegs
[NumIntRegs
],
1405 X86::GR64RegisterClass
);
1406 SDOperand Val
= DAG
.getCopyFromReg(Root
, VReg
, MVT::i64
);
1407 SDOperand Store
= DAG
.getStore(Val
.getValue(1), Val
, FIN
, NULL
, 0);
1408 MemOps
.push_back(Store
);
1409 FIN
= DAG
.getNode(ISD::ADD
, getPointerTy(), FIN
,
1410 DAG
.getConstant(8, getPointerTy()));
1413 // Now store the XMM (fp + vector) parameter registers.
1414 FIN
= DAG
.getNode(ISD::ADD
, getPointerTy(), RSFIN
,
1415 DAG
.getConstant(VarArgsFPOffset
, getPointerTy()));
1416 for (; NumXMMRegs
!= 8; ++NumXMMRegs
) {
1417 unsigned VReg
= AddLiveIn(MF
, XMMArgRegs
[NumXMMRegs
],
1418 X86::VR128RegisterClass
);
1419 SDOperand Val
= DAG
.getCopyFromReg(Root
, VReg
, MVT::v4f32
);
1420 SDOperand Store
= DAG
.getStore(Val
.getValue(1), Val
, FIN
, NULL
, 0);
1421 MemOps
.push_back(Store
);
1422 FIN
= DAG
.getNode(ISD::ADD
, getPointerTy(), FIN
,
1423 DAG
.getConstant(16, getPointerTy()));
1425 if (!MemOps
.empty())
1426 Root
= DAG
.getNode(ISD::TokenFactor
, MVT::Other
,
1427 &MemOps
[0], MemOps
.size());
1430 ArgValues
.push_back(Root
);
1432 BytesToPopOnReturn
= 0; // Callee pops nothing.
1433 BytesCallerReserves
= StackSize
;
1435 X86MachineFunctionInfo
*FuncInfo
= MF
.getInfo
<X86MachineFunctionInfo
>();
1436 FuncInfo
->setBytesToPopOnReturn(BytesToPopOnReturn
);
1438 // Return the new list of results.
1439 return DAG
.getNode(ISD::MERGE_VALUES
, Op
.Val
->getVTList(),
1440 &ArgValues
[0], ArgValues
.size()).getValue(Op
.ResNo
);
1444 X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op
, SelectionDAG
&DAG
,
1446 SDOperand Chain
= Op
.getOperand(0);
1447 bool isVarArg
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getValue() != 0;
1448 bool isTailCall
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getValue() != 0;
1449 SDOperand Callee
= Op
.getOperand(4);
1451 // Analyze operands of the call, assigning locations to each operand.
1452 SmallVector
<CCValAssign
, 16> ArgLocs
;
1453 CCState
CCInfo(CC
, isVarArg
, getTargetMachine(), ArgLocs
);
1454 CCInfo
.AnalyzeCallOperands(Op
.Val
, CC_X86_64_C
);
1456 // Get a count of how many bytes are to be pushed on the stack.
1457 unsigned NumBytes
= CCInfo
.getNextStackOffset();
1458 Chain
= DAG
.getCALLSEQ_START(Chain
,DAG
.getConstant(NumBytes
, getPointerTy()));
1460 SmallVector
<std::pair
<unsigned, SDOperand
>, 8> RegsToPass
;
1461 SmallVector
<SDOperand
, 8> MemOpChains
;
1465 // Walk the register/memloc assignments, inserting copies/loads.
1466 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1467 CCValAssign
&VA
= ArgLocs
[i
];
1468 SDOperand Arg
= Op
.getOperand(5+2*VA
.getValNo());
1470 // Promote the value if needed.
1471 switch (VA
.getLocInfo()) {
1472 default: assert(0 && "Unknown loc info!");
1473 case CCValAssign::Full
: break;
1474 case CCValAssign::SExt
:
1475 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, VA
.getLocVT(), Arg
);
1477 case CCValAssign::ZExt
:
1478 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, VA
.getLocVT(), Arg
);
1480 case CCValAssign::AExt
:
1481 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, VA
.getLocVT(), Arg
);
1485 if (VA
.isRegLoc()) {
1486 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
1488 assert(VA
.isMemLoc());
1489 if (StackPtr
.Val
== 0)
1490 StackPtr
= DAG
.getRegister(getStackPtrReg(), getPointerTy());
1492 MemOpChains
.push_back(LowerMemOpCallTo(Op
, DAG
, StackPtr
, VA
, Chain
,
1497 if (!MemOpChains
.empty())
1498 Chain
= DAG
.getNode(ISD::TokenFactor
, MVT::Other
,
1499 &MemOpChains
[0], MemOpChains
.size());
1501 // Build a sequence of copy-to-reg nodes chained together with token chain
1502 // and flag operands which copy the outgoing args into registers.
1504 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1505 Chain
= DAG
.getCopyToReg(Chain
, RegsToPass
[i
].first
, RegsToPass
[i
].second
,
1507 InFlag
= Chain
.getValue(1);
1511 // From AMD64 ABI document:
1512 // For calls that may call functions that use varargs or stdargs
1513 // (prototype-less calls or calls to functions containing ellipsis (...) in
1514 // the declaration) %al is used as hidden argument to specify the number
1515 // of SSE registers used. The contents of %al do not need to match exactly
1516 // the number of registers, but must be an ubound on the number of SSE
1517 // registers used and is in the range 0 - 8 inclusive.
1519 // Count the number of XMM registers allocated.
1520 static const unsigned XMMArgRegs
[] = {
1521 X86::XMM0
, X86::XMM1
, X86::XMM2
, X86::XMM3
,
1522 X86::XMM4
, X86::XMM5
, X86::XMM6
, X86::XMM7
1524 unsigned NumXMMRegs
= CCInfo
.getFirstUnallocated(XMMArgRegs
, 8);
1526 Chain
= DAG
.getCopyToReg(Chain
, X86::AL
,
1527 DAG
.getConstant(NumXMMRegs
, MVT::i8
), InFlag
);
1528 InFlag
= Chain
.getValue(1);
1531 // If the callee is a GlobalAddress node (quite common, every direct call is)
1532 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1533 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1534 // We should use extra load for direct calls to dllimported functions in
1536 if (getTargetMachine().getCodeModel() != CodeModel::Large
1537 && !Subtarget
->GVRequiresExtraLoad(G
->getGlobal(),
1538 getTargetMachine(), true))
1539 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), getPointerTy());
1540 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
))
1541 if (getTargetMachine().getCodeModel() != CodeModel::Large
)
1542 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), getPointerTy());
1544 // Returns a chain & a flag for retval copy to use.
1545 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
1546 SmallVector
<SDOperand
, 8> Ops
;
1547 Ops
.push_back(Chain
);
1548 Ops
.push_back(Callee
);
1550 // Add argument registers to the end of the list so that they are known live
1552 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
1553 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
1554 RegsToPass
[i
].second
.getValueType()));
1557 Ops
.push_back(InFlag
);
1559 // FIXME: Do not generate X86ISD::TAILCALL for now.
1560 Chain
= DAG
.getNode(isTailCall
? X86ISD::TAILCALL
: X86ISD::CALL
,
1561 NodeTys
, &Ops
[0], Ops
.size());
1562 InFlag
= Chain
.getValue(1);
1564 // Returns a flag for retval copy to use.
1565 NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
1567 Ops
.push_back(Chain
);
1568 Ops
.push_back(DAG
.getConstant(NumBytes
, getPointerTy()));
1569 Ops
.push_back(DAG
.getConstant(0, getPointerTy()));
1570 Ops
.push_back(InFlag
);
1571 Chain
= DAG
.getNode(ISD::CALLSEQ_END
, NodeTys
, &Ops
[0], Ops
.size());
1572 InFlag
= Chain
.getValue(1);
1574 // Handle result values, copying them out of physregs into vregs that we
1576 return SDOperand(LowerCallResult(Chain
, InFlag
, Op
.Val
, CC
, DAG
), Op
.ResNo
);
1580 //===----------------------------------------------------------------------===//
1581 // Other Lowering Hooks
1582 //===----------------------------------------------------------------------===//
1585 SDOperand
X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG
&DAG
) {
1586 MachineFunction
&MF
= DAG
.getMachineFunction();
1587 X86MachineFunctionInfo
*FuncInfo
= MF
.getInfo
<X86MachineFunctionInfo
>();
1588 int ReturnAddrIndex
= FuncInfo
->getRAIndex();
1590 if (ReturnAddrIndex
== 0) {
1591 // Set up a frame object for the return address.
1592 if (Subtarget
->is64Bit())
1593 ReturnAddrIndex
= MF
.getFrameInfo()->CreateFixedObject(8, -8);
1595 ReturnAddrIndex
= MF
.getFrameInfo()->CreateFixedObject(4, -4);
1597 FuncInfo
->setRAIndex(ReturnAddrIndex
);
1600 return DAG
.getFrameIndex(ReturnAddrIndex
, getPointerTy());
1605 /// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
1606 /// specific condition code. It returns a false if it cannot do a direct
1607 /// translation. X86CC is the translated CondCode. LHS/RHS are modified as
1609 static bool translateX86CC(ISD::CondCode SetCCOpcode
, bool isFP
,
1610 unsigned &X86CC
, SDOperand
&LHS
, SDOperand
&RHS
,
1611 SelectionDAG
&DAG
) {
1612 X86CC
= X86::COND_INVALID
;
1614 if (ConstantSDNode
*RHSC
= dyn_cast
<ConstantSDNode
>(RHS
)) {
1615 if (SetCCOpcode
== ISD::SETGT
&& RHSC
->isAllOnesValue()) {
1616 // X > -1 -> X == 0, jump !sign.
1617 RHS
= DAG
.getConstant(0, RHS
.getValueType());
1618 X86CC
= X86::COND_NS
;
1620 } else if (SetCCOpcode
== ISD::SETLT
&& RHSC
->isNullValue()) {
1621 // X < 0 -> X == 0, jump on sign.
1622 X86CC
= X86::COND_S
;
1624 } else if (SetCCOpcode
== ISD::SETLT
&& RHSC
->getValue() == 1) {
1626 RHS
= DAG
.getConstant(0, RHS
.getValueType());
1627 X86CC
= X86::COND_LE
;
1632 switch (SetCCOpcode
) {
1634 case ISD::SETEQ
: X86CC
= X86::COND_E
; break;
1635 case ISD::SETGT
: X86CC
= X86::COND_G
; break;
1636 case ISD::SETGE
: X86CC
= X86::COND_GE
; break;
1637 case ISD::SETLT
: X86CC
= X86::COND_L
; break;
1638 case ISD::SETLE
: X86CC
= X86::COND_LE
; break;
1639 case ISD::SETNE
: X86CC
= X86::COND_NE
; break;
1640 case ISD::SETULT
: X86CC
= X86::COND_B
; break;
1641 case ISD::SETUGT
: X86CC
= X86::COND_A
; break;
1642 case ISD::SETULE
: X86CC
= X86::COND_BE
; break;
1643 case ISD::SETUGE
: X86CC
= X86::COND_AE
; break;
1646 // On a floating point condition, the flags are set as follows:
1648 // 0 | 0 | 0 | X > Y
1649 // 0 | 0 | 1 | X < Y
1650 // 1 | 0 | 0 | X == Y
1651 // 1 | 1 | 1 | unordered
1653 switch (SetCCOpcode
) {
1656 case ISD::SETEQ
: X86CC
= X86::COND_E
; break;
1657 case ISD::SETOLT
: Flip
= true; // Fallthrough
1659 case ISD::SETGT
: X86CC
= X86::COND_A
; break;
1660 case ISD::SETOLE
: Flip
= true; // Fallthrough
1662 case ISD::SETGE
: X86CC
= X86::COND_AE
; break;
1663 case ISD::SETUGT
: Flip
= true; // Fallthrough
1665 case ISD::SETLT
: X86CC
= X86::COND_B
; break;
1666 case ISD::SETUGE
: Flip
= true; // Fallthrough
1668 case ISD::SETLE
: X86CC
= X86::COND_BE
; break;
1670 case ISD::SETNE
: X86CC
= X86::COND_NE
; break;
1671 case ISD::SETUO
: X86CC
= X86::COND_P
; break;
1672 case ISD::SETO
: X86CC
= X86::COND_NP
; break;
1675 std::swap(LHS
, RHS
);
1678 return X86CC
!= X86::COND_INVALID
;
1681 /// hasFPCMov - is there a floating point cmov for the specific X86 condition
1682 /// code. Current x86 isa includes the following FP cmov instructions:
1683 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
1684 static bool hasFPCMov(unsigned X86CC
) {
1700 /// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return
1701 /// true if Op is undef or if its value falls within the specified range (L, H].
1702 static bool isUndefOrInRange(SDOperand Op
, unsigned Low
, unsigned Hi
) {
1703 if (Op
.getOpcode() == ISD::UNDEF
)
1706 unsigned Val
= cast
<ConstantSDNode
>(Op
)->getValue();
1707 return (Val
>= Low
&& Val
< Hi
);
1710 /// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return
1711 /// true if Op is undef or if its value equal to the specified value.
1712 static bool isUndefOrEqual(SDOperand Op
, unsigned Val
) {
1713 if (Op
.getOpcode() == ISD::UNDEF
)
1715 return cast
<ConstantSDNode
>(Op
)->getValue() == Val
;
1718 /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
1719 /// specifies a shuffle of elements that is suitable for input to PSHUFD.
1720 bool X86::isPSHUFDMask(SDNode
*N
) {
1721 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1723 if (N
->getNumOperands() != 2 && N
->getNumOperands() != 4)
1726 // Check if the value doesn't reference the second vector.
1727 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
1728 SDOperand Arg
= N
->getOperand(i
);
1729 if (Arg
.getOpcode() == ISD::UNDEF
) continue;
1730 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
1731 if (cast
<ConstantSDNode
>(Arg
)->getValue() >= e
)
1738 /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
1739 /// specifies a shuffle of elements that is suitable for input to PSHUFHW.
1740 bool X86::isPSHUFHWMask(SDNode
*N
) {
1741 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1743 if (N
->getNumOperands() != 8)
1746 // Lower quadword copied in order.
1747 for (unsigned i
= 0; i
!= 4; ++i
) {
1748 SDOperand Arg
= N
->getOperand(i
);
1749 if (Arg
.getOpcode() == ISD::UNDEF
) continue;
1750 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
1751 if (cast
<ConstantSDNode
>(Arg
)->getValue() != i
)
1755 // Upper quadword shuffled.
1756 for (unsigned i
= 4; i
!= 8; ++i
) {
1757 SDOperand Arg
= N
->getOperand(i
);
1758 if (Arg
.getOpcode() == ISD::UNDEF
) continue;
1759 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
1760 unsigned Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
1761 if (Val
< 4 || Val
> 7)
1768 /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
1769 /// specifies a shuffle of elements that is suitable for input to PSHUFLW.
1770 bool X86::isPSHUFLWMask(SDNode
*N
) {
1771 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1773 if (N
->getNumOperands() != 8)
1776 // Upper quadword copied in order.
1777 for (unsigned i
= 4; i
!= 8; ++i
)
1778 if (!isUndefOrEqual(N
->getOperand(i
), i
))
1781 // Lower quadword shuffled.
1782 for (unsigned i
= 0; i
!= 4; ++i
)
1783 if (!isUndefOrInRange(N
->getOperand(i
), 0, 4))
1789 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
1790 /// specifies a shuffle of elements that is suitable for input to SHUFP*.
1791 static bool isSHUFPMask(const SDOperand
*Elems
, unsigned NumElems
) {
1792 if (NumElems
!= 2 && NumElems
!= 4) return false;
1794 unsigned Half
= NumElems
/ 2;
1795 for (unsigned i
= 0; i
< Half
; ++i
)
1796 if (!isUndefOrInRange(Elems
[i
], 0, NumElems
))
1798 for (unsigned i
= Half
; i
< NumElems
; ++i
)
1799 if (!isUndefOrInRange(Elems
[i
], NumElems
, NumElems
*2))
1805 bool X86::isSHUFPMask(SDNode
*N
) {
1806 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1807 return ::isSHUFPMask(N
->op_begin(), N
->getNumOperands());
1810 /// isCommutedSHUFP - Returns true if the shuffle mask is exactly
1811 /// the reverse of what x86 shuffles want. x86 shuffles requires the lower
1812 /// half elements to come from vector 1 (which would equal the dest.) and
1813 /// the upper half to come from vector 2.
1814 static bool isCommutedSHUFP(const SDOperand
*Ops
, unsigned NumOps
) {
1815 if (NumOps
!= 2 && NumOps
!= 4) return false;
1817 unsigned Half
= NumOps
/ 2;
1818 for (unsigned i
= 0; i
< Half
; ++i
)
1819 if (!isUndefOrInRange(Ops
[i
], NumOps
, NumOps
*2))
1821 for (unsigned i
= Half
; i
< NumOps
; ++i
)
1822 if (!isUndefOrInRange(Ops
[i
], 0, NumOps
))
1827 static bool isCommutedSHUFP(SDNode
*N
) {
1828 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1829 return isCommutedSHUFP(N
->op_begin(), N
->getNumOperands());
1832 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
1833 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
1834 bool X86::isMOVHLPSMask(SDNode
*N
) {
1835 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1837 if (N
->getNumOperands() != 4)
1840 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
1841 return isUndefOrEqual(N
->getOperand(0), 6) &&
1842 isUndefOrEqual(N
->getOperand(1), 7) &&
1843 isUndefOrEqual(N
->getOperand(2), 2) &&
1844 isUndefOrEqual(N
->getOperand(3), 3);
1847 /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
1848 /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
1850 bool X86::isMOVHLPS_v_undef_Mask(SDNode
*N
) {
1851 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1853 if (N
->getNumOperands() != 4)
1856 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
1857 return isUndefOrEqual(N
->getOperand(0), 2) &&
1858 isUndefOrEqual(N
->getOperand(1), 3) &&
1859 isUndefOrEqual(N
->getOperand(2), 2) &&
1860 isUndefOrEqual(N
->getOperand(3), 3);
1863 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
1864 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
1865 bool X86::isMOVLPMask(SDNode
*N
) {
1866 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1868 unsigned NumElems
= N
->getNumOperands();
1869 if (NumElems
!= 2 && NumElems
!= 4)
1872 for (unsigned i
= 0; i
< NumElems
/2; ++i
)
1873 if (!isUndefOrEqual(N
->getOperand(i
), i
+ NumElems
))
1876 for (unsigned i
= NumElems
/2; i
< NumElems
; ++i
)
1877 if (!isUndefOrEqual(N
->getOperand(i
), i
))
1883 /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
1884 /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
1886 bool X86::isMOVHPMask(SDNode
*N
) {
1887 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1889 unsigned NumElems
= N
->getNumOperands();
1890 if (NumElems
!= 2 && NumElems
!= 4)
1893 for (unsigned i
= 0; i
< NumElems
/2; ++i
)
1894 if (!isUndefOrEqual(N
->getOperand(i
), i
))
1897 for (unsigned i
= 0; i
< NumElems
/2; ++i
) {
1898 SDOperand Arg
= N
->getOperand(i
+ NumElems
/2);
1899 if (!isUndefOrEqual(Arg
, i
+ NumElems
))
1906 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
1907 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
1908 bool static isUNPCKLMask(const SDOperand
*Elts
, unsigned NumElts
,
1909 bool V2IsSplat
= false) {
1910 if (NumElts
!= 2 && NumElts
!= 4 && NumElts
!= 8 && NumElts
!= 16)
1913 for (unsigned i
= 0, j
= 0; i
!= NumElts
; i
+= 2, ++j
) {
1914 SDOperand BitI
= Elts
[i
];
1915 SDOperand BitI1
= Elts
[i
+1];
1916 if (!isUndefOrEqual(BitI
, j
))
1919 if (isUndefOrEqual(BitI1
, NumElts
))
1922 if (!isUndefOrEqual(BitI1
, j
+ NumElts
))
1930 bool X86::isUNPCKLMask(SDNode
*N
, bool V2IsSplat
) {
1931 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1932 return ::isUNPCKLMask(N
->op_begin(), N
->getNumOperands(), V2IsSplat
);
1935 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
1936 /// specifies a shuffle of elements that is suitable for input to UNPCKH.
1937 bool static isUNPCKHMask(const SDOperand
*Elts
, unsigned NumElts
,
1938 bool V2IsSplat
= false) {
1939 if (NumElts
!= 2 && NumElts
!= 4 && NumElts
!= 8 && NumElts
!= 16)
1942 for (unsigned i
= 0, j
= 0; i
!= NumElts
; i
+= 2, ++j
) {
1943 SDOperand BitI
= Elts
[i
];
1944 SDOperand BitI1
= Elts
[i
+1];
1945 if (!isUndefOrEqual(BitI
, j
+ NumElts
/2))
1948 if (isUndefOrEqual(BitI1
, NumElts
))
1951 if (!isUndefOrEqual(BitI1
, j
+ NumElts
/2 + NumElts
))
1959 bool X86::isUNPCKHMask(SDNode
*N
, bool V2IsSplat
) {
1960 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1961 return ::isUNPCKHMask(N
->op_begin(), N
->getNumOperands(), V2IsSplat
);
1964 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
1965 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
1967 bool X86::isUNPCKL_v_undef_Mask(SDNode
*N
) {
1968 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1970 unsigned NumElems
= N
->getNumOperands();
1971 if (NumElems
!= 2 && NumElems
!= 4 && NumElems
!= 8 && NumElems
!= 16)
1974 for (unsigned i
= 0, j
= 0; i
!= NumElems
; i
+= 2, ++j
) {
1975 SDOperand BitI
= N
->getOperand(i
);
1976 SDOperand BitI1
= N
->getOperand(i
+1);
1978 if (!isUndefOrEqual(BitI
, j
))
1980 if (!isUndefOrEqual(BitI1
, j
))
1987 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
1988 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
1990 bool X86::isUNPCKH_v_undef_Mask(SDNode
*N
) {
1991 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
1993 unsigned NumElems
= N
->getNumOperands();
1994 if (NumElems
!= 2 && NumElems
!= 4 && NumElems
!= 8 && NumElems
!= 16)
1997 for (unsigned i
= 0, j
= NumElems
/ 2; i
!= NumElems
; i
+= 2, ++j
) {
1998 SDOperand BitI
= N
->getOperand(i
);
1999 SDOperand BitI1
= N
->getOperand(i
+ 1);
2001 if (!isUndefOrEqual(BitI
, j
))
2003 if (!isUndefOrEqual(BitI1
, j
))
2010 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
2011 /// specifies a shuffle of elements that is suitable for input to MOVSS,
2012 /// MOVSD, and MOVD, i.e. setting the lowest element.
2013 static bool isMOVLMask(const SDOperand
*Elts
, unsigned NumElts
) {
2014 if (NumElts
!= 2 && NumElts
!= 4 && NumElts
!= 8 && NumElts
!= 16)
2017 if (!isUndefOrEqual(Elts
[0], NumElts
))
2020 for (unsigned i
= 1; i
< NumElts
; ++i
) {
2021 if (!isUndefOrEqual(Elts
[i
], i
))
2028 bool X86::isMOVLMask(SDNode
*N
) {
2029 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
2030 return ::isMOVLMask(N
->op_begin(), N
->getNumOperands());
2033 /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
2034 /// of what x86 movss want. X86 movs requires the lowest element to be lowest
2035 /// element of vector 2 and the other elements to come from vector 1 in order.
2036 static bool isCommutedMOVL(const SDOperand
*Ops
, unsigned NumOps
,
2037 bool V2IsSplat
= false,
2038 bool V2IsUndef
= false) {
2039 if (NumOps
!= 2 && NumOps
!= 4 && NumOps
!= 8 && NumOps
!= 16)
2042 if (!isUndefOrEqual(Ops
[0], 0))
2045 for (unsigned i
= 1; i
< NumOps
; ++i
) {
2046 SDOperand Arg
= Ops
[i
];
2047 if (!(isUndefOrEqual(Arg
, i
+NumOps
) ||
2048 (V2IsUndef
&& isUndefOrInRange(Arg
, NumOps
, NumOps
*2)) ||
2049 (V2IsSplat
&& isUndefOrEqual(Arg
, NumOps
))))
2056 static bool isCommutedMOVL(SDNode
*N
, bool V2IsSplat
= false,
2057 bool V2IsUndef
= false) {
2058 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
2059 return isCommutedMOVL(N
->op_begin(), N
->getNumOperands(),
2060 V2IsSplat
, V2IsUndef
);
2063 /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2064 /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
2065 bool X86::isMOVSHDUPMask(SDNode
*N
) {
2066 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
2068 if (N
->getNumOperands() != 4)
2071 // Expect 1, 1, 3, 3
2072 for (unsigned i
= 0; i
< 2; ++i
) {
2073 SDOperand Arg
= N
->getOperand(i
);
2074 if (Arg
.getOpcode() == ISD::UNDEF
) continue;
2075 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
2076 unsigned Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2077 if (Val
!= 1) return false;
2081 for (unsigned i
= 2; i
< 4; ++i
) {
2082 SDOperand Arg
= N
->getOperand(i
);
2083 if (Arg
.getOpcode() == ISD::UNDEF
) continue;
2084 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
2085 unsigned Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2086 if (Val
!= 3) return false;
2090 // Don't use movshdup if it can be done with a shufps.
2094 /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2095 /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
2096 bool X86::isMOVSLDUPMask(SDNode
*N
) {
2097 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
2099 if (N
->getNumOperands() != 4)
2102 // Expect 0, 0, 2, 2
2103 for (unsigned i
= 0; i
< 2; ++i
) {
2104 SDOperand Arg
= N
->getOperand(i
);
2105 if (Arg
.getOpcode() == ISD::UNDEF
) continue;
2106 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
2107 unsigned Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2108 if (Val
!= 0) return false;
2112 for (unsigned i
= 2; i
< 4; ++i
) {
2113 SDOperand Arg
= N
->getOperand(i
);
2114 if (Arg
.getOpcode() == ISD::UNDEF
) continue;
2115 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
2116 unsigned Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2117 if (Val
!= 2) return false;
2121 // Don't use movshdup if it can be done with a shufps.
2125 /// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand
2126 /// specifies a identity operation on the LHS or RHS.
2127 static bool isIdentityMask(SDNode
*N
, bool RHS
= false) {
2128 unsigned NumElems
= N
->getNumOperands();
2129 for (unsigned i
= 0; i
< NumElems
; ++i
)
2130 if (!isUndefOrEqual(N
->getOperand(i
), i
+ (RHS
? NumElems
: 0)))
2135 /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2136 /// a splat of a single element.
2137 static bool isSplatMask(SDNode
*N
) {
2138 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
2140 // This is a splat operation if each element of the permute is the same, and
2141 // if the value doesn't reference the second vector.
2142 unsigned NumElems
= N
->getNumOperands();
2143 SDOperand ElementBase
;
2145 for (; i
!= NumElems
; ++i
) {
2146 SDOperand Elt
= N
->getOperand(i
);
2147 if (isa
<ConstantSDNode
>(Elt
)) {
2153 if (!ElementBase
.Val
)
2156 for (; i
!= NumElems
; ++i
) {
2157 SDOperand Arg
= N
->getOperand(i
);
2158 if (Arg
.getOpcode() == ISD::UNDEF
) continue;
2159 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
2160 if (Arg
!= ElementBase
) return false;
2163 // Make sure it is a splat of the first vector operand.
2164 return cast
<ConstantSDNode
>(ElementBase
)->getValue() < NumElems
;
2167 /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2168 /// a splat of a single element and it's a 2 or 4 element mask.
2169 bool X86::isSplatMask(SDNode
*N
) {
2170 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
2172 // We can only splat 64-bit, and 32-bit quantities with a single instruction.
2173 if (N
->getNumOperands() != 4 && N
->getNumOperands() != 2)
2175 return ::isSplatMask(N
);
2178 /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
2179 /// specifies a splat of zero element.
2180 bool X86::isSplatLoMask(SDNode
*N
) {
2181 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
2183 for (unsigned i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
)
2184 if (!isUndefOrEqual(N
->getOperand(i
), 0))
2189 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
2190 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
2192 unsigned X86::getShuffleSHUFImmediate(SDNode
*N
) {
2193 unsigned NumOperands
= N
->getNumOperands();
2194 unsigned Shift
= (NumOperands
== 4) ? 2 : 1;
2196 for (unsigned i
= 0; i
< NumOperands
; ++i
) {
2198 SDOperand Arg
= N
->getOperand(NumOperands
-i
-1);
2199 if (Arg
.getOpcode() != ISD::UNDEF
)
2200 Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2201 if (Val
>= NumOperands
) Val
-= NumOperands
;
2203 if (i
!= NumOperands
- 1)
2210 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
2211 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
2213 unsigned X86::getShufflePSHUFHWImmediate(SDNode
*N
) {
2215 // 8 nodes, but we only care about the last 4.
2216 for (unsigned i
= 7; i
>= 4; --i
) {
2218 SDOperand Arg
= N
->getOperand(i
);
2219 if (Arg
.getOpcode() != ISD::UNDEF
)
2220 Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2229 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
2230 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
2232 unsigned X86::getShufflePSHUFLWImmediate(SDNode
*N
) {
2234 // 8 nodes, but we only care about the first 4.
2235 for (int i
= 3; i
>= 0; --i
) {
2237 SDOperand Arg
= N
->getOperand(i
);
2238 if (Arg
.getOpcode() != ISD::UNDEF
)
2239 Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2248 /// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
2249 /// specifies a 8 element shuffle that can be broken into a pair of
2250 /// PSHUFHW and PSHUFLW.
2251 static bool isPSHUFHW_PSHUFLWMask(SDNode
*N
) {
2252 assert(N
->getOpcode() == ISD::BUILD_VECTOR
);
2254 if (N
->getNumOperands() != 8)
2257 // Lower quadword shuffled.
2258 for (unsigned i
= 0; i
!= 4; ++i
) {
2259 SDOperand Arg
= N
->getOperand(i
);
2260 if (Arg
.getOpcode() == ISD::UNDEF
) continue;
2261 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
2262 unsigned Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2267 // Upper quadword shuffled.
2268 for (unsigned i
= 4; i
!= 8; ++i
) {
2269 SDOperand Arg
= N
->getOperand(i
);
2270 if (Arg
.getOpcode() == ISD::UNDEF
) continue;
2271 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
2272 unsigned Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2273 if (Val
< 4 || Val
> 7)
2280 /// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
2281 /// values in ther permute mask.
2282 static SDOperand
CommuteVectorShuffle(SDOperand Op
, SDOperand
&V1
,
2283 SDOperand
&V2
, SDOperand
&Mask
,
2284 SelectionDAG
&DAG
) {
2285 MVT::ValueType VT
= Op
.getValueType();
2286 MVT::ValueType MaskVT
= Mask
.getValueType();
2287 MVT::ValueType EltVT
= MVT::getVectorElementType(MaskVT
);
2288 unsigned NumElems
= Mask
.getNumOperands();
2289 SmallVector
<SDOperand
, 8> MaskVec
;
2291 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
2292 SDOperand Arg
= Mask
.getOperand(i
);
2293 if (Arg
.getOpcode() == ISD::UNDEF
) {
2294 MaskVec
.push_back(DAG
.getNode(ISD::UNDEF
, EltVT
));
2297 assert(isa
<ConstantSDNode
>(Arg
) && "Invalid VECTOR_SHUFFLE mask!");
2298 unsigned Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2300 MaskVec
.push_back(DAG
.getConstant(Val
+ NumElems
, EltVT
));
2302 MaskVec
.push_back(DAG
.getConstant(Val
- NumElems
, EltVT
));
2306 Mask
= DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
, &MaskVec
[0], MaskVec
.size());
2307 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V2
, Mask
);
2310 /// ShouldXformToMOVHLPS - Return true if the node should be transformed to
2311 /// match movhlps. The lower half elements should come from upper half of
2312 /// V1 (and in order), and the upper half elements should come from the upper
2313 /// half of V2 (and in order).
2314 static bool ShouldXformToMOVHLPS(SDNode
*Mask
) {
2315 unsigned NumElems
= Mask
->getNumOperands();
2318 for (unsigned i
= 0, e
= 2; i
!= e
; ++i
)
2319 if (!isUndefOrEqual(Mask
->getOperand(i
), i
+2))
2321 for (unsigned i
= 2; i
!= 4; ++i
)
2322 if (!isUndefOrEqual(Mask
->getOperand(i
), i
+4))
2327 /// isScalarLoadToVector - Returns true if the node is a scalar load that
2328 /// is promoted to a vector.
2329 static inline bool isScalarLoadToVector(SDNode
*N
) {
2330 if (N
->getOpcode() == ISD::SCALAR_TO_VECTOR
) {
2331 N
= N
->getOperand(0).Val
;
2332 return ISD::isNON_EXTLoad(N
);
2337 /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
2338 /// match movlp{s|d}. The lower half elements should come from lower half of
2339 /// V1 (and in order), and the upper half elements should come from the upper
2340 /// half of V2 (and in order). And since V1 will become the source of the
2341 /// MOVLP, it must be either a vector load or a scalar load to vector.
2342 static bool ShouldXformToMOVLP(SDNode
*V1
, SDNode
*V2
, SDNode
*Mask
) {
2343 if (!ISD::isNON_EXTLoad(V1
) && !isScalarLoadToVector(V1
))
2345 // Is V2 is a vector load, don't do this transformation. We will try to use
2346 // load folding shufps op.
2347 if (ISD::isNON_EXTLoad(V2
))
2350 unsigned NumElems
= Mask
->getNumOperands();
2351 if (NumElems
!= 2 && NumElems
!= 4)
2353 for (unsigned i
= 0, e
= NumElems
/2; i
!= e
; ++i
)
2354 if (!isUndefOrEqual(Mask
->getOperand(i
), i
))
2356 for (unsigned i
= NumElems
/2; i
!= NumElems
; ++i
)
2357 if (!isUndefOrEqual(Mask
->getOperand(i
), i
+NumElems
))
2362 /// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
2364 static bool isSplatVector(SDNode
*N
) {
2365 if (N
->getOpcode() != ISD::BUILD_VECTOR
)
2368 SDOperand SplatValue
= N
->getOperand(0);
2369 for (unsigned i
= 1, e
= N
->getNumOperands(); i
!= e
; ++i
)
2370 if (N
->getOperand(i
) != SplatValue
)
2375 /// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
2377 static bool isUndefShuffle(SDNode
*N
) {
2378 if (N
->getOpcode() != ISD::VECTOR_SHUFFLE
)
2381 SDOperand V1
= N
->getOperand(0);
2382 SDOperand V2
= N
->getOperand(1);
2383 SDOperand Mask
= N
->getOperand(2);
2384 unsigned NumElems
= Mask
.getNumOperands();
2385 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
2386 SDOperand Arg
= Mask
.getOperand(i
);
2387 if (Arg
.getOpcode() != ISD::UNDEF
) {
2388 unsigned Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2389 if (Val
< NumElems
&& V1
.getOpcode() != ISD::UNDEF
)
2391 else if (Val
>= NumElems
&& V2
.getOpcode() != ISD::UNDEF
)
2398 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
2400 static inline bool isZeroNode(SDOperand Elt
) {
2401 return ((isa
<ConstantSDNode
>(Elt
) &&
2402 cast
<ConstantSDNode
>(Elt
)->getValue() == 0) ||
2403 (isa
<ConstantFPSDNode
>(Elt
) &&
2404 cast
<ConstantFPSDNode
>(Elt
)->getValueAPF().isPosZero()));
2407 /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
2408 /// to an zero vector.
2409 static bool isZeroShuffle(SDNode
*N
) {
2410 if (N
->getOpcode() != ISD::VECTOR_SHUFFLE
)
2413 SDOperand V1
= N
->getOperand(0);
2414 SDOperand V2
= N
->getOperand(1);
2415 SDOperand Mask
= N
->getOperand(2);
2416 unsigned NumElems
= Mask
.getNumOperands();
2417 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
2418 SDOperand Arg
= Mask
.getOperand(i
);
2419 if (Arg
.getOpcode() != ISD::UNDEF
) {
2420 unsigned Idx
= cast
<ConstantSDNode
>(Arg
)->getValue();
2421 if (Idx
< NumElems
) {
2422 unsigned Opc
= V1
.Val
->getOpcode();
2423 if (Opc
== ISD::UNDEF
)
2425 if (Opc
!= ISD::BUILD_VECTOR
||
2426 !isZeroNode(V1
.Val
->getOperand(Idx
)))
2428 } else if (Idx
>= NumElems
) {
2429 unsigned Opc
= V2
.Val
->getOpcode();
2430 if (Opc
== ISD::UNDEF
)
2432 if (Opc
!= ISD::BUILD_VECTOR
||
2433 !isZeroNode(V2
.Val
->getOperand(Idx
- NumElems
)))
2441 /// getZeroVector - Returns a vector of specified type with all zero elements.
2443 static SDOperand
getZeroVector(MVT::ValueType VT
, SelectionDAG
&DAG
) {
2444 assert(MVT::isVector(VT
) && "Expected a vector type");
2445 unsigned NumElems
= MVT::getVectorNumElements(VT
);
2446 MVT::ValueType EVT
= MVT::getVectorElementType(VT
);
2447 bool isFP
= MVT::isFloatingPoint(EVT
);
2448 SDOperand Zero
= isFP
? DAG
.getConstantFP(0.0, EVT
) : DAG
.getConstant(0, EVT
);
2449 SmallVector
<SDOperand
, 8> ZeroVec(NumElems
, Zero
);
2450 return DAG
.getNode(ISD::BUILD_VECTOR
, VT
, &ZeroVec
[0], ZeroVec
.size());
2453 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
2454 /// that point to V2 points to its first element.
2455 static SDOperand
NormalizeMask(SDOperand Mask
, SelectionDAG
&DAG
) {
2456 assert(Mask
.getOpcode() == ISD::BUILD_VECTOR
);
2458 bool Changed
= false;
2459 SmallVector
<SDOperand
, 8> MaskVec
;
2460 unsigned NumElems
= Mask
.getNumOperands();
2461 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
2462 SDOperand Arg
= Mask
.getOperand(i
);
2463 if (Arg
.getOpcode() != ISD::UNDEF
) {
2464 unsigned Val
= cast
<ConstantSDNode
>(Arg
)->getValue();
2465 if (Val
> NumElems
) {
2466 Arg
= DAG
.getConstant(NumElems
, Arg
.getValueType());
2470 MaskVec
.push_back(Arg
);
2474 Mask
= DAG
.getNode(ISD::BUILD_VECTOR
, Mask
.getValueType(),
2475 &MaskVec
[0], MaskVec
.size());
2479 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
2480 /// operation of specified width.
2481 static SDOperand
getMOVLMask(unsigned NumElems
, SelectionDAG
&DAG
) {
2482 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(NumElems
);
2483 MVT::ValueType BaseVT
= MVT::getVectorElementType(MaskVT
);
2485 SmallVector
<SDOperand
, 8> MaskVec
;
2486 MaskVec
.push_back(DAG
.getConstant(NumElems
, BaseVT
));
2487 for (unsigned i
= 1; i
!= NumElems
; ++i
)
2488 MaskVec
.push_back(DAG
.getConstant(i
, BaseVT
));
2489 return DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
, &MaskVec
[0], MaskVec
.size());
2492 /// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
2493 /// of specified width.
2494 static SDOperand
getUnpacklMask(unsigned NumElems
, SelectionDAG
&DAG
) {
2495 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(NumElems
);
2496 MVT::ValueType BaseVT
= MVT::getVectorElementType(MaskVT
);
2497 SmallVector
<SDOperand
, 8> MaskVec
;
2498 for (unsigned i
= 0, e
= NumElems
/2; i
!= e
; ++i
) {
2499 MaskVec
.push_back(DAG
.getConstant(i
, BaseVT
));
2500 MaskVec
.push_back(DAG
.getConstant(i
+ NumElems
, BaseVT
));
2502 return DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
, &MaskVec
[0], MaskVec
.size());
2505 /// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
2506 /// of specified width.
2507 static SDOperand
getUnpackhMask(unsigned NumElems
, SelectionDAG
&DAG
) {
2508 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(NumElems
);
2509 MVT::ValueType BaseVT
= MVT::getVectorElementType(MaskVT
);
2510 unsigned Half
= NumElems
/2;
2511 SmallVector
<SDOperand
, 8> MaskVec
;
2512 for (unsigned i
= 0; i
!= Half
; ++i
) {
2513 MaskVec
.push_back(DAG
.getConstant(i
+ Half
, BaseVT
));
2514 MaskVec
.push_back(DAG
.getConstant(i
+ NumElems
+ Half
, BaseVT
));
2516 return DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
, &MaskVec
[0], MaskVec
.size());
2519 /// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
2521 static SDOperand
PromoteSplat(SDOperand Op
, SelectionDAG
&DAG
) {
2522 SDOperand V1
= Op
.getOperand(0);
2523 SDOperand Mask
= Op
.getOperand(2);
2524 MVT::ValueType VT
= Op
.getValueType();
2525 unsigned NumElems
= Mask
.getNumOperands();
2526 Mask
= getUnpacklMask(NumElems
, DAG
);
2527 while (NumElems
!= 4) {
2528 V1
= DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V1
, Mask
);
2531 V1
= DAG
.getNode(ISD::BIT_CONVERT
, MVT::v4i32
, V1
);
2533 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(4);
2534 Mask
= getZeroVector(MaskVT
, DAG
);
2535 SDOperand Shuffle
= DAG
.getNode(ISD::VECTOR_SHUFFLE
, MVT::v4i32
, V1
,
2536 DAG
.getNode(ISD::UNDEF
, MVT::v4i32
), Mask
);
2537 return DAG
.getNode(ISD::BIT_CONVERT
, VT
, Shuffle
);
2540 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
2541 /// vector of zero or undef vector.
2542 static SDOperand
getShuffleVectorZeroOrUndef(SDOperand V2
, MVT::ValueType VT
,
2543 unsigned NumElems
, unsigned Idx
,
2544 bool isZero
, SelectionDAG
&DAG
) {
2545 SDOperand V1
= isZero
? getZeroVector(VT
, DAG
) : DAG
.getNode(ISD::UNDEF
, VT
);
2546 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(NumElems
);
2547 MVT::ValueType EVT
= MVT::getVectorElementType(MaskVT
);
2548 SDOperand Zero
= DAG
.getConstant(0, EVT
);
2549 SmallVector
<SDOperand
, 8> MaskVec(NumElems
, Zero
);
2550 MaskVec
[Idx
] = DAG
.getConstant(NumElems
, EVT
);
2551 SDOperand Mask
= DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
2552 &MaskVec
[0], MaskVec
.size());
2553 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V2
, Mask
);
2556 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
2558 static SDOperand
LowerBuildVectorv16i8(SDOperand Op
, unsigned NonZeros
,
2559 unsigned NumNonZero
, unsigned NumZero
,
2560 SelectionDAG
&DAG
, TargetLowering
&TLI
) {
2566 for (unsigned i
= 0; i
< 16; ++i
) {
2567 bool ThisIsNonZero
= (NonZeros
& (1 << i
)) != 0;
2568 if (ThisIsNonZero
&& First
) {
2570 V
= getZeroVector(MVT::v8i16
, DAG
);
2572 V
= DAG
.getNode(ISD::UNDEF
, MVT::v8i16
);
2577 SDOperand
ThisElt(0, 0), LastElt(0, 0);
2578 bool LastIsNonZero
= (NonZeros
& (1 << (i
-1))) != 0;
2579 if (LastIsNonZero
) {
2580 LastElt
= DAG
.getNode(ISD::ZERO_EXTEND
, MVT::i16
, Op
.getOperand(i
-1));
2582 if (ThisIsNonZero
) {
2583 ThisElt
= DAG
.getNode(ISD::ZERO_EXTEND
, MVT::i16
, Op
.getOperand(i
));
2584 ThisElt
= DAG
.getNode(ISD::SHL
, MVT::i16
,
2585 ThisElt
, DAG
.getConstant(8, MVT::i8
));
2587 ThisElt
= DAG
.getNode(ISD::OR
, MVT::i16
, ThisElt
, LastElt
);
2592 V
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, MVT::v8i16
, V
, ThisElt
,
2593 DAG
.getConstant(i
/2, TLI
.getPointerTy()));
2597 return DAG
.getNode(ISD::BIT_CONVERT
, MVT::v16i8
, V
);
2600 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
2602 static SDOperand
LowerBuildVectorv8i16(SDOperand Op
, unsigned NonZeros
,
2603 unsigned NumNonZero
, unsigned NumZero
,
2604 SelectionDAG
&DAG
, TargetLowering
&TLI
) {
2610 for (unsigned i
= 0; i
< 8; ++i
) {
2611 bool isNonZero
= (NonZeros
& (1 << i
)) != 0;
2615 V
= getZeroVector(MVT::v8i16
, DAG
);
2617 V
= DAG
.getNode(ISD::UNDEF
, MVT::v8i16
);
2620 V
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, MVT::v8i16
, V
, Op
.getOperand(i
),
2621 DAG
.getConstant(i
, TLI
.getPointerTy()));
2629 X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op
, SelectionDAG
&DAG
) {
2630 // All zero's are handled with pxor.
2631 if (ISD::isBuildVectorAllZeros(Op
.Val
))
2634 // All one's are handled with pcmpeqd.
2635 if (ISD::isBuildVectorAllOnes(Op
.Val
))
2638 MVT::ValueType VT
= Op
.getValueType();
2639 MVT::ValueType EVT
= MVT::getVectorElementType(VT
);
2640 unsigned EVTBits
= MVT::getSizeInBits(EVT
);
2642 unsigned NumElems
= Op
.getNumOperands();
2643 unsigned NumZero
= 0;
2644 unsigned NumNonZero
= 0;
2645 unsigned NonZeros
= 0;
2646 unsigned NumNonZeroImms
= 0;
2647 std::set
<SDOperand
> Values
;
2648 for (unsigned i
= 0; i
< NumElems
; ++i
) {
2649 SDOperand Elt
= Op
.getOperand(i
);
2650 if (Elt
.getOpcode() != ISD::UNDEF
) {
2652 if (isZeroNode(Elt
))
2655 NonZeros
|= (1 << i
);
2657 if (Elt
.getOpcode() == ISD::Constant
||
2658 Elt
.getOpcode() == ISD::ConstantFP
)
2664 if (NumNonZero
== 0) {
2666 // All undef vector. Return an UNDEF.
2667 return DAG
.getNode(ISD::UNDEF
, VT
);
2669 // A mix of zero and undef. Return a zero vector.
2670 return getZeroVector(VT
, DAG
);
2673 // Splat is obviously ok. Let legalizer expand it to a shuffle.
2674 if (Values
.size() == 1)
2677 // Special case for single non-zero element.
2678 if (NumNonZero
== 1) {
2679 unsigned Idx
= CountTrailingZeros_32(NonZeros
);
2680 SDOperand Item
= Op
.getOperand(Idx
);
2681 Item
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, VT
, Item
);
2683 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
2684 return getShuffleVectorZeroOrUndef(Item
, VT
, NumElems
, Idx
,
2687 if (EVTBits
== 32) {
2688 // Turn it into a shuffle of zero and zero-extended scalar to vector.
2689 Item
= getShuffleVectorZeroOrUndef(Item
, VT
, NumElems
, 0, NumZero
> 0,
2691 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(NumElems
);
2692 MVT::ValueType MaskEVT
= MVT::getVectorElementType(MaskVT
);
2693 SmallVector
<SDOperand
, 8> MaskVec
;
2694 for (unsigned i
= 0; i
< NumElems
; i
++)
2695 MaskVec
.push_back(DAG
.getConstant((i
== Idx
) ? 0 : 1, MaskEVT
));
2696 SDOperand Mask
= DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
2697 &MaskVec
[0], MaskVec
.size());
2698 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, Item
,
2699 DAG
.getNode(ISD::UNDEF
, VT
), Mask
);
2703 // A vector full of immediates; various special cases are already
2704 // handled, so this is best done with a single constant-pool load.
2705 if (NumNonZero
== NumNonZeroImms
)
2708 // Let legalizer expand 2-wide build_vectors.
2712 // If element VT is < 32 bits, convert it to inserts into a zero vector.
2713 if (EVTBits
== 8 && NumElems
== 16) {
2714 SDOperand V
= LowerBuildVectorv16i8(Op
, NonZeros
,NumNonZero
,NumZero
, DAG
,
2716 if (V
.Val
) return V
;
2719 if (EVTBits
== 16 && NumElems
== 8) {
2720 SDOperand V
= LowerBuildVectorv8i16(Op
, NonZeros
,NumNonZero
,NumZero
, DAG
,
2722 if (V
.Val
) return V
;
2725 // If element VT is == 32 bits, turn it into a number of shuffles.
2726 SmallVector
<SDOperand
, 8> V
;
2728 if (NumElems
== 4 && NumZero
> 0) {
2729 for (unsigned i
= 0; i
< 4; ++i
) {
2730 bool isZero
= !(NonZeros
& (1 << i
));
2732 V
[i
] = getZeroVector(VT
, DAG
);
2734 V
[i
] = DAG
.getNode(ISD::SCALAR_TO_VECTOR
, VT
, Op
.getOperand(i
));
2737 for (unsigned i
= 0; i
< 2; ++i
) {
2738 switch ((NonZeros
& (0x3 << i
*2)) >> (i
*2)) {
2741 V
[i
] = V
[i
*2]; // Must be a zero vector.
2744 V
[i
] = DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V
[i
*2+1], V
[i
*2],
2745 getMOVLMask(NumElems
, DAG
));
2748 V
[i
] = DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V
[i
*2], V
[i
*2+1],
2749 getMOVLMask(NumElems
, DAG
));
2752 V
[i
] = DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V
[i
*2], V
[i
*2+1],
2753 getUnpacklMask(NumElems
, DAG
));
2758 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
2759 // clears the upper bits.
2760 // FIXME: we can do the same for v4f32 case when we know both parts of
2761 // the lower half come from scalar_to_vector (loadf32). We should do
2762 // that in post legalizer dag combiner with target specific hooks.
2763 if (MVT::isInteger(EVT
) && (NonZeros
& (0x3 << 2)) == 0)
2765 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(NumElems
);
2766 MVT::ValueType EVT
= MVT::getVectorElementType(MaskVT
);
2767 SmallVector
<SDOperand
, 8> MaskVec
;
2768 bool Reverse
= (NonZeros
& 0x3) == 2;
2769 for (unsigned i
= 0; i
< 2; ++i
)
2771 MaskVec
.push_back(DAG
.getConstant(1-i
, EVT
));
2773 MaskVec
.push_back(DAG
.getConstant(i
, EVT
));
2774 Reverse
= ((NonZeros
& (0x3 << 2)) >> 2) == 2;
2775 for (unsigned i
= 0; i
< 2; ++i
)
2777 MaskVec
.push_back(DAG
.getConstant(1-i
+NumElems
, EVT
));
2779 MaskVec
.push_back(DAG
.getConstant(i
+NumElems
, EVT
));
2780 SDOperand ShufMask
= DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
2781 &MaskVec
[0], MaskVec
.size());
2782 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V
[0], V
[1], ShufMask
);
2785 if (Values
.size() > 2) {
2786 // Expand into a number of unpckl*.
2788 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
2789 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
2790 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
2791 SDOperand UnpckMask
= getUnpacklMask(NumElems
, DAG
);
2792 for (unsigned i
= 0; i
< NumElems
; ++i
)
2793 V
[i
] = DAG
.getNode(ISD::SCALAR_TO_VECTOR
, VT
, Op
.getOperand(i
));
2795 while (NumElems
!= 0) {
2796 for (unsigned i
= 0; i
< NumElems
; ++i
)
2797 V
[i
] = DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V
[i
], V
[i
+ NumElems
],
2808 X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op
, SelectionDAG
&DAG
) {
2809 SDOperand V1
= Op
.getOperand(0);
2810 SDOperand V2
= Op
.getOperand(1);
2811 SDOperand PermMask
= Op
.getOperand(2);
2812 MVT::ValueType VT
= Op
.getValueType();
2813 unsigned NumElems
= PermMask
.getNumOperands();
2814 bool V1IsUndef
= V1
.getOpcode() == ISD::UNDEF
;
2815 bool V2IsUndef
= V2
.getOpcode() == ISD::UNDEF
;
2816 bool V1IsSplat
= false;
2817 bool V2IsSplat
= false;
2819 if (isUndefShuffle(Op
.Val
))
2820 return DAG
.getNode(ISD::UNDEF
, VT
);
2822 if (isZeroShuffle(Op
.Val
))
2823 return getZeroVector(VT
, DAG
);
2825 if (isIdentityMask(PermMask
.Val
))
2827 else if (isIdentityMask(PermMask
.Val
, true))
2830 if (isSplatMask(PermMask
.Val
)) {
2831 if (NumElems
<= 4) return Op
;
2832 // Promote it to a v4i32 splat.
2833 return PromoteSplat(Op
, DAG
);
2836 if (X86::isMOVLMask(PermMask
.Val
))
2837 return (V1IsUndef
) ? V2
: Op
;
2839 if (X86::isMOVSHDUPMask(PermMask
.Val
) ||
2840 X86::isMOVSLDUPMask(PermMask
.Val
) ||
2841 X86::isMOVHLPSMask(PermMask
.Val
) ||
2842 X86::isMOVHPMask(PermMask
.Val
) ||
2843 X86::isMOVLPMask(PermMask
.Val
))
2846 if (ShouldXformToMOVHLPS(PermMask
.Val
) ||
2847 ShouldXformToMOVLP(V1
.Val
, V2
.Val
, PermMask
.Val
))
2848 return CommuteVectorShuffle(Op
, V1
, V2
, PermMask
, DAG
);
2850 bool Commuted
= false;
2851 V1IsSplat
= isSplatVector(V1
.Val
);
2852 V2IsSplat
= isSplatVector(V2
.Val
);
2853 if ((V1IsSplat
|| V1IsUndef
) && !(V2IsSplat
|| V2IsUndef
)) {
2854 Op
= CommuteVectorShuffle(Op
, V1
, V2
, PermMask
, DAG
);
2855 std::swap(V1IsSplat
, V2IsSplat
);
2856 std::swap(V1IsUndef
, V2IsUndef
);
2860 if (isCommutedMOVL(PermMask
.Val
, V2IsSplat
, V2IsUndef
)) {
2861 if (V2IsUndef
) return V1
;
2862 Op
= CommuteVectorShuffle(Op
, V1
, V2
, PermMask
, DAG
);
2864 // V2 is a splat, so the mask may be malformed. That is, it may point
2865 // to any V2 element. The instruction selectior won't like this. Get
2866 // a corrected mask and commute to form a proper MOVS{S|D}.
2867 SDOperand NewMask
= getMOVLMask(NumElems
, DAG
);
2868 if (NewMask
.Val
!= PermMask
.Val
)
2869 Op
= DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V2
, NewMask
);
2874 if (X86::isUNPCKL_v_undef_Mask(PermMask
.Val
) ||
2875 X86::isUNPCKH_v_undef_Mask(PermMask
.Val
) ||
2876 X86::isUNPCKLMask(PermMask
.Val
) ||
2877 X86::isUNPCKHMask(PermMask
.Val
))
2881 // Normalize mask so all entries that point to V2 points to its first
2882 // element then try to match unpck{h|l} again. If match, return a
2883 // new vector_shuffle with the corrected mask.
2884 SDOperand NewMask
= NormalizeMask(PermMask
, DAG
);
2885 if (NewMask
.Val
!= PermMask
.Val
) {
2886 if (X86::isUNPCKLMask(PermMask
.Val
, true)) {
2887 SDOperand NewMask
= getUnpacklMask(NumElems
, DAG
);
2888 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V2
, NewMask
);
2889 } else if (X86::isUNPCKHMask(PermMask
.Val
, true)) {
2890 SDOperand NewMask
= getUnpackhMask(NumElems
, DAG
);
2891 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V2
, NewMask
);
2896 // Normalize the node to match x86 shuffle ops if needed
2897 if (V2
.getOpcode() != ISD::UNDEF
&& isCommutedSHUFP(PermMask
.Val
))
2898 Op
= CommuteVectorShuffle(Op
, V1
, V2
, PermMask
, DAG
);
2901 // Commute is back and try unpck* again.
2902 Op
= CommuteVectorShuffle(Op
, V1
, V2
, PermMask
, DAG
);
2903 if (X86::isUNPCKL_v_undef_Mask(PermMask
.Val
) ||
2904 X86::isUNPCKH_v_undef_Mask(PermMask
.Val
) ||
2905 X86::isUNPCKLMask(PermMask
.Val
) ||
2906 X86::isUNPCKHMask(PermMask
.Val
))
2910 // If VT is integer, try PSHUF* first, then SHUFP*.
2911 if (MVT::isInteger(VT
)) {
2912 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically
2913 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
2914 if (((MVT::getSizeInBits(VT
) != 64 || NumElems
== 4) &&
2915 X86::isPSHUFDMask(PermMask
.Val
)) ||
2916 X86::isPSHUFHWMask(PermMask
.Val
) ||
2917 X86::isPSHUFLWMask(PermMask
.Val
)) {
2918 if (V2
.getOpcode() != ISD::UNDEF
)
2919 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
,
2920 DAG
.getNode(ISD::UNDEF
, V1
.getValueType()),PermMask
);
2924 if (X86::isSHUFPMask(PermMask
.Val
) &&
2925 MVT::getSizeInBits(VT
) != 64) // Don't do this for MMX.
2928 // Handle v8i16 shuffle high / low shuffle node pair.
2929 if (VT
== MVT::v8i16
&& isPSHUFHW_PSHUFLWMask(PermMask
.Val
)) {
2930 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(NumElems
);
2931 MVT::ValueType BaseVT
= MVT::getVectorElementType(MaskVT
);
2932 SmallVector
<SDOperand
, 8> MaskVec
;
2933 for (unsigned i
= 0; i
!= 4; ++i
)
2934 MaskVec
.push_back(PermMask
.getOperand(i
));
2935 for (unsigned i
= 4; i
!= 8; ++i
)
2936 MaskVec
.push_back(DAG
.getConstant(i
, BaseVT
));
2937 SDOperand Mask
= DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
2938 &MaskVec
[0], MaskVec
.size());
2939 V1
= DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V2
, Mask
);
2941 for (unsigned i
= 0; i
!= 4; ++i
)
2942 MaskVec
.push_back(DAG
.getConstant(i
, BaseVT
));
2943 for (unsigned i
= 4; i
!= 8; ++i
)
2944 MaskVec
.push_back(PermMask
.getOperand(i
));
2945 Mask
= DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
, &MaskVec
[0],MaskVec
.size());
2946 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V2
, Mask
);
2949 // Floating point cases in the other order.
2950 if (X86::isSHUFPMask(PermMask
.Val
))
2952 if (X86::isPSHUFDMask(PermMask
.Val
) ||
2953 X86::isPSHUFHWMask(PermMask
.Val
) ||
2954 X86::isPSHUFLWMask(PermMask
.Val
)) {
2955 if (V2
.getOpcode() != ISD::UNDEF
)
2956 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
,
2957 DAG
.getNode(ISD::UNDEF
, V1
.getValueType()),PermMask
);
2962 if (NumElems
== 4 &&
2963 // Don't do this for MMX.
2964 MVT::getSizeInBits(VT
) != 64) {
2965 MVT::ValueType MaskVT
= PermMask
.getValueType();
2966 MVT::ValueType MaskEVT
= MVT::getVectorElementType(MaskVT
);
2967 SmallVector
<std::pair
<int, int>, 8> Locs
;
2968 Locs
.reserve(NumElems
);
2969 SmallVector
<SDOperand
, 8> Mask1(NumElems
, DAG
.getNode(ISD::UNDEF
, MaskEVT
));
2970 SmallVector
<SDOperand
, 8> Mask2(NumElems
, DAG
.getNode(ISD::UNDEF
, MaskEVT
));
2973 // If no more than two elements come from either vector. This can be
2974 // implemented with two shuffles. First shuffle gather the elements.
2975 // The second shuffle, which takes the first shuffle as both of its
2976 // vector operands, put the elements into the right order.
2977 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
2978 SDOperand Elt
= PermMask
.getOperand(i
);
2979 if (Elt
.getOpcode() == ISD::UNDEF
) {
2980 Locs
[i
] = std::make_pair(-1, -1);
2982 unsigned Val
= cast
<ConstantSDNode
>(Elt
)->getValue();
2983 if (Val
< NumElems
) {
2984 Locs
[i
] = std::make_pair(0, NumLo
);
2988 Locs
[i
] = std::make_pair(1, NumHi
);
2989 if (2+NumHi
< NumElems
)
2990 Mask1
[2+NumHi
] = Elt
;
2995 if (NumLo
<= 2 && NumHi
<= 2) {
2996 V1
= DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V2
,
2997 DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
2998 &Mask1
[0], Mask1
.size()));
2999 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
3000 if (Locs
[i
].first
== -1)
3003 unsigned Idx
= (i
< NumElems
/2) ? 0 : NumElems
;
3004 Idx
+= Locs
[i
].first
* (NumElems
/2) + Locs
[i
].second
;
3005 Mask2
[i
] = DAG
.getConstant(Idx
, MaskEVT
);
3009 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V1
,
3010 DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
3011 &Mask2
[0], Mask2
.size()));
3014 // Break it into (shuffle shuffle_hi, shuffle_lo).
3016 SmallVector
<SDOperand
,8> LoMask(NumElems
, DAG
.getNode(ISD::UNDEF
, MaskEVT
));
3017 SmallVector
<SDOperand
,8> HiMask(NumElems
, DAG
.getNode(ISD::UNDEF
, MaskEVT
));
3018 SmallVector
<SDOperand
,8> *MaskPtr
= &LoMask
;
3019 unsigned MaskIdx
= 0;
3021 unsigned HiIdx
= NumElems
/2;
3022 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
3023 if (i
== NumElems
/2) {
3029 SDOperand Elt
= PermMask
.getOperand(i
);
3030 if (Elt
.getOpcode() == ISD::UNDEF
) {
3031 Locs
[i
] = std::make_pair(-1, -1);
3032 } else if (cast
<ConstantSDNode
>(Elt
)->getValue() < NumElems
) {
3033 Locs
[i
] = std::make_pair(MaskIdx
, LoIdx
);
3034 (*MaskPtr
)[LoIdx
] = Elt
;
3037 Locs
[i
] = std::make_pair(MaskIdx
, HiIdx
);
3038 (*MaskPtr
)[HiIdx
] = Elt
;
3043 SDOperand LoShuffle
=
3044 DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V2
,
3045 DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
3046 &LoMask
[0], LoMask
.size()));
3047 SDOperand HiShuffle
=
3048 DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, V1
, V2
,
3049 DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
3050 &HiMask
[0], HiMask
.size()));
3051 SmallVector
<SDOperand
, 8> MaskOps
;
3052 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
3053 if (Locs
[i
].first
== -1) {
3054 MaskOps
.push_back(DAG
.getNode(ISD::UNDEF
, MaskEVT
));
3056 unsigned Idx
= Locs
[i
].first
* NumElems
+ Locs
[i
].second
;
3057 MaskOps
.push_back(DAG
.getConstant(Idx
, MaskEVT
));
3060 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, LoShuffle
, HiShuffle
,
3061 DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
3062 &MaskOps
[0], MaskOps
.size()));
3069 X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op
, SelectionDAG
&DAG
) {
3070 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
3073 MVT::ValueType VT
= Op
.getValueType();
3074 // TODO: handle v16i8.
3075 if (MVT::getSizeInBits(VT
) == 16) {
3076 // Transform it so it match pextrw which produces a 32-bit result.
3077 MVT::ValueType EVT
= (MVT::ValueType
)(VT
+1);
3078 SDOperand Extract
= DAG
.getNode(X86ISD::PEXTRW
, EVT
,
3079 Op
.getOperand(0), Op
.getOperand(1));
3080 SDOperand Assert
= DAG
.getNode(ISD::AssertZext
, EVT
, Extract
,
3081 DAG
.getValueType(VT
));
3082 return DAG
.getNode(ISD::TRUNCATE
, VT
, Assert
);
3083 } else if (MVT::getSizeInBits(VT
) == 32) {
3084 SDOperand Vec
= Op
.getOperand(0);
3085 unsigned Idx
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getValue();
3088 // SHUFPS the element to the lowest double word, then movss.
3089 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(4);
3090 SmallVector
<SDOperand
, 8> IdxVec
;
3091 IdxVec
.push_back(DAG
.getConstant(Idx
, MVT::getVectorElementType(MaskVT
)));
3092 IdxVec
.push_back(DAG
.getNode(ISD::UNDEF
, MVT::getVectorElementType(MaskVT
)));
3093 IdxVec
.push_back(DAG
.getNode(ISD::UNDEF
, MVT::getVectorElementType(MaskVT
)));
3094 IdxVec
.push_back(DAG
.getNode(ISD::UNDEF
, MVT::getVectorElementType(MaskVT
)));
3095 SDOperand Mask
= DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
3096 &IdxVec
[0], IdxVec
.size());
3097 Vec
= DAG
.getNode(ISD::VECTOR_SHUFFLE
, Vec
.getValueType(),
3098 Vec
, DAG
.getNode(ISD::UNDEF
, Vec
.getValueType()), Mask
);
3099 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, VT
, Vec
,
3100 DAG
.getConstant(0, getPointerTy()));
3101 } else if (MVT::getSizeInBits(VT
) == 64) {
3102 SDOperand Vec
= Op
.getOperand(0);
3103 unsigned Idx
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getValue();
3107 // UNPCKHPD the element to the lowest double word, then movsd.
3108 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
3109 // to a f64mem, the whole operation is folded into a single MOVHPDmr.
3110 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(4);
3111 SmallVector
<SDOperand
, 8> IdxVec
;
3112 IdxVec
.push_back(DAG
.getConstant(1, MVT::getVectorElementType(MaskVT
)));
3113 IdxVec
.push_back(DAG
.getNode(ISD::UNDEF
, MVT::getVectorElementType(MaskVT
)));
3114 SDOperand Mask
= DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
3115 &IdxVec
[0], IdxVec
.size());
3116 Vec
= DAG
.getNode(ISD::VECTOR_SHUFFLE
, Vec
.getValueType(),
3117 Vec
, DAG
.getNode(ISD::UNDEF
, Vec
.getValueType()), Mask
);
3118 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, VT
, Vec
,
3119 DAG
.getConstant(0, getPointerTy()));
3126 X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op
, SelectionDAG
&DAG
) {
3127 // Transform it so it match pinsrw which expects a 16-bit value in a GR32
3128 // as its second argument.
3129 MVT::ValueType VT
= Op
.getValueType();
3130 MVT::ValueType BaseVT
= MVT::getVectorElementType(VT
);
3131 SDOperand N0
= Op
.getOperand(0);
3132 SDOperand N1
= Op
.getOperand(1);
3133 SDOperand N2
= Op
.getOperand(2);
3134 if (MVT::getSizeInBits(BaseVT
) == 16) {
3135 if (N1
.getValueType() != MVT::i32
)
3136 N1
= DAG
.getNode(ISD::ANY_EXTEND
, MVT::i32
, N1
);
3137 if (N2
.getValueType() != MVT::i32
)
3138 N2
= DAG
.getConstant(cast
<ConstantSDNode
>(N2
)->getValue(),getPointerTy());
3139 return DAG
.getNode(X86ISD::PINSRW
, VT
, N0
, N1
, N2
);
3140 } else if (MVT::getSizeInBits(BaseVT
) == 32) {
3141 unsigned Idx
= cast
<ConstantSDNode
>(N2
)->getValue();
3144 N1
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, VT
, N1
);
3145 MVT::ValueType MaskVT
= MVT::getIntVectorWithNumElements(4);
3146 MVT::ValueType BaseVT
= MVT::getVectorElementType(MaskVT
);
3147 SmallVector
<SDOperand
, 8> MaskVec
;
3148 MaskVec
.push_back(DAG
.getConstant(4, BaseVT
));
3149 for (unsigned i
= 1; i
<= 3; ++i
)
3150 MaskVec
.push_back(DAG
.getConstant(i
, BaseVT
));
3151 return DAG
.getNode(ISD::VECTOR_SHUFFLE
, VT
, N0
, N1
,
3152 DAG
.getNode(ISD::BUILD_VECTOR
, MaskVT
,
3153 &MaskVec
[0], MaskVec
.size()));
3155 // Use two pinsrw instructions to insert a 32 bit value.
3157 if (MVT::isFloatingPoint(N1
.getValueType())) {
3158 N1
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, N1
);
3159 N1
= DAG
.getNode(ISD::BIT_CONVERT
, MVT::v4i32
, N1
);
3160 N1
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, MVT::i32
, N1
,
3161 DAG
.getConstant(0, getPointerTy()));
3163 N0
= DAG
.getNode(ISD::BIT_CONVERT
, MVT::v8i16
, N0
);
3164 N0
= DAG
.getNode(X86ISD::PINSRW
, MVT::v8i16
, N0
, N1
,
3165 DAG
.getConstant(Idx
, getPointerTy()));
3166 N1
= DAG
.getNode(ISD::SRL
, MVT::i32
, N1
, DAG
.getConstant(16, MVT::i8
));
3167 N0
= DAG
.getNode(X86ISD::PINSRW
, MVT::v8i16
, N0
, N1
,
3168 DAG
.getConstant(Idx
+1, getPointerTy()));
3169 return DAG
.getNode(ISD::BIT_CONVERT
, VT
, N0
);
3177 X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op
, SelectionDAG
&DAG
) {
3178 SDOperand AnyExt
= DAG
.getNode(ISD::ANY_EXTEND
, MVT::i32
, Op
.getOperand(0));
3179 return DAG
.getNode(X86ISD::S2VEC
, Op
.getValueType(), AnyExt
);
3182 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3183 // their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
3184 // one of the above mentioned nodes. It has to be wrapped because otherwise
3185 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3186 // be used to form addressing mode. These wrapped nodes will be selected
3189 X86TargetLowering::LowerConstantPool(SDOperand Op
, SelectionDAG
&DAG
) {
3190 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
3191 SDOperand Result
= DAG
.getTargetConstantPool(CP
->getConstVal(),
3193 CP
->getAlignment());
3194 Result
= DAG
.getNode(X86ISD::Wrapper
, getPointerTy(), Result
);
3195 // With PIC, the address is actually $g + Offset.
3196 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
&&
3197 !Subtarget
->isPICStyleRIPRel()) {
3198 Result
= DAG
.getNode(ISD::ADD
, getPointerTy(),
3199 DAG
.getNode(X86ISD::GlobalBaseReg
, getPointerTy()),
3207 X86TargetLowering::LowerGlobalAddress(SDOperand Op
, SelectionDAG
&DAG
) {
3208 GlobalValue
*GV
= cast
<GlobalAddressSDNode
>(Op
)->getGlobal();
3209 SDOperand Result
= DAG
.getTargetGlobalAddress(GV
, getPointerTy());
3210 Result
= DAG
.getNode(X86ISD::Wrapper
, getPointerTy(), Result
);
3211 // With PIC, the address is actually $g + Offset.
3212 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
&&
3213 !Subtarget
->isPICStyleRIPRel()) {
3214 Result
= DAG
.getNode(ISD::ADD
, getPointerTy(),
3215 DAG
.getNode(X86ISD::GlobalBaseReg
, getPointerTy()),
3219 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
3220 // load the value at address GV, not the value of GV itself. This means that
3221 // the GlobalAddress must be in the base or index register of the address, not
3222 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
3223 // The same applies for external symbols during PIC codegen
3224 if (Subtarget
->GVRequiresExtraLoad(GV
, getTargetMachine(), false))
3225 Result
= DAG
.getLoad(getPointerTy(), DAG
.getEntryNode(), Result
, NULL
, 0);
3230 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3232 LowerToTLSGeneralDynamicModel(GlobalAddressSDNode
*GA
, SelectionDAG
&DAG
,
3233 const MVT::ValueType PtrVT
) {
3235 SDOperand Chain
= DAG
.getCopyToReg(DAG
.getEntryNode(), X86::EBX
,
3236 DAG
.getNode(X86ISD::GlobalBaseReg
,
3238 InFlag
= Chain
.getValue(1);
3240 // emit leal symbol@TLSGD(,%ebx,1), %eax
3241 SDVTList NodeTys
= DAG
.getVTList(PtrVT
, MVT::Other
, MVT::Flag
);
3242 SDOperand TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(),
3243 GA
->getValueType(0),
3245 SDOperand Ops
[] = { Chain
, TGA
, InFlag
};
3246 SDOperand Result
= DAG
.getNode(X86ISD::TLSADDR
, NodeTys
, Ops
, 3);
3247 InFlag
= Result
.getValue(2);
3248 Chain
= Result
.getValue(1);
3250 // call ___tls_get_addr. This function receives its argument in
3251 // the register EAX.
3252 Chain
= DAG
.getCopyToReg(Chain
, X86::EAX
, Result
, InFlag
);
3253 InFlag
= Chain
.getValue(1);
3255 NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
3256 SDOperand Ops1
[] = { Chain
,
3257 DAG
.getTargetExternalSymbol("___tls_get_addr",
3259 DAG
.getRegister(X86::EAX
, PtrVT
),
3260 DAG
.getRegister(X86::EBX
, PtrVT
),
3262 Chain
= DAG
.getNode(X86ISD::CALL
, NodeTys
, Ops1
, 5);
3263 InFlag
= Chain
.getValue(1);
3265 return DAG
.getCopyFromReg(Chain
, X86::EAX
, PtrVT
, InFlag
);
3268 // Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
3269 // "local exec" model.
3271 LowerToTLSExecModel(GlobalAddressSDNode
*GA
, SelectionDAG
&DAG
,
3272 const MVT::ValueType PtrVT
) {
3273 // Get the Thread Pointer
3274 SDOperand ThreadPointer
= DAG
.getNode(X86ISD::THREAD_POINTER
, PtrVT
);
3275 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
3277 SDOperand TGA
= DAG
.getTargetGlobalAddress(GA
->getGlobal(),
3278 GA
->getValueType(0),
3280 SDOperand Offset
= DAG
.getNode(X86ISD::Wrapper
, PtrVT
, TGA
);
3282 if (GA
->getGlobal()->isDeclaration()) // initial exec TLS model
3283 Offset
= DAG
.getLoad(PtrVT
, DAG
.getEntryNode(), Offset
, NULL
, 0);
3285 // The address of the thread local variable is the add of the thread
3286 // pointer with the offset of the variable.
3287 return DAG
.getNode(ISD::ADD
, PtrVT
, ThreadPointer
, Offset
);
3291 X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op
, SelectionDAG
&DAG
) {
3292 // TODO: implement the "local dynamic" model
3293 // TODO: implement the "initial exec"model for pic executables
3294 assert(!Subtarget
->is64Bit() && Subtarget
->isTargetELF() &&
3295 "TLS not implemented for non-ELF and 64-bit targets");
3296 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
3297 // If the relocation model is PIC, use the "General Dynamic" TLS Model,
3298 // otherwise use the "Local Exec"TLS Model
3299 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
)
3300 return LowerToTLSGeneralDynamicModel(GA
, DAG
, getPointerTy());
3302 return LowerToTLSExecModel(GA
, DAG
, getPointerTy());
3306 X86TargetLowering::LowerExternalSymbol(SDOperand Op
, SelectionDAG
&DAG
) {
3307 const char *Sym
= cast
<ExternalSymbolSDNode
>(Op
)->getSymbol();
3308 SDOperand Result
= DAG
.getTargetExternalSymbol(Sym
, getPointerTy());
3309 Result
= DAG
.getNode(X86ISD::Wrapper
, getPointerTy(), Result
);
3310 // With PIC, the address is actually $g + Offset.
3311 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
&&
3312 !Subtarget
->isPICStyleRIPRel()) {
3313 Result
= DAG
.getNode(ISD::ADD
, getPointerTy(),
3314 DAG
.getNode(X86ISD::GlobalBaseReg
, getPointerTy()),
3321 SDOperand
X86TargetLowering::LowerJumpTable(SDOperand Op
, SelectionDAG
&DAG
) {
3322 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
3323 SDOperand Result
= DAG
.getTargetJumpTable(JT
->getIndex(), getPointerTy());
3324 Result
= DAG
.getNode(X86ISD::Wrapper
, getPointerTy(), Result
);
3325 // With PIC, the address is actually $g + Offset.
3326 if (getTargetMachine().getRelocationModel() == Reloc::PIC_
&&
3327 !Subtarget
->isPICStyleRIPRel()) {
3328 Result
= DAG
.getNode(ISD::ADD
, getPointerTy(),
3329 DAG
.getNode(X86ISD::GlobalBaseReg
, getPointerTy()),
3336 SDOperand
X86TargetLowering::LowerShift(SDOperand Op
, SelectionDAG
&DAG
) {
3337 assert(Op
.getNumOperands() == 3 && Op
.getValueType() == MVT::i32
&&
3338 "Not an i64 shift!");
3339 bool isSRA
= Op
.getOpcode() == ISD::SRA_PARTS
;
3340 SDOperand ShOpLo
= Op
.getOperand(0);
3341 SDOperand ShOpHi
= Op
.getOperand(1);
3342 SDOperand ShAmt
= Op
.getOperand(2);
3343 SDOperand Tmp1
= isSRA
?
3344 DAG
.getNode(ISD::SRA
, MVT::i32
, ShOpHi
, DAG
.getConstant(31, MVT::i8
)) :
3345 DAG
.getConstant(0, MVT::i32
);
3347 SDOperand Tmp2
, Tmp3
;
3348 if (Op
.getOpcode() == ISD::SHL_PARTS
) {
3349 Tmp2
= DAG
.getNode(X86ISD::SHLD
, MVT::i32
, ShOpHi
, ShOpLo
, ShAmt
);
3350 Tmp3
= DAG
.getNode(ISD::SHL
, MVT::i32
, ShOpLo
, ShAmt
);
3352 Tmp2
= DAG
.getNode(X86ISD::SHRD
, MVT::i32
, ShOpLo
, ShOpHi
, ShAmt
);
3353 Tmp3
= DAG
.getNode(isSRA
? ISD::SRA
: ISD::SRL
, MVT::i32
, ShOpHi
, ShAmt
);
3356 const MVT::ValueType
*VTs
= DAG
.getNodeValueTypes(MVT::Other
, MVT::Flag
);
3357 SDOperand AndNode
= DAG
.getNode(ISD::AND
, MVT::i8
, ShAmt
,
3358 DAG
.getConstant(32, MVT::i8
));
3359 SDOperand COps
[]={DAG
.getEntryNode(), AndNode
, DAG
.getConstant(0, MVT::i8
)};
3360 SDOperand Cond
= NewCCModeling
3361 ? DAG
.getNode(X86ISD::CMP_NEW
, MVT::i32
,
3362 AndNode
, DAG
.getConstant(0, MVT::i8
))
3363 : DAG
.getNode(X86ISD::CMP
, VTs
, 2, COps
, 3).getValue(1);
3366 SDOperand CC
= DAG
.getConstant(X86::COND_NE
, MVT::i8
);
3367 unsigned Opc
= NewCCModeling
? X86ISD::CMOV_NEW
: X86ISD::CMOV
;
3368 VTs
= DAG
.getNodeValueTypes(MVT::i32
, MVT::Flag
);
3369 SmallVector
<SDOperand
, 4> Ops
;
3370 if (Op
.getOpcode() == ISD::SHL_PARTS
) {
3371 Ops
.push_back(Tmp2
);
3372 Ops
.push_back(Tmp3
);
3374 Ops
.push_back(Cond
);
3376 Hi
= DAG
.getNode(Opc
, MVT::i32
, &Ops
[0], Ops
.size());
3378 Hi
= DAG
.getNode(Opc
, VTs
, 2, &Ops
[0], Ops
.size());
3379 Cond
= Hi
.getValue(1);
3383 Ops
.push_back(Tmp3
);
3384 Ops
.push_back(Tmp1
);
3386 Ops
.push_back(Cond
);
3388 Lo
= DAG
.getNode(Opc
, MVT::i32
, &Ops
[0], Ops
.size());
3390 Lo
= DAG
.getNode(Opc
, VTs
, 2, &Ops
[0], Ops
.size());
3392 Ops
.push_back(Tmp2
);
3393 Ops
.push_back(Tmp3
);
3395 Ops
.push_back(Cond
);
3397 Lo
= DAG
.getNode(Opc
, MVT::i32
, &Ops
[0], Ops
.size());
3399 Lo
= DAG
.getNode(Opc
, VTs
, 2, &Ops
[0], Ops
.size());
3400 Cond
= Lo
.getValue(1);
3404 Ops
.push_back(Tmp3
);
3405 Ops
.push_back(Tmp1
);
3407 Ops
.push_back(Cond
);
3409 Hi
= DAG
.getNode(Opc
, MVT::i32
, &Ops
[0], Ops
.size());
3411 Hi
= DAG
.getNode(Opc
, VTs
, 2, &Ops
[0], Ops
.size());
3414 VTs
= DAG
.getNodeValueTypes(MVT::i32
, MVT::i32
);
3418 return DAG
.getNode(ISD::MERGE_VALUES
, VTs
, 2, &Ops
[0], Ops
.size());
3421 SDOperand
X86TargetLowering::LowerIntegerDivOrRem(SDOperand Op
, SelectionDAG
&DAG
) {
3422 unsigned Opcode
= Op
.getOpcode();
3423 MVT::ValueType NVT
= Op
.getValueType();
3424 bool isSigned
= Opcode
== ISD::SDIV
|| Opcode
== ISD::SREM
;
3425 bool isDiv
= Opcode
== ISD::SDIV
|| Opcode
== ISD::UDIV
;
3426 unsigned Opc
= isSigned
? X86ISD::IDIV
: X86ISD::DIV
;
3428 SDOperand Ops
[] = { Op
.getOperand(0), Op
.getOperand(1) };
3429 SDOperand DR
= DAG
.getNode(Opc
, DAG
.getVTList(NVT
, NVT
), Ops
, 2);
3434 return SDOperand(DR
.Val
, 1);
3437 SDOperand
X86TargetLowering::LowerSINT_TO_FP(SDOperand Op
, SelectionDAG
&DAG
) {
3438 assert(Op
.getOperand(0).getValueType() <= MVT::i64
&&
3439 Op
.getOperand(0).getValueType() >= MVT::i16
&&
3440 "Unknown SINT_TO_FP to lower!");
3443 MVT::ValueType SrcVT
= Op
.getOperand(0).getValueType();
3444 unsigned Size
= MVT::getSizeInBits(SrcVT
)/8;
3445 MachineFunction
&MF
= DAG
.getMachineFunction();
3446 int SSFI
= MF
.getFrameInfo()->CreateStackObject(Size
, Size
);
3447 SDOperand StackSlot
= DAG
.getFrameIndex(SSFI
, getPointerTy());
3448 SDOperand Chain
= DAG
.getStore(DAG
.getEntryNode(), Op
.getOperand(0),
3449 StackSlot
, NULL
, 0);
3451 // These are really Legal; caller falls through into that case.
3452 if (SrcVT
==MVT::i32
&& Op
.getValueType() == MVT::f32
&& X86ScalarSSEf32
)
3454 if (SrcVT
==MVT::i32
&& Op
.getValueType() == MVT::f64
&& X86ScalarSSEf64
)
3456 if (SrcVT
==MVT::i64
&& Op
.getValueType() != MVT::f80
&&
3457 Subtarget
->is64Bit())
3462 bool useSSE
= (X86ScalarSSEf32
&& Op
.getValueType() == MVT::f32
) ||
3463 (X86ScalarSSEf64
&& Op
.getValueType() == MVT::f64
);
3465 Tys
= DAG
.getVTList(MVT::f64
, MVT::Other
, MVT::Flag
);
3467 Tys
= DAG
.getVTList(Op
.getValueType(), MVT::Other
);
3468 SmallVector
<SDOperand
, 8> Ops
;
3469 Ops
.push_back(Chain
);
3470 Ops
.push_back(StackSlot
);
3471 Ops
.push_back(DAG
.getValueType(SrcVT
));
3472 Result
= DAG
.getNode(useSSE
? X86ISD::FILD_FLAG
:X86ISD::FILD
,
3473 Tys
, &Ops
[0], Ops
.size());
3476 Chain
= Result
.getValue(1);
3477 SDOperand InFlag
= Result
.getValue(2);
3479 // FIXME: Currently the FST is flagged to the FILD_FLAG. This
3480 // shouldn't be necessary except that RFP cannot be live across
3481 // multiple blocks. When stackifier is fixed, they can be uncoupled.
3482 MachineFunction
&MF
= DAG
.getMachineFunction();
3483 int SSFI
= MF
.getFrameInfo()->CreateStackObject(8, 8);
3484 SDOperand StackSlot
= DAG
.getFrameIndex(SSFI
, getPointerTy());
3485 Tys
= DAG
.getVTList(MVT::Other
);
3486 SmallVector
<SDOperand
, 8> Ops
;
3487 Ops
.push_back(Chain
);
3488 Ops
.push_back(Result
);
3489 Ops
.push_back(StackSlot
);
3490 Ops
.push_back(DAG
.getValueType(Op
.getValueType()));
3491 Ops
.push_back(InFlag
);
3492 Chain
= DAG
.getNode(X86ISD::FST
, Tys
, &Ops
[0], Ops
.size());
3493 Result
= DAG
.getLoad(Op
.getValueType(), Chain
, StackSlot
, NULL
, 0);
3499 SDOperand
X86TargetLowering::LowerFP_TO_SINT(SDOperand Op
, SelectionDAG
&DAG
) {
3500 assert(Op
.getValueType() <= MVT::i64
&& Op
.getValueType() >= MVT::i16
&&
3501 "Unknown FP_TO_SINT to lower!");
3502 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
3505 MachineFunction
&MF
= DAG
.getMachineFunction();
3506 unsigned MemSize
= MVT::getSizeInBits(Op
.getValueType())/8;
3507 int SSFI
= MF
.getFrameInfo()->CreateStackObject(MemSize
, MemSize
);
3508 SDOperand StackSlot
= DAG
.getFrameIndex(SSFI
, getPointerTy());
3510 // These are really Legal.
3511 if (Op
.getValueType() == MVT::i32
&&
3512 X86ScalarSSEf32
&& Op
.getOperand(0).getValueType() == MVT::f32
)
3514 if (Op
.getValueType() == MVT::i32
&&
3515 X86ScalarSSEf64
&& Op
.getOperand(0).getValueType() == MVT::f64
)
3517 if (Subtarget
->is64Bit() &&
3518 Op
.getValueType() == MVT::i64
&&
3519 Op
.getOperand(0).getValueType() != MVT::f80
)
3523 switch (Op
.getValueType()) {
3524 default: assert(0 && "Invalid FP_TO_SINT to lower!");
3525 case MVT::i16
: Opc
= X86ISD::FP_TO_INT16_IN_MEM
; break;
3526 case MVT::i32
: Opc
= X86ISD::FP_TO_INT32_IN_MEM
; break;
3527 case MVT::i64
: Opc
= X86ISD::FP_TO_INT64_IN_MEM
; break;
3530 SDOperand Chain
= DAG
.getEntryNode();
3531 SDOperand Value
= Op
.getOperand(0);
3532 if ((X86ScalarSSEf32
&& Op
.getOperand(0).getValueType() == MVT::f32
) ||
3533 (X86ScalarSSEf64
&& Op
.getOperand(0).getValueType() == MVT::f64
)) {
3534 assert(Op
.getValueType() == MVT::i64
&& "Invalid FP_TO_SINT to lower!");
3535 Chain
= DAG
.getStore(Chain
, Value
, StackSlot
, NULL
, 0);
3536 SDVTList Tys
= DAG
.getVTList(Op
.getOperand(0).getValueType(), MVT::Other
);
3538 Chain
, StackSlot
, DAG
.getValueType(Op
.getOperand(0).getValueType())
3540 Value
= DAG
.getNode(X86ISD::FLD
, Tys
, Ops
, 3);
3541 Chain
= Value
.getValue(1);
3542 SSFI
= MF
.getFrameInfo()->CreateStackObject(MemSize
, MemSize
);
3543 StackSlot
= DAG
.getFrameIndex(SSFI
, getPointerTy());
3546 // Build the FP_TO_INT*_IN_MEM
3547 SDOperand Ops
[] = { Chain
, Value
, StackSlot
};
3548 SDOperand FIST
= DAG
.getNode(Opc
, MVT::Other
, Ops
, 3);
3551 return DAG
.getLoad(Op
.getValueType(), FIST
, StackSlot
, NULL
, 0);
3554 SDOperand
X86TargetLowering::LowerFABS(SDOperand Op
, SelectionDAG
&DAG
) {
3555 MVT::ValueType VT
= Op
.getValueType();
3556 MVT::ValueType EltVT
= VT
;
3557 if (MVT::isVector(VT
))
3558 EltVT
= MVT::getVectorElementType(VT
);
3559 const Type
*OpNTy
= MVT::getTypeForValueType(EltVT
);
3560 std::vector
<Constant
*> CV
;
3561 if (EltVT
== MVT::f64
) {
3562 Constant
*C
= ConstantFP::get(OpNTy
, APFloat(APInt(64, ~(1ULL << 63))));
3566 Constant
*C
= ConstantFP::get(OpNTy
, APFloat(APInt(32, ~(1U << 31))));
3572 Constant
*C
= ConstantVector::get(CV
);
3573 SDOperand CPIdx
= DAG
.getConstantPool(C
, getPointerTy(), 4);
3574 SDOperand Mask
= DAG
.getLoad(VT
, DAG
.getEntryNode(), CPIdx
, NULL
, 0,
3576 return DAG
.getNode(X86ISD::FAND
, VT
, Op
.getOperand(0), Mask
);
3579 SDOperand
X86TargetLowering::LowerFNEG(SDOperand Op
, SelectionDAG
&DAG
) {
3580 MVT::ValueType VT
= Op
.getValueType();
3581 MVT::ValueType EltVT
= VT
;
3582 unsigned EltNum
= 1;
3583 if (MVT::isVector(VT
)) {
3584 EltVT
= MVT::getVectorElementType(VT
);
3585 EltNum
= MVT::getVectorNumElements(VT
);
3587 const Type
*OpNTy
= MVT::getTypeForValueType(EltVT
);
3588 std::vector
<Constant
*> CV
;
3589 if (EltVT
== MVT::f64
) {
3590 Constant
*C
= ConstantFP::get(OpNTy
, APFloat(APInt(64, 1ULL << 63)));
3594 Constant
*C
= ConstantFP::get(OpNTy
, APFloat(APInt(32, 1U << 31)));
3600 Constant
*C
= ConstantVector::get(CV
);
3601 SDOperand CPIdx
= DAG
.getConstantPool(C
, getPointerTy(), 4);
3602 SDOperand Mask
= DAG
.getLoad(VT
, DAG
.getEntryNode(), CPIdx
, NULL
, 0,
3604 if (MVT::isVector(VT
)) {
3605 return DAG
.getNode(ISD::BIT_CONVERT
, VT
,
3606 DAG
.getNode(ISD::XOR
, MVT::v2i64
,
3607 DAG
.getNode(ISD::BIT_CONVERT
, MVT::v2i64
, Op
.getOperand(0)),
3608 DAG
.getNode(ISD::BIT_CONVERT
, MVT::v2i64
, Mask
)));
3610 return DAG
.getNode(X86ISD::FXOR
, VT
, Op
.getOperand(0), Mask
);
3614 SDOperand
X86TargetLowering::LowerFCOPYSIGN(SDOperand Op
, SelectionDAG
&DAG
) {
3615 SDOperand Op0
= Op
.getOperand(0);
3616 SDOperand Op1
= Op
.getOperand(1);
3617 MVT::ValueType VT
= Op
.getValueType();
3618 MVT::ValueType SrcVT
= Op1
.getValueType();
3619 const Type
*SrcTy
= MVT::getTypeForValueType(SrcVT
);
3621 // If second operand is smaller, extend it first.
3622 if (MVT::getSizeInBits(SrcVT
) < MVT::getSizeInBits(VT
)) {
3623 Op1
= DAG
.getNode(ISD::FP_EXTEND
, VT
, Op1
);
3625 SrcTy
= MVT::getTypeForValueType(SrcVT
);
3628 // First get the sign bit of second operand.
3629 std::vector
<Constant
*> CV
;
3630 if (SrcVT
== MVT::f64
) {
3631 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(64, 1ULL << 63))));
3632 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(64, 0))));
3634 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(32, 1U << 31))));
3635 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(32, 0))));
3636 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(32, 0))));
3637 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(32, 0))));
3639 Constant
*C
= ConstantVector::get(CV
);
3640 SDOperand CPIdx
= DAG
.getConstantPool(C
, getPointerTy(), 4);
3641 SDOperand Mask1
= DAG
.getLoad(SrcVT
, DAG
.getEntryNode(), CPIdx
, NULL
, 0,
3643 SDOperand SignBit
= DAG
.getNode(X86ISD::FAND
, SrcVT
, Op1
, Mask1
);
3645 // Shift sign bit right or left if the two operands have different types.
3646 if (MVT::getSizeInBits(SrcVT
) > MVT::getSizeInBits(VT
)) {
3647 // Op0 is MVT::f32, Op1 is MVT::f64.
3648 SignBit
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, MVT::v2f64
, SignBit
);
3649 SignBit
= DAG
.getNode(X86ISD::FSRL
, MVT::v2f64
, SignBit
,
3650 DAG
.getConstant(32, MVT::i32
));
3651 SignBit
= DAG
.getNode(ISD::BIT_CONVERT
, MVT::v4f32
, SignBit
);
3652 SignBit
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, MVT::f32
, SignBit
,
3653 DAG
.getConstant(0, getPointerTy()));
3656 // Clear first operand sign bit.
3658 if (VT
== MVT::f64
) {
3659 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(64, ~(1ULL << 63)))));
3660 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(64, 0))));
3662 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(32, ~(1U << 31)))));
3663 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(32, 0))));
3664 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(32, 0))));
3665 CV
.push_back(ConstantFP::get(SrcTy
, APFloat(APInt(32, 0))));
3667 C
= ConstantVector::get(CV
);
3668 CPIdx
= DAG
.getConstantPool(C
, getPointerTy(), 4);
3669 SDOperand Mask2
= DAG
.getLoad(VT
, DAG
.getEntryNode(), CPIdx
, NULL
, 0,
3671 SDOperand Val
= DAG
.getNode(X86ISD::FAND
, VT
, Op0
, Mask2
);
3673 // Or the value with the sign bit.
3674 return DAG
.getNode(X86ISD::FOR
, VT
, Val
, SignBit
);
3677 SDOperand
X86TargetLowering::LowerSETCC(SDOperand Op
, SelectionDAG
&DAG
,
3679 assert(Op
.getValueType() == MVT::i8
&& "SetCC type must be 8-bit integer");
3681 SDOperand Op0
= Op
.getOperand(0);
3682 SDOperand Op1
= Op
.getOperand(1);
3683 SDOperand CC
= Op
.getOperand(2);
3684 ISD::CondCode SetCCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
3685 const MVT::ValueType
*VTs1
= DAG
.getNodeValueTypes(MVT::Other
, MVT::Flag
);
3686 const MVT::ValueType
*VTs2
= DAG
.getNodeValueTypes(MVT::i8
, MVT::Flag
);
3687 bool isFP
= MVT::isFloatingPoint(Op
.getOperand(1).getValueType());
3690 if (translateX86CC(cast
<CondCodeSDNode
>(CC
)->get(), isFP
, X86CC
,
3692 SDOperand Ops1
[] = { Chain
, Op0
, Op1
};
3693 Cond
= DAG
.getNode(X86ISD::CMP
, VTs1
, 2, Ops1
, 3).getValue(1);
3694 SDOperand Ops2
[] = { DAG
.getConstant(X86CC
, MVT::i8
), Cond
};
3695 return DAG
.getNode(X86ISD::SETCC
, VTs2
, 2, Ops2
, 2);
3698 assert(isFP
&& "Illegal integer SetCC!");
3700 SDOperand COps
[] = { Chain
, Op0
, Op1
};
3701 Cond
= DAG
.getNode(X86ISD::CMP
, VTs1
, 2, COps
, 3).getValue(1);
3703 switch (SetCCOpcode
) {
3704 default: assert(false && "Illegal floating point SetCC!");
3705 case ISD::SETOEQ
: { // !PF & ZF
3706 SDOperand Ops1
[] = { DAG
.getConstant(X86::COND_NP
, MVT::i8
), Cond
};
3707 SDOperand Tmp1
= DAG
.getNode(X86ISD::SETCC
, VTs2
, 2, Ops1
, 2);
3708 SDOperand Ops2
[] = { DAG
.getConstant(X86::COND_E
, MVT::i8
),
3710 SDOperand Tmp2
= DAG
.getNode(X86ISD::SETCC
, VTs2
, 2, Ops2
, 2);
3711 return DAG
.getNode(ISD::AND
, MVT::i8
, Tmp1
, Tmp2
);
3713 case ISD::SETUNE
: { // PF | !ZF
3714 SDOperand Ops1
[] = { DAG
.getConstant(X86::COND_P
, MVT::i8
), Cond
};
3715 SDOperand Tmp1
= DAG
.getNode(X86ISD::SETCC
, VTs2
, 2, Ops1
, 2);
3716 SDOperand Ops2
[] = { DAG
.getConstant(X86::COND_NE
, MVT::i8
),
3718 SDOperand Tmp2
= DAG
.getNode(X86ISD::SETCC
, VTs2
, 2, Ops2
, 2);
3719 return DAG
.getNode(ISD::OR
, MVT::i8
, Tmp1
, Tmp2
);
3724 SDOperand
X86TargetLowering::LowerSETCC_New(SDOperand Op
, SelectionDAG
&DAG
) {
3725 assert(Op
.getValueType() == MVT::i8
&& "SetCC type must be 8-bit integer");
3727 SDOperand Op0
= Op
.getOperand(0);
3728 SDOperand Op1
= Op
.getOperand(1);
3729 SDOperand CC
= Op
.getOperand(2);
3730 ISD::CondCode SetCCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
3731 bool isFP
= MVT::isFloatingPoint(Op
.getOperand(1).getValueType());
3734 if (translateX86CC(cast
<CondCodeSDNode
>(CC
)->get(), isFP
, X86CC
,
3736 Cond
= DAG
.getNode(X86ISD::CMP_NEW
, MVT::i32
, Op0
, Op1
);
3737 return DAG
.getNode(X86ISD::SETCC_NEW
, MVT::i8
,
3738 DAG
.getConstant(X86CC
, MVT::i8
), Cond
);
3741 assert(isFP
&& "Illegal integer SetCC!");
3743 Cond
= DAG
.getNode(X86ISD::CMP_NEW
, MVT::i32
, Op0
, Op1
);
3744 switch (SetCCOpcode
) {
3745 default: assert(false && "Illegal floating point SetCC!");
3746 case ISD::SETOEQ
: { // !PF & ZF
3747 SDOperand Tmp1
= DAG
.getNode(X86ISD::SETCC_NEW
, MVT::i8
,
3748 DAG
.getConstant(X86::COND_NP
, MVT::i8
), Cond
);
3749 SDOperand Tmp2
= DAG
.getNode(X86ISD::SETCC_NEW
, MVT::i8
,
3750 DAG
.getConstant(X86::COND_E
, MVT::i8
), Cond
);
3751 return DAG
.getNode(ISD::AND
, MVT::i8
, Tmp1
, Tmp2
);
3753 case ISD::SETUNE
: { // PF | !ZF
3754 SDOperand Tmp1
= DAG
.getNode(X86ISD::SETCC_NEW
, MVT::i8
,
3755 DAG
.getConstant(X86::COND_P
, MVT::i8
), Cond
);
3756 SDOperand Tmp2
= DAG
.getNode(X86ISD::SETCC_NEW
, MVT::i8
,
3757 DAG
.getConstant(X86::COND_NE
, MVT::i8
), Cond
);
3758 return DAG
.getNode(ISD::OR
, MVT::i8
, Tmp1
, Tmp2
);
3764 SDOperand
X86TargetLowering::LowerSELECT(SDOperand Op
, SelectionDAG
&DAG
) {
3765 bool addTest
= true;
3766 SDOperand Chain
= DAG
.getEntryNode();
3767 SDOperand Cond
= Op
.getOperand(0);
3769 const MVT::ValueType
*VTs
= DAG
.getNodeValueTypes(MVT::Other
, MVT::Flag
);
3771 if (Cond
.getOpcode() == ISD::SETCC
)
3772 Cond
= LowerSETCC(Cond
, DAG
, Chain
);
3774 if (Cond
.getOpcode() == X86ISD::SETCC
) {
3775 CC
= Cond
.getOperand(0);
3777 // If condition flag is set by a X86ISD::CMP, then make a copy of it
3778 // (since flag operand cannot be shared). Use it as the condition setting
3779 // operand in place of the X86ISD::SETCC.
3780 // If the X86ISD::SETCC has more than one use, then perhaps it's better
3781 // to use a test instead of duplicating the X86ISD::CMP (for register
3782 // pressure reason)?
3783 SDOperand Cmp
= Cond
.getOperand(1);
3784 unsigned Opc
= Cmp
.getOpcode();
3785 bool IllegalFPCMov
=
3786 ! ((X86ScalarSSEf32
&& Op
.getValueType()==MVT::f32
) ||
3787 (X86ScalarSSEf64
&& Op
.getValueType()==MVT::f64
)) &&
3788 !hasFPCMov(cast
<ConstantSDNode
>(CC
)->getSignExtended());
3789 if ((Opc
== X86ISD::CMP
|| Opc
== X86ISD::COMI
|| Opc
== X86ISD::UCOMI
) &&
3791 SDOperand Ops
[] = { Chain
, Cmp
.getOperand(1), Cmp
.getOperand(2) };
3792 Cond
= DAG
.getNode(Opc
, VTs
, 2, Ops
, 3);
3798 CC
= DAG
.getConstant(X86::COND_NE
, MVT::i8
);
3799 SDOperand Ops
[] = { Chain
, Cond
, DAG
.getConstant(0, MVT::i8
) };
3800 Cond
= DAG
.getNode(X86ISD::CMP
, VTs
, 2, Ops
, 3);
3803 VTs
= DAG
.getNodeValueTypes(Op
.getValueType(), MVT::Flag
);
3804 SmallVector
<SDOperand
, 4> Ops
;
3805 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
3806 // condition is true.
3807 Ops
.push_back(Op
.getOperand(2));
3808 Ops
.push_back(Op
.getOperand(1));
3810 Ops
.push_back(Cond
.getValue(1));
3811 return DAG
.getNode(X86ISD::CMOV
, VTs
, 2, &Ops
[0], Ops
.size());
3814 SDOperand
X86TargetLowering::LowerSELECT_New(SDOperand Op
, SelectionDAG
&DAG
) {
3815 bool addTest
= true;
3816 SDOperand Cond
= Op
.getOperand(0);
3819 if (Cond
.getOpcode() == ISD::SETCC
)
3820 Cond
= LowerSETCC_New(Cond
, DAG
);
3822 if (Cond
.getOpcode() == X86ISD::SETCC_NEW
) {
3823 CC
= Cond
.getOperand(0);
3825 // If condition flag is set by a X86ISD::CMP, then make a copy of it
3826 // (since flag operand cannot be shared). Use it as the condition setting
3827 // operand in place of the X86ISD::SETCC.
3828 // If the X86ISD::SETCC has more than one use, then perhaps it's better
3829 // to use a test instead of duplicating the X86ISD::CMP (for register
3830 // pressure reason)?
3831 SDOperand Cmp
= Cond
.getOperand(1);
3832 unsigned Opc
= Cmp
.getOpcode();
3833 bool IllegalFPCMov
=
3834 ! ((X86ScalarSSEf32
&& Op
.getValueType()==MVT::f32
) ||
3835 (X86ScalarSSEf64
&& Op
.getValueType()==MVT::f64
)) &&
3836 !hasFPCMov(cast
<ConstantSDNode
>(CC
)->getSignExtended());
3837 if ((Opc
== X86ISD::CMP_NEW
||
3838 Opc
== X86ISD::COMI_NEW
||
3839 Opc
== X86ISD::UCOMI_NEW
) &&
3841 Cond
= DAG
.getNode(Opc
, MVT::i32
, Cmp
.getOperand(0), Cmp
.getOperand(1));
3847 CC
= DAG
.getConstant(X86::COND_NE
, MVT::i8
);
3848 Cond
= DAG
.getNode(X86ISD::CMP_NEW
, MVT::i32
, Cond
,
3849 DAG
.getConstant(0, MVT::i8
));
3852 const MVT::ValueType
*VTs
= DAG
.getNodeValueTypes(Op
.getValueType(),
3854 SmallVector
<SDOperand
, 4> Ops
;
3855 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
3856 // condition is true.
3857 Ops
.push_back(Op
.getOperand(2));
3858 Ops
.push_back(Op
.getOperand(1));
3860 Ops
.push_back(Cond
);
3861 return DAG
.getNode(X86ISD::CMOV_NEW
, VTs
, 2, &Ops
[0], Ops
.size());
3864 SDOperand
X86TargetLowering::LowerBRCOND(SDOperand Op
, SelectionDAG
&DAG
) {
3865 bool addTest
= true;
3866 SDOperand Chain
= Op
.getOperand(0);
3867 SDOperand Cond
= Op
.getOperand(1);
3868 SDOperand Dest
= Op
.getOperand(2);
3870 const MVT::ValueType
*VTs
= DAG
.getNodeValueTypes(MVT::Other
, MVT::Flag
);
3872 if (Cond
.getOpcode() == ISD::SETCC
)
3873 Cond
= LowerSETCC(Cond
, DAG
, Chain
);
3875 if (Cond
.getOpcode() == X86ISD::SETCC
) {
3876 CC
= Cond
.getOperand(0);
3878 // If condition flag is set by a X86ISD::CMP, then make a copy of it
3879 // (since flag operand cannot be shared). Use it as the condition setting
3880 // operand in place of the X86ISD::SETCC.
3881 // If the X86ISD::SETCC has more than one use, then perhaps it's better
3882 // to use a test instead of duplicating the X86ISD::CMP (for register
3883 // pressure reason)?
3884 SDOperand Cmp
= Cond
.getOperand(1);
3885 unsigned Opc
= Cmp
.getOpcode();
3886 if (Opc
== X86ISD::CMP
|| Opc
== X86ISD::COMI
|| Opc
== X86ISD::UCOMI
) {
3887 SDOperand Ops
[] = { Chain
, Cmp
.getOperand(1), Cmp
.getOperand(2) };
3888 Cond
= DAG
.getNode(Opc
, VTs
, 2, Ops
, 3);
3894 CC
= DAG
.getConstant(X86::COND_NE
, MVT::i8
);
3895 SDOperand Ops
[] = { Chain
, Cond
, DAG
.getConstant(0, MVT::i8
) };
3896 Cond
= DAG
.getNode(X86ISD::CMP
, VTs
, 2, Ops
, 3);
3898 return DAG
.getNode(X86ISD::BRCOND
, Op
.getValueType(),
3899 Cond
, Op
.getOperand(2), CC
, Cond
.getValue(1));
3902 SDOperand
X86TargetLowering::LowerBRCOND_New(SDOperand Op
, SelectionDAG
&DAG
) {
3903 bool addTest
= true;
3904 SDOperand Chain
= Op
.getOperand(0);
3905 SDOperand Cond
= Op
.getOperand(1);
3906 SDOperand Dest
= Op
.getOperand(2);
3909 if (Cond
.getOpcode() == ISD::SETCC
)
3910 Cond
= LowerSETCC_New(Cond
, DAG
);
3912 if (Cond
.getOpcode() == X86ISD::SETCC_NEW
) {
3913 CC
= Cond
.getOperand(0);
3915 // If condition flag is set by a X86ISD::CMP, then make a copy of it
3916 // (since flag operand cannot be shared). Use it as the condition setting
3917 // operand in place of the X86ISD::SETCC.
3918 // If the X86ISD::SETCC has more than one use, then perhaps it's better
3919 // to use a test instead of duplicating the X86ISD::CMP (for register
3920 // pressure reason)?
3921 SDOperand Cmp
= Cond
.getOperand(1);
3922 unsigned Opc
= Cmp
.getOpcode();
3923 if (Opc
== X86ISD::CMP_NEW
||
3924 Opc
== X86ISD::COMI_NEW
||
3925 Opc
== X86ISD::UCOMI_NEW
) {
3926 Cond
= DAG
.getNode(Opc
, MVT::i32
, Cmp
.getOperand(0), Cmp
.getOperand(1));
3932 CC
= DAG
.getConstant(X86::COND_NE
, MVT::i8
);
3933 Cond
= DAG
.getNode(X86ISD::CMP_NEW
, MVT::i32
, Cond
, DAG
.getConstant(0, MVT::i8
));
3935 return DAG
.getNode(X86ISD::BRCOND_NEW
, Op
.getValueType(),
3936 Chain
, Op
.getOperand(2), CC
, Cond
);
3939 SDOperand
X86TargetLowering::LowerCALL(SDOperand Op
, SelectionDAG
&DAG
) {
3940 unsigned CallingConv
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getValue();
3942 if (Subtarget
->is64Bit())
3943 return LowerX86_64CCCCallTo(Op
, DAG
, CallingConv
);
3945 switch (CallingConv
) {
3947 assert(0 && "Unsupported calling convention");
3948 case CallingConv::Fast
:
3949 // TODO: Implement fastcc
3951 case CallingConv::C
:
3952 case CallingConv::X86_StdCall
:
3953 return LowerCCCCallTo(Op
, DAG
, CallingConv
);
3954 case CallingConv::X86_FastCall
:
3955 return LowerFastCCCallTo(Op
, DAG
, CallingConv
);
3960 // Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
3961 // Calls to _alloca is needed to probe the stack when allocating more than 4k
3962 // bytes in one go. Touching the stack at 4K increments is necessary to ensure
3963 // that the guard pages used by the OS virtual memory manager are allocated in
3964 // correct sequence.
3966 X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op
,
3967 SelectionDAG
&DAG
) {
3968 assert(Subtarget
->isTargetCygMing() &&
3969 "This should be used only on Cygwin/Mingw targets");
3972 SDOperand Chain
= Op
.getOperand(0);
3973 SDOperand Size
= Op
.getOperand(1);
3974 // FIXME: Ensure alignment here
3978 MVT::ValueType IntPtr
= getPointerTy();
3979 MVT::ValueType SPTy
= (Subtarget
->is64Bit() ? MVT::i64
: MVT::i32
);
3981 Chain
= DAG
.getCopyToReg(Chain
, X86::EAX
, Size
, Flag
);
3982 Flag
= Chain
.getValue(1);
3984 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
3985 SDOperand Ops
[] = { Chain
,
3986 DAG
.getTargetExternalSymbol("_alloca", IntPtr
),
3987 DAG
.getRegister(X86::EAX
, IntPtr
),
3989 Chain
= DAG
.getNode(X86ISD::CALL
, NodeTys
, Ops
, 4);
3990 Flag
= Chain
.getValue(1);
3992 Chain
= DAG
.getCopyFromReg(Chain
, X86StackPtr
, SPTy
).getValue(1);
3994 std::vector
<MVT::ValueType
> Tys
;
3995 Tys
.push_back(SPTy
);
3996 Tys
.push_back(MVT::Other
);
3997 SDOperand Ops1
[2] = { Chain
.getValue(0), Chain
};
3998 return DAG
.getNode(ISD::MERGE_VALUES
, Tys
, Ops1
, 2);
4002 X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op
, SelectionDAG
&DAG
) {
4003 MachineFunction
&MF
= DAG
.getMachineFunction();
4004 const Function
* Fn
= MF
.getFunction();
4005 if (Fn
->hasExternalLinkage() &&
4006 Subtarget
->isTargetCygMing() &&
4007 Fn
->getName() == "main")
4008 MF
.getInfo
<X86MachineFunctionInfo
>()->setForceFramePointer(true);
4010 unsigned CC
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getValue();
4011 if (Subtarget
->is64Bit())
4012 return LowerX86_64CCCArguments(Op
, DAG
);
4016 assert(0 && "Unsupported calling convention");
4017 case CallingConv::Fast
:
4018 // TODO: implement fastcc.
4021 case CallingConv::C
:
4022 return LowerCCCArguments(Op
, DAG
);
4023 case CallingConv::X86_StdCall
:
4024 MF
.getInfo
<X86MachineFunctionInfo
>()->setDecorationStyle(StdCall
);
4025 return LowerCCCArguments(Op
, DAG
, true);
4026 case CallingConv::X86_FastCall
:
4027 MF
.getInfo
<X86MachineFunctionInfo
>()->setDecorationStyle(FastCall
);
4028 return LowerFastCCArguments(Op
, DAG
);
4032 SDOperand
X86TargetLowering::LowerMEMSET(SDOperand Op
, SelectionDAG
&DAG
) {
4033 SDOperand
InFlag(0, 0);
4034 SDOperand Chain
= Op
.getOperand(0);
4036 (unsigned)cast
<ConstantSDNode
>(Op
.getOperand(4))->getValue();
4037 if (Align
== 0) Align
= 1;
4039 ConstantSDNode
*I
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(3));
4040 // If not DWORD aligned or size is more than the threshold, call memset.
4041 // The libc version is likely to be faster for these cases. It can use the
4042 // address value and run time information about the CPU.
4043 if ((Align
& 3) != 0 ||
4044 (I
&& I
->getValue() > Subtarget
->getMinRepStrSizeThreshold())) {
4045 MVT::ValueType IntPtr
= getPointerTy();
4046 const Type
*IntPtrTy
= getTargetData()->getIntPtrType();
4047 TargetLowering::ArgListTy Args
;
4048 TargetLowering::ArgListEntry Entry
;
4049 Entry
.Node
= Op
.getOperand(1);
4050 Entry
.Ty
= IntPtrTy
;
4051 Args
.push_back(Entry
);
4052 // Extend the unsigned i8 argument to be an int value for the call.
4053 Entry
.Node
= DAG
.getNode(ISD::ZERO_EXTEND
, MVT::i32
, Op
.getOperand(2));
4054 Entry
.Ty
= IntPtrTy
;
4055 Args
.push_back(Entry
);
4056 Entry
.Node
= Op
.getOperand(3);
4057 Args
.push_back(Entry
);
4058 std::pair
<SDOperand
,SDOperand
> CallResult
=
4059 LowerCallTo(Chain
, Type::VoidTy
, false, false, CallingConv::C
, false,
4060 DAG
.getExternalSymbol("memset", IntPtr
), Args
, DAG
);
4061 return CallResult
.second
;
4066 ConstantSDNode
*ValC
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
4067 unsigned BytesLeft
= 0;
4068 bool TwoRepStos
= false;
4071 uint64_t Val
= ValC
->getValue() & 255;
4073 // If the value is a constant, then we can potentially use larger sets.
4074 switch (Align
& 3) {
4075 case 2: // WORD aligned
4078 Val
= (Val
<< 8) | Val
;
4080 case 0: // DWORD aligned
4083 Val
= (Val
<< 8) | Val
;
4084 Val
= (Val
<< 16) | Val
;
4085 if (Subtarget
->is64Bit() && ((Align
& 0xF) == 0)) { // QWORD aligned
4088 Val
= (Val
<< 32) | Val
;
4091 default: // Byte aligned
4094 Count
= Op
.getOperand(3);
4098 if (AVT
> MVT::i8
) {
4100 unsigned UBytes
= MVT::getSizeInBits(AVT
) / 8;
4101 Count
= DAG
.getConstant(I
->getValue() / UBytes
, getPointerTy());
4102 BytesLeft
= I
->getValue() % UBytes
;
4104 assert(AVT
>= MVT::i32
&&
4105 "Do not use rep;stos if not at least DWORD aligned");
4106 Count
= DAG
.getNode(ISD::SRL
, Op
.getOperand(3).getValueType(),
4107 Op
.getOperand(3), DAG
.getConstant(2, MVT::i8
));
4112 Chain
= DAG
.getCopyToReg(Chain
, ValReg
, DAG
.getConstant(Val
, AVT
),
4114 InFlag
= Chain
.getValue(1);
4117 Count
= Op
.getOperand(3);
4118 Chain
= DAG
.getCopyToReg(Chain
, X86::AL
, Op
.getOperand(2), InFlag
);
4119 InFlag
= Chain
.getValue(1);
4122 Chain
= DAG
.getCopyToReg(Chain
, Subtarget
->is64Bit() ? X86::RCX
: X86::ECX
,
4124 InFlag
= Chain
.getValue(1);
4125 Chain
= DAG
.getCopyToReg(Chain
, Subtarget
->is64Bit() ? X86::RDI
: X86::EDI
,
4126 Op
.getOperand(1), InFlag
);
4127 InFlag
= Chain
.getValue(1);
4129 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
4130 SmallVector
<SDOperand
, 8> Ops
;
4131 Ops
.push_back(Chain
);
4132 Ops
.push_back(DAG
.getValueType(AVT
));
4133 Ops
.push_back(InFlag
);
4134 Chain
= DAG
.getNode(X86ISD::REP_STOS
, Tys
, &Ops
[0], Ops
.size());
4137 InFlag
= Chain
.getValue(1);
4138 Count
= Op
.getOperand(3);
4139 MVT::ValueType CVT
= Count
.getValueType();
4140 SDOperand Left
= DAG
.getNode(ISD::AND
, CVT
, Count
,
4141 DAG
.getConstant((AVT
== MVT::i64
) ? 7 : 3, CVT
));
4142 Chain
= DAG
.getCopyToReg(Chain
, (CVT
== MVT::i64
) ? X86::RCX
: X86::ECX
,
4144 InFlag
= Chain
.getValue(1);
4145 Tys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
4147 Ops
.push_back(Chain
);
4148 Ops
.push_back(DAG
.getValueType(MVT::i8
));
4149 Ops
.push_back(InFlag
);
4150 Chain
= DAG
.getNode(X86ISD::REP_STOS
, Tys
, &Ops
[0], Ops
.size());
4151 } else if (BytesLeft
) {
4152 // Issue stores for the last 1 - 7 bytes.
4154 unsigned Val
= ValC
->getValue() & 255;
4155 unsigned Offset
= I
->getValue() - BytesLeft
;
4156 SDOperand DstAddr
= Op
.getOperand(1);
4157 MVT::ValueType AddrVT
= DstAddr
.getValueType();
4158 if (BytesLeft
>= 4) {
4159 Val
= (Val
<< 8) | Val
;
4160 Val
= (Val
<< 16) | Val
;
4161 Value
= DAG
.getConstant(Val
, MVT::i32
);
4162 Chain
= DAG
.getStore(Chain
, Value
,
4163 DAG
.getNode(ISD::ADD
, AddrVT
, DstAddr
,
4164 DAG
.getConstant(Offset
, AddrVT
)),
4169 if (BytesLeft
>= 2) {
4170 Value
= DAG
.getConstant((Val
<< 8) | Val
, MVT::i16
);
4171 Chain
= DAG
.getStore(Chain
, Value
,
4172 DAG
.getNode(ISD::ADD
, AddrVT
, DstAddr
,
4173 DAG
.getConstant(Offset
, AddrVT
)),
4178 if (BytesLeft
== 1) {
4179 Value
= DAG
.getConstant(Val
, MVT::i8
);
4180 Chain
= DAG
.getStore(Chain
, Value
,
4181 DAG
.getNode(ISD::ADD
, AddrVT
, DstAddr
,
4182 DAG
.getConstant(Offset
, AddrVT
)),
4190 SDOperand
X86TargetLowering::LowerMEMCPY(SDOperand Op
, SelectionDAG
&DAG
) {
4191 SDOperand Chain
= Op
.getOperand(0);
4193 (unsigned)cast
<ConstantSDNode
>(Op
.getOperand(4))->getValue();
4194 if (Align
== 0) Align
= 1;
4196 ConstantSDNode
*I
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(3));
4197 // If not DWORD aligned or size is more than the threshold, call memcpy.
4198 // The libc version is likely to be faster for these cases. It can use the
4199 // address value and run time information about the CPU.
4200 // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
4201 if ((Align
& 3) != 0 ||
4202 (I
&& I
->getValue() > Subtarget
->getMinRepStrSizeThreshold())) {
4203 MVT::ValueType IntPtr
= getPointerTy();
4204 TargetLowering::ArgListTy Args
;
4205 TargetLowering::ArgListEntry Entry
;
4206 Entry
.Ty
= getTargetData()->getIntPtrType();
4207 Entry
.Node
= Op
.getOperand(1); Args
.push_back(Entry
);
4208 Entry
.Node
= Op
.getOperand(2); Args
.push_back(Entry
);
4209 Entry
.Node
= Op
.getOperand(3); Args
.push_back(Entry
);
4210 std::pair
<SDOperand
,SDOperand
> CallResult
=
4211 LowerCallTo(Chain
, Type::VoidTy
, false, false, CallingConv::C
, false,
4212 DAG
.getExternalSymbol("memcpy", IntPtr
), Args
, DAG
);
4213 return CallResult
.second
;
4218 unsigned BytesLeft
= 0;
4219 bool TwoRepMovs
= false;
4220 switch (Align
& 3) {
4221 case 2: // WORD aligned
4224 case 0: // DWORD aligned
4226 if (Subtarget
->is64Bit() && ((Align
& 0xF) == 0)) // QWORD aligned
4229 default: // Byte aligned
4231 Count
= Op
.getOperand(3);
4235 if (AVT
> MVT::i8
) {
4237 unsigned UBytes
= MVT::getSizeInBits(AVT
) / 8;
4238 Count
= DAG
.getConstant(I
->getValue() / UBytes
, getPointerTy());
4239 BytesLeft
= I
->getValue() % UBytes
;
4241 assert(AVT
>= MVT::i32
&&
4242 "Do not use rep;movs if not at least DWORD aligned");
4243 Count
= DAG
.getNode(ISD::SRL
, Op
.getOperand(3).getValueType(),
4244 Op
.getOperand(3), DAG
.getConstant(2, MVT::i8
));
4249 SDOperand
InFlag(0, 0);
4250 Chain
= DAG
.getCopyToReg(Chain
, Subtarget
->is64Bit() ? X86::RCX
: X86::ECX
,
4252 InFlag
= Chain
.getValue(1);
4253 Chain
= DAG
.getCopyToReg(Chain
, Subtarget
->is64Bit() ? X86::RDI
: X86::EDI
,
4254 Op
.getOperand(1), InFlag
);
4255 InFlag
= Chain
.getValue(1);
4256 Chain
= DAG
.getCopyToReg(Chain
, Subtarget
->is64Bit() ? X86::RSI
: X86::ESI
,
4257 Op
.getOperand(2), InFlag
);
4258 InFlag
= Chain
.getValue(1);
4260 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
4261 SmallVector
<SDOperand
, 8> Ops
;
4262 Ops
.push_back(Chain
);
4263 Ops
.push_back(DAG
.getValueType(AVT
));
4264 Ops
.push_back(InFlag
);
4265 Chain
= DAG
.getNode(X86ISD::REP_MOVS
, Tys
, &Ops
[0], Ops
.size());
4268 InFlag
= Chain
.getValue(1);
4269 Count
= Op
.getOperand(3);
4270 MVT::ValueType CVT
= Count
.getValueType();
4271 SDOperand Left
= DAG
.getNode(ISD::AND
, CVT
, Count
,
4272 DAG
.getConstant((AVT
== MVT::i64
) ? 7 : 3, CVT
));
4273 Chain
= DAG
.getCopyToReg(Chain
, (CVT
== MVT::i64
) ? X86::RCX
: X86::ECX
,
4275 InFlag
= Chain
.getValue(1);
4276 Tys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
4278 Ops
.push_back(Chain
);
4279 Ops
.push_back(DAG
.getValueType(MVT::i8
));
4280 Ops
.push_back(InFlag
);
4281 Chain
= DAG
.getNode(X86ISD::REP_MOVS
, Tys
, &Ops
[0], Ops
.size());
4282 } else if (BytesLeft
) {
4283 // Issue loads and stores for the last 1 - 7 bytes.
4284 unsigned Offset
= I
->getValue() - BytesLeft
;
4285 SDOperand DstAddr
= Op
.getOperand(1);
4286 MVT::ValueType DstVT
= DstAddr
.getValueType();
4287 SDOperand SrcAddr
= Op
.getOperand(2);
4288 MVT::ValueType SrcVT
= SrcAddr
.getValueType();
4290 if (BytesLeft
>= 4) {
4291 Value
= DAG
.getLoad(MVT::i32
, Chain
,
4292 DAG
.getNode(ISD::ADD
, SrcVT
, SrcAddr
,
4293 DAG
.getConstant(Offset
, SrcVT
)),
4295 Chain
= Value
.getValue(1);
4296 Chain
= DAG
.getStore(Chain
, Value
,
4297 DAG
.getNode(ISD::ADD
, DstVT
, DstAddr
,
4298 DAG
.getConstant(Offset
, DstVT
)),
4303 if (BytesLeft
>= 2) {
4304 Value
= DAG
.getLoad(MVT::i16
, Chain
,
4305 DAG
.getNode(ISD::ADD
, SrcVT
, SrcAddr
,
4306 DAG
.getConstant(Offset
, SrcVT
)),
4308 Chain
= Value
.getValue(1);
4309 Chain
= DAG
.getStore(Chain
, Value
,
4310 DAG
.getNode(ISD::ADD
, DstVT
, DstAddr
,
4311 DAG
.getConstant(Offset
, DstVT
)),
4317 if (BytesLeft
== 1) {
4318 Value
= DAG
.getLoad(MVT::i8
, Chain
,
4319 DAG
.getNode(ISD::ADD
, SrcVT
, SrcAddr
,
4320 DAG
.getConstant(Offset
, SrcVT
)),
4322 Chain
= Value
.getValue(1);
4323 Chain
= DAG
.getStore(Chain
, Value
,
4324 DAG
.getNode(ISD::ADD
, DstVT
, DstAddr
,
4325 DAG
.getConstant(Offset
, DstVT
)),
4334 X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op
, SelectionDAG
&DAG
) {
4335 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Flag
);
4336 SDOperand TheOp
= Op
.getOperand(0);
4337 SDOperand rd
= DAG
.getNode(X86ISD::RDTSC_DAG
, Tys
, &TheOp
, 1);
4338 if (Subtarget
->is64Bit()) {
4339 SDOperand Copy1
= DAG
.getCopyFromReg(rd
, X86::RAX
, MVT::i64
, rd
.getValue(1));
4340 SDOperand Copy2
= DAG
.getCopyFromReg(Copy1
.getValue(1), X86::RDX
,
4341 MVT::i64
, Copy1
.getValue(2));
4342 SDOperand Tmp
= DAG
.getNode(ISD::SHL
, MVT::i64
, Copy2
,
4343 DAG
.getConstant(32, MVT::i8
));
4345 DAG
.getNode(ISD::OR
, MVT::i64
, Copy1
, Tmp
), Copy2
.getValue(1)
4348 Tys
= DAG
.getVTList(MVT::i64
, MVT::Other
);
4349 return DAG
.getNode(ISD::MERGE_VALUES
, Tys
, Ops
, 2);
4352 SDOperand Copy1
= DAG
.getCopyFromReg(rd
, X86::EAX
, MVT::i32
, rd
.getValue(1));
4353 SDOperand Copy2
= DAG
.getCopyFromReg(Copy1
.getValue(1), X86::EDX
,
4354 MVT::i32
, Copy1
.getValue(2));
4355 SDOperand Ops
[] = { Copy1
, Copy2
, Copy2
.getValue(1) };
4356 Tys
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
4357 return DAG
.getNode(ISD::MERGE_VALUES
, Tys
, Ops
, 3);
4360 SDOperand
X86TargetLowering::LowerVASTART(SDOperand Op
, SelectionDAG
&DAG
) {
4361 SrcValueSDNode
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2));
4363 if (!Subtarget
->is64Bit()) {
4364 // vastart just stores the address of the VarArgsFrameIndex slot into the
4365 // memory location argument.
4366 SDOperand FR
= DAG
.getFrameIndex(VarArgsFrameIndex
, getPointerTy());
4367 return DAG
.getStore(Op
.getOperand(0), FR
,Op
.getOperand(1), SV
->getValue(),
4372 // gp_offset (0 - 6 * 8)
4373 // fp_offset (48 - 48 + 8 * 16)
4374 // overflow_arg_area (point to parameters coming in memory).
4376 SmallVector
<SDOperand
, 8> MemOps
;
4377 SDOperand FIN
= Op
.getOperand(1);
4379 SDOperand Store
= DAG
.getStore(Op
.getOperand(0),
4380 DAG
.getConstant(VarArgsGPOffset
, MVT::i32
),
4381 FIN
, SV
->getValue(), SV
->getOffset());
4382 MemOps
.push_back(Store
);
4385 FIN
= DAG
.getNode(ISD::ADD
, getPointerTy(), FIN
,
4386 DAG
.getConstant(4, getPointerTy()));
4387 Store
= DAG
.getStore(Op
.getOperand(0),
4388 DAG
.getConstant(VarArgsFPOffset
, MVT::i32
),
4389 FIN
, SV
->getValue(), SV
->getOffset());
4390 MemOps
.push_back(Store
);
4392 // Store ptr to overflow_arg_area
4393 FIN
= DAG
.getNode(ISD::ADD
, getPointerTy(), FIN
,
4394 DAG
.getConstant(4, getPointerTy()));
4395 SDOperand OVFIN
= DAG
.getFrameIndex(VarArgsFrameIndex
, getPointerTy());
4396 Store
= DAG
.getStore(Op
.getOperand(0), OVFIN
, FIN
, SV
->getValue(),
4398 MemOps
.push_back(Store
);
4400 // Store ptr to reg_save_area.
4401 FIN
= DAG
.getNode(ISD::ADD
, getPointerTy(), FIN
,
4402 DAG
.getConstant(8, getPointerTy()));
4403 SDOperand RSFIN
= DAG
.getFrameIndex(RegSaveFrameIndex
, getPointerTy());
4404 Store
= DAG
.getStore(Op
.getOperand(0), RSFIN
, FIN
, SV
->getValue(),
4406 MemOps
.push_back(Store
);
4407 return DAG
.getNode(ISD::TokenFactor
, MVT::Other
, &MemOps
[0], MemOps
.size());
4410 SDOperand
X86TargetLowering::LowerVACOPY(SDOperand Op
, SelectionDAG
&DAG
) {
4411 // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
4412 SDOperand Chain
= Op
.getOperand(0);
4413 SDOperand DstPtr
= Op
.getOperand(1);
4414 SDOperand SrcPtr
= Op
.getOperand(2);
4415 SrcValueSDNode
*DstSV
= cast
<SrcValueSDNode
>(Op
.getOperand(3));
4416 SrcValueSDNode
*SrcSV
= cast
<SrcValueSDNode
>(Op
.getOperand(4));
4418 SrcPtr
= DAG
.getLoad(getPointerTy(), Chain
, SrcPtr
,
4419 SrcSV
->getValue(), SrcSV
->getOffset());
4420 Chain
= SrcPtr
.getValue(1);
4421 for (unsigned i
= 0; i
< 3; ++i
) {
4422 SDOperand Val
= DAG
.getLoad(MVT::i64
, Chain
, SrcPtr
,
4423 SrcSV
->getValue(), SrcSV
->getOffset());
4424 Chain
= Val
.getValue(1);
4425 Chain
= DAG
.getStore(Chain
, Val
, DstPtr
,
4426 DstSV
->getValue(), DstSV
->getOffset());
4429 SrcPtr
= DAG
.getNode(ISD::ADD
, getPointerTy(), SrcPtr
,
4430 DAG
.getConstant(8, getPointerTy()));
4431 DstPtr
= DAG
.getNode(ISD::ADD
, getPointerTy(), DstPtr
,
4432 DAG
.getConstant(8, getPointerTy()));
4438 X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op
, SelectionDAG
&DAG
) {
4439 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getValue();
4441 default: return SDOperand(); // Don't custom lower most intrinsics.
4442 // Comparison intrinsics.
4443 case Intrinsic::x86_sse_comieq_ss
:
4444 case Intrinsic::x86_sse_comilt_ss
:
4445 case Intrinsic::x86_sse_comile_ss
:
4446 case Intrinsic::x86_sse_comigt_ss
:
4447 case Intrinsic::x86_sse_comige_ss
:
4448 case Intrinsic::x86_sse_comineq_ss
:
4449 case Intrinsic::x86_sse_ucomieq_ss
:
4450 case Intrinsic::x86_sse_ucomilt_ss
:
4451 case Intrinsic::x86_sse_ucomile_ss
:
4452 case Intrinsic::x86_sse_ucomigt_ss
:
4453 case Intrinsic::x86_sse_ucomige_ss
:
4454 case Intrinsic::x86_sse_ucomineq_ss
:
4455 case Intrinsic::x86_sse2_comieq_sd
:
4456 case Intrinsic::x86_sse2_comilt_sd
:
4457 case Intrinsic::x86_sse2_comile_sd
:
4458 case Intrinsic::x86_sse2_comigt_sd
:
4459 case Intrinsic::x86_sse2_comige_sd
:
4460 case Intrinsic::x86_sse2_comineq_sd
:
4461 case Intrinsic::x86_sse2_ucomieq_sd
:
4462 case Intrinsic::x86_sse2_ucomilt_sd
:
4463 case Intrinsic::x86_sse2_ucomile_sd
:
4464 case Intrinsic::x86_sse2_ucomigt_sd
:
4465 case Intrinsic::x86_sse2_ucomige_sd
:
4466 case Intrinsic::x86_sse2_ucomineq_sd
: {
4468 ISD::CondCode CC
= ISD::SETCC_INVALID
;
4471 case Intrinsic::x86_sse_comieq_ss
:
4472 case Intrinsic::x86_sse2_comieq_sd
:
4476 case Intrinsic::x86_sse_comilt_ss
:
4477 case Intrinsic::x86_sse2_comilt_sd
:
4481 case Intrinsic::x86_sse_comile_ss
:
4482 case Intrinsic::x86_sse2_comile_sd
:
4486 case Intrinsic::x86_sse_comigt_ss
:
4487 case Intrinsic::x86_sse2_comigt_sd
:
4491 case Intrinsic::x86_sse_comige_ss
:
4492 case Intrinsic::x86_sse2_comige_sd
:
4496 case Intrinsic::x86_sse_comineq_ss
:
4497 case Intrinsic::x86_sse2_comineq_sd
:
4501 case Intrinsic::x86_sse_ucomieq_ss
:
4502 case Intrinsic::x86_sse2_ucomieq_sd
:
4503 Opc
= X86ISD::UCOMI
;
4506 case Intrinsic::x86_sse_ucomilt_ss
:
4507 case Intrinsic::x86_sse2_ucomilt_sd
:
4508 Opc
= X86ISD::UCOMI
;
4511 case Intrinsic::x86_sse_ucomile_ss
:
4512 case Intrinsic::x86_sse2_ucomile_sd
:
4513 Opc
= X86ISD::UCOMI
;
4516 case Intrinsic::x86_sse_ucomigt_ss
:
4517 case Intrinsic::x86_sse2_ucomigt_sd
:
4518 Opc
= X86ISD::UCOMI
;
4521 case Intrinsic::x86_sse_ucomige_ss
:
4522 case Intrinsic::x86_sse2_ucomige_sd
:
4523 Opc
= X86ISD::UCOMI
;
4526 case Intrinsic::x86_sse_ucomineq_ss
:
4527 case Intrinsic::x86_sse2_ucomineq_sd
:
4528 Opc
= X86ISD::UCOMI
;
4534 SDOperand LHS
= Op
.getOperand(1);
4535 SDOperand RHS
= Op
.getOperand(2);
4536 translateX86CC(CC
, true, X86CC
, LHS
, RHS
, DAG
);
4538 if (NewCCModeling
) {
4539 Opc
= (Opc
== X86ISD::UCOMI
) ? X86ISD::UCOMI_NEW
: X86ISD::COMI_NEW
;
4540 SDOperand Cond
= DAG
.getNode(Opc
, MVT::i32
, LHS
, RHS
);
4541 SDOperand SetCC
= DAG
.getNode(X86ISD::SETCC_NEW
, MVT::i8
,
4542 DAG
.getConstant(X86CC
, MVT::i8
), Cond
);
4543 return DAG
.getNode(ISD::ANY_EXTEND
, MVT::i32
, SetCC
);
4545 const MVT::ValueType
*VTs
= DAG
.getNodeValueTypes(MVT::Other
, MVT::Flag
);
4546 SDOperand Ops1
[] = { DAG
.getEntryNode(), LHS
, RHS
};
4547 SDOperand Cond
= DAG
.getNode(Opc
, VTs
, 2, Ops1
, 3);
4548 VTs
= DAG
.getNodeValueTypes(MVT::i8
, MVT::Flag
);
4549 SDOperand Ops2
[] = { DAG
.getConstant(X86CC
, MVT::i8
), Cond
};
4550 SDOperand SetCC
= DAG
.getNode(X86ISD::SETCC
, VTs
, 2, Ops2
, 2);
4551 return DAG
.getNode(ISD::ANY_EXTEND
, MVT::i32
, SetCC
);
4557 SDOperand
X86TargetLowering::LowerRETURNADDR(SDOperand Op
, SelectionDAG
&DAG
) {
4558 // Depths > 0 not supported yet!
4559 if (cast
<ConstantSDNode
>(Op
.getOperand(0))->getValue() > 0)
4562 // Just load the return address
4563 SDOperand RetAddrFI
= getReturnAddressFrameIndex(DAG
);
4564 return DAG
.getLoad(getPointerTy(), DAG
.getEntryNode(), RetAddrFI
, NULL
, 0);
4567 SDOperand
X86TargetLowering::LowerFRAMEADDR(SDOperand Op
, SelectionDAG
&DAG
) {
4568 // Depths > 0 not supported yet!
4569 if (cast
<ConstantSDNode
>(Op
.getOperand(0))->getValue() > 0)
4572 SDOperand RetAddrFI
= getReturnAddressFrameIndex(DAG
);
4573 return DAG
.getNode(ISD::SUB
, getPointerTy(), RetAddrFI
,
4574 DAG
.getConstant(4, getPointerTy()));
4577 SDOperand
X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op
,
4578 SelectionDAG
&DAG
) {
4579 // Is not yet supported on x86-64
4580 if (Subtarget
->is64Bit())
4583 return DAG
.getConstant(8, getPointerTy());
4586 SDOperand
X86TargetLowering::LowerEH_RETURN(SDOperand Op
, SelectionDAG
&DAG
)
4588 assert(!Subtarget
->is64Bit() &&
4589 "Lowering of eh_return builtin is not supported yet on x86-64");
4591 MachineFunction
&MF
= DAG
.getMachineFunction();
4592 SDOperand Chain
= Op
.getOperand(0);
4593 SDOperand Offset
= Op
.getOperand(1);
4594 SDOperand Handler
= Op
.getOperand(2);
4596 SDOperand Frame
= DAG
.getRegister(RegInfo
->getFrameRegister(MF
),
4599 SDOperand StoreAddr
= DAG
.getNode(ISD::SUB
, getPointerTy(), Frame
,
4600 DAG
.getConstant(-4UL, getPointerTy()));
4601 StoreAddr
= DAG
.getNode(ISD::ADD
, getPointerTy(), StoreAddr
, Offset
);
4602 Chain
= DAG
.getStore(Chain
, Handler
, StoreAddr
, NULL
, 0);
4603 Chain
= DAG
.getCopyToReg(Chain
, X86::ECX
, StoreAddr
);
4604 MF
.addLiveOut(X86::ECX
);
4606 return DAG
.getNode(X86ISD::EH_RETURN
, MVT::Other
,
4607 Chain
, DAG
.getRegister(X86::ECX
, getPointerTy()));
4610 SDOperand
X86TargetLowering::LowerTRAMPOLINE(SDOperand Op
,
4611 SelectionDAG
&DAG
) {
4612 SDOperand Root
= Op
.getOperand(0);
4613 SDOperand Trmp
= Op
.getOperand(1); // trampoline
4614 SDOperand FPtr
= Op
.getOperand(2); // nested function
4615 SDOperand Nest
= Op
.getOperand(3); // 'nest' parameter value
4617 SrcValueSDNode
*TrmpSV
= cast
<SrcValueSDNode
>(Op
.getOperand(4));
4619 if (Subtarget
->is64Bit()) {
4620 return SDOperand(); // not yet supported
4622 Function
*Func
= (Function
*)
4623 cast
<Function
>(cast
<SrcValueSDNode
>(Op
.getOperand(5))->getValue());
4624 unsigned CC
= Func
->getCallingConv();
4629 assert(0 && "Unsupported calling convention");
4630 case CallingConv::C
:
4631 case CallingConv::Fast
:
4632 case CallingConv::X86_StdCall
: {
4633 // Pass 'nest' parameter in ECX.
4634 // Must be kept in sync with X86CallingConv.td
4637 // Check that ECX wasn't needed by an 'inreg' parameter.
4638 const FunctionType
*FTy
= Func
->getFunctionType();
4639 const ParamAttrsList
*Attrs
= FTy
->getParamAttrs();
4641 if (Attrs
&& !Func
->isVarArg()) {
4642 unsigned InRegCount
= 0;
4645 for (FunctionType::param_iterator I
= FTy
->param_begin(),
4646 E
= FTy
->param_end(); I
!= E
; ++I
, ++Idx
)
4647 if (Attrs
->paramHasAttr(Idx
, ParamAttr::InReg
))
4648 // FIXME: should only count parameters that are lowered to integers.
4649 InRegCount
+= (getTargetData()->getTypeSizeInBits(*I
) + 31) / 32;
4651 if (InRegCount
> 2) {
4652 cerr
<< "Nest register in use - reduce number of inreg parameters!\n";
4658 case CallingConv::X86_FastCall
:
4659 // Pass 'nest' parameter in EAX.
4660 // Must be kept in sync with X86CallingConv.td
4665 const X86InstrInfo
*TII
=
4666 ((X86TargetMachine
&)getTargetMachine()).getInstrInfo();
4668 SDOperand OutChains
[4];
4669 SDOperand Addr
, Disp
;
4671 Addr
= DAG
.getNode(ISD::ADD
, MVT::i32
, Trmp
, DAG
.getConstant(10, MVT::i32
));
4672 Disp
= DAG
.getNode(ISD::SUB
, MVT::i32
, FPtr
, Addr
);
4674 unsigned char MOV32ri
= TII
->getBaseOpcodeFor(X86::MOV32ri
);
4675 unsigned char N86Reg
= ((X86RegisterInfo
&)RegInfo
).getX86RegNum(NestReg
);
4676 OutChains
[0] = DAG
.getStore(Root
, DAG
.getConstant(MOV32ri
|N86Reg
, MVT::i8
),
4677 Trmp
, TrmpSV
->getValue(), TrmpSV
->getOffset());
4679 Addr
= DAG
.getNode(ISD::ADD
, MVT::i32
, Trmp
, DAG
.getConstant(1, MVT::i32
));
4680 OutChains
[1] = DAG
.getStore(Root
, Nest
, Addr
, TrmpSV
->getValue(),
4681 TrmpSV
->getOffset() + 1, false, 1);
4683 unsigned char JMP
= TII
->getBaseOpcodeFor(X86::JMP
);
4684 Addr
= DAG
.getNode(ISD::ADD
, MVT::i32
, Trmp
, DAG
.getConstant(5, MVT::i32
));
4685 OutChains
[2] = DAG
.getStore(Root
, DAG
.getConstant(JMP
, MVT::i8
), Addr
,
4686 TrmpSV
->getValue() + 5, TrmpSV
->getOffset());
4688 Addr
= DAG
.getNode(ISD::ADD
, MVT::i32
, Trmp
, DAG
.getConstant(6, MVT::i32
));
4689 OutChains
[3] = DAG
.getStore(Root
, Disp
, Addr
, TrmpSV
->getValue(),
4690 TrmpSV
->getOffset() + 6, false, 1);
4693 { Trmp
, DAG
.getNode(ISD::TokenFactor
, MVT::Other
, OutChains
, 4) };
4694 return DAG
.getNode(ISD::MERGE_VALUES
, Op
.Val
->getVTList(), Ops
, 2);
4698 /// LowerOperation - Provide custom lowering hooks for some operations.
4700 SDOperand
X86TargetLowering::LowerOperation(SDOperand Op
, SelectionDAG
&DAG
) {
4701 switch (Op
.getOpcode()) {
4702 default: assert(0 && "Should not custom lower this!");
4703 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
4704 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
4705 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
4706 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
4707 case ISD::SCALAR_TO_VECTOR
: return LowerSCALAR_TO_VECTOR(Op
, DAG
);
4708 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
4709 case ISD::GlobalAddress
: return LowerGlobalAddress(Op
, DAG
);
4710 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
4711 case ISD::ExternalSymbol
: return LowerExternalSymbol(Op
, DAG
);
4712 case ISD::SHL_PARTS
:
4713 case ISD::SRA_PARTS
:
4714 case ISD::SRL_PARTS
: return LowerShift(Op
, DAG
);
4718 case ISD::UREM
: return LowerIntegerDivOrRem(Op
, DAG
);
4719 case ISD::SINT_TO_FP
: return LowerSINT_TO_FP(Op
, DAG
);
4720 case ISD::FP_TO_SINT
: return LowerFP_TO_SINT(Op
, DAG
);
4721 case ISD::FABS
: return LowerFABS(Op
, DAG
);
4722 case ISD::FNEG
: return LowerFNEG(Op
, DAG
);
4723 case ISD::FCOPYSIGN
: return LowerFCOPYSIGN(Op
, DAG
);
4724 case ISD::SETCC
: return NewCCModeling
4725 ? LowerSETCC_New(Op
, DAG
)
4726 : LowerSETCC(Op
, DAG
, DAG
.getEntryNode());
4727 case ISD::SELECT
: return NewCCModeling
4728 ? LowerSELECT_New(Op
, DAG
)
4729 : LowerSELECT(Op
, DAG
);
4730 case ISD::BRCOND
: return NewCCModeling
4731 ? LowerBRCOND_New(Op
, DAG
)
4732 : LowerBRCOND(Op
, DAG
);
4733 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
4734 case ISD::CALL
: return LowerCALL(Op
, DAG
);
4735 case ISD::RET
: return LowerRET(Op
, DAG
);
4736 case ISD::FORMAL_ARGUMENTS
: return LowerFORMAL_ARGUMENTS(Op
, DAG
);
4737 case ISD::MEMSET
: return LowerMEMSET(Op
, DAG
);
4738 case ISD::MEMCPY
: return LowerMEMCPY(Op
, DAG
);
4739 case ISD::READCYCLECOUNTER
: return LowerREADCYCLCECOUNTER(Op
, DAG
);
4740 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
4741 case ISD::VACOPY
: return LowerVACOPY(Op
, DAG
);
4742 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
4743 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
4744 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
4745 case ISD::FRAME_TO_ARGS_OFFSET
:
4746 return LowerFRAME_TO_ARGS_OFFSET(Op
, DAG
);
4747 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
4748 case ISD::EH_RETURN
: return LowerEH_RETURN(Op
, DAG
);
4749 case ISD::TRAMPOLINE
: return LowerTRAMPOLINE(Op
, DAG
);
4754 const char *X86TargetLowering::getTargetNodeName(unsigned Opcode
) const {
4756 default: return NULL
;
4757 case X86ISD::SHLD
: return "X86ISD::SHLD";
4758 case X86ISD::SHRD
: return "X86ISD::SHRD";
4759 case X86ISD::FAND
: return "X86ISD::FAND";
4760 case X86ISD::FOR
: return "X86ISD::FOR";
4761 case X86ISD::FXOR
: return "X86ISD::FXOR";
4762 case X86ISD::FSRL
: return "X86ISD::FSRL";
4763 case X86ISD::FILD
: return "X86ISD::FILD";
4764 case X86ISD::FILD_FLAG
: return "X86ISD::FILD_FLAG";
4765 case X86ISD::FP_TO_INT16_IN_MEM
: return "X86ISD::FP_TO_INT16_IN_MEM";
4766 case X86ISD::FP_TO_INT32_IN_MEM
: return "X86ISD::FP_TO_INT32_IN_MEM";
4767 case X86ISD::FP_TO_INT64_IN_MEM
: return "X86ISD::FP_TO_INT64_IN_MEM";
4768 case X86ISD::FLD
: return "X86ISD::FLD";
4769 case X86ISD::FST
: return "X86ISD::FST";
4770 case X86ISD::FP_GET_RESULT
: return "X86ISD::FP_GET_RESULT";
4771 case X86ISD::FP_SET_RESULT
: return "X86ISD::FP_SET_RESULT";
4772 case X86ISD::CALL
: return "X86ISD::CALL";
4773 case X86ISD::TAILCALL
: return "X86ISD::TAILCALL";
4774 case X86ISD::RDTSC_DAG
: return "X86ISD::RDTSC_DAG";
4775 case X86ISD::CMP
: return "X86ISD::CMP";
4776 case X86ISD::CMP_NEW
: return "X86ISD::CMP_NEW";
4777 case X86ISD::COMI
: return "X86ISD::COMI";
4778 case X86ISD::COMI_NEW
: return "X86ISD::COMI_NEW";
4779 case X86ISD::UCOMI
: return "X86ISD::UCOMI";
4780 case X86ISD::UCOMI_NEW
: return "X86ISD::UCOMI_NEW";
4781 case X86ISD::SETCC
: return "X86ISD::SETCC";
4782 case X86ISD::SETCC_NEW
: return "X86ISD::SETCC_NEW";
4783 case X86ISD::CMOV
: return "X86ISD::CMOV";
4784 case X86ISD::CMOV_NEW
: return "X86ISD::CMOV_NEW";
4785 case X86ISD::BRCOND
: return "X86ISD::BRCOND";
4786 case X86ISD::BRCOND_NEW
: return "X86ISD::BRCOND_NEW";
4787 case X86ISD::RET_FLAG
: return "X86ISD::RET_FLAG";
4788 case X86ISD::REP_STOS
: return "X86ISD::REP_STOS";
4789 case X86ISD::REP_MOVS
: return "X86ISD::REP_MOVS";
4790 case X86ISD::GlobalBaseReg
: return "X86ISD::GlobalBaseReg";
4791 case X86ISD::Wrapper
: return "X86ISD::Wrapper";
4792 case X86ISD::S2VEC
: return "X86ISD::S2VEC";
4793 case X86ISD::PEXTRW
: return "X86ISD::PEXTRW";
4794 case X86ISD::PINSRW
: return "X86ISD::PINSRW";
4795 case X86ISD::FMAX
: return "X86ISD::FMAX";
4796 case X86ISD::FMIN
: return "X86ISD::FMIN";
4797 case X86ISD::FRSQRT
: return "X86ISD::FRSQRT";
4798 case X86ISD::FRCP
: return "X86ISD::FRCP";
4799 case X86ISD::TLSADDR
: return "X86ISD::TLSADDR";
4800 case X86ISD::THREAD_POINTER
: return "X86ISD::THREAD_POINTER";
4801 case X86ISD::EH_RETURN
: return "X86ISD::EH_RETURN";
4802 case X86ISD::DIV
: return "X86ISD::DIV";
4803 case X86ISD::IDIV
: return "X86ISD::IDIV";
4807 // isLegalAddressingMode - Return true if the addressing mode represented
4808 // by AM is legal for this target, for a load/store of the specified type.
4809 bool X86TargetLowering::isLegalAddressingMode(const AddrMode
&AM
,
4810 const Type
*Ty
) const {
4811 // X86 supports extremely general addressing modes.
4813 // X86 allows a sign-extended 32-bit immediate field as a displacement.
4814 if (AM
.BaseOffs
<= -(1LL << 32) || AM
.BaseOffs
>= (1LL << 32)-1)
4818 // We can only fold this if we don't need an extra load.
4819 if (Subtarget
->GVRequiresExtraLoad(AM
.BaseGV
, getTargetMachine(), false))
4822 // X86-64 only supports addr of globals in small code model.
4823 if (Subtarget
->is64Bit()) {
4824 if (getTargetMachine().getCodeModel() != CodeModel::Small
)
4826 // If lower 4G is not available, then we must use rip-relative addressing.
4827 if (AM
.BaseOffs
|| AM
.Scale
> 1)
4838 // These scales always work.
4843 // These scales are formed with basereg+scalereg. Only accept if there is
4848 default: // Other stuff never works.
4856 /// isShuffleMaskLegal - Targets can use this to indicate that they only
4857 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4858 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4859 /// are assumed to be legal.
4861 X86TargetLowering::isShuffleMaskLegal(SDOperand Mask
, MVT::ValueType VT
) const {
4862 // Only do shuffles on 128-bit vector types for now.
4863 if (MVT::getSizeInBits(VT
) == 64) return false;
4864 return (Mask
.Val
->getNumOperands() <= 4 ||
4865 isIdentityMask(Mask
.Val
) ||
4866 isIdentityMask(Mask
.Val
, true) ||
4867 isSplatMask(Mask
.Val
) ||
4868 isPSHUFHW_PSHUFLWMask(Mask
.Val
) ||
4869 X86::isUNPCKLMask(Mask
.Val
) ||
4870 X86::isUNPCKHMask(Mask
.Val
) ||
4871 X86::isUNPCKL_v_undef_Mask(Mask
.Val
) ||
4872 X86::isUNPCKH_v_undef_Mask(Mask
.Val
));
4875 bool X86TargetLowering::isVectorClearMaskLegal(std::vector
<SDOperand
> &BVOps
,
4877 SelectionDAG
&DAG
) const {
4878 unsigned NumElts
= BVOps
.size();
4879 // Only do shuffles on 128-bit vector types for now.
4880 if (MVT::getSizeInBits(EVT
) * NumElts
== 64) return false;
4881 if (NumElts
== 2) return true;
4883 return (isMOVLMask(&BVOps
[0], 4) ||
4884 isCommutedMOVL(&BVOps
[0], 4, true) ||
4885 isSHUFPMask(&BVOps
[0], 4) ||
4886 isCommutedSHUFP(&BVOps
[0], 4));
4891 //===----------------------------------------------------------------------===//
4892 // X86 Scheduler Hooks
4893 //===----------------------------------------------------------------------===//
4896 X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr
*MI
,
4897 MachineBasicBlock
*BB
) {
4898 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
4899 switch (MI
->getOpcode()) {
4900 default: assert(false && "Unexpected instr type to insert");
4901 case X86::CMOV_FR32
:
4902 case X86::CMOV_FR64
:
4903 case X86::CMOV_V4F32
:
4904 case X86::CMOV_V2F64
:
4905 case X86::CMOV_V2I64
:
4907 case X86::NEW_CMOV_FR32
:
4908 case X86::NEW_CMOV_FR64
:
4909 case X86::NEW_CMOV_V4F32
:
4910 case X86::NEW_CMOV_V2F64
:
4911 case X86::NEW_CMOV_V2I64
: {
4912 // To "insert" a SELECT_CC instruction, we actually have to insert the
4913 // diamond control-flow pattern. The incoming instruction knows the
4914 // destination vreg to set, the condition code register to branch on, the
4915 // true/false values to select between, and a branch opcode to use.
4916 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
4917 ilist
<MachineBasicBlock
>::iterator It
= BB
;
4923 // cmpTY ccX, r1, r2
4925 // fallthrough --> copy0MBB
4926 MachineBasicBlock
*thisMBB
= BB
;
4927 MachineBasicBlock
*copy0MBB
= new MachineBasicBlock(LLVM_BB
);
4928 MachineBasicBlock
*sinkMBB
= new MachineBasicBlock(LLVM_BB
);
4930 X86::GetCondBranchFromCond((X86::CondCode
)MI
->getOperand(3).getImm());
4931 BuildMI(BB
, TII
->get(Opc
)).addMBB(sinkMBB
);
4932 MachineFunction
*F
= BB
->getParent();
4933 F
->getBasicBlockList().insert(It
, copy0MBB
);
4934 F
->getBasicBlockList().insert(It
, sinkMBB
);
4935 // Update machine-CFG edges by first adding all successors of the current
4936 // block to the new block which will contain the Phi node for the select.
4937 for(MachineBasicBlock::succ_iterator i
= BB
->succ_begin(),
4938 e
= BB
->succ_end(); i
!= e
; ++i
)
4939 sinkMBB
->addSuccessor(*i
);
4940 // Next, remove all successors of the current block, and add the true
4941 // and fallthrough blocks as its successors.
4942 while(!BB
->succ_empty())
4943 BB
->removeSuccessor(BB
->succ_begin());
4944 BB
->addSuccessor(copy0MBB
);
4945 BB
->addSuccessor(sinkMBB
);
4948 // %FalseValue = ...
4949 // # fallthrough to sinkMBB
4952 // Update machine-CFG edges
4953 BB
->addSuccessor(sinkMBB
);
4956 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4959 BuildMI(BB
, TII
->get(X86::PHI
), MI
->getOperand(0).getReg())
4960 .addReg(MI
->getOperand(1).getReg()).addMBB(copy0MBB
)
4961 .addReg(MI
->getOperand(2).getReg()).addMBB(thisMBB
);
4963 delete MI
; // The pseudo instruction is gone now.
4967 case X86::FP32_TO_INT16_IN_MEM
:
4968 case X86::FP32_TO_INT32_IN_MEM
:
4969 case X86::FP32_TO_INT64_IN_MEM
:
4970 case X86::FP64_TO_INT16_IN_MEM
:
4971 case X86::FP64_TO_INT32_IN_MEM
:
4972 case X86::FP64_TO_INT64_IN_MEM
:
4973 case X86::FP80_TO_INT16_IN_MEM
:
4974 case X86::FP80_TO_INT32_IN_MEM
:
4975 case X86::FP80_TO_INT64_IN_MEM
: {
4976 // Change the floating point control register to use "round towards zero"
4977 // mode when truncating to an integer value.
4978 MachineFunction
*F
= BB
->getParent();
4979 int CWFrameIdx
= F
->getFrameInfo()->CreateStackObject(2, 2);
4980 addFrameReference(BuildMI(BB
, TII
->get(X86::FNSTCW16m
)), CWFrameIdx
);
4982 // Load the old value of the high byte of the control word...
4984 F
->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass
);
4985 addFrameReference(BuildMI(BB
, TII
->get(X86::MOV16rm
), OldCW
), CWFrameIdx
);
4987 // Set the high part to be round to zero...
4988 addFrameReference(BuildMI(BB
, TII
->get(X86::MOV16mi
)), CWFrameIdx
)
4991 // Reload the modified control word now...
4992 addFrameReference(BuildMI(BB
, TII
->get(X86::FLDCW16m
)), CWFrameIdx
);
4994 // Restore the memory image of control word to original value
4995 addFrameReference(BuildMI(BB
, TII
->get(X86::MOV16mr
)), CWFrameIdx
)
4998 // Get the X86 opcode to use.
5000 switch (MI
->getOpcode()) {
5001 default: assert(0 && "illegal opcode!");
5002 case X86::FP32_TO_INT16_IN_MEM
: Opc
= X86::IST_Fp16m32
; break;
5003 case X86::FP32_TO_INT32_IN_MEM
: Opc
= X86::IST_Fp32m32
; break;
5004 case X86::FP32_TO_INT64_IN_MEM
: Opc
= X86::IST_Fp64m32
; break;
5005 case X86::FP64_TO_INT16_IN_MEM
: Opc
= X86::IST_Fp16m64
; break;
5006 case X86::FP64_TO_INT32_IN_MEM
: Opc
= X86::IST_Fp32m64
; break;
5007 case X86::FP64_TO_INT64_IN_MEM
: Opc
= X86::IST_Fp64m64
; break;
5008 case X86::FP80_TO_INT16_IN_MEM
: Opc
= X86::IST_Fp16m80
; break;
5009 case X86::FP80_TO_INT32_IN_MEM
: Opc
= X86::IST_Fp32m80
; break;
5010 case X86::FP80_TO_INT64_IN_MEM
: Opc
= X86::IST_Fp64m80
; break;
5014 MachineOperand
&Op
= MI
->getOperand(0);
5015 if (Op
.isRegister()) {
5016 AM
.BaseType
= X86AddressMode::RegBase
;
5017 AM
.Base
.Reg
= Op
.getReg();
5019 AM
.BaseType
= X86AddressMode::FrameIndexBase
;
5020 AM
.Base
.FrameIndex
= Op
.getFrameIndex();
5022 Op
= MI
->getOperand(1);
5023 if (Op
.isImmediate())
5024 AM
.Scale
= Op
.getImm();
5025 Op
= MI
->getOperand(2);
5026 if (Op
.isImmediate())
5027 AM
.IndexReg
= Op
.getImm();
5028 Op
= MI
->getOperand(3);
5029 if (Op
.isGlobalAddress()) {
5030 AM
.GV
= Op
.getGlobal();
5032 AM
.Disp
= Op
.getImm();
5034 addFullAddress(BuildMI(BB
, TII
->get(Opc
)), AM
)
5035 .addReg(MI
->getOperand(4).getReg());
5037 // Reload the original control word now.
5038 addFrameReference(BuildMI(BB
, TII
->get(X86::FLDCW16m
)), CWFrameIdx
);
5040 delete MI
; // The pseudo instruction is gone now.
5046 //===----------------------------------------------------------------------===//
5047 // X86 Optimization Hooks
5048 //===----------------------------------------------------------------------===//
5050 void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op
,
5052 uint64_t &KnownZero
,
5054 const SelectionDAG
&DAG
,
5055 unsigned Depth
) const {
5056 unsigned Opc
= Op
.getOpcode();
5057 assert((Opc
>= ISD::BUILTIN_OP_END
||
5058 Opc
== ISD::INTRINSIC_WO_CHAIN
||
5059 Opc
== ISD::INTRINSIC_W_CHAIN
||
5060 Opc
== ISD::INTRINSIC_VOID
) &&
5061 "Should use MaskedValueIsZero if you don't know whether Op"
5062 " is a target node!");
5064 KnownZero
= KnownOne
= 0; // Don't know anything.
5068 case X86ISD::SETCC_NEW
:
5069 KnownZero
|= (MVT::getIntVTBitMask(Op
.getValueType()) ^ 1ULL);
5074 /// getShuffleScalarElt - Returns the scalar element that will make up the ith
5075 /// element of the result of the vector shuffle.
5076 static SDOperand
getShuffleScalarElt(SDNode
*N
, unsigned i
, SelectionDAG
&DAG
) {
5077 MVT::ValueType VT
= N
->getValueType(0);
5078 SDOperand PermMask
= N
->getOperand(2);
5079 unsigned NumElems
= PermMask
.getNumOperands();
5080 SDOperand V
= (i
< NumElems
) ? N
->getOperand(0) : N
->getOperand(1);
5082 if (V
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
5084 ? V
.getOperand(0) : DAG
.getNode(ISD::UNDEF
, MVT::getVectorElementType(VT
));
5085 } else if (V
.getOpcode() == ISD::VECTOR_SHUFFLE
) {
5086 SDOperand Idx
= PermMask
.getOperand(i
);
5087 if (Idx
.getOpcode() == ISD::UNDEF
)
5088 return DAG
.getNode(ISD::UNDEF
, MVT::getVectorElementType(VT
));
5089 return getShuffleScalarElt(V
.Val
,cast
<ConstantSDNode
>(Idx
)->getValue(),DAG
);
5094 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
5095 /// node is a GlobalAddress + an offset.
5096 static bool isGAPlusOffset(SDNode
*N
, GlobalValue
* &GA
, int64_t &Offset
) {
5097 unsigned Opc
= N
->getOpcode();
5098 if (Opc
== X86ISD::Wrapper
) {
5099 if (dyn_cast
<GlobalAddressSDNode
>(N
->getOperand(0))) {
5100 GA
= cast
<GlobalAddressSDNode
>(N
->getOperand(0))->getGlobal();
5103 } else if (Opc
== ISD::ADD
) {
5104 SDOperand N1
= N
->getOperand(0);
5105 SDOperand N2
= N
->getOperand(1);
5106 if (isGAPlusOffset(N1
.Val
, GA
, Offset
)) {
5107 ConstantSDNode
*V
= dyn_cast
<ConstantSDNode
>(N2
);
5109 Offset
+= V
->getSignExtended();
5112 } else if (isGAPlusOffset(N2
.Val
, GA
, Offset
)) {
5113 ConstantSDNode
*V
= dyn_cast
<ConstantSDNode
>(N1
);
5115 Offset
+= V
->getSignExtended();
5123 /// isConsecutiveLoad - Returns true if N is loading from an address of Base
5125 static bool isConsecutiveLoad(SDNode
*N
, SDNode
*Base
, int Dist
, int Size
,
5126 MachineFrameInfo
*MFI
) {
5127 if (N
->getOperand(0).Val
!= Base
->getOperand(0).Val
)
5130 SDOperand Loc
= N
->getOperand(1);
5131 SDOperand BaseLoc
= Base
->getOperand(1);
5132 if (Loc
.getOpcode() == ISD::FrameIndex
) {
5133 if (BaseLoc
.getOpcode() != ISD::FrameIndex
)
5135 int FI
= cast
<FrameIndexSDNode
>(Loc
)->getIndex();
5136 int BFI
= cast
<FrameIndexSDNode
>(BaseLoc
)->getIndex();
5137 int FS
= MFI
->getObjectSize(FI
);
5138 int BFS
= MFI
->getObjectSize(BFI
);
5139 if (FS
!= BFS
|| FS
!= Size
) return false;
5140 return MFI
->getObjectOffset(FI
) == (MFI
->getObjectOffset(BFI
) + Dist
*Size
);
5142 GlobalValue
*GV1
= NULL
;
5143 GlobalValue
*GV2
= NULL
;
5144 int64_t Offset1
= 0;
5145 int64_t Offset2
= 0;
5146 bool isGA1
= isGAPlusOffset(Loc
.Val
, GV1
, Offset1
);
5147 bool isGA2
= isGAPlusOffset(BaseLoc
.Val
, GV2
, Offset2
);
5148 if (isGA1
&& isGA2
&& GV1
== GV2
)
5149 return Offset1
== (Offset2
+ Dist
*Size
);
5155 static bool isBaseAlignment16(SDNode
*Base
, MachineFrameInfo
*MFI
,
5156 const X86Subtarget
*Subtarget
) {
5159 if (isGAPlusOffset(Base
, GV
, Offset
))
5160 return (GV
->getAlignment() >= 16 && (Offset
% 16) == 0);
5162 assert(Base
->getOpcode() == ISD::FrameIndex
&& "Unexpected base node!");
5163 int BFI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
5165 // Fixed objects do not specify alignment, however the offsets are known.
5166 return ((Subtarget
->getStackAlignment() % 16) == 0 &&
5167 (MFI
->getObjectOffset(BFI
) % 16) == 0);
5169 return MFI
->getObjectAlignment(BFI
) >= 16;
5175 /// PerformShuffleCombine - Combine a vector_shuffle that is equal to
5176 /// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
5177 /// if the load addresses are consecutive, non-overlapping, and in the right
5179 static SDOperand
PerformShuffleCombine(SDNode
*N
, SelectionDAG
&DAG
,
5180 const X86Subtarget
*Subtarget
) {
5181 MachineFunction
&MF
= DAG
.getMachineFunction();
5182 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
5183 MVT::ValueType VT
= N
->getValueType(0);
5184 MVT::ValueType EVT
= MVT::getVectorElementType(VT
);
5185 SDOperand PermMask
= N
->getOperand(2);
5186 int NumElems
= (int)PermMask
.getNumOperands();
5187 SDNode
*Base
= NULL
;
5188 for (int i
= 0; i
< NumElems
; ++i
) {
5189 SDOperand Idx
= PermMask
.getOperand(i
);
5190 if (Idx
.getOpcode() == ISD::UNDEF
) {
5191 if (!Base
) return SDOperand();
5194 getShuffleScalarElt(N
, cast
<ConstantSDNode
>(Idx
)->getValue(), DAG
);
5195 if (!Arg
.Val
|| !ISD::isNON_EXTLoad(Arg
.Val
))
5199 else if (!isConsecutiveLoad(Arg
.Val
, Base
,
5200 i
, MVT::getSizeInBits(EVT
)/8,MFI
))
5205 bool isAlign16
= isBaseAlignment16(Base
->getOperand(1).Val
, MFI
, Subtarget
);
5206 LoadSDNode
*LD
= cast
<LoadSDNode
>(Base
);
5208 return DAG
.getLoad(VT
, LD
->getChain(), LD
->getBasePtr(), LD
->getSrcValue(),
5209 LD
->getSrcValueOffset(), LD
->isVolatile());
5211 return DAG
.getLoad(VT
, LD
->getChain(), LD
->getBasePtr(), LD
->getSrcValue(),
5212 LD
->getSrcValueOffset(), LD
->isVolatile(),
5213 LD
->getAlignment());
5217 /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
5218 static SDOperand
PerformSELECTCombine(SDNode
*N
, SelectionDAG
&DAG
,
5219 const X86Subtarget
*Subtarget
) {
5220 SDOperand Cond
= N
->getOperand(0);
5222 // If we have SSE[12] support, try to form min/max nodes.
5223 if (Subtarget
->hasSSE2() &&
5224 (N
->getValueType(0) == MVT::f32
|| N
->getValueType(0) == MVT::f64
)) {
5225 if (Cond
.getOpcode() == ISD::SETCC
) {
5226 // Get the LHS/RHS of the select.
5227 SDOperand LHS
= N
->getOperand(1);
5228 SDOperand RHS
= N
->getOperand(2);
5229 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Cond
.getOperand(2))->get();
5231 unsigned Opcode
= 0;
5232 if (LHS
== Cond
.getOperand(0) && RHS
== Cond
.getOperand(1)) {
5235 case ISD::SETOLE
: // (X <= Y) ? X : Y -> min
5238 if (!UnsafeFPMath
) break;
5240 case ISD::SETOLT
: // (X olt/lt Y) ? X : Y -> min
5242 Opcode
= X86ISD::FMIN
;
5245 case ISD::SETOGT
: // (X > Y) ? X : Y -> max
5248 if (!UnsafeFPMath
) break;
5250 case ISD::SETUGE
: // (X uge/ge Y) ? X : Y -> max
5252 Opcode
= X86ISD::FMAX
;
5255 } else if (LHS
== Cond
.getOperand(1) && RHS
== Cond
.getOperand(0)) {
5258 case ISD::SETOGT
: // (X > Y) ? Y : X -> min
5261 if (!UnsafeFPMath
) break;
5263 case ISD::SETUGE
: // (X uge/ge Y) ? Y : X -> min
5265 Opcode
= X86ISD::FMIN
;
5268 case ISD::SETOLE
: // (X <= Y) ? Y : X -> max
5271 if (!UnsafeFPMath
) break;
5273 case ISD::SETOLT
: // (X olt/lt Y) ? Y : X -> max
5275 Opcode
= X86ISD::FMAX
;
5281 return DAG
.getNode(Opcode
, N
->getValueType(0), LHS
, RHS
);
5290 SDOperand
X86TargetLowering::PerformDAGCombine(SDNode
*N
,
5291 DAGCombinerInfo
&DCI
) const {
5292 SelectionDAG
&DAG
= DCI
.DAG
;
5293 switch (N
->getOpcode()) {
5295 case ISD::VECTOR_SHUFFLE
:
5296 return PerformShuffleCombine(N
, DAG
, Subtarget
);
5298 return PerformSELECTCombine(N
, DAG
, Subtarget
);
5304 //===----------------------------------------------------------------------===//
5305 // X86 Inline Assembly Support
5306 //===----------------------------------------------------------------------===//
5308 /// getConstraintType - Given a constraint letter, return the type of
5309 /// constraint it is for this target.
5310 X86TargetLowering::ConstraintType
5311 X86TargetLowering::getConstraintType(const std::string
&Constraint
) const {
5312 if (Constraint
.size() == 1) {
5313 switch (Constraint
[0]) {
5322 return C_RegisterClass
;
5327 return TargetLowering::getConstraintType(Constraint
);
5330 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5331 /// vector. If it is invalid, don't add anything to Ops.
5332 void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op
,
5334 std::vector
<SDOperand
>&Ops
,
5335 SelectionDAG
&DAG
) {
5336 SDOperand
Result(0, 0);
5338 switch (Constraint
) {
5341 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
)) {
5342 if (C
->getValue() <= 31) {
5343 Result
= DAG
.getTargetConstant(C
->getValue(), Op
.getValueType());
5349 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
)) {
5350 if (C
->getValue() <= 255) {
5351 Result
= DAG
.getTargetConstant(C
->getValue(), Op
.getValueType());
5357 // Literal immediates are always ok.
5358 if (ConstantSDNode
*CST
= dyn_cast
<ConstantSDNode
>(Op
)) {
5359 Result
= DAG
.getTargetConstant(CST
->getValue(), Op
.getValueType());
5363 // If we are in non-pic codegen mode, we allow the address of a global (with
5364 // an optional displacement) to be used with 'i'.
5365 GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
);
5368 // Match either (GA) or (GA+C)
5370 Offset
= GA
->getOffset();
5371 } else if (Op
.getOpcode() == ISD::ADD
) {
5372 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
5373 GA
= dyn_cast
<GlobalAddressSDNode
>(Op
.getOperand(0));
5375 Offset
= GA
->getOffset()+C
->getValue();
5377 C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
5378 GA
= dyn_cast
<GlobalAddressSDNode
>(Op
.getOperand(0));
5380 Offset
= GA
->getOffset()+C
->getValue();
5387 // If addressing this global requires a load (e.g. in PIC mode), we can't
5389 if (Subtarget
->GVRequiresExtraLoad(GA
->getGlobal(), getTargetMachine(),
5393 Op
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), GA
->getValueType(0),
5399 // Otherwise, not valid for this mode.
5405 Ops
.push_back(Result
);
5408 return TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
5411 std::vector
<unsigned> X86TargetLowering::
5412 getRegClassForInlineAsmConstraint(const std::string
&Constraint
,
5413 MVT::ValueType VT
) const {
5414 if (Constraint
.size() == 1) {
5415 // FIXME: not handling fp-stack yet!
5416 switch (Constraint
[0]) { // GCC X86 Constraint Letters
5417 default: break; // Unknown constraint letter
5418 case 'A': // EAX/EDX
5419 if (VT
== MVT::i32
|| VT
== MVT::i64
)
5420 return make_vector
<unsigned>(X86::EAX
, X86::EDX
, 0);
5422 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
5425 return make_vector
<unsigned>(X86::EAX
, X86::EDX
, X86::ECX
, X86::EBX
, 0);
5426 else if (VT
== MVT::i16
)
5427 return make_vector
<unsigned>(X86::AX
, X86::DX
, X86::CX
, X86::BX
, 0);
5428 else if (VT
== MVT::i8
)
5429 return make_vector
<unsigned>(X86::AL
, X86::DL
, X86::CL
, X86::BL
, 0);
5434 return std::vector
<unsigned>();
5437 std::pair
<unsigned, const TargetRegisterClass
*>
5438 X86TargetLowering::getRegForInlineAsmConstraint(const std::string
&Constraint
,
5439 MVT::ValueType VT
) const {
5440 // First, see if this is a constraint that directly corresponds to an LLVM
5442 if (Constraint
.size() == 1) {
5443 // GCC Constraint Letters
5444 switch (Constraint
[0]) {
5446 case 'r': // GENERAL_REGS
5447 case 'R': // LEGACY_REGS
5448 case 'l': // INDEX_REGS
5449 if (VT
== MVT::i64
&& Subtarget
->is64Bit())
5450 return std::make_pair(0U, X86::GR64RegisterClass
);
5452 return std::make_pair(0U, X86::GR32RegisterClass
);
5453 else if (VT
== MVT::i16
)
5454 return std::make_pair(0U, X86::GR16RegisterClass
);
5455 else if (VT
== MVT::i8
)
5456 return std::make_pair(0U, X86::GR8RegisterClass
);
5458 case 'y': // MMX_REGS if MMX allowed.
5459 if (!Subtarget
->hasMMX()) break;
5460 return std::make_pair(0U, X86::VR64RegisterClass
);
5462 case 'Y': // SSE_REGS if SSE2 allowed
5463 if (!Subtarget
->hasSSE2()) break;
5465 case 'x': // SSE_REGS if SSE1 allowed
5466 if (!Subtarget
->hasSSE1()) break;
5470 // Scalar SSE types.
5473 return std::make_pair(0U, X86::FR32RegisterClass
);
5476 return std::make_pair(0U, X86::FR64RegisterClass
);
5484 return std::make_pair(0U, X86::VR128RegisterClass
);
5490 // Use the default implementation in TargetLowering to convert the register
5491 // constraint into a member of a register class.
5492 std::pair
<unsigned, const TargetRegisterClass
*> Res
;
5493 Res
= TargetLowering::getRegForInlineAsmConstraint(Constraint
, VT
);
5495 // Not found as a standard register?
5496 if (Res
.second
== 0) {
5497 // GCC calls "st(0)" just plain "st".
5498 if (StringsEqualNoCase("{st}", Constraint
)) {
5499 Res
.first
= X86::ST0
;
5500 Res
.second
= X86::RFP80RegisterClass
;
5506 // Otherwise, check to see if this is a register class of the wrong value
5507 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
5508 // turn into {ax},{dx}.
5509 if (Res
.second
->hasType(VT
))
5510 return Res
; // Correct type already, nothing to do.
5512 // All of the single-register GCC register classes map their values onto
5513 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
5514 // really want an 8-bit or 32-bit register, map to the appropriate register
5515 // class and return the appropriate register.
5516 if (Res
.second
!= X86::GR16RegisterClass
)
5519 if (VT
== MVT::i8
) {
5520 unsigned DestReg
= 0;
5521 switch (Res
.first
) {
5523 case X86::AX
: DestReg
= X86::AL
; break;
5524 case X86::DX
: DestReg
= X86::DL
; break;
5525 case X86::CX
: DestReg
= X86::CL
; break;
5526 case X86::BX
: DestReg
= X86::BL
; break;
5529 Res
.first
= DestReg
;
5530 Res
.second
= Res
.second
= X86::GR8RegisterClass
;
5532 } else if (VT
== MVT::i32
) {
5533 unsigned DestReg
= 0;
5534 switch (Res
.first
) {
5536 case X86::AX
: DestReg
= X86::EAX
; break;
5537 case X86::DX
: DestReg
= X86::EDX
; break;
5538 case X86::CX
: DestReg
= X86::ECX
; break;
5539 case X86::BX
: DestReg
= X86::EBX
; break;
5540 case X86::SI
: DestReg
= X86::ESI
; break;
5541 case X86::DI
: DestReg
= X86::EDI
; break;
5542 case X86::BP
: DestReg
= X86::EBP
; break;
5543 case X86::SP
: DestReg
= X86::ESP
; break;
5546 Res
.first
= DestReg
;
5547 Res
.second
= Res
.second
= X86::GR32RegisterClass
;
5549 } else if (VT
== MVT::i64
) {
5550 unsigned DestReg
= 0;
5551 switch (Res
.first
) {
5553 case X86::AX
: DestReg
= X86::RAX
; break;
5554 case X86::DX
: DestReg
= X86::RDX
; break;
5555 case X86::CX
: DestReg
= X86::RCX
; break;
5556 case X86::BX
: DestReg
= X86::RBX
; break;
5557 case X86::SI
: DestReg
= X86::RSI
; break;
5558 case X86::DI
: DestReg
= X86::RDI
; break;
5559 case X86::BP
: DestReg
= X86::RBP
; break;
5560 case X86::SP
: DestReg
= X86::RSP
; break;
5563 Res
.first
= DestReg
;
5564 Res
.second
= Res
.second
= X86::GR64RegisterClass
;