1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Subclass of MipsTargetLowering specialized for mips32/64.
12 //===----------------------------------------------------------------------===//
14 #include "MipsSEISelLowering.h"
15 #include "MipsMachineFunction.h"
16 #include "MipsRegisterInfo.h"
17 #include "MipsSubtarget.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Triple.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/ISDOpcodes.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineMemOperand.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/SelectionDAG.h"
32 #include "llvm/CodeGen/SelectionDAGNodes.h"
33 #include "llvm/CodeGen/TargetInstrInfo.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/CodeGen/ValueTypes.h"
36 #include "llvm/IR/DebugLoc.h"
37 #include "llvm/IR/Intrinsics.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/MachineValueType.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Support/raw_ostream.h"
53 #define DEBUG_TYPE "mips-isel"
56 UseMipsTailCalls("mips-tail-calls", cl::Hidden
,
57 cl::desc("MIPS: permit tail calls."), cl::init(false));
59 static cl::opt
<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
60 cl::desc("Expand double precision loads and "
61 "stores to their single precision "
64 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine
&TM
,
65 const MipsSubtarget
&STI
)
66 : MipsTargetLowering(TM
, STI
) {
67 // Set up the register classes
68 addRegisterClass(MVT::i32
, &Mips::GPR32RegClass
);
70 if (Subtarget
.isGP64bit())
71 addRegisterClass(MVT::i64
, &Mips::GPR64RegClass
);
73 if (Subtarget
.hasDSP() || Subtarget
.hasMSA()) {
74 // Expand all truncating stores and extending loads.
75 for (MVT VT0
: MVT::vector_valuetypes()) {
76 for (MVT VT1
: MVT::vector_valuetypes()) {
77 setTruncStoreAction(VT0
, VT1
, Expand
);
78 setLoadExtAction(ISD::SEXTLOAD
, VT0
, VT1
, Expand
);
79 setLoadExtAction(ISD::ZEXTLOAD
, VT0
, VT1
, Expand
);
80 setLoadExtAction(ISD::EXTLOAD
, VT0
, VT1
, Expand
);
85 if (Subtarget
.hasDSP()) {
86 MVT::SimpleValueType VecTys
[2] = {MVT::v2i16
, MVT::v4i8
};
88 for (unsigned i
= 0; i
< array_lengthof(VecTys
); ++i
) {
89 addRegisterClass(VecTys
[i
], &Mips::DSPRRegClass
);
91 // Expand all builtin opcodes.
92 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
93 setOperationAction(Opc
, VecTys
[i
], Expand
);
95 setOperationAction(ISD::ADD
, VecTys
[i
], Legal
);
96 setOperationAction(ISD::SUB
, VecTys
[i
], Legal
);
97 setOperationAction(ISD::LOAD
, VecTys
[i
], Legal
);
98 setOperationAction(ISD::STORE
, VecTys
[i
], Legal
);
99 setOperationAction(ISD::BITCAST
, VecTys
[i
], Legal
);
102 setTargetDAGCombine(ISD::SHL
);
103 setTargetDAGCombine(ISD::SRA
);
104 setTargetDAGCombine(ISD::SRL
);
105 setTargetDAGCombine(ISD::SETCC
);
106 setTargetDAGCombine(ISD::VSELECT
);
108 if (Subtarget
.hasMips32r2()) {
109 setOperationAction(ISD::ADDC
, MVT::i32
, Legal
);
110 setOperationAction(ISD::ADDE
, MVT::i32
, Legal
);
114 if (Subtarget
.hasDSPR2())
115 setOperationAction(ISD::MUL
, MVT::v2i16
, Legal
);
117 if (Subtarget
.hasMSA()) {
118 addMSAIntType(MVT::v16i8
, &Mips::MSA128BRegClass
);
119 addMSAIntType(MVT::v8i16
, &Mips::MSA128HRegClass
);
120 addMSAIntType(MVT::v4i32
, &Mips::MSA128WRegClass
);
121 addMSAIntType(MVT::v2i64
, &Mips::MSA128DRegClass
);
122 addMSAFloatType(MVT::v8f16
, &Mips::MSA128HRegClass
);
123 addMSAFloatType(MVT::v4f32
, &Mips::MSA128WRegClass
);
124 addMSAFloatType(MVT::v2f64
, &Mips::MSA128DRegClass
);
126 // f16 is a storage-only type, always promote it to f32.
127 addRegisterClass(MVT::f16
, &Mips::MSA128HRegClass
);
128 setOperationAction(ISD::SETCC
, MVT::f16
, Promote
);
129 setOperationAction(ISD::BR_CC
, MVT::f16
, Promote
);
130 setOperationAction(ISD::SELECT_CC
, MVT::f16
, Promote
);
131 setOperationAction(ISD::SELECT
, MVT::f16
, Promote
);
132 setOperationAction(ISD::FADD
, MVT::f16
, Promote
);
133 setOperationAction(ISD::FSUB
, MVT::f16
, Promote
);
134 setOperationAction(ISD::FMUL
, MVT::f16
, Promote
);
135 setOperationAction(ISD::FDIV
, MVT::f16
, Promote
);
136 setOperationAction(ISD::FREM
, MVT::f16
, Promote
);
137 setOperationAction(ISD::FMA
, MVT::f16
, Promote
);
138 setOperationAction(ISD::FNEG
, MVT::f16
, Promote
);
139 setOperationAction(ISD::FABS
, MVT::f16
, Promote
);
140 setOperationAction(ISD::FCEIL
, MVT::f16
, Promote
);
141 setOperationAction(ISD::FCOPYSIGN
, MVT::f16
, Promote
);
142 setOperationAction(ISD::FCOS
, MVT::f16
, Promote
);
143 setOperationAction(ISD::FP_EXTEND
, MVT::f16
, Promote
);
144 setOperationAction(ISD::FFLOOR
, MVT::f16
, Promote
);
145 setOperationAction(ISD::FNEARBYINT
, MVT::f16
, Promote
);
146 setOperationAction(ISD::FPOW
, MVT::f16
, Promote
);
147 setOperationAction(ISD::FPOWI
, MVT::f16
, Promote
);
148 setOperationAction(ISD::FRINT
, MVT::f16
, Promote
);
149 setOperationAction(ISD::FSIN
, MVT::f16
, Promote
);
150 setOperationAction(ISD::FSINCOS
, MVT::f16
, Promote
);
151 setOperationAction(ISD::FSQRT
, MVT::f16
, Promote
);
152 setOperationAction(ISD::FEXP
, MVT::f16
, Promote
);
153 setOperationAction(ISD::FEXP2
, MVT::f16
, Promote
);
154 setOperationAction(ISD::FLOG
, MVT::f16
, Promote
);
155 setOperationAction(ISD::FLOG2
, MVT::f16
, Promote
);
156 setOperationAction(ISD::FLOG10
, MVT::f16
, Promote
);
157 setOperationAction(ISD::FROUND
, MVT::f16
, Promote
);
158 setOperationAction(ISD::FTRUNC
, MVT::f16
, Promote
);
159 setOperationAction(ISD::FMINNUM
, MVT::f16
, Promote
);
160 setOperationAction(ISD::FMAXNUM
, MVT::f16
, Promote
);
161 setOperationAction(ISD::FMINNAN
, MVT::f16
, Promote
);
162 setOperationAction(ISD::FMAXNAN
, MVT::f16
, Promote
);
164 setTargetDAGCombine(ISD::AND
);
165 setTargetDAGCombine(ISD::OR
);
166 setTargetDAGCombine(ISD::SRA
);
167 setTargetDAGCombine(ISD::VSELECT
);
168 setTargetDAGCombine(ISD::XOR
);
171 if (!Subtarget
.useSoftFloat()) {
172 addRegisterClass(MVT::f32
, &Mips::FGR32RegClass
);
174 // When dealing with single precision only, use libcalls
175 if (!Subtarget
.isSingleFloat()) {
176 if (Subtarget
.isFP64bit())
177 addRegisterClass(MVT::f64
, &Mips::FGR64RegClass
);
179 addRegisterClass(MVT::f64
, &Mips::AFGR64RegClass
);
183 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Custom
);
184 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Custom
);
185 setOperationAction(ISD::MULHS
, MVT::i32
, Custom
);
186 setOperationAction(ISD::MULHU
, MVT::i32
, Custom
);
188 if (Subtarget
.hasCnMips())
189 setOperationAction(ISD::MUL
, MVT::i64
, Legal
);
190 else if (Subtarget
.isGP64bit())
191 setOperationAction(ISD::MUL
, MVT::i64
, Custom
);
193 if (Subtarget
.isGP64bit()) {
194 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Custom
);
195 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Custom
);
196 setOperationAction(ISD::MULHS
, MVT::i64
, Custom
);
197 setOperationAction(ISD::MULHU
, MVT::i64
, Custom
);
198 setOperationAction(ISD::SDIVREM
, MVT::i64
, Custom
);
199 setOperationAction(ISD::UDIVREM
, MVT::i64
, Custom
);
202 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i64
, Custom
);
203 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i64
, Custom
);
205 setOperationAction(ISD::SDIVREM
, MVT::i32
, Custom
);
206 setOperationAction(ISD::UDIVREM
, MVT::i32
, Custom
);
207 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
208 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
209 setOperationAction(ISD::STORE
, MVT::i32
, Custom
);
211 setTargetDAGCombine(ISD::MUL
);
213 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
214 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
215 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
218 setOperationAction(ISD::LOAD
, MVT::f64
, Custom
);
219 setOperationAction(ISD::STORE
, MVT::f64
, Custom
);
222 if (Subtarget
.hasMips32r6()) {
223 // MIPS32r6 replaces the accumulator-based multiplies with a three register
225 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
226 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
227 setOperationAction(ISD::MUL
, MVT::i32
, Legal
);
228 setOperationAction(ISD::MULHS
, MVT::i32
, Legal
);
229 setOperationAction(ISD::MULHU
, MVT::i32
, Legal
);
231 // MIPS32r6 replaces the accumulator-based division/remainder with separate
232 // three register division and remainder instructions.
233 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
234 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
235 setOperationAction(ISD::SDIV
, MVT::i32
, Legal
);
236 setOperationAction(ISD::UDIV
, MVT::i32
, Legal
);
237 setOperationAction(ISD::SREM
, MVT::i32
, Legal
);
238 setOperationAction(ISD::UREM
, MVT::i32
, Legal
);
240 // MIPS32r6 replaces conditional moves with an equivalent that removes the
241 // need for three GPR read ports.
242 setOperationAction(ISD::SETCC
, MVT::i32
, Legal
);
243 setOperationAction(ISD::SELECT
, MVT::i32
, Legal
);
244 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Expand
);
246 setOperationAction(ISD::SETCC
, MVT::f32
, Legal
);
247 setOperationAction(ISD::SELECT
, MVT::f32
, Legal
);
248 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Expand
);
250 assert(Subtarget
.isFP64bit() && "FR=1 is required for MIPS32r6");
251 setOperationAction(ISD::SETCC
, MVT::f64
, Legal
);
252 setOperationAction(ISD::SELECT
, MVT::f64
, Custom
);
253 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Expand
);
255 setOperationAction(ISD::BRCOND
, MVT::Other
, Legal
);
257 // Floating point > and >= are supported via < and <=
258 setCondCodeAction(ISD::SETOGE
, MVT::f32
, Expand
);
259 setCondCodeAction(ISD::SETOGT
, MVT::f32
, Expand
);
260 setCondCodeAction(ISD::SETUGE
, MVT::f32
, Expand
);
261 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
263 setCondCodeAction(ISD::SETOGE
, MVT::f64
, Expand
);
264 setCondCodeAction(ISD::SETOGT
, MVT::f64
, Expand
);
265 setCondCodeAction(ISD::SETUGE
, MVT::f64
, Expand
);
266 setCondCodeAction(ISD::SETUGT
, MVT::f64
, Expand
);
269 if (Subtarget
.hasMips64r6()) {
270 // MIPS64r6 replaces the accumulator-based multiplies with a three register
272 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Expand
);
273 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Expand
);
274 setOperationAction(ISD::MUL
, MVT::i64
, Legal
);
275 setOperationAction(ISD::MULHS
, MVT::i64
, Legal
);
276 setOperationAction(ISD::MULHU
, MVT::i64
, Legal
);
278 // MIPS32r6 replaces the accumulator-based division/remainder with separate
279 // three register division and remainder instructions.
280 setOperationAction(ISD::SDIVREM
, MVT::i64
, Expand
);
281 setOperationAction(ISD::UDIVREM
, MVT::i64
, Expand
);
282 setOperationAction(ISD::SDIV
, MVT::i64
, Legal
);
283 setOperationAction(ISD::UDIV
, MVT::i64
, Legal
);
284 setOperationAction(ISD::SREM
, MVT::i64
, Legal
);
285 setOperationAction(ISD::UREM
, MVT::i64
, Legal
);
287 // MIPS64r6 replaces conditional moves with an equivalent that removes the
288 // need for three GPR read ports.
289 setOperationAction(ISD::SETCC
, MVT::i64
, Legal
);
290 setOperationAction(ISD::SELECT
, MVT::i64
, Legal
);
291 setOperationAction(ISD::SELECT_CC
, MVT::i64
, Expand
);
294 computeRegisterProperties(Subtarget
.getRegisterInfo());
297 const MipsTargetLowering
*
298 llvm::createMipsSETargetLowering(const MipsTargetMachine
&TM
,
299 const MipsSubtarget
&STI
) {
300 return new MipsSETargetLowering(TM
, STI
);
303 const TargetRegisterClass
*
304 MipsSETargetLowering::getRepRegClassFor(MVT VT
) const {
305 if (VT
== MVT::Untyped
)
306 return Subtarget
.hasDSP() ? &Mips::ACC64DSPRegClass
: &Mips::ACC64RegClass
;
308 return TargetLowering::getRepRegClassFor(VT
);
311 // Enable MSA support for the given integer type and Register class.
312 void MipsSETargetLowering::
313 addMSAIntType(MVT::SimpleValueType Ty
, const TargetRegisterClass
*RC
) {
314 addRegisterClass(Ty
, RC
);
316 // Expand all builtin opcodes.
317 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
318 setOperationAction(Opc
, Ty
, Expand
);
320 setOperationAction(ISD::BITCAST
, Ty
, Legal
);
321 setOperationAction(ISD::LOAD
, Ty
, Legal
);
322 setOperationAction(ISD::STORE
, Ty
, Legal
);
323 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, Ty
, Custom
);
324 setOperationAction(ISD::INSERT_VECTOR_ELT
, Ty
, Legal
);
325 setOperationAction(ISD::BUILD_VECTOR
, Ty
, Custom
);
327 setOperationAction(ISD::ADD
, Ty
, Legal
);
328 setOperationAction(ISD::AND
, Ty
, Legal
);
329 setOperationAction(ISD::CTLZ
, Ty
, Legal
);
330 setOperationAction(ISD::CTPOP
, Ty
, Legal
);
331 setOperationAction(ISD::MUL
, Ty
, Legal
);
332 setOperationAction(ISD::OR
, Ty
, Legal
);
333 setOperationAction(ISD::SDIV
, Ty
, Legal
);
334 setOperationAction(ISD::SREM
, Ty
, Legal
);
335 setOperationAction(ISD::SHL
, Ty
, Legal
);
336 setOperationAction(ISD::SRA
, Ty
, Legal
);
337 setOperationAction(ISD::SRL
, Ty
, Legal
);
338 setOperationAction(ISD::SUB
, Ty
, Legal
);
339 setOperationAction(ISD::SMAX
, Ty
, Legal
);
340 setOperationAction(ISD::SMIN
, Ty
, Legal
);
341 setOperationAction(ISD::UDIV
, Ty
, Legal
);
342 setOperationAction(ISD::UREM
, Ty
, Legal
);
343 setOperationAction(ISD::UMAX
, Ty
, Legal
);
344 setOperationAction(ISD::UMIN
, Ty
, Legal
);
345 setOperationAction(ISD::VECTOR_SHUFFLE
, Ty
, Custom
);
346 setOperationAction(ISD::VSELECT
, Ty
, Legal
);
347 setOperationAction(ISD::XOR
, Ty
, Legal
);
349 if (Ty
== MVT::v4i32
|| Ty
== MVT::v2i64
) {
350 setOperationAction(ISD::FP_TO_SINT
, Ty
, Legal
);
351 setOperationAction(ISD::FP_TO_UINT
, Ty
, Legal
);
352 setOperationAction(ISD::SINT_TO_FP
, Ty
, Legal
);
353 setOperationAction(ISD::UINT_TO_FP
, Ty
, Legal
);
356 setOperationAction(ISD::SETCC
, Ty
, Legal
);
357 setCondCodeAction(ISD::SETNE
, Ty
, Expand
);
358 setCondCodeAction(ISD::SETGE
, Ty
, Expand
);
359 setCondCodeAction(ISD::SETGT
, Ty
, Expand
);
360 setCondCodeAction(ISD::SETUGE
, Ty
, Expand
);
361 setCondCodeAction(ISD::SETUGT
, Ty
, Expand
);
364 // Enable MSA support for the given floating-point type and Register class.
365 void MipsSETargetLowering::
366 addMSAFloatType(MVT::SimpleValueType Ty
, const TargetRegisterClass
*RC
) {
367 addRegisterClass(Ty
, RC
);
369 // Expand all builtin opcodes.
370 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
371 setOperationAction(Opc
, Ty
, Expand
);
373 setOperationAction(ISD::LOAD
, Ty
, Legal
);
374 setOperationAction(ISD::STORE
, Ty
, Legal
);
375 setOperationAction(ISD::BITCAST
, Ty
, Legal
);
376 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, Ty
, Legal
);
377 setOperationAction(ISD::INSERT_VECTOR_ELT
, Ty
, Legal
);
378 setOperationAction(ISD::BUILD_VECTOR
, Ty
, Custom
);
380 if (Ty
!= MVT::v8f16
) {
381 setOperationAction(ISD::FABS
, Ty
, Legal
);
382 setOperationAction(ISD::FADD
, Ty
, Legal
);
383 setOperationAction(ISD::FDIV
, Ty
, Legal
);
384 setOperationAction(ISD::FEXP2
, Ty
, Legal
);
385 setOperationAction(ISD::FLOG2
, Ty
, Legal
);
386 setOperationAction(ISD::FMA
, Ty
, Legal
);
387 setOperationAction(ISD::FMUL
, Ty
, Legal
);
388 setOperationAction(ISD::FRINT
, Ty
, Legal
);
389 setOperationAction(ISD::FSQRT
, Ty
, Legal
);
390 setOperationAction(ISD::FSUB
, Ty
, Legal
);
391 setOperationAction(ISD::VSELECT
, Ty
, Legal
);
393 setOperationAction(ISD::SETCC
, Ty
, Legal
);
394 setCondCodeAction(ISD::SETOGE
, Ty
, Expand
);
395 setCondCodeAction(ISD::SETOGT
, Ty
, Expand
);
396 setCondCodeAction(ISD::SETUGE
, Ty
, Expand
);
397 setCondCodeAction(ISD::SETUGT
, Ty
, Expand
);
398 setCondCodeAction(ISD::SETGE
, Ty
, Expand
);
399 setCondCodeAction(ISD::SETGT
, Ty
, Expand
);
403 SDValue
MipsSETargetLowering::lowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
404 if(!Subtarget
.hasMips32r6())
405 return MipsTargetLowering::LowerOperation(Op
, DAG
);
407 EVT ResTy
= Op
->getValueType(0);
410 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
411 // floating point register are undefined. Not really an issue as sel.d, which
412 // is produced from an FSELECT node, only looks at bit 0.
413 SDValue Tmp
= DAG
.getNode(MipsISD::MTC1_D64
, DL
, MVT::f64
, Op
->getOperand(0));
414 return DAG
.getNode(MipsISD::FSELECT
, DL
, ResTy
, Tmp
, Op
->getOperand(1),
419 MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT
,
423 MVT::SimpleValueType SVT
= VT
.getSimpleVT().SimpleTy
;
425 if (Subtarget
.systemSupportsUnalignedAccess()) {
426 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
427 // implementation defined whether this is handled by hardware, software, or
428 // a hybrid of the two but it's expected that most implementations will
429 // handle the majority of cases in hardware.
446 SDValue
MipsSETargetLowering::LowerOperation(SDValue Op
,
447 SelectionDAG
&DAG
) const {
448 switch(Op
.getOpcode()) {
449 case ISD::LOAD
: return lowerLOAD(Op
, DAG
);
450 case ISD::STORE
: return lowerSTORE(Op
, DAG
);
451 case ISD::SMUL_LOHI
: return lowerMulDiv(Op
, MipsISD::Mult
, true, true, DAG
);
452 case ISD::UMUL_LOHI
: return lowerMulDiv(Op
, MipsISD::Multu
, true, true, DAG
);
453 case ISD::MULHS
: return lowerMulDiv(Op
, MipsISD::Mult
, false, true, DAG
);
454 case ISD::MULHU
: return lowerMulDiv(Op
, MipsISD::Multu
, false, true, DAG
);
455 case ISD::MUL
: return lowerMulDiv(Op
, MipsISD::Mult
, true, false, DAG
);
456 case ISD::SDIVREM
: return lowerMulDiv(Op
, MipsISD::DivRem
, true, true, DAG
);
457 case ISD::UDIVREM
: return lowerMulDiv(Op
, MipsISD::DivRemU
, true, true,
459 case ISD::INTRINSIC_WO_CHAIN
: return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
460 case ISD::INTRINSIC_W_CHAIN
: return lowerINTRINSIC_W_CHAIN(Op
, DAG
);
461 case ISD::INTRINSIC_VOID
: return lowerINTRINSIC_VOID(Op
, DAG
);
462 case ISD::EXTRACT_VECTOR_ELT
: return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
463 case ISD::BUILD_VECTOR
: return lowerBUILD_VECTOR(Op
, DAG
);
464 case ISD::VECTOR_SHUFFLE
: return lowerVECTOR_SHUFFLE(Op
, DAG
);
465 case ISD::SELECT
: return lowerSELECT(Op
, DAG
);
468 return MipsTargetLowering::LowerOperation(Op
, DAG
);
471 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
473 // Performs the following transformations:
474 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
475 // sign/zero-extension is completely overwritten by the new one performed by
477 // - Removes redundant zero extensions performed by an ISD::AND.
478 static SDValue
performANDCombine(SDNode
*N
, SelectionDAG
&DAG
,
479 TargetLowering::DAGCombinerInfo
&DCI
,
480 const MipsSubtarget
&Subtarget
) {
481 if (!Subtarget
.hasMSA())
484 SDValue Op0
= N
->getOperand(0);
485 SDValue Op1
= N
->getOperand(1);
486 unsigned Op0Opcode
= Op0
->getOpcode();
488 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
489 // where $d + 1 == 2^n and n == 32
490 // or $d + 1 == 2^n and n <= 32 and ZExt
491 // -> (MipsVExtractZExt $a, $b, $c)
492 if (Op0Opcode
== MipsISD::VEXTRACT_SEXT_ELT
||
493 Op0Opcode
== MipsISD::VEXTRACT_ZEXT_ELT
) {
494 ConstantSDNode
*Mask
= dyn_cast
<ConstantSDNode
>(Op1
);
499 int32_t Log2IfPositive
= (Mask
->getAPIntValue() + 1).exactLogBase2();
501 if (Log2IfPositive
<= 0)
502 return SDValue(); // Mask+1 is not a power of 2
504 SDValue Op0Op2
= Op0
->getOperand(2);
505 EVT ExtendTy
= cast
<VTSDNode
>(Op0Op2
)->getVT();
506 unsigned ExtendTySize
= ExtendTy
.getSizeInBits();
507 unsigned Log2
= Log2IfPositive
;
509 if ((Op0Opcode
== MipsISD::VEXTRACT_ZEXT_ELT
&& Log2
>= ExtendTySize
) ||
510 Log2
== ExtendTySize
) {
511 SDValue Ops
[] = { Op0
->getOperand(0), Op0
->getOperand(1), Op0Op2
};
512 return DAG
.getNode(MipsISD::VEXTRACT_ZEXT_ELT
, SDLoc(Op0
),
514 makeArrayRef(Ops
, Op0
->getNumOperands()));
521 // Determine if the specified node is a constant vector splat.
523 // Returns true and sets Imm if:
524 // * N is a ISD::BUILD_VECTOR representing a constant splat
526 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
527 // differences are that it assumes the MSA has already been checked and the
528 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and
529 // must not be in order for binsri.d to be selectable).
530 static bool isVSplat(SDValue N
, APInt
&Imm
, bool IsLittleEndian
) {
531 BuildVectorSDNode
*Node
= dyn_cast
<BuildVectorSDNode
>(N
.getNode());
536 APInt SplatValue
, SplatUndef
;
537 unsigned SplatBitSize
;
540 if (!Node
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
549 // Test whether the given node is an all-ones build_vector.
550 static bool isVectorAllOnes(SDValue N
) {
551 // Look through bitcasts. Endianness doesn't matter because we are looking
552 // for an all-ones value.
553 if (N
->getOpcode() == ISD::BITCAST
)
554 N
= N
->getOperand(0);
556 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(N
);
561 APInt SplatValue
, SplatUndef
;
562 unsigned SplatBitSize
;
565 // Endianness doesn't matter in this context because we are looking for
566 // an all-ones value.
567 if (BVN
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
))
568 return SplatValue
.isAllOnesValue();
573 // Test whether N is the bitwise inverse of OfNode.
574 static bool isBitwiseInverse(SDValue N
, SDValue OfNode
) {
575 if (N
->getOpcode() != ISD::XOR
)
578 if (isVectorAllOnes(N
->getOperand(0)))
579 return N
->getOperand(1) == OfNode
;
581 if (isVectorAllOnes(N
->getOperand(1)))
582 return N
->getOperand(0) == OfNode
;
587 // Perform combines where ISD::OR is the root node.
589 // Performs the following transformations:
590 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
591 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
593 static SDValue
performORCombine(SDNode
*N
, SelectionDAG
&DAG
,
594 TargetLowering::DAGCombinerInfo
&DCI
,
595 const MipsSubtarget
&Subtarget
) {
596 if (!Subtarget
.hasMSA())
599 EVT Ty
= N
->getValueType(0);
601 if (!Ty
.is128BitVector())
604 SDValue Op0
= N
->getOperand(0);
605 SDValue Op1
= N
->getOperand(1);
607 if (Op0
->getOpcode() == ISD::AND
&& Op1
->getOpcode() == ISD::AND
) {
608 SDValue Op0Op0
= Op0
->getOperand(0);
609 SDValue Op0Op1
= Op0
->getOperand(1);
610 SDValue Op1Op0
= Op1
->getOperand(0);
611 SDValue Op1Op1
= Op1
->getOperand(1);
612 bool IsLittleEndian
= !Subtarget
.isLittle();
614 SDValue IfSet
, IfClr
, Cond
;
615 bool IsConstantMask
= false;
618 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
619 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
621 // IfClr will be set if we find a valid match.
622 if (isVSplat(Op0Op0
, Mask
, IsLittleEndian
)) {
626 if (isVSplat(Op1Op0
, InvMask
, IsLittleEndian
) &&
627 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
629 else if (isVSplat(Op1Op1
, InvMask
, IsLittleEndian
) &&
630 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
633 IsConstantMask
= true;
636 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
637 // thing again using this mask.
638 // IfClr will be set if we find a valid match.
639 if (!IfClr
.getNode() && isVSplat(Op0Op1
, Mask
, IsLittleEndian
)) {
643 if (isVSplat(Op1Op0
, InvMask
, IsLittleEndian
) &&
644 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
646 else if (isVSplat(Op1Op1
, InvMask
, IsLittleEndian
) &&
647 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
650 IsConstantMask
= true;
653 // If IfClr is not yet set, try looking for a non-constant match.
654 // IfClr will be set if we find a valid match amongst the eight
656 if (!IfClr
.getNode()) {
657 if (isBitwiseInverse(Op0Op0
, Op1Op0
)) {
661 } else if (isBitwiseInverse(Op0Op1
, Op1Op0
)) {
665 } else if (isBitwiseInverse(Op0Op0
, Op1Op1
)) {
669 } else if (isBitwiseInverse(Op0Op1
, Op1Op1
)) {
673 } else if (isBitwiseInverse(Op1Op0
, Op0Op0
)) {
677 } else if (isBitwiseInverse(Op1Op1
, Op0Op0
)) {
681 } else if (isBitwiseInverse(Op1Op0
, Op0Op1
)) {
685 } else if (isBitwiseInverse(Op1Op1
, Op0Op1
)) {
692 // At this point, IfClr will be set if we have a valid match.
693 if (!IfClr
.getNode())
696 assert(Cond
.getNode() && IfSet
.getNode());
698 // Fold degenerate cases.
699 if (IsConstantMask
) {
700 if (Mask
.isAllOnesValue())
706 // Transform the DAG into an equivalent VSELECT.
707 return DAG
.getNode(ISD::VSELECT
, SDLoc(N
), Ty
, Cond
, IfSet
, IfClr
);
713 static bool shouldTransformMulToShiftsAddsSubs(APInt C
, EVT VT
,
715 const MipsSubtarget
&Subtarget
) {
716 // Estimate the number of operations the below transform will turn a
717 // constant multiply into. The number is approximately how many powers
718 // of two summed together that the constant can be broken down into.
720 SmallVector
<APInt
, 16> WorkStack(1, C
);
722 unsigned BitWidth
= C
.getBitWidth();
724 while (!WorkStack
.empty()) {
725 APInt Val
= WorkStack
.pop_back_val();
727 if (Val
== 0 || Val
== 1)
730 if (Val
.isPowerOf2()) {
735 APInt Floor
= APInt(BitWidth
, 1) << Val
.logBase2();
736 APInt Ceil
= Val
.isNegative() ? APInt(BitWidth
, 0)
737 : APInt(BitWidth
, 1) << C
.ceilLogBase2();
739 if ((Val
- Floor
).ule(Ceil
- Val
)) {
740 WorkStack
.push_back(Floor
);
741 WorkStack
.push_back(Val
- Floor
);
746 WorkStack
.push_back(Ceil
);
747 WorkStack
.push_back(Ceil
- Val
);
750 // If we have taken more than 12[1] / 8[2] steps to attempt the
751 // optimization for a native sized value, it is more than likely that this
752 // optimization will make things worse.
754 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
755 // multiplication requires at least 4 cycles, but another cycle (or two)
756 // to retrieve the result from the HI/LO registers.
758 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
759 // materialized in 2 instructions, multiplication requires at least 4
760 // cycles, but another cycle (or two) to retrieve the result from the
763 if (Steps
> 12 && (Subtarget
.isABI_N32() || Subtarget
.isABI_N64()))
766 if (Steps
> 8 && Subtarget
.isABI_O32())
770 // If the value being multiplied is not supported natively, we have to pay
771 // an additional legalization cost, conservatively assume an increase in the
772 // cost of 3 instructions per step. This values for this heuristic were
773 // determined experimentally.
774 unsigned RegisterSize
= DAG
.getTargetLoweringInfo()
775 .getRegisterType(*DAG
.getContext(), VT
)
777 Steps
*= (VT
.getSizeInBits() != RegisterSize
) * 3;
784 static SDValue
genConstMult(SDValue X
, APInt C
, const SDLoc
&DL
, EVT VT
,
785 EVT ShiftTy
, SelectionDAG
&DAG
) {
788 return DAG
.getConstant(0, DL
, VT
);
794 // If c is power of 2, return (shl x, log2(c)).
796 return DAG
.getNode(ISD::SHL
, DL
, VT
, X
,
797 DAG
.getConstant(C
.logBase2(), DL
, ShiftTy
));
799 unsigned BitWidth
= C
.getBitWidth();
800 APInt Floor
= APInt(BitWidth
, 1) << C
.logBase2();
801 APInt Ceil
= C
.isNegative() ? APInt(BitWidth
, 0) :
802 APInt(BitWidth
, 1) << C
.ceilLogBase2();
804 // If |c - floor_c| <= |c - ceil_c|,
805 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
806 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
807 if ((C
- Floor
).ule(Ceil
- C
)) {
808 SDValue Op0
= genConstMult(X
, Floor
, DL
, VT
, ShiftTy
, DAG
);
809 SDValue Op1
= genConstMult(X
, C
- Floor
, DL
, VT
, ShiftTy
, DAG
);
810 return DAG
.getNode(ISD::ADD
, DL
, VT
, Op0
, Op1
);
813 // If |c - floor_c| > |c - ceil_c|,
814 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
815 SDValue Op0
= genConstMult(X
, Ceil
, DL
, VT
, ShiftTy
, DAG
);
816 SDValue Op1
= genConstMult(X
, Ceil
- C
, DL
, VT
, ShiftTy
, DAG
);
817 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op0
, Op1
);
820 static SDValue
performMULCombine(SDNode
*N
, SelectionDAG
&DAG
,
821 const TargetLowering::DAGCombinerInfo
&DCI
,
822 const MipsSETargetLowering
*TL
,
823 const MipsSubtarget
&Subtarget
) {
824 EVT VT
= N
->getValueType(0);
826 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1)))
827 if (!VT
.isVector() && shouldTransformMulToShiftsAddsSubs(
828 C
->getAPIntValue(), VT
, DAG
, Subtarget
))
829 return genConstMult(N
->getOperand(0), C
->getAPIntValue(), SDLoc(N
), VT
,
830 TL
->getScalarShiftAmountTy(DAG
.getDataLayout(), VT
),
833 return SDValue(N
, 0);
836 static SDValue
performDSPShiftCombine(unsigned Opc
, SDNode
*N
, EVT Ty
,
838 const MipsSubtarget
&Subtarget
) {
839 // See if this is a vector splat immediate node.
840 APInt SplatValue
, SplatUndef
;
841 unsigned SplatBitSize
;
843 unsigned EltSize
= Ty
.getScalarSizeInBits();
844 BuildVectorSDNode
*BV
= dyn_cast
<BuildVectorSDNode
>(N
->getOperand(1));
846 if (!Subtarget
.hasDSP())
850 !BV
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
851 EltSize
, !Subtarget
.isLittle()) ||
852 (SplatBitSize
!= EltSize
) ||
853 (SplatValue
.getZExtValue() >= EltSize
))
857 return DAG
.getNode(Opc
, DL
, Ty
, N
->getOperand(0),
858 DAG
.getConstant(SplatValue
.getZExtValue(), DL
, MVT::i32
));
861 static SDValue
performSHLCombine(SDNode
*N
, SelectionDAG
&DAG
,
862 TargetLowering::DAGCombinerInfo
&DCI
,
863 const MipsSubtarget
&Subtarget
) {
864 EVT Ty
= N
->getValueType(0);
866 if ((Ty
!= MVT::v2i16
) && (Ty
!= MVT::v4i8
))
869 return performDSPShiftCombine(MipsISD::SHLL_DSP
, N
, Ty
, DAG
, Subtarget
);
872 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
873 // constant splats into MipsISD::SHRA_DSP for DSPr2.
875 // Performs the following transformations:
876 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
877 // sign/zero-extension is completely overwritten by the new one performed by
878 // the ISD::SRA and ISD::SHL nodes.
879 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
882 // See performDSPShiftCombine for more information about the transformation
884 static SDValue
performSRACombine(SDNode
*N
, SelectionDAG
&DAG
,
885 TargetLowering::DAGCombinerInfo
&DCI
,
886 const MipsSubtarget
&Subtarget
) {
887 EVT Ty
= N
->getValueType(0);
889 if (Subtarget
.hasMSA()) {
890 SDValue Op0
= N
->getOperand(0);
891 SDValue Op1
= N
->getOperand(1);
893 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
894 // where $d + sizeof($c) == 32
895 // or $d + sizeof($c) <= 32 and SExt
896 // -> (MipsVExtractSExt $a, $b, $c)
897 if (Op0
->getOpcode() == ISD::SHL
&& Op1
== Op0
->getOperand(1)) {
898 SDValue Op0Op0
= Op0
->getOperand(0);
899 ConstantSDNode
*ShAmount
= dyn_cast
<ConstantSDNode
>(Op1
);
904 if (Op0Op0
->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT
&&
905 Op0Op0
->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT
)
908 EVT ExtendTy
= cast
<VTSDNode
>(Op0Op0
->getOperand(2))->getVT();
909 unsigned TotalBits
= ShAmount
->getZExtValue() + ExtendTy
.getSizeInBits();
911 if (TotalBits
== 32 ||
912 (Op0Op0
->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT
&&
914 SDValue Ops
[] = { Op0Op0
->getOperand(0), Op0Op0
->getOperand(1),
915 Op0Op0
->getOperand(2) };
916 return DAG
.getNode(MipsISD::VEXTRACT_SEXT_ELT
, SDLoc(Op0Op0
),
918 makeArrayRef(Ops
, Op0Op0
->getNumOperands()));
923 if ((Ty
!= MVT::v2i16
) && ((Ty
!= MVT::v4i8
) || !Subtarget
.hasDSPR2()))
926 return performDSPShiftCombine(MipsISD::SHRA_DSP
, N
, Ty
, DAG
, Subtarget
);
930 static SDValue
performSRLCombine(SDNode
*N
, SelectionDAG
&DAG
,
931 TargetLowering::DAGCombinerInfo
&DCI
,
932 const MipsSubtarget
&Subtarget
) {
933 EVT Ty
= N
->getValueType(0);
935 if (((Ty
!= MVT::v2i16
) || !Subtarget
.hasDSPR2()) && (Ty
!= MVT::v4i8
))
938 return performDSPShiftCombine(MipsISD::SHRL_DSP
, N
, Ty
, DAG
, Subtarget
);
941 static bool isLegalDSPCondCode(EVT Ty
, ISD::CondCode CC
) {
942 bool IsV216
= (Ty
== MVT::v2i16
);
946 case ISD::SETNE
: return true;
950 case ISD::SETGE
: return IsV216
;
954 case ISD::SETUGE
: return !IsV216
;
955 default: return false;
959 static SDValue
performSETCCCombine(SDNode
*N
, SelectionDAG
&DAG
) {
960 EVT Ty
= N
->getValueType(0);
962 if ((Ty
!= MVT::v2i16
) && (Ty
!= MVT::v4i8
))
965 if (!isLegalDSPCondCode(Ty
, cast
<CondCodeSDNode
>(N
->getOperand(2))->get()))
968 return DAG
.getNode(MipsISD::SETCC_DSP
, SDLoc(N
), Ty
, N
->getOperand(0),
969 N
->getOperand(1), N
->getOperand(2));
972 static SDValue
performVSELECTCombine(SDNode
*N
, SelectionDAG
&DAG
) {
973 EVT Ty
= N
->getValueType(0);
975 if (Ty
== MVT::v2i16
|| Ty
== MVT::v4i8
) {
976 SDValue SetCC
= N
->getOperand(0);
978 if (SetCC
.getOpcode() != MipsISD::SETCC_DSP
)
981 return DAG
.getNode(MipsISD::SELECT_CC_DSP
, SDLoc(N
), Ty
,
982 SetCC
.getOperand(0), SetCC
.getOperand(1),
983 N
->getOperand(1), N
->getOperand(2), SetCC
.getOperand(2));
989 static SDValue
performXORCombine(SDNode
*N
, SelectionDAG
&DAG
,
990 const MipsSubtarget
&Subtarget
) {
991 EVT Ty
= N
->getValueType(0);
993 if (Subtarget
.hasMSA() && Ty
.is128BitVector() && Ty
.isInteger()) {
994 // Try the following combines:
995 // (xor (or $a, $b), (build_vector allones))
996 // (xor (or $a, $b), (bitcast (build_vector allones)))
997 SDValue Op0
= N
->getOperand(0);
998 SDValue Op1
= N
->getOperand(1);
1001 if (ISD::isBuildVectorAllOnes(Op0
.getNode()))
1003 else if (ISD::isBuildVectorAllOnes(Op1
.getNode()))
1008 if (NotOp
->getOpcode() == ISD::OR
)
1009 return DAG
.getNode(MipsISD::VNOR
, SDLoc(N
), Ty
, NotOp
->getOperand(0),
1010 NotOp
->getOperand(1));
1017 MipsSETargetLowering::PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
1018 SelectionDAG
&DAG
= DCI
.DAG
;
1021 switch (N
->getOpcode()) {
1023 Val
= performANDCombine(N
, DAG
, DCI
, Subtarget
);
1026 Val
= performORCombine(N
, DAG
, DCI
, Subtarget
);
1029 return performMULCombine(N
, DAG
, DCI
, this, Subtarget
);
1031 Val
= performSHLCombine(N
, DAG
, DCI
, Subtarget
);
1034 return performSRACombine(N
, DAG
, DCI
, Subtarget
);
1036 return performSRLCombine(N
, DAG
, DCI
, Subtarget
);
1038 return performVSELECTCombine(N
, DAG
);
1040 Val
= performXORCombine(N
, DAG
, Subtarget
);
1043 Val
= performSETCCCombine(N
, DAG
);
1047 if (Val
.getNode()) {
1048 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1049 N
->printrWithDepth(dbgs(), &DAG
); dbgs() << "\n=> \n";
1050 Val
.getNode()->printrWithDepth(dbgs(), &DAG
); dbgs() << "\n");
1054 return MipsTargetLowering::PerformDAGCombine(N
, DCI
);
1058 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
1059 MachineBasicBlock
*BB
) const {
1060 switch (MI
.getOpcode()) {
1062 return MipsTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
1063 case Mips::BPOSGE32_PSEUDO
:
1064 return emitBPOSGE32(MI
, BB
);
1065 case Mips::SNZ_B_PSEUDO
:
1066 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_B
);
1067 case Mips::SNZ_H_PSEUDO
:
1068 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_H
);
1069 case Mips::SNZ_W_PSEUDO
:
1070 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_W
);
1071 case Mips::SNZ_D_PSEUDO
:
1072 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_D
);
1073 case Mips::SNZ_V_PSEUDO
:
1074 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_V
);
1075 case Mips::SZ_B_PSEUDO
:
1076 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_B
);
1077 case Mips::SZ_H_PSEUDO
:
1078 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_H
);
1079 case Mips::SZ_W_PSEUDO
:
1080 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_W
);
1081 case Mips::SZ_D_PSEUDO
:
1082 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_D
);
1083 case Mips::SZ_V_PSEUDO
:
1084 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_V
);
1085 case Mips::COPY_FW_PSEUDO
:
1086 return emitCOPY_FW(MI
, BB
);
1087 case Mips::COPY_FD_PSEUDO
:
1088 return emitCOPY_FD(MI
, BB
);
1089 case Mips::INSERT_FW_PSEUDO
:
1090 return emitINSERT_FW(MI
, BB
);
1091 case Mips::INSERT_FD_PSEUDO
:
1092 return emitINSERT_FD(MI
, BB
);
1093 case Mips::INSERT_B_VIDX_PSEUDO
:
1094 case Mips::INSERT_B_VIDX64_PSEUDO
:
1095 return emitINSERT_DF_VIDX(MI
, BB
, 1, false);
1096 case Mips::INSERT_H_VIDX_PSEUDO
:
1097 case Mips::INSERT_H_VIDX64_PSEUDO
:
1098 return emitINSERT_DF_VIDX(MI
, BB
, 2, false);
1099 case Mips::INSERT_W_VIDX_PSEUDO
:
1100 case Mips::INSERT_W_VIDX64_PSEUDO
:
1101 return emitINSERT_DF_VIDX(MI
, BB
, 4, false);
1102 case Mips::INSERT_D_VIDX_PSEUDO
:
1103 case Mips::INSERT_D_VIDX64_PSEUDO
:
1104 return emitINSERT_DF_VIDX(MI
, BB
, 8, false);
1105 case Mips::INSERT_FW_VIDX_PSEUDO
:
1106 case Mips::INSERT_FW_VIDX64_PSEUDO
:
1107 return emitINSERT_DF_VIDX(MI
, BB
, 4, true);
1108 case Mips::INSERT_FD_VIDX_PSEUDO
:
1109 case Mips::INSERT_FD_VIDX64_PSEUDO
:
1110 return emitINSERT_DF_VIDX(MI
, BB
, 8, true);
1111 case Mips::FILL_FW_PSEUDO
:
1112 return emitFILL_FW(MI
, BB
);
1113 case Mips::FILL_FD_PSEUDO
:
1114 return emitFILL_FD(MI
, BB
);
1115 case Mips::FEXP2_W_1_PSEUDO
:
1116 return emitFEXP2_W_1(MI
, BB
);
1117 case Mips::FEXP2_D_1_PSEUDO
:
1118 return emitFEXP2_D_1(MI
, BB
);
1120 return emitST_F16_PSEUDO(MI
, BB
);
1122 return emitLD_F16_PSEUDO(MI
, BB
);
1123 case Mips::MSA_FP_EXTEND_W_PSEUDO
:
1124 return emitFPEXTEND_PSEUDO(MI
, BB
, false);
1125 case Mips::MSA_FP_ROUND_W_PSEUDO
:
1126 return emitFPROUND_PSEUDO(MI
, BB
, false);
1127 case Mips::MSA_FP_EXTEND_D_PSEUDO
:
1128 return emitFPEXTEND_PSEUDO(MI
, BB
, true);
1129 case Mips::MSA_FP_ROUND_D_PSEUDO
:
1130 return emitFPROUND_PSEUDO(MI
, BB
, true);
1134 bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1135 const CCState
&CCInfo
, unsigned NextStackOffset
,
1136 const MipsFunctionInfo
&FI
) const {
1137 if (!UseMipsTailCalls
)
1140 // Exception has to be cleared with eret.
1144 // Return false if either the callee or caller has a byval argument.
1145 if (CCInfo
.getInRegsParamsCount() > 0 || FI
.hasByvalArg())
1148 // Return true if the callee's argument area is no larger than the
1150 return NextStackOffset
<= FI
.getIncomingArgSize();
1153 void MipsSETargetLowering::
1154 getOpndList(SmallVectorImpl
<SDValue
> &Ops
,
1155 std::deque
<std::pair
<unsigned, SDValue
>> &RegsToPass
,
1156 bool IsPICCall
, bool GlobalOrExternal
, bool InternalLinkage
,
1157 bool IsCallReloc
, CallLoweringInfo
&CLI
, SDValue Callee
,
1158 SDValue Chain
) const {
1159 Ops
.push_back(Callee
);
1160 MipsTargetLowering::getOpndList(Ops
, RegsToPass
, IsPICCall
, GlobalOrExternal
,
1161 InternalLinkage
, IsCallReloc
, CLI
, Callee
,
1165 SDValue
MipsSETargetLowering::lowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
1166 LoadSDNode
&Nd
= *cast
<LoadSDNode
>(Op
);
1168 if (Nd
.getMemoryVT() != MVT::f64
|| !NoDPLoadStore
)
1169 return MipsTargetLowering::lowerLOAD(Op
, DAG
);
1171 // Replace a double precision load with two i32 loads and a buildpair64.
1173 SDValue Ptr
= Nd
.getBasePtr(), Chain
= Nd
.getChain();
1174 EVT PtrVT
= Ptr
.getValueType();
1176 // i32 load from lower address.
1177 SDValue Lo
= DAG
.getLoad(MVT::i32
, DL
, Chain
, Ptr
, MachinePointerInfo(),
1178 Nd
.getAlignment(), Nd
.getMemOperand()->getFlags());
1180 // i32 load from higher address.
1181 Ptr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Ptr
, DAG
.getConstant(4, DL
, PtrVT
));
1182 SDValue Hi
= DAG
.getLoad(
1183 MVT::i32
, DL
, Lo
.getValue(1), Ptr
, MachinePointerInfo(),
1184 std::min(Nd
.getAlignment(), 4U), Nd
.getMemOperand()->getFlags());
1186 if (!Subtarget
.isLittle())
1189 SDValue BP
= DAG
.getNode(MipsISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
1190 SDValue Ops
[2] = {BP
, Hi
.getValue(1)};
1191 return DAG
.getMergeValues(Ops
, DL
);
1194 SDValue
MipsSETargetLowering::lowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
1195 StoreSDNode
&Nd
= *cast
<StoreSDNode
>(Op
);
1197 if (Nd
.getMemoryVT() != MVT::f64
|| !NoDPLoadStore
)
1198 return MipsTargetLowering::lowerSTORE(Op
, DAG
);
1200 // Replace a double precision store with two extractelement64s and i32 stores.
1202 SDValue Val
= Nd
.getValue(), Ptr
= Nd
.getBasePtr(), Chain
= Nd
.getChain();
1203 EVT PtrVT
= Ptr
.getValueType();
1204 SDValue Lo
= DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
,
1205 Val
, DAG
.getConstant(0, DL
, MVT::i32
));
1206 SDValue Hi
= DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
,
1207 Val
, DAG
.getConstant(1, DL
, MVT::i32
));
1209 if (!Subtarget
.isLittle())
1212 // i32 store to lower address.
1214 DAG
.getStore(Chain
, DL
, Lo
, Ptr
, MachinePointerInfo(), Nd
.getAlignment(),
1215 Nd
.getMemOperand()->getFlags(), Nd
.getAAInfo());
1217 // i32 store to higher address.
1218 Ptr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Ptr
, DAG
.getConstant(4, DL
, PtrVT
));
1219 return DAG
.getStore(Chain
, DL
, Hi
, Ptr
, MachinePointerInfo(),
1220 std::min(Nd
.getAlignment(), 4U),
1221 Nd
.getMemOperand()->getFlags(), Nd
.getAAInfo());
1224 SDValue
MipsSETargetLowering::lowerMulDiv(SDValue Op
, unsigned NewOpc
,
1225 bool HasLo
, bool HasHi
,
1226 SelectionDAG
&DAG
) const {
1227 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1228 assert(!Subtarget
.hasMips32r6());
1230 EVT Ty
= Op
.getOperand(0).getValueType();
1232 SDValue Mult
= DAG
.getNode(NewOpc
, DL
, MVT::Untyped
,
1233 Op
.getOperand(0), Op
.getOperand(1));
1237 Lo
= DAG
.getNode(MipsISD::MFLO
, DL
, Ty
, Mult
);
1239 Hi
= DAG
.getNode(MipsISD::MFHI
, DL
, Ty
, Mult
);
1241 if (!HasLo
|| !HasHi
)
1242 return HasLo
? Lo
: Hi
;
1244 SDValue Vals
[] = { Lo
, Hi
};
1245 return DAG
.getMergeValues(Vals
, DL
);
1248 static SDValue
initAccumulator(SDValue In
, const SDLoc
&DL
, SelectionDAG
&DAG
) {
1249 SDValue InLo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
, In
,
1250 DAG
.getConstant(0, DL
, MVT::i32
));
1251 SDValue InHi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
, In
,
1252 DAG
.getConstant(1, DL
, MVT::i32
));
1253 return DAG
.getNode(MipsISD::MTLOHI
, DL
, MVT::Untyped
, InLo
, InHi
);
1256 static SDValue
extractLOHI(SDValue Op
, const SDLoc
&DL
, SelectionDAG
&DAG
) {
1257 SDValue Lo
= DAG
.getNode(MipsISD::MFLO
, DL
, MVT::i32
, Op
);
1258 SDValue Hi
= DAG
.getNode(MipsISD::MFHI
, DL
, MVT::i32
, Op
);
1259 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, Lo
, Hi
);
1262 // This function expands mips intrinsic nodes which have 64-bit input operands
1263 // or output values.
1265 // out64 = intrinsic-node in64
1267 // lo = copy (extract-element (in64, 0))
1268 // hi = copy (extract-element (in64, 1))
1269 // mips-specific-node
1272 // out64 = merge-values (v0, v1)
1274 static SDValue
lowerDSPIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Opc
) {
1276 bool HasChainIn
= Op
->getOperand(0).getValueType() == MVT::Other
;
1277 SmallVector
<SDValue
, 3> Ops
;
1280 // See if Op has a chain input.
1282 Ops
.push_back(Op
->getOperand(OpNo
++));
1284 // The next operand is the intrinsic opcode.
1285 assert(Op
->getOperand(OpNo
).getOpcode() == ISD::TargetConstant
);
1287 // See if the next operand has type i64.
1288 SDValue Opnd
= Op
->getOperand(++OpNo
), In64
;
1290 if (Opnd
.getValueType() == MVT::i64
)
1291 In64
= initAccumulator(Opnd
, DL
, DAG
);
1293 Ops
.push_back(Opnd
);
1295 // Push the remaining operands.
1296 for (++OpNo
; OpNo
< Op
->getNumOperands(); ++OpNo
)
1297 Ops
.push_back(Op
->getOperand(OpNo
));
1299 // Add In64 to the end of the list.
1301 Ops
.push_back(In64
);
1304 SmallVector
<EVT
, 2> ResTys
;
1306 for (SDNode::value_iterator I
= Op
->value_begin(), E
= Op
->value_end();
1308 ResTys
.push_back((*I
== MVT::i64
) ? MVT::Untyped
: *I
);
1311 SDValue Val
= DAG
.getNode(Opc
, DL
, ResTys
, Ops
);
1312 SDValue Out
= (ResTys
[0] == MVT::Untyped
) ? extractLOHI(Val
, DL
, DAG
) : Val
;
1317 assert(Val
->getValueType(1) == MVT::Other
);
1318 SDValue Vals
[] = { Out
, SDValue(Val
.getNode(), 1) };
1319 return DAG
.getMergeValues(Vals
, DL
);
1322 // Lower an MSA copy intrinsic into the specified SelectionDAG node
1323 static SDValue
lowerMSACopyIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Opc
) {
1325 SDValue Vec
= Op
->getOperand(1);
1326 SDValue Idx
= Op
->getOperand(2);
1327 EVT ResTy
= Op
->getValueType(0);
1328 EVT EltTy
= Vec
->getValueType(0).getVectorElementType();
1330 SDValue Result
= DAG
.getNode(Opc
, DL
, ResTy
, Vec
, Idx
,
1331 DAG
.getValueType(EltTy
));
1336 static SDValue
lowerMSASplatZExt(SDValue Op
, unsigned OpNr
, SelectionDAG
&DAG
) {
1337 EVT ResVecTy
= Op
->getValueType(0);
1338 EVT ViaVecTy
= ResVecTy
;
1339 bool BigEndian
= !DAG
.getSubtarget().getTargetTriple().isLittleEndian();
1342 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1343 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1345 SDValue LaneA
= Op
->getOperand(OpNr
);
1348 if (ResVecTy
== MVT::v2i64
) {
1349 // In case of the index being passed as an immediate value, set the upper
1350 // lane to 0 so that the splati.d instruction can be matched.
1351 if (isa
<ConstantSDNode
>(LaneA
))
1352 LaneB
= DAG
.getConstant(0, DL
, MVT::i32
);
1353 // Having the index passed in a register, set the upper lane to the same
1354 // value as the lower - this results in the BUILD_VECTOR node not being
1355 // expanded through stack. This way we are able to pattern match the set of
1356 // nodes created here to splat.d.
1359 ViaVecTy
= MVT::v4i32
;
1361 std::swap(LaneA
, LaneB
);
1365 SDValue Ops
[16] = { LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
,
1366 LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
};
1368 SDValue Result
= DAG
.getBuildVector(
1369 ViaVecTy
, DL
, makeArrayRef(Ops
, ViaVecTy
.getVectorNumElements()));
1371 if (ViaVecTy
!= ResVecTy
) {
1372 SDValue One
= DAG
.getConstant(1, DL
, ViaVecTy
);
1373 Result
= DAG
.getNode(ISD::BITCAST
, DL
, ResVecTy
,
1374 DAG
.getNode(ISD::AND
, DL
, ViaVecTy
, Result
, One
));
1380 static SDValue
lowerMSASplatImm(SDValue Op
, unsigned ImmOp
, SelectionDAG
&DAG
,
1381 bool IsSigned
= false) {
1382 return DAG
.getConstant(
1383 APInt(Op
->getValueType(0).getScalarType().getSizeInBits(),
1384 Op
->getConstantOperandVal(ImmOp
), IsSigned
),
1385 SDLoc(Op
), Op
->getValueType(0));
1388 static SDValue
getBuildVectorSplat(EVT VecTy
, SDValue SplatValue
,
1389 bool BigEndian
, SelectionDAG
&DAG
) {
1390 EVT ViaVecTy
= VecTy
;
1391 SDValue SplatValueA
= SplatValue
;
1392 SDValue SplatValueB
= SplatValue
;
1393 SDLoc
DL(SplatValue
);
1395 if (VecTy
== MVT::v2i64
) {
1396 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1397 ViaVecTy
= MVT::v4i32
;
1399 SplatValueA
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, SplatValue
);
1400 SplatValueB
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, SplatValue
,
1401 DAG
.getConstant(32, DL
, MVT::i32
));
1402 SplatValueB
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, SplatValueB
);
1405 // We currently hold the parts in little endian order. Swap them if
1408 std::swap(SplatValueA
, SplatValueB
);
1410 SDValue Ops
[16] = { SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1411 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1412 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1413 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
};
1415 SDValue Result
= DAG
.getBuildVector(
1416 ViaVecTy
, DL
, makeArrayRef(Ops
, ViaVecTy
.getVectorNumElements()));
1418 if (VecTy
!= ViaVecTy
)
1419 Result
= DAG
.getNode(ISD::BITCAST
, DL
, VecTy
, Result
);
1424 static SDValue
lowerMSABinaryBitImmIntr(SDValue Op
, SelectionDAG
&DAG
,
1425 unsigned Opc
, SDValue Imm
,
1427 EVT VecTy
= Op
->getValueType(0);
1431 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1433 if (VecTy
== MVT::v2i64
) {
1434 if (ConstantSDNode
*CImm
= dyn_cast
<ConstantSDNode
>(Imm
)) {
1435 APInt BitImm
= APInt(64, 1) << CImm
->getAPIntValue();
1437 SDValue BitImmHiOp
= DAG
.getConstant(BitImm
.lshr(32).trunc(32), DL
,
1439 SDValue BitImmLoOp
= DAG
.getConstant(BitImm
.trunc(32), DL
, MVT::i32
);
1442 std::swap(BitImmLoOp
, BitImmHiOp
);
1444 Exp2Imm
= DAG
.getNode(
1445 ISD::BITCAST
, DL
, MVT::v2i64
,
1446 DAG
.getBuildVector(MVT::v4i32
, DL
,
1447 {BitImmLoOp
, BitImmHiOp
, BitImmLoOp
, BitImmHiOp
}));
1451 if (!Exp2Imm
.getNode()) {
1452 // We couldnt constant fold, do a vector shift instead
1454 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1455 // only values 0-63 are valid.
1456 if (VecTy
== MVT::v2i64
)
1457 Imm
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, Imm
);
1459 Exp2Imm
= getBuildVectorSplat(VecTy
, Imm
, BigEndian
, DAG
);
1461 Exp2Imm
= DAG
.getNode(ISD::SHL
, DL
, VecTy
, DAG
.getConstant(1, DL
, VecTy
),
1465 return DAG
.getNode(Opc
, DL
, VecTy
, Op
->getOperand(1), Exp2Imm
);
1468 static SDValue
truncateVecElts(SDValue Op
, SelectionDAG
&DAG
) {
1470 EVT ResTy
= Op
->getValueType(0);
1471 SDValue Vec
= Op
->getOperand(2);
1472 bool BigEndian
= !DAG
.getSubtarget().getTargetTriple().isLittleEndian();
1473 MVT ResEltTy
= ResTy
== MVT::v2i64
? MVT::i64
: MVT::i32
;
1474 SDValue ConstValue
= DAG
.getConstant(Vec
.getScalarValueSizeInBits() - 1,
1476 SDValue SplatVec
= getBuildVectorSplat(ResTy
, ConstValue
, BigEndian
, DAG
);
1478 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Vec
, SplatVec
);
1481 static SDValue
lowerMSABitClear(SDValue Op
, SelectionDAG
&DAG
) {
1482 EVT ResTy
= Op
->getValueType(0);
1484 SDValue One
= DAG
.getConstant(1, DL
, ResTy
);
1485 SDValue Bit
= DAG
.getNode(ISD::SHL
, DL
, ResTy
, One
, truncateVecElts(Op
, DAG
));
1487 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Op
->getOperand(1),
1488 DAG
.getNOT(DL
, Bit
, ResTy
));
1491 static SDValue
lowerMSABitClearImm(SDValue Op
, SelectionDAG
&DAG
) {
1493 EVT ResTy
= Op
->getValueType(0);
1494 APInt BitImm
= APInt(ResTy
.getScalarSizeInBits(), 1)
1495 << cast
<ConstantSDNode
>(Op
->getOperand(2))->getAPIntValue();
1496 SDValue BitMask
= DAG
.getConstant(~BitImm
, DL
, ResTy
);
1498 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Op
->getOperand(1), BitMask
);
1501 SDValue
MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
1502 SelectionDAG
&DAG
) const {
1504 unsigned Intrinsic
= cast
<ConstantSDNode
>(Op
->getOperand(0))->getZExtValue();
1505 switch (Intrinsic
) {
1508 case Intrinsic::mips_shilo
:
1509 return lowerDSPIntr(Op
, DAG
, MipsISD::SHILO
);
1510 case Intrinsic::mips_dpau_h_qbl
:
1511 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAU_H_QBL
);
1512 case Intrinsic::mips_dpau_h_qbr
:
1513 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAU_H_QBR
);
1514 case Intrinsic::mips_dpsu_h_qbl
:
1515 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSU_H_QBL
);
1516 case Intrinsic::mips_dpsu_h_qbr
:
1517 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSU_H_QBR
);
1518 case Intrinsic::mips_dpa_w_ph
:
1519 return lowerDSPIntr(Op
, DAG
, MipsISD::DPA_W_PH
);
1520 case Intrinsic::mips_dps_w_ph
:
1521 return lowerDSPIntr(Op
, DAG
, MipsISD::DPS_W_PH
);
1522 case Intrinsic::mips_dpax_w_ph
:
1523 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAX_W_PH
);
1524 case Intrinsic::mips_dpsx_w_ph
:
1525 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSX_W_PH
);
1526 case Intrinsic::mips_mulsa_w_ph
:
1527 return lowerDSPIntr(Op
, DAG
, MipsISD::MULSA_W_PH
);
1528 case Intrinsic::mips_mult
:
1529 return lowerDSPIntr(Op
, DAG
, MipsISD::Mult
);
1530 case Intrinsic::mips_multu
:
1531 return lowerDSPIntr(Op
, DAG
, MipsISD::Multu
);
1532 case Intrinsic::mips_madd
:
1533 return lowerDSPIntr(Op
, DAG
, MipsISD::MAdd
);
1534 case Intrinsic::mips_maddu
:
1535 return lowerDSPIntr(Op
, DAG
, MipsISD::MAddu
);
1536 case Intrinsic::mips_msub
:
1537 return lowerDSPIntr(Op
, DAG
, MipsISD::MSub
);
1538 case Intrinsic::mips_msubu
:
1539 return lowerDSPIntr(Op
, DAG
, MipsISD::MSubu
);
1540 case Intrinsic::mips_addv_b
:
1541 case Intrinsic::mips_addv_h
:
1542 case Intrinsic::mips_addv_w
:
1543 case Intrinsic::mips_addv_d
:
1544 return DAG
.getNode(ISD::ADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1546 case Intrinsic::mips_addvi_b
:
1547 case Intrinsic::mips_addvi_h
:
1548 case Intrinsic::mips_addvi_w
:
1549 case Intrinsic::mips_addvi_d
:
1550 return DAG
.getNode(ISD::ADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1551 lowerMSASplatImm(Op
, 2, DAG
));
1552 case Intrinsic::mips_and_v
:
1553 return DAG
.getNode(ISD::AND
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1555 case Intrinsic::mips_andi_b
:
1556 return DAG
.getNode(ISD::AND
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1557 lowerMSASplatImm(Op
, 2, DAG
));
1558 case Intrinsic::mips_bclr_b
:
1559 case Intrinsic::mips_bclr_h
:
1560 case Intrinsic::mips_bclr_w
:
1561 case Intrinsic::mips_bclr_d
:
1562 return lowerMSABitClear(Op
, DAG
);
1563 case Intrinsic::mips_bclri_b
:
1564 case Intrinsic::mips_bclri_h
:
1565 case Intrinsic::mips_bclri_w
:
1566 case Intrinsic::mips_bclri_d
:
1567 return lowerMSABitClearImm(Op
, DAG
);
1568 case Intrinsic::mips_binsli_b
:
1569 case Intrinsic::mips_binsli_h
:
1570 case Intrinsic::mips_binsli_w
:
1571 case Intrinsic::mips_binsli_d
: {
1572 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1573 EVT VecTy
= Op
->getValueType(0);
1574 EVT EltTy
= VecTy
.getVectorElementType();
1575 if (Op
->getConstantOperandVal(3) >= EltTy
.getSizeInBits())
1576 report_fatal_error("Immediate out of range");
1577 APInt Mask
= APInt::getHighBitsSet(EltTy
.getSizeInBits(),
1578 Op
->getConstantOperandVal(3) + 1);
1579 return DAG
.getNode(ISD::VSELECT
, DL
, VecTy
,
1580 DAG
.getConstant(Mask
, DL
, VecTy
, true),
1581 Op
->getOperand(2), Op
->getOperand(1));
1583 case Intrinsic::mips_binsri_b
:
1584 case Intrinsic::mips_binsri_h
:
1585 case Intrinsic::mips_binsri_w
:
1586 case Intrinsic::mips_binsri_d
: {
1587 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1588 EVT VecTy
= Op
->getValueType(0);
1589 EVT EltTy
= VecTy
.getVectorElementType();
1590 if (Op
->getConstantOperandVal(3) >= EltTy
.getSizeInBits())
1591 report_fatal_error("Immediate out of range");
1592 APInt Mask
= APInt::getLowBitsSet(EltTy
.getSizeInBits(),
1593 Op
->getConstantOperandVal(3) + 1);
1594 return DAG
.getNode(ISD::VSELECT
, DL
, VecTy
,
1595 DAG
.getConstant(Mask
, DL
, VecTy
, true),
1596 Op
->getOperand(2), Op
->getOperand(1));
1598 case Intrinsic::mips_bmnz_v
:
1599 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0), Op
->getOperand(3),
1600 Op
->getOperand(2), Op
->getOperand(1));
1601 case Intrinsic::mips_bmnzi_b
:
1602 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1603 lowerMSASplatImm(Op
, 3, DAG
), Op
->getOperand(2),
1605 case Intrinsic::mips_bmz_v
:
1606 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0), Op
->getOperand(3),
1607 Op
->getOperand(1), Op
->getOperand(2));
1608 case Intrinsic::mips_bmzi_b
:
1609 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1610 lowerMSASplatImm(Op
, 3, DAG
), Op
->getOperand(1),
1612 case Intrinsic::mips_bneg_b
:
1613 case Intrinsic::mips_bneg_h
:
1614 case Intrinsic::mips_bneg_w
:
1615 case Intrinsic::mips_bneg_d
: {
1616 EVT VecTy
= Op
->getValueType(0);
1617 SDValue One
= DAG
.getConstant(1, DL
, VecTy
);
1619 return DAG
.getNode(ISD::XOR
, DL
, VecTy
, Op
->getOperand(1),
1620 DAG
.getNode(ISD::SHL
, DL
, VecTy
, One
,
1621 truncateVecElts(Op
, DAG
)));
1623 case Intrinsic::mips_bnegi_b
:
1624 case Intrinsic::mips_bnegi_h
:
1625 case Intrinsic::mips_bnegi_w
:
1626 case Intrinsic::mips_bnegi_d
:
1627 return lowerMSABinaryBitImmIntr(Op
, DAG
, ISD::XOR
, Op
->getOperand(2),
1628 !Subtarget
.isLittle());
1629 case Intrinsic::mips_bnz_b
:
1630 case Intrinsic::mips_bnz_h
:
1631 case Intrinsic::mips_bnz_w
:
1632 case Intrinsic::mips_bnz_d
:
1633 return DAG
.getNode(MipsISD::VALL_NONZERO
, DL
, Op
->getValueType(0),
1635 case Intrinsic::mips_bnz_v
:
1636 return DAG
.getNode(MipsISD::VANY_NONZERO
, DL
, Op
->getValueType(0),
1638 case Intrinsic::mips_bsel_v
:
1639 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1640 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1641 Op
->getOperand(1), Op
->getOperand(3),
1643 case Intrinsic::mips_bseli_b
:
1644 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1645 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1646 Op
->getOperand(1), lowerMSASplatImm(Op
, 3, DAG
),
1648 case Intrinsic::mips_bset_b
:
1649 case Intrinsic::mips_bset_h
:
1650 case Intrinsic::mips_bset_w
:
1651 case Intrinsic::mips_bset_d
: {
1652 EVT VecTy
= Op
->getValueType(0);
1653 SDValue One
= DAG
.getConstant(1, DL
, VecTy
);
1655 return DAG
.getNode(ISD::OR
, DL
, VecTy
, Op
->getOperand(1),
1656 DAG
.getNode(ISD::SHL
, DL
, VecTy
, One
,
1657 truncateVecElts(Op
, DAG
)));
1659 case Intrinsic::mips_bseti_b
:
1660 case Intrinsic::mips_bseti_h
:
1661 case Intrinsic::mips_bseti_w
:
1662 case Intrinsic::mips_bseti_d
:
1663 return lowerMSABinaryBitImmIntr(Op
, DAG
, ISD::OR
, Op
->getOperand(2),
1664 !Subtarget
.isLittle());
1665 case Intrinsic::mips_bz_b
:
1666 case Intrinsic::mips_bz_h
:
1667 case Intrinsic::mips_bz_w
:
1668 case Intrinsic::mips_bz_d
:
1669 return DAG
.getNode(MipsISD::VALL_ZERO
, DL
, Op
->getValueType(0),
1671 case Intrinsic::mips_bz_v
:
1672 return DAG
.getNode(MipsISD::VANY_ZERO
, DL
, Op
->getValueType(0),
1674 case Intrinsic::mips_ceq_b
:
1675 case Intrinsic::mips_ceq_h
:
1676 case Intrinsic::mips_ceq_w
:
1677 case Intrinsic::mips_ceq_d
:
1678 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1679 Op
->getOperand(2), ISD::SETEQ
);
1680 case Intrinsic::mips_ceqi_b
:
1681 case Intrinsic::mips_ceqi_h
:
1682 case Intrinsic::mips_ceqi_w
:
1683 case Intrinsic::mips_ceqi_d
:
1684 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1685 lowerMSASplatImm(Op
, 2, DAG
, true), ISD::SETEQ
);
1686 case Intrinsic::mips_cle_s_b
:
1687 case Intrinsic::mips_cle_s_h
:
1688 case Intrinsic::mips_cle_s_w
:
1689 case Intrinsic::mips_cle_s_d
:
1690 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1691 Op
->getOperand(2), ISD::SETLE
);
1692 case Intrinsic::mips_clei_s_b
:
1693 case Intrinsic::mips_clei_s_h
:
1694 case Intrinsic::mips_clei_s_w
:
1695 case Intrinsic::mips_clei_s_d
:
1696 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1697 lowerMSASplatImm(Op
, 2, DAG
, true), ISD::SETLE
);
1698 case Intrinsic::mips_cle_u_b
:
1699 case Intrinsic::mips_cle_u_h
:
1700 case Intrinsic::mips_cle_u_w
:
1701 case Intrinsic::mips_cle_u_d
:
1702 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1703 Op
->getOperand(2), ISD::SETULE
);
1704 case Intrinsic::mips_clei_u_b
:
1705 case Intrinsic::mips_clei_u_h
:
1706 case Intrinsic::mips_clei_u_w
:
1707 case Intrinsic::mips_clei_u_d
:
1708 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1709 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETULE
);
1710 case Intrinsic::mips_clt_s_b
:
1711 case Intrinsic::mips_clt_s_h
:
1712 case Intrinsic::mips_clt_s_w
:
1713 case Intrinsic::mips_clt_s_d
:
1714 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1715 Op
->getOperand(2), ISD::SETLT
);
1716 case Intrinsic::mips_clti_s_b
:
1717 case Intrinsic::mips_clti_s_h
:
1718 case Intrinsic::mips_clti_s_w
:
1719 case Intrinsic::mips_clti_s_d
:
1720 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1721 lowerMSASplatImm(Op
, 2, DAG
, true), ISD::SETLT
);
1722 case Intrinsic::mips_clt_u_b
:
1723 case Intrinsic::mips_clt_u_h
:
1724 case Intrinsic::mips_clt_u_w
:
1725 case Intrinsic::mips_clt_u_d
:
1726 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1727 Op
->getOperand(2), ISD::SETULT
);
1728 case Intrinsic::mips_clti_u_b
:
1729 case Intrinsic::mips_clti_u_h
:
1730 case Intrinsic::mips_clti_u_w
:
1731 case Intrinsic::mips_clti_u_d
:
1732 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1733 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETULT
);
1734 case Intrinsic::mips_copy_s_b
:
1735 case Intrinsic::mips_copy_s_h
:
1736 case Intrinsic::mips_copy_s_w
:
1737 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_SEXT_ELT
);
1738 case Intrinsic::mips_copy_s_d
:
1739 if (Subtarget
.hasMips64())
1740 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1741 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_SEXT_ELT
);
1743 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1744 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1745 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op
),
1746 Op
->getValueType(0), Op
->getOperand(1),
1749 case Intrinsic::mips_copy_u_b
:
1750 case Intrinsic::mips_copy_u_h
:
1751 case Intrinsic::mips_copy_u_w
:
1752 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_ZEXT_ELT
);
1753 case Intrinsic::mips_copy_u_d
:
1754 if (Subtarget
.hasMips64())
1755 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1756 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_ZEXT_ELT
);
1758 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1759 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1760 // Note: When i64 is illegal, this results in copy_s.w instructions
1761 // instead of copy_u.w instructions. This makes no difference to the
1762 // behaviour since i64 is only illegal when the register file is 32-bit.
1763 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op
),
1764 Op
->getValueType(0), Op
->getOperand(1),
1767 case Intrinsic::mips_div_s_b
:
1768 case Intrinsic::mips_div_s_h
:
1769 case Intrinsic::mips_div_s_w
:
1770 case Intrinsic::mips_div_s_d
:
1771 return DAG
.getNode(ISD::SDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1773 case Intrinsic::mips_div_u_b
:
1774 case Intrinsic::mips_div_u_h
:
1775 case Intrinsic::mips_div_u_w
:
1776 case Intrinsic::mips_div_u_d
:
1777 return DAG
.getNode(ISD::UDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1779 case Intrinsic::mips_fadd_w
:
1780 case Intrinsic::mips_fadd_d
:
1781 // TODO: If intrinsics have fast-math-flags, propagate them.
1782 return DAG
.getNode(ISD::FADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1784 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1785 case Intrinsic::mips_fceq_w
:
1786 case Intrinsic::mips_fceq_d
:
1787 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1788 Op
->getOperand(2), ISD::SETOEQ
);
1789 case Intrinsic::mips_fcle_w
:
1790 case Intrinsic::mips_fcle_d
:
1791 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1792 Op
->getOperand(2), ISD::SETOLE
);
1793 case Intrinsic::mips_fclt_w
:
1794 case Intrinsic::mips_fclt_d
:
1795 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1796 Op
->getOperand(2), ISD::SETOLT
);
1797 case Intrinsic::mips_fcne_w
:
1798 case Intrinsic::mips_fcne_d
:
1799 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1800 Op
->getOperand(2), ISD::SETONE
);
1801 case Intrinsic::mips_fcor_w
:
1802 case Intrinsic::mips_fcor_d
:
1803 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1804 Op
->getOperand(2), ISD::SETO
);
1805 case Intrinsic::mips_fcueq_w
:
1806 case Intrinsic::mips_fcueq_d
:
1807 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1808 Op
->getOperand(2), ISD::SETUEQ
);
1809 case Intrinsic::mips_fcule_w
:
1810 case Intrinsic::mips_fcule_d
:
1811 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1812 Op
->getOperand(2), ISD::SETULE
);
1813 case Intrinsic::mips_fcult_w
:
1814 case Intrinsic::mips_fcult_d
:
1815 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1816 Op
->getOperand(2), ISD::SETULT
);
1817 case Intrinsic::mips_fcun_w
:
1818 case Intrinsic::mips_fcun_d
:
1819 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1820 Op
->getOperand(2), ISD::SETUO
);
1821 case Intrinsic::mips_fcune_w
:
1822 case Intrinsic::mips_fcune_d
:
1823 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1824 Op
->getOperand(2), ISD::SETUNE
);
1825 case Intrinsic::mips_fdiv_w
:
1826 case Intrinsic::mips_fdiv_d
:
1827 // TODO: If intrinsics have fast-math-flags, propagate them.
1828 return DAG
.getNode(ISD::FDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1830 case Intrinsic::mips_ffint_u_w
:
1831 case Intrinsic::mips_ffint_u_d
:
1832 return DAG
.getNode(ISD::UINT_TO_FP
, DL
, Op
->getValueType(0),
1834 case Intrinsic::mips_ffint_s_w
:
1835 case Intrinsic::mips_ffint_s_d
:
1836 return DAG
.getNode(ISD::SINT_TO_FP
, DL
, Op
->getValueType(0),
1838 case Intrinsic::mips_fill_b
:
1839 case Intrinsic::mips_fill_h
:
1840 case Intrinsic::mips_fill_w
:
1841 case Intrinsic::mips_fill_d
: {
1842 EVT ResTy
= Op
->getValueType(0);
1843 SmallVector
<SDValue
, 16> Ops(ResTy
.getVectorNumElements(),
1846 // If ResTy is v2i64 then the type legalizer will break this node down into
1847 // an equivalent v4i32.
1848 return DAG
.getBuildVector(ResTy
, DL
, Ops
);
1850 case Intrinsic::mips_fexp2_w
:
1851 case Intrinsic::mips_fexp2_d
: {
1852 // TODO: If intrinsics have fast-math-flags, propagate them.
1853 EVT ResTy
= Op
->getValueType(0);
1855 ISD::FMUL
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1856 DAG
.getNode(ISD::FEXP2
, SDLoc(Op
), ResTy
, Op
->getOperand(2)));
1858 case Intrinsic::mips_flog2_w
:
1859 case Intrinsic::mips_flog2_d
:
1860 return DAG
.getNode(ISD::FLOG2
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1861 case Intrinsic::mips_fmadd_w
:
1862 case Intrinsic::mips_fmadd_d
:
1863 return DAG
.getNode(ISD::FMA
, SDLoc(Op
), Op
->getValueType(0),
1864 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
1865 case Intrinsic::mips_fmul_w
:
1866 case Intrinsic::mips_fmul_d
:
1867 // TODO: If intrinsics have fast-math-flags, propagate them.
1868 return DAG
.getNode(ISD::FMUL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1870 case Intrinsic::mips_fmsub_w
:
1871 case Intrinsic::mips_fmsub_d
: {
1872 // TODO: If intrinsics have fast-math-flags, propagate them.
1873 return DAG
.getNode(MipsISD::FMS
, SDLoc(Op
), Op
->getValueType(0),
1874 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
1876 case Intrinsic::mips_frint_w
:
1877 case Intrinsic::mips_frint_d
:
1878 return DAG
.getNode(ISD::FRINT
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1879 case Intrinsic::mips_fsqrt_w
:
1880 case Intrinsic::mips_fsqrt_d
:
1881 return DAG
.getNode(ISD::FSQRT
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1882 case Intrinsic::mips_fsub_w
:
1883 case Intrinsic::mips_fsub_d
:
1884 // TODO: If intrinsics have fast-math-flags, propagate them.
1885 return DAG
.getNode(ISD::FSUB
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1887 case Intrinsic::mips_ftrunc_u_w
:
1888 case Intrinsic::mips_ftrunc_u_d
:
1889 return DAG
.getNode(ISD::FP_TO_UINT
, DL
, Op
->getValueType(0),
1891 case Intrinsic::mips_ftrunc_s_w
:
1892 case Intrinsic::mips_ftrunc_s_d
:
1893 return DAG
.getNode(ISD::FP_TO_SINT
, DL
, Op
->getValueType(0),
1895 case Intrinsic::mips_ilvev_b
:
1896 case Intrinsic::mips_ilvev_h
:
1897 case Intrinsic::mips_ilvev_w
:
1898 case Intrinsic::mips_ilvev_d
:
1899 return DAG
.getNode(MipsISD::ILVEV
, DL
, Op
->getValueType(0),
1900 Op
->getOperand(1), Op
->getOperand(2));
1901 case Intrinsic::mips_ilvl_b
:
1902 case Intrinsic::mips_ilvl_h
:
1903 case Intrinsic::mips_ilvl_w
:
1904 case Intrinsic::mips_ilvl_d
:
1905 return DAG
.getNode(MipsISD::ILVL
, DL
, Op
->getValueType(0),
1906 Op
->getOperand(1), Op
->getOperand(2));
1907 case Intrinsic::mips_ilvod_b
:
1908 case Intrinsic::mips_ilvod_h
:
1909 case Intrinsic::mips_ilvod_w
:
1910 case Intrinsic::mips_ilvod_d
:
1911 return DAG
.getNode(MipsISD::ILVOD
, DL
, Op
->getValueType(0),
1912 Op
->getOperand(1), Op
->getOperand(2));
1913 case Intrinsic::mips_ilvr_b
:
1914 case Intrinsic::mips_ilvr_h
:
1915 case Intrinsic::mips_ilvr_w
:
1916 case Intrinsic::mips_ilvr_d
:
1917 return DAG
.getNode(MipsISD::ILVR
, DL
, Op
->getValueType(0),
1918 Op
->getOperand(1), Op
->getOperand(2));
1919 case Intrinsic::mips_insert_b
:
1920 case Intrinsic::mips_insert_h
:
1921 case Intrinsic::mips_insert_w
:
1922 case Intrinsic::mips_insert_d
:
1923 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(Op
), Op
->getValueType(0),
1924 Op
->getOperand(1), Op
->getOperand(3), Op
->getOperand(2));
1925 case Intrinsic::mips_insve_b
:
1926 case Intrinsic::mips_insve_h
:
1927 case Intrinsic::mips_insve_w
:
1928 case Intrinsic::mips_insve_d
: {
1929 // Report an error for out of range values.
1931 switch (Intrinsic
) {
1932 case Intrinsic::mips_insve_b
: Max
= 15; break;
1933 case Intrinsic::mips_insve_h
: Max
= 7; break;
1934 case Intrinsic::mips_insve_w
: Max
= 3; break;
1935 case Intrinsic::mips_insve_d
: Max
= 1; break;
1936 default: llvm_unreachable("Unmatched intrinsic");
1938 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
1939 if (Value
< 0 || Value
> Max
)
1940 report_fatal_error("Immediate out of range");
1941 return DAG
.getNode(MipsISD::INSVE
, DL
, Op
->getValueType(0),
1942 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3),
1943 DAG
.getConstant(0, DL
, MVT::i32
));
1945 case Intrinsic::mips_ldi_b
:
1946 case Intrinsic::mips_ldi_h
:
1947 case Intrinsic::mips_ldi_w
:
1948 case Intrinsic::mips_ldi_d
:
1949 return lowerMSASplatImm(Op
, 1, DAG
, true);
1950 case Intrinsic::mips_lsa
:
1951 case Intrinsic::mips_dlsa
: {
1952 EVT ResTy
= Op
->getValueType(0);
1953 return DAG
.getNode(ISD::ADD
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1954 DAG
.getNode(ISD::SHL
, SDLoc(Op
), ResTy
,
1955 Op
->getOperand(2), Op
->getOperand(3)));
1957 case Intrinsic::mips_maddv_b
:
1958 case Intrinsic::mips_maddv_h
:
1959 case Intrinsic::mips_maddv_w
:
1960 case Intrinsic::mips_maddv_d
: {
1961 EVT ResTy
= Op
->getValueType(0);
1962 return DAG
.getNode(ISD::ADD
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1963 DAG
.getNode(ISD::MUL
, SDLoc(Op
), ResTy
,
1964 Op
->getOperand(2), Op
->getOperand(3)));
1966 case Intrinsic::mips_max_s_b
:
1967 case Intrinsic::mips_max_s_h
:
1968 case Intrinsic::mips_max_s_w
:
1969 case Intrinsic::mips_max_s_d
:
1970 return DAG
.getNode(ISD::SMAX
, DL
, Op
->getValueType(0),
1971 Op
->getOperand(1), Op
->getOperand(2));
1972 case Intrinsic::mips_max_u_b
:
1973 case Intrinsic::mips_max_u_h
:
1974 case Intrinsic::mips_max_u_w
:
1975 case Intrinsic::mips_max_u_d
:
1976 return DAG
.getNode(ISD::UMAX
, DL
, Op
->getValueType(0),
1977 Op
->getOperand(1), Op
->getOperand(2));
1978 case Intrinsic::mips_maxi_s_b
:
1979 case Intrinsic::mips_maxi_s_h
:
1980 case Intrinsic::mips_maxi_s_w
:
1981 case Intrinsic::mips_maxi_s_d
:
1982 return DAG
.getNode(ISD::SMAX
, DL
, Op
->getValueType(0),
1983 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
, true));
1984 case Intrinsic::mips_maxi_u_b
:
1985 case Intrinsic::mips_maxi_u_h
:
1986 case Intrinsic::mips_maxi_u_w
:
1987 case Intrinsic::mips_maxi_u_d
:
1988 return DAG
.getNode(ISD::UMAX
, DL
, Op
->getValueType(0),
1989 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
1990 case Intrinsic::mips_min_s_b
:
1991 case Intrinsic::mips_min_s_h
:
1992 case Intrinsic::mips_min_s_w
:
1993 case Intrinsic::mips_min_s_d
:
1994 return DAG
.getNode(ISD::SMIN
, DL
, Op
->getValueType(0),
1995 Op
->getOperand(1), Op
->getOperand(2));
1996 case Intrinsic::mips_min_u_b
:
1997 case Intrinsic::mips_min_u_h
:
1998 case Intrinsic::mips_min_u_w
:
1999 case Intrinsic::mips_min_u_d
:
2000 return DAG
.getNode(ISD::UMIN
, DL
, Op
->getValueType(0),
2001 Op
->getOperand(1), Op
->getOperand(2));
2002 case Intrinsic::mips_mini_s_b
:
2003 case Intrinsic::mips_mini_s_h
:
2004 case Intrinsic::mips_mini_s_w
:
2005 case Intrinsic::mips_mini_s_d
:
2006 return DAG
.getNode(ISD::SMIN
, DL
, Op
->getValueType(0),
2007 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
, true));
2008 case Intrinsic::mips_mini_u_b
:
2009 case Intrinsic::mips_mini_u_h
:
2010 case Intrinsic::mips_mini_u_w
:
2011 case Intrinsic::mips_mini_u_d
:
2012 return DAG
.getNode(ISD::UMIN
, DL
, Op
->getValueType(0),
2013 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2014 case Intrinsic::mips_mod_s_b
:
2015 case Intrinsic::mips_mod_s_h
:
2016 case Intrinsic::mips_mod_s_w
:
2017 case Intrinsic::mips_mod_s_d
:
2018 return DAG
.getNode(ISD::SREM
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2020 case Intrinsic::mips_mod_u_b
:
2021 case Intrinsic::mips_mod_u_h
:
2022 case Intrinsic::mips_mod_u_w
:
2023 case Intrinsic::mips_mod_u_d
:
2024 return DAG
.getNode(ISD::UREM
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2026 case Intrinsic::mips_mulv_b
:
2027 case Intrinsic::mips_mulv_h
:
2028 case Intrinsic::mips_mulv_w
:
2029 case Intrinsic::mips_mulv_d
:
2030 return DAG
.getNode(ISD::MUL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2032 case Intrinsic::mips_msubv_b
:
2033 case Intrinsic::mips_msubv_h
:
2034 case Intrinsic::mips_msubv_w
:
2035 case Intrinsic::mips_msubv_d
: {
2036 EVT ResTy
= Op
->getValueType(0);
2037 return DAG
.getNode(ISD::SUB
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
2038 DAG
.getNode(ISD::MUL
, SDLoc(Op
), ResTy
,
2039 Op
->getOperand(2), Op
->getOperand(3)));
2041 case Intrinsic::mips_nlzc_b
:
2042 case Intrinsic::mips_nlzc_h
:
2043 case Intrinsic::mips_nlzc_w
:
2044 case Intrinsic::mips_nlzc_d
:
2045 return DAG
.getNode(ISD::CTLZ
, DL
, Op
->getValueType(0), Op
->getOperand(1));
2046 case Intrinsic::mips_nor_v
: {
2047 SDValue Res
= DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2048 Op
->getOperand(1), Op
->getOperand(2));
2049 return DAG
.getNOT(DL
, Res
, Res
->getValueType(0));
2051 case Intrinsic::mips_nori_b
: {
2052 SDValue Res
= DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2054 lowerMSASplatImm(Op
, 2, DAG
));
2055 return DAG
.getNOT(DL
, Res
, Res
->getValueType(0));
2057 case Intrinsic::mips_or_v
:
2058 return DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2060 case Intrinsic::mips_ori_b
:
2061 return DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2062 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2063 case Intrinsic::mips_pckev_b
:
2064 case Intrinsic::mips_pckev_h
:
2065 case Intrinsic::mips_pckev_w
:
2066 case Intrinsic::mips_pckev_d
:
2067 return DAG
.getNode(MipsISD::PCKEV
, DL
, Op
->getValueType(0),
2068 Op
->getOperand(1), Op
->getOperand(2));
2069 case Intrinsic::mips_pckod_b
:
2070 case Intrinsic::mips_pckod_h
:
2071 case Intrinsic::mips_pckod_w
:
2072 case Intrinsic::mips_pckod_d
:
2073 return DAG
.getNode(MipsISD::PCKOD
, DL
, Op
->getValueType(0),
2074 Op
->getOperand(1), Op
->getOperand(2));
2075 case Intrinsic::mips_pcnt_b
:
2076 case Intrinsic::mips_pcnt_h
:
2077 case Intrinsic::mips_pcnt_w
:
2078 case Intrinsic::mips_pcnt_d
:
2079 return DAG
.getNode(ISD::CTPOP
, DL
, Op
->getValueType(0), Op
->getOperand(1));
2080 case Intrinsic::mips_sat_s_b
:
2081 case Intrinsic::mips_sat_s_h
:
2082 case Intrinsic::mips_sat_s_w
:
2083 case Intrinsic::mips_sat_s_d
:
2084 case Intrinsic::mips_sat_u_b
:
2085 case Intrinsic::mips_sat_u_h
:
2086 case Intrinsic::mips_sat_u_w
:
2087 case Intrinsic::mips_sat_u_d
: {
2088 // Report an error for out of range values.
2090 switch (Intrinsic
) {
2091 case Intrinsic::mips_sat_s_b
:
2092 case Intrinsic::mips_sat_u_b
: Max
= 7; break;
2093 case Intrinsic::mips_sat_s_h
:
2094 case Intrinsic::mips_sat_u_h
: Max
= 15; break;
2095 case Intrinsic::mips_sat_s_w
:
2096 case Intrinsic::mips_sat_u_w
: Max
= 31; break;
2097 case Intrinsic::mips_sat_s_d
:
2098 case Intrinsic::mips_sat_u_d
: Max
= 63; break;
2099 default: llvm_unreachable("Unmatched intrinsic");
2101 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2102 if (Value
< 0 || Value
> Max
)
2103 report_fatal_error("Immediate out of range");
2106 case Intrinsic::mips_shf_b
:
2107 case Intrinsic::mips_shf_h
:
2108 case Intrinsic::mips_shf_w
: {
2109 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2110 if (Value
< 0 || Value
> 255)
2111 report_fatal_error("Immediate out of range");
2112 return DAG
.getNode(MipsISD::SHF
, DL
, Op
->getValueType(0),
2113 Op
->getOperand(2), Op
->getOperand(1));
2115 case Intrinsic::mips_sldi_b
:
2116 case Intrinsic::mips_sldi_h
:
2117 case Intrinsic::mips_sldi_w
:
2118 case Intrinsic::mips_sldi_d
: {
2119 // Report an error for out of range values.
2121 switch (Intrinsic
) {
2122 case Intrinsic::mips_sldi_b
: Max
= 15; break;
2123 case Intrinsic::mips_sldi_h
: Max
= 7; break;
2124 case Intrinsic::mips_sldi_w
: Max
= 3; break;
2125 case Intrinsic::mips_sldi_d
: Max
= 1; break;
2126 default: llvm_unreachable("Unmatched intrinsic");
2128 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(3))->getSExtValue();
2129 if (Value
< 0 || Value
> Max
)
2130 report_fatal_error("Immediate out of range");
2133 case Intrinsic::mips_sll_b
:
2134 case Intrinsic::mips_sll_h
:
2135 case Intrinsic::mips_sll_w
:
2136 case Intrinsic::mips_sll_d
:
2137 return DAG
.getNode(ISD::SHL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2138 truncateVecElts(Op
, DAG
));
2139 case Intrinsic::mips_slli_b
:
2140 case Intrinsic::mips_slli_h
:
2141 case Intrinsic::mips_slli_w
:
2142 case Intrinsic::mips_slli_d
:
2143 return DAG
.getNode(ISD::SHL
, DL
, Op
->getValueType(0),
2144 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2145 case Intrinsic::mips_splat_b
:
2146 case Intrinsic::mips_splat_h
:
2147 case Intrinsic::mips_splat_w
:
2148 case Intrinsic::mips_splat_d
:
2149 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2150 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2151 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2152 // Instead we lower to MipsISD::VSHF and match from there.
2153 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2154 lowerMSASplatZExt(Op
, 2, DAG
), Op
->getOperand(1),
2156 case Intrinsic::mips_splati_b
:
2157 case Intrinsic::mips_splati_h
:
2158 case Intrinsic::mips_splati_w
:
2159 case Intrinsic::mips_splati_d
:
2160 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2161 lowerMSASplatImm(Op
, 2, DAG
), Op
->getOperand(1),
2163 case Intrinsic::mips_sra_b
:
2164 case Intrinsic::mips_sra_h
:
2165 case Intrinsic::mips_sra_w
:
2166 case Intrinsic::mips_sra_d
:
2167 return DAG
.getNode(ISD::SRA
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2168 truncateVecElts(Op
, DAG
));
2169 case Intrinsic::mips_srai_b
:
2170 case Intrinsic::mips_srai_h
:
2171 case Intrinsic::mips_srai_w
:
2172 case Intrinsic::mips_srai_d
:
2173 return DAG
.getNode(ISD::SRA
, DL
, Op
->getValueType(0),
2174 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2175 case Intrinsic::mips_srari_b
:
2176 case Intrinsic::mips_srari_h
:
2177 case Intrinsic::mips_srari_w
:
2178 case Intrinsic::mips_srari_d
: {
2179 // Report an error for out of range values.
2181 switch (Intrinsic
) {
2182 case Intrinsic::mips_srari_b
: Max
= 7; break;
2183 case Intrinsic::mips_srari_h
: Max
= 15; break;
2184 case Intrinsic::mips_srari_w
: Max
= 31; break;
2185 case Intrinsic::mips_srari_d
: Max
= 63; break;
2186 default: llvm_unreachable("Unmatched intrinsic");
2188 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2189 if (Value
< 0 || Value
> Max
)
2190 report_fatal_error("Immediate out of range");
2193 case Intrinsic::mips_srl_b
:
2194 case Intrinsic::mips_srl_h
:
2195 case Intrinsic::mips_srl_w
:
2196 case Intrinsic::mips_srl_d
:
2197 return DAG
.getNode(ISD::SRL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2198 truncateVecElts(Op
, DAG
));
2199 case Intrinsic::mips_srli_b
:
2200 case Intrinsic::mips_srli_h
:
2201 case Intrinsic::mips_srli_w
:
2202 case Intrinsic::mips_srli_d
:
2203 return DAG
.getNode(ISD::SRL
, DL
, Op
->getValueType(0),
2204 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2205 case Intrinsic::mips_srlri_b
:
2206 case Intrinsic::mips_srlri_h
:
2207 case Intrinsic::mips_srlri_w
:
2208 case Intrinsic::mips_srlri_d
: {
2209 // Report an error for out of range values.
2211 switch (Intrinsic
) {
2212 case Intrinsic::mips_srlri_b
: Max
= 7; break;
2213 case Intrinsic::mips_srlri_h
: Max
= 15; break;
2214 case Intrinsic::mips_srlri_w
: Max
= 31; break;
2215 case Intrinsic::mips_srlri_d
: Max
= 63; break;
2216 default: llvm_unreachable("Unmatched intrinsic");
2218 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2219 if (Value
< 0 || Value
> Max
)
2220 report_fatal_error("Immediate out of range");
2223 case Intrinsic::mips_subv_b
:
2224 case Intrinsic::mips_subv_h
:
2225 case Intrinsic::mips_subv_w
:
2226 case Intrinsic::mips_subv_d
:
2227 return DAG
.getNode(ISD::SUB
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2229 case Intrinsic::mips_subvi_b
:
2230 case Intrinsic::mips_subvi_h
:
2231 case Intrinsic::mips_subvi_w
:
2232 case Intrinsic::mips_subvi_d
:
2233 return DAG
.getNode(ISD::SUB
, DL
, Op
->getValueType(0),
2234 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2235 case Intrinsic::mips_vshf_b
:
2236 case Intrinsic::mips_vshf_h
:
2237 case Intrinsic::mips_vshf_w
:
2238 case Intrinsic::mips_vshf_d
:
2239 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2240 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
2241 case Intrinsic::mips_xor_v
:
2242 return DAG
.getNode(ISD::XOR
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2244 case Intrinsic::mips_xori_b
:
2245 return DAG
.getNode(ISD::XOR
, DL
, Op
->getValueType(0),
2246 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2247 case Intrinsic::thread_pointer
: {
2248 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2249 return DAG
.getNode(MipsISD::ThreadPointer
, DL
, PtrVT
);
2254 static SDValue
lowerMSALoadIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Intr
,
2255 const MipsSubtarget
&Subtarget
) {
2257 SDValue ChainIn
= Op
->getOperand(0);
2258 SDValue Address
= Op
->getOperand(2);
2259 SDValue Offset
= Op
->getOperand(3);
2260 EVT ResTy
= Op
->getValueType(0);
2261 EVT PtrTy
= Address
->getValueType(0);
2263 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2264 // however takes an i32 signed constant offset. The actual type of the
2265 // intrinsic is a scaled signed i10.
2266 if (Subtarget
.isABI_N64())
2267 Offset
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, PtrTy
, Offset
);
2269 Address
= DAG
.getNode(ISD::ADD
, DL
, PtrTy
, Address
, Offset
);
2270 return DAG
.getLoad(ResTy
, DL
, ChainIn
, Address
, MachinePointerInfo(),
2271 /* Alignment = */ 16);
2274 SDValue
MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op
,
2275 SelectionDAG
&DAG
) const {
2276 unsigned Intr
= cast
<ConstantSDNode
>(Op
->getOperand(1))->getZExtValue();
2280 case Intrinsic::mips_extp
:
2281 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTP
);
2282 case Intrinsic::mips_extpdp
:
2283 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTPDP
);
2284 case Intrinsic::mips_extr_w
:
2285 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_W
);
2286 case Intrinsic::mips_extr_r_w
:
2287 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_R_W
);
2288 case Intrinsic::mips_extr_rs_w
:
2289 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_RS_W
);
2290 case Intrinsic::mips_extr_s_h
:
2291 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_S_H
);
2292 case Intrinsic::mips_mthlip
:
2293 return lowerDSPIntr(Op
, DAG
, MipsISD::MTHLIP
);
2294 case Intrinsic::mips_mulsaq_s_w_ph
:
2295 return lowerDSPIntr(Op
, DAG
, MipsISD::MULSAQ_S_W_PH
);
2296 case Intrinsic::mips_maq_s_w_phl
:
2297 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_S_W_PHL
);
2298 case Intrinsic::mips_maq_s_w_phr
:
2299 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_S_W_PHR
);
2300 case Intrinsic::mips_maq_sa_w_phl
:
2301 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_SA_W_PHL
);
2302 case Intrinsic::mips_maq_sa_w_phr
:
2303 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_SA_W_PHR
);
2304 case Intrinsic::mips_dpaq_s_w_ph
:
2305 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQ_S_W_PH
);
2306 case Intrinsic::mips_dpsq_s_w_ph
:
2307 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQ_S_W_PH
);
2308 case Intrinsic::mips_dpaq_sa_l_w
:
2309 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQ_SA_L_W
);
2310 case Intrinsic::mips_dpsq_sa_l_w
:
2311 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQ_SA_L_W
);
2312 case Intrinsic::mips_dpaqx_s_w_ph
:
2313 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQX_S_W_PH
);
2314 case Intrinsic::mips_dpaqx_sa_w_ph
:
2315 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQX_SA_W_PH
);
2316 case Intrinsic::mips_dpsqx_s_w_ph
:
2317 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQX_S_W_PH
);
2318 case Intrinsic::mips_dpsqx_sa_w_ph
:
2319 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQX_SA_W_PH
);
2320 case Intrinsic::mips_ld_b
:
2321 case Intrinsic::mips_ld_h
:
2322 case Intrinsic::mips_ld_w
:
2323 case Intrinsic::mips_ld_d
:
2324 return lowerMSALoadIntr(Op
, DAG
, Intr
, Subtarget
);
2328 static SDValue
lowerMSAStoreIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Intr
,
2329 const MipsSubtarget
&Subtarget
) {
2331 SDValue ChainIn
= Op
->getOperand(0);
2332 SDValue Value
= Op
->getOperand(2);
2333 SDValue Address
= Op
->getOperand(3);
2334 SDValue Offset
= Op
->getOperand(4);
2335 EVT PtrTy
= Address
->getValueType(0);
2337 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2338 // however takes an i32 signed constant offset. The actual type of the
2339 // intrinsic is a scaled signed i10.
2340 if (Subtarget
.isABI_N64())
2341 Offset
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, PtrTy
, Offset
);
2343 Address
= DAG
.getNode(ISD::ADD
, DL
, PtrTy
, Address
, Offset
);
2345 return DAG
.getStore(ChainIn
, DL
, Value
, Address
, MachinePointerInfo(),
2346 /* Alignment = */ 16);
2349 SDValue
MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op
,
2350 SelectionDAG
&DAG
) const {
2351 unsigned Intr
= cast
<ConstantSDNode
>(Op
->getOperand(1))->getZExtValue();
2355 case Intrinsic::mips_st_b
:
2356 case Intrinsic::mips_st_h
:
2357 case Intrinsic::mips_st_w
:
2358 case Intrinsic::mips_st_d
:
2359 return lowerMSAStoreIntr(Op
, DAG
, Intr
, Subtarget
);
2363 /// Check if the given BuildVectorSDNode is a splat.
2364 /// This method currently relies on DAG nodes being reused when equivalent,
2365 /// so it's possible for this to return false even when isConstantSplat returns
2367 static bool isSplatVector(const BuildVectorSDNode
*N
) {
2368 unsigned int nOps
= N
->getNumOperands();
2369 assert(nOps
> 1 && "isSplatVector has 0 or 1 sized build vector");
2371 SDValue Operand0
= N
->getOperand(0);
2373 for (unsigned int i
= 1; i
< nOps
; ++i
) {
2374 if (N
->getOperand(i
) != Operand0
)
2381 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2383 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2384 // choose to sign-extend but we could have equally chosen zero-extend. The
2385 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2386 // result into this node later (possibly changing it to a zero-extend in the
2388 SDValue
MipsSETargetLowering::
2389 lowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const {
2391 EVT ResTy
= Op
->getValueType(0);
2392 SDValue Op0
= Op
->getOperand(0);
2393 EVT VecTy
= Op0
->getValueType(0);
2395 if (!VecTy
.is128BitVector())
2398 if (ResTy
.isInteger()) {
2399 SDValue Op1
= Op
->getOperand(1);
2400 EVT EltTy
= VecTy
.getVectorElementType();
2401 return DAG
.getNode(MipsISD::VEXTRACT_SEXT_ELT
, DL
, ResTy
, Op0
, Op1
,
2402 DAG
.getValueType(EltTy
));
2408 static bool isConstantOrUndef(const SDValue Op
) {
2411 if (isa
<ConstantSDNode
>(Op
))
2413 if (isa
<ConstantFPSDNode
>(Op
))
2418 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode
*Op
) {
2419 for (unsigned i
= 0; i
< Op
->getNumOperands(); ++i
)
2420 if (isConstantOrUndef(Op
->getOperand(i
)))
2425 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2428 // Lowers according to the following rules:
2429 // - Constant splats are legal as-is as long as the SplatBitSize is a power of
2430 // 2 less than or equal to 64 and the value fits into a signed 10-bit
2432 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2433 // is a power of 2 less than or equal to 64 and the value does not fit into a
2434 // signed 10-bit immediate
2435 // - Non-constant splats are legal as-is.
2436 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2437 // - All others are illegal and must be expanded.
2438 SDValue
MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op
,
2439 SelectionDAG
&DAG
) const {
2440 BuildVectorSDNode
*Node
= cast
<BuildVectorSDNode
>(Op
);
2441 EVT ResTy
= Op
->getValueType(0);
2443 APInt SplatValue
, SplatUndef
;
2444 unsigned SplatBitSize
;
2447 if (!Subtarget
.hasMSA() || !ResTy
.is128BitVector())
2450 if (Node
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
,
2452 !Subtarget
.isLittle()) && SplatBitSize
<= 64) {
2453 // We can only cope with 8, 16, 32, or 64-bit elements
2454 if (SplatBitSize
!= 8 && SplatBitSize
!= 16 && SplatBitSize
!= 32 &&
2458 // If the value isn't an integer type we will have to bitcast
2459 // from an integer type first. Also, if there are any undefs, we must
2460 // lower them to defined values first.
2461 if (ResTy
.isInteger() && !HasAnyUndefs
)
2466 switch (SplatBitSize
) {
2470 ViaVecTy
= MVT::v16i8
;
2473 ViaVecTy
= MVT::v8i16
;
2476 ViaVecTy
= MVT::v4i32
;
2479 // There's no fill.d to fall back on for 64-bit values
2483 // SelectionDAG::getConstant will promote SplatValue appropriately.
2484 SDValue Result
= DAG
.getConstant(SplatValue
, DL
, ViaVecTy
);
2486 // Bitcast to the type we originally wanted
2487 if (ViaVecTy
!= ResTy
)
2488 Result
= DAG
.getNode(ISD::BITCAST
, SDLoc(Node
), ResTy
, Result
);
2491 } else if (isSplatVector(Node
))
2493 else if (!isConstantOrUndefBUILD_VECTOR(Node
)) {
2494 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2495 // The resulting code is the same length as the expansion, but it doesn't
2496 // use memory operations
2497 EVT ResTy
= Node
->getValueType(0);
2499 assert(ResTy
.isVector());
2501 unsigned NumElts
= ResTy
.getVectorNumElements();
2502 SDValue Vector
= DAG
.getUNDEF(ResTy
);
2503 for (unsigned i
= 0; i
< NumElts
; ++i
) {
2504 Vector
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ResTy
, Vector
,
2505 Node
->getOperand(i
),
2506 DAG
.getConstant(i
, DL
, MVT::i32
));
2514 // Lower VECTOR_SHUFFLE into SHF (if possible).
2516 // SHF splits the vector into blocks of four elements, then shuffles these
2517 // elements according to a <4 x i2> constant (encoded as an integer immediate).
2519 // It is therefore possible to lower into SHF when the mask takes the form:
2520 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2521 // When undef's appear they are treated as if they were whatever value is
2522 // necessary in order to fit the above forms.
2525 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2526 // <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2527 // i32 7, i32 6, i32 5, i32 4>
2529 // (SHF_H $w0, $w1, 27)
2530 // where the 27 comes from:
2531 // 3 + (2 << 2) + (1 << 4) + (0 << 6)
2532 static SDValue
lowerVECTOR_SHUFFLE_SHF(SDValue Op
, EVT ResTy
,
2533 SmallVector
<int, 16> Indices
,
2534 SelectionDAG
&DAG
) {
2535 int SHFIndices
[4] = { -1, -1, -1, -1 };
2537 if (Indices
.size() < 4)
2540 for (unsigned i
= 0; i
< 4; ++i
) {
2541 for (unsigned j
= i
; j
< Indices
.size(); j
+= 4) {
2542 int Idx
= Indices
[j
];
2544 // Convert from vector index to 4-element subvector index
2545 // If an index refers to an element outside of the subvector then give up
2548 if (Idx
< 0 || Idx
>= 4)
2552 // If the mask has an undef, replace it with the current index.
2553 // Note that it might still be undef if the current index is also undef
2554 if (SHFIndices
[i
] == -1)
2555 SHFIndices
[i
] = Idx
;
2557 // Check that non-undef values are the same as in the mask. If they
2558 // aren't then give up
2559 if (!(Idx
== -1 || Idx
== SHFIndices
[i
]))
2564 // Calculate the immediate. Replace any remaining undefs with zero
2566 for (int i
= 3; i
>= 0; --i
) {
2567 int Idx
= SHFIndices
[i
];
2577 return DAG
.getNode(MipsISD::SHF
, DL
, ResTy
,
2578 DAG
.getConstant(Imm
, DL
, MVT::i32
), Op
->getOperand(0));
2581 /// Determine whether a range fits a regular pattern of values.
2582 /// This function accounts for the possibility of jumping over the End iterator.
2583 template <typename ValType
>
2585 fitsRegularPattern(typename SmallVectorImpl
<ValType
>::const_iterator Begin
,
2586 unsigned CheckStride
,
2587 typename SmallVectorImpl
<ValType
>::const_iterator End
,
2588 ValType ExpectedIndex
, unsigned ExpectedIndexStride
) {
2592 if (*I
!= -1 && *I
!= ExpectedIndex
)
2594 ExpectedIndex
+= ExpectedIndexStride
;
2596 // Incrementing past End is undefined behaviour so we must increment one
2597 // step at a time and check for End at each step.
2598 for (unsigned n
= 0; n
< CheckStride
&& I
!= End
; ++n
, ++I
)
2599 ; // Empty loop body.
2604 // Determine whether VECTOR_SHUFFLE is a SPLATI.
2606 // It is a SPLATI when the mask is:
2608 // where x is any valid index.
2610 // When undef's appear in the mask they are treated as if they were whatever
2611 // value is necessary in order to fit the above form.
2612 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op
, EVT ResTy
,
2613 SmallVector
<int, 16> Indices
,
2614 SelectionDAG
&DAG
) {
2615 assert((Indices
.size() % 2) == 0);
2617 int SplatIndex
= -1;
2618 for (const auto &V
: Indices
) {
2625 return fitsRegularPattern
<int>(Indices
.begin(), 1, Indices
.end(), SplatIndex
,
2629 // Lower VECTOR_SHUFFLE into ILVEV (if possible).
2631 // ILVEV interleaves the even elements from each vector.
2633 // It is possible to lower into ILVEV when the mask consists of two of the
2634 // following forms interleaved:
2636 // <n, n+2, n+4, ...>
2637 // where n is the number of elements in the vector.
2639 // <0, 0, 2, 2, 4, 4, ...>
2640 // <0, n, 2, n+2, 4, n+4, ...>
2642 // When undef's appear in the mask they are treated as if they were whatever
2643 // value is necessary in order to fit the above forms.
2644 static SDValue
lowerVECTOR_SHUFFLE_ILVEV(SDValue Op
, EVT ResTy
,
2645 SmallVector
<int, 16> Indices
,
2646 SelectionDAG
&DAG
) {
2647 assert((Indices
.size() % 2) == 0);
2651 const auto &Begin
= Indices
.begin();
2652 const auto &End
= Indices
.end();
2654 // Check even elements are taken from the even elements of one half or the
2655 // other and pick an operand accordingly.
2656 if (fitsRegularPattern
<int>(Begin
, 2, End
, 0, 2))
2657 Wt
= Op
->getOperand(0);
2658 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size(), 2))
2659 Wt
= Op
->getOperand(1);
2663 // Check odd elements are taken from the even elements of one half or the
2664 // other and pick an operand accordingly.
2665 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 0, 2))
2666 Ws
= Op
->getOperand(0);
2667 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size(), 2))
2668 Ws
= Op
->getOperand(1);
2672 return DAG
.getNode(MipsISD::ILVEV
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2675 // Lower VECTOR_SHUFFLE into ILVOD (if possible).
2677 // ILVOD interleaves the odd elements from each vector.
2679 // It is possible to lower into ILVOD when the mask consists of two of the
2680 // following forms interleaved:
2682 // <n+1, n+3, n+5, ...>
2683 // where n is the number of elements in the vector.
2685 // <1, 1, 3, 3, 5, 5, ...>
2686 // <1, n+1, 3, n+3, 5, n+5, ...>
2688 // When undef's appear in the mask they are treated as if they were whatever
2689 // value is necessary in order to fit the above forms.
2690 static SDValue
lowerVECTOR_SHUFFLE_ILVOD(SDValue Op
, EVT ResTy
,
2691 SmallVector
<int, 16> Indices
,
2692 SelectionDAG
&DAG
) {
2693 assert((Indices
.size() % 2) == 0);
2697 const auto &Begin
= Indices
.begin();
2698 const auto &End
= Indices
.end();
2700 // Check even elements are taken from the odd elements of one half or the
2701 // other and pick an operand accordingly.
2702 if (fitsRegularPattern
<int>(Begin
, 2, End
, 1, 2))
2703 Wt
= Op
->getOperand(0);
2704 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size() + 1, 2))
2705 Wt
= Op
->getOperand(1);
2709 // Check odd elements are taken from the odd elements of one half or the
2710 // other and pick an operand accordingly.
2711 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 1, 2))
2712 Ws
= Op
->getOperand(0);
2713 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size() + 1, 2))
2714 Ws
= Op
->getOperand(1);
2718 return DAG
.getNode(MipsISD::ILVOD
, SDLoc(Op
), ResTy
, Wt
, Ws
);
2721 // Lower VECTOR_SHUFFLE into ILVR (if possible).
2723 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2726 // It is possible to lower into ILVR when the mask consists of two of the
2727 // following forms interleaved:
2729 // <n, n+1, n+2, ...>
2730 // where n is the number of elements in the vector.
2732 // <0, 0, 1, 1, 2, 2, ...>
2733 // <0, n, 1, n+1, 2, n+2, ...>
2735 // When undef's appear in the mask they are treated as if they were whatever
2736 // value is necessary in order to fit the above forms.
2737 static SDValue
lowerVECTOR_SHUFFLE_ILVR(SDValue Op
, EVT ResTy
,
2738 SmallVector
<int, 16> Indices
,
2739 SelectionDAG
&DAG
) {
2740 assert((Indices
.size() % 2) == 0);
2744 const auto &Begin
= Indices
.begin();
2745 const auto &End
= Indices
.end();
2747 // Check even elements are taken from the right (lowest-indexed) elements of
2748 // one half or the other and pick an operand accordingly.
2749 if (fitsRegularPattern
<int>(Begin
, 2, End
, 0, 1))
2750 Wt
= Op
->getOperand(0);
2751 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size(), 1))
2752 Wt
= Op
->getOperand(1);
2756 // Check odd elements are taken from the right (lowest-indexed) elements of
2757 // one half or the other and pick an operand accordingly.
2758 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 0, 1))
2759 Ws
= Op
->getOperand(0);
2760 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size(), 1))
2761 Ws
= Op
->getOperand(1);
2765 return DAG
.getNode(MipsISD::ILVR
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2768 // Lower VECTOR_SHUFFLE into ILVL (if possible).
2770 // ILVL interleaves consecutive elements from the left (highest-indexed) half
2773 // It is possible to lower into ILVL when the mask consists of two of the
2774 // following forms interleaved:
2775 // <x, x+1, x+2, ...>
2776 // <n+x, n+x+1, n+x+2, ...>
2777 // where n is the number of elements in the vector and x is half n.
2779 // <x, x, x+1, x+1, x+2, x+2, ...>
2780 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2782 // When undef's appear in the mask they are treated as if they were whatever
2783 // value is necessary in order to fit the above forms.
2784 static SDValue
lowerVECTOR_SHUFFLE_ILVL(SDValue Op
, EVT ResTy
,
2785 SmallVector
<int, 16> Indices
,
2786 SelectionDAG
&DAG
) {
2787 assert((Indices
.size() % 2) == 0);
2789 unsigned HalfSize
= Indices
.size() / 2;
2792 const auto &Begin
= Indices
.begin();
2793 const auto &End
= Indices
.end();
2795 // Check even elements are taken from the left (highest-indexed) elements of
2796 // one half or the other and pick an operand accordingly.
2797 if (fitsRegularPattern
<int>(Begin
, 2, End
, HalfSize
, 1))
2798 Wt
= Op
->getOperand(0);
2799 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size() + HalfSize
, 1))
2800 Wt
= Op
->getOperand(1);
2804 // Check odd elements are taken from the left (highest-indexed) elements of
2805 // one half or the other and pick an operand accordingly.
2806 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, HalfSize
, 1))
2807 Ws
= Op
->getOperand(0);
2808 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size() + HalfSize
,
2810 Ws
= Op
->getOperand(1);
2814 return DAG
.getNode(MipsISD::ILVL
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2817 // Lower VECTOR_SHUFFLE into PCKEV (if possible).
2819 // PCKEV copies the even elements of each vector into the result vector.
2821 // It is possible to lower into PCKEV when the mask consists of two of the
2822 // following forms concatenated:
2824 // <n, n+2, n+4, ...>
2825 // where n is the number of elements in the vector.
2827 // <0, 2, 4, ..., 0, 2, 4, ...>
2828 // <0, 2, 4, ..., n, n+2, n+4, ...>
2830 // When undef's appear in the mask they are treated as if they were whatever
2831 // value is necessary in order to fit the above forms.
2832 static SDValue
lowerVECTOR_SHUFFLE_PCKEV(SDValue Op
, EVT ResTy
,
2833 SmallVector
<int, 16> Indices
,
2834 SelectionDAG
&DAG
) {
2835 assert((Indices
.size() % 2) == 0);
2839 const auto &Begin
= Indices
.begin();
2840 const auto &Mid
= Indices
.begin() + Indices
.size() / 2;
2841 const auto &End
= Indices
.end();
2843 if (fitsRegularPattern
<int>(Begin
, 1, Mid
, 0, 2))
2844 Wt
= Op
->getOperand(0);
2845 else if (fitsRegularPattern
<int>(Begin
, 1, Mid
, Indices
.size(), 2))
2846 Wt
= Op
->getOperand(1);
2850 if (fitsRegularPattern
<int>(Mid
, 1, End
, 0, 2))
2851 Ws
= Op
->getOperand(0);
2852 else if (fitsRegularPattern
<int>(Mid
, 1, End
, Indices
.size(), 2))
2853 Ws
= Op
->getOperand(1);
2857 return DAG
.getNode(MipsISD::PCKEV
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2860 // Lower VECTOR_SHUFFLE into PCKOD (if possible).
2862 // PCKOD copies the odd elements of each vector into the result vector.
2864 // It is possible to lower into PCKOD when the mask consists of two of the
2865 // following forms concatenated:
2867 // <n+1, n+3, n+5, ...>
2868 // where n is the number of elements in the vector.
2870 // <1, 3, 5, ..., 1, 3, 5, ...>
2871 // <1, 3, 5, ..., n+1, n+3, n+5, ...>
2873 // When undef's appear in the mask they are treated as if they were whatever
2874 // value is necessary in order to fit the above forms.
2875 static SDValue
lowerVECTOR_SHUFFLE_PCKOD(SDValue Op
, EVT ResTy
,
2876 SmallVector
<int, 16> Indices
,
2877 SelectionDAG
&DAG
) {
2878 assert((Indices
.size() % 2) == 0);
2882 const auto &Begin
= Indices
.begin();
2883 const auto &Mid
= Indices
.begin() + Indices
.size() / 2;
2884 const auto &End
= Indices
.end();
2886 if (fitsRegularPattern
<int>(Begin
, 1, Mid
, 1, 2))
2887 Wt
= Op
->getOperand(0);
2888 else if (fitsRegularPattern
<int>(Begin
, 1, Mid
, Indices
.size() + 1, 2))
2889 Wt
= Op
->getOperand(1);
2893 if (fitsRegularPattern
<int>(Mid
, 1, End
, 1, 2))
2894 Ws
= Op
->getOperand(0);
2895 else if (fitsRegularPattern
<int>(Mid
, 1, End
, Indices
.size() + 1, 2))
2896 Ws
= Op
->getOperand(1);
2900 return DAG
.getNode(MipsISD::PCKOD
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2903 // Lower VECTOR_SHUFFLE into VSHF.
2905 // This mostly consists of converting the shuffle indices in Indices into a
2906 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2907 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2908 // if the type is v8i16 and all the indices are less than 8 then the second
2909 // operand is unused and can be replaced with anything. We choose to replace it
2910 // with the used operand since this reduces the number of instructions overall.
2911 static SDValue
lowerVECTOR_SHUFFLE_VSHF(SDValue Op
, EVT ResTy
,
2912 SmallVector
<int, 16> Indices
,
2913 SelectionDAG
&DAG
) {
2914 SmallVector
<SDValue
, 16> Ops
;
2917 EVT MaskVecTy
= ResTy
.changeVectorElementTypeToInteger();
2918 EVT MaskEltTy
= MaskVecTy
.getVectorElementType();
2919 bool Using1stVec
= false;
2920 bool Using2ndVec
= false;
2922 int ResTyNumElts
= ResTy
.getVectorNumElements();
2924 for (int i
= 0; i
< ResTyNumElts
; ++i
) {
2925 // Idx == -1 means UNDEF
2926 int Idx
= Indices
[i
];
2928 if (0 <= Idx
&& Idx
< ResTyNumElts
)
2930 if (ResTyNumElts
<= Idx
&& Idx
< ResTyNumElts
* 2)
2934 for (SmallVector
<int, 16>::iterator I
= Indices
.begin(); I
!= Indices
.end();
2936 Ops
.push_back(DAG
.getTargetConstant(*I
, DL
, MaskEltTy
));
2938 SDValue MaskVec
= DAG
.getBuildVector(MaskVecTy
, DL
, Ops
);
2940 if (Using1stVec
&& Using2ndVec
) {
2941 Op0
= Op
->getOperand(0);
2942 Op1
= Op
->getOperand(1);
2943 } else if (Using1stVec
)
2944 Op0
= Op1
= Op
->getOperand(0);
2945 else if (Using2ndVec
)
2946 Op0
= Op1
= Op
->getOperand(1);
2948 llvm_unreachable("shuffle vector mask references neither vector operand?");
2950 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2951 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2952 // VSHF concatenates the vectors in a bitwise fashion:
2953 // <0b00, 0b01> + <0b10, 0b11> ->
2954 // 0b0100 + 0b1110 -> 0b01001110
2955 // <0b10, 0b11, 0b00, 0b01>
2956 // We must therefore swap the operands to get the correct result.
2957 return DAG
.getNode(MipsISD::VSHF
, DL
, ResTy
, MaskVec
, Op1
, Op0
);
2960 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
2961 // indices in the shuffle.
2962 SDValue
MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op
,
2963 SelectionDAG
&DAG
) const {
2964 ShuffleVectorSDNode
*Node
= cast
<ShuffleVectorSDNode
>(Op
);
2965 EVT ResTy
= Op
->getValueType(0);
2967 if (!ResTy
.is128BitVector())
2970 int ResTyNumElts
= ResTy
.getVectorNumElements();
2971 SmallVector
<int, 16> Indices
;
2973 for (int i
= 0; i
< ResTyNumElts
; ++i
)
2974 Indices
.push_back(Node
->getMaskElt(i
));
2976 // splati.[bhwd] is preferable to the others but is matched from
2978 if (isVECTOR_SHUFFLE_SPLATI(Op
, ResTy
, Indices
, DAG
))
2979 return lowerVECTOR_SHUFFLE_VSHF(Op
, ResTy
, Indices
, DAG
);
2981 if ((Result
= lowerVECTOR_SHUFFLE_ILVEV(Op
, ResTy
, Indices
, DAG
)))
2983 if ((Result
= lowerVECTOR_SHUFFLE_ILVOD(Op
, ResTy
, Indices
, DAG
)))
2985 if ((Result
= lowerVECTOR_SHUFFLE_ILVL(Op
, ResTy
, Indices
, DAG
)))
2987 if ((Result
= lowerVECTOR_SHUFFLE_ILVR(Op
, ResTy
, Indices
, DAG
)))
2989 if ((Result
= lowerVECTOR_SHUFFLE_PCKEV(Op
, ResTy
, Indices
, DAG
)))
2991 if ((Result
= lowerVECTOR_SHUFFLE_PCKOD(Op
, ResTy
, Indices
, DAG
)))
2993 if ((Result
= lowerVECTOR_SHUFFLE_SHF(Op
, ResTy
, Indices
, DAG
)))
2995 return lowerVECTOR_SHUFFLE_VSHF(Op
, ResTy
, Indices
, DAG
);
2999 MipsSETargetLowering::emitBPOSGE32(MachineInstr
&MI
,
3000 MachineBasicBlock
*BB
) const {
3002 // bposge32_pseudo $vr0
3012 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3014 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3015 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3016 const TargetRegisterClass
*RC
= &Mips::GPR32RegClass
;
3017 DebugLoc DL
= MI
.getDebugLoc();
3018 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
3019 MachineFunction::iterator It
= std::next(MachineFunction::iterator(BB
));
3020 MachineFunction
*F
= BB
->getParent();
3021 MachineBasicBlock
*FBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3022 MachineBasicBlock
*TBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3023 MachineBasicBlock
*Sink
= F
->CreateMachineBasicBlock(LLVM_BB
);
3026 F
->insert(It
, Sink
);
3028 // Transfer the remainder of BB and its successor edges to Sink.
3029 Sink
->splice(Sink
->begin(), BB
, std::next(MachineBasicBlock::iterator(MI
)),
3031 Sink
->transferSuccessorsAndUpdatePHIs(BB
);
3034 BB
->addSuccessor(FBB
);
3035 BB
->addSuccessor(TBB
);
3036 FBB
->addSuccessor(Sink
);
3037 TBB
->addSuccessor(Sink
);
3039 // Insert the real bposge32 instruction to $BB.
3040 BuildMI(BB
, DL
, TII
->get(Mips::BPOSGE32
)).addMBB(TBB
);
3041 // Insert the real bposge32c instruction to $BB.
3042 BuildMI(BB
, DL
, TII
->get(Mips::BPOSGE32C_MMR3
)).addMBB(TBB
);
3045 unsigned VR2
= RegInfo
.createVirtualRegister(RC
);
3046 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::ADDiu
), VR2
)
3047 .addReg(Mips::ZERO
).addImm(0);
3048 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::B
)).addMBB(Sink
);
3051 unsigned VR1
= RegInfo
.createVirtualRegister(RC
);
3052 BuildMI(*TBB
, TBB
->end(), DL
, TII
->get(Mips::ADDiu
), VR1
)
3053 .addReg(Mips::ZERO
).addImm(1);
3055 // Insert phi function to $Sink.
3056 BuildMI(*Sink
, Sink
->begin(), DL
, TII
->get(Mips::PHI
),
3057 MI
.getOperand(0).getReg())
3063 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3067 MachineBasicBlock
*MipsSETargetLowering::emitMSACBranchPseudo(
3068 MachineInstr
&MI
, MachineBasicBlock
*BB
, unsigned BranchOp
) const {
3070 // vany_nonzero $rd, $ws
3081 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3083 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3084 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3085 const TargetRegisterClass
*RC
= &Mips::GPR32RegClass
;
3086 DebugLoc DL
= MI
.getDebugLoc();
3087 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
3088 MachineFunction::iterator It
= std::next(MachineFunction::iterator(BB
));
3089 MachineFunction
*F
= BB
->getParent();
3090 MachineBasicBlock
*FBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3091 MachineBasicBlock
*TBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3092 MachineBasicBlock
*Sink
= F
->CreateMachineBasicBlock(LLVM_BB
);
3095 F
->insert(It
, Sink
);
3097 // Transfer the remainder of BB and its successor edges to Sink.
3098 Sink
->splice(Sink
->begin(), BB
, std::next(MachineBasicBlock::iterator(MI
)),
3100 Sink
->transferSuccessorsAndUpdatePHIs(BB
);
3103 BB
->addSuccessor(FBB
);
3104 BB
->addSuccessor(TBB
);
3105 FBB
->addSuccessor(Sink
);
3106 TBB
->addSuccessor(Sink
);
3108 // Insert the real bnz.b instruction to $BB.
3109 BuildMI(BB
, DL
, TII
->get(BranchOp
))
3110 .addReg(MI
.getOperand(1).getReg())
3114 unsigned RD1
= RegInfo
.createVirtualRegister(RC
);
3115 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::ADDiu
), RD1
)
3116 .addReg(Mips::ZERO
).addImm(0);
3117 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::B
)).addMBB(Sink
);
3120 unsigned RD2
= RegInfo
.createVirtualRegister(RC
);
3121 BuildMI(*TBB
, TBB
->end(), DL
, TII
->get(Mips::ADDiu
), RD2
)
3122 .addReg(Mips::ZERO
).addImm(1);
3124 // Insert phi function to $Sink.
3125 BuildMI(*Sink
, Sink
->begin(), DL
, TII
->get(Mips::PHI
),
3126 MI
.getOperand(0).getReg())
3132 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3136 // Emit the COPY_FW pseudo instruction.
3138 // copy_fw_pseudo $fd, $ws, n
3140 // copy_u_w $rt, $ws, $n
3143 // When n is zero, the equivalent operation can be performed with (potentially)
3144 // zero instructions due to register overlaps. This optimization is never valid
3145 // for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3147 MipsSETargetLowering::emitCOPY_FW(MachineInstr
&MI
,
3148 MachineBasicBlock
*BB
) const {
3149 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3150 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3151 DebugLoc DL
= MI
.getDebugLoc();
3152 unsigned Fd
= MI
.getOperand(0).getReg();
3153 unsigned Ws
= MI
.getOperand(1).getReg();
3154 unsigned Lane
= MI
.getOperand(2).getImm();
3158 if (!Subtarget
.useOddSPReg()) {
3159 // We must copy to an even-numbered MSA register so that the
3160 // single-precision sub-register is also guaranteed to be even-numbered.
3161 Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128WEvensRegClass
);
3163 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Wt
).addReg(Ws
);
3166 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_lo
);
3168 unsigned Wt
= RegInfo
.createVirtualRegister(
3169 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
:
3170 &Mips::MSA128WEvensRegClass
);
3172 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_W
), Wt
).addReg(Ws
).addImm(Lane
);
3173 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_lo
);
3176 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3180 // Emit the COPY_FD pseudo instruction.
3182 // copy_fd_pseudo $fd, $ws, n
3184 // splati.d $wt, $ws, $n
3185 // copy $fd, $wt:sub_64
3187 // When n is zero, the equivalent operation can be performed with (potentially)
3188 // zero instructions due to register overlaps. This optimization is always
3189 // valid because FR=1 mode which is the only supported mode in MSA.
3191 MipsSETargetLowering::emitCOPY_FD(MachineInstr
&MI
,
3192 MachineBasicBlock
*BB
) const {
3193 assert(Subtarget
.isFP64bit());
3195 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3196 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3197 unsigned Fd
= MI
.getOperand(0).getReg();
3198 unsigned Ws
= MI
.getOperand(1).getReg();
3199 unsigned Lane
= MI
.getOperand(2).getImm() * 2;
3200 DebugLoc DL
= MI
.getDebugLoc();
3203 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Ws
, 0, Mips::sub_64
);
3205 unsigned Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3207 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_D
), Wt
).addReg(Ws
).addImm(1);
3208 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_64
);
3211 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3215 // Emit the INSERT_FW pseudo instruction.
3217 // insert_fw_pseudo $wd, $wd_in, $n, $fs
3219 // subreg_to_reg $wt:sub_lo, $fs
3220 // insve_w $wd[$n], $wd_in, $wt[0]
3222 MipsSETargetLowering::emitINSERT_FW(MachineInstr
&MI
,
3223 MachineBasicBlock
*BB
) const {
3224 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3225 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3226 DebugLoc DL
= MI
.getDebugLoc();
3227 unsigned Wd
= MI
.getOperand(0).getReg();
3228 unsigned Wd_in
= MI
.getOperand(1).getReg();
3229 unsigned Lane
= MI
.getOperand(2).getImm();
3230 unsigned Fs
= MI
.getOperand(3).getReg();
3231 unsigned Wt
= RegInfo
.createVirtualRegister(
3232 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
:
3233 &Mips::MSA128WEvensRegClass
);
3235 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
3238 .addImm(Mips::sub_lo
);
3239 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSVE_W
), Wd
)
3245 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3249 // Emit the INSERT_FD pseudo instruction.
3251 // insert_fd_pseudo $wd, $fs, n
3253 // subreg_to_reg $wt:sub_64, $fs
3254 // insve_d $wd[$n], $wd_in, $wt[0]
3256 MipsSETargetLowering::emitINSERT_FD(MachineInstr
&MI
,
3257 MachineBasicBlock
*BB
) const {
3258 assert(Subtarget
.isFP64bit());
3260 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3261 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3262 DebugLoc DL
= MI
.getDebugLoc();
3263 unsigned Wd
= MI
.getOperand(0).getReg();
3264 unsigned Wd_in
= MI
.getOperand(1).getReg();
3265 unsigned Lane
= MI
.getOperand(2).getImm();
3266 unsigned Fs
= MI
.getOperand(3).getReg();
3267 unsigned Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3269 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
3272 .addImm(Mips::sub_64
);
3273 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSVE_D
), Wd
)
3279 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3283 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3286 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3288 // (SLL $lanetmp1, $lane, <log2size)
3289 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3290 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3291 // (NEG $lanetmp2, $lanetmp1)
3292 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3294 // For floating point:
3295 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3297 // (SUBREG_TO_REG $wt, $fs, <subreg>)
3298 // (SLL $lanetmp1, $lane, <log2size)
3299 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3300 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3301 // (NEG $lanetmp2, $lanetmp1)
3302 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3303 MachineBasicBlock
*MipsSETargetLowering::emitINSERT_DF_VIDX(
3304 MachineInstr
&MI
, MachineBasicBlock
*BB
, unsigned EltSizeInBytes
,
3306 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3307 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3308 DebugLoc DL
= MI
.getDebugLoc();
3309 unsigned Wd
= MI
.getOperand(0).getReg();
3310 unsigned SrcVecReg
= MI
.getOperand(1).getReg();
3311 unsigned LaneReg
= MI
.getOperand(2).getReg();
3312 unsigned SrcValReg
= MI
.getOperand(3).getReg();
3314 const TargetRegisterClass
*VecRC
= nullptr;
3315 // FIXME: This should be true for N32 too.
3316 const TargetRegisterClass
*GPRRC
=
3317 Subtarget
.isABI_N64() ? &Mips::GPR64RegClass
: &Mips::GPR32RegClass
;
3318 unsigned SubRegIdx
= Subtarget
.isABI_N64() ? Mips::sub_32
: 0;
3319 unsigned ShiftOp
= Subtarget
.isABI_N64() ? Mips::DSLL
: Mips::SLL
;
3320 unsigned EltLog2Size
;
3321 unsigned InsertOp
= 0;
3322 unsigned InsveOp
= 0;
3323 switch (EltSizeInBytes
) {
3325 llvm_unreachable("Unexpected size");
3328 InsertOp
= Mips::INSERT_B
;
3329 InsveOp
= Mips::INSVE_B
;
3330 VecRC
= &Mips::MSA128BRegClass
;
3334 InsertOp
= Mips::INSERT_H
;
3335 InsveOp
= Mips::INSVE_H
;
3336 VecRC
= &Mips::MSA128HRegClass
;
3340 InsertOp
= Mips::INSERT_W
;
3341 InsveOp
= Mips::INSVE_W
;
3342 VecRC
= &Mips::MSA128WRegClass
;
3346 InsertOp
= Mips::INSERT_D
;
3347 InsveOp
= Mips::INSVE_D
;
3348 VecRC
= &Mips::MSA128DRegClass
;
3353 unsigned Wt
= RegInfo
.createVirtualRegister(VecRC
);
3354 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
3357 .addImm(EltSizeInBytes
== 8 ? Mips::sub_64
: Mips::sub_lo
);
3361 // Convert the lane index into a byte index
3362 if (EltSizeInBytes
!= 1) {
3363 unsigned LaneTmp1
= RegInfo
.createVirtualRegister(GPRRC
);
3364 BuildMI(*BB
, MI
, DL
, TII
->get(ShiftOp
), LaneTmp1
)
3366 .addImm(EltLog2Size
);
3370 // Rotate bytes around so that the desired lane is element zero
3371 unsigned WdTmp1
= RegInfo
.createVirtualRegister(VecRC
);
3372 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SLD_B
), WdTmp1
)
3375 .addReg(LaneReg
, 0, SubRegIdx
);
3377 unsigned WdTmp2
= RegInfo
.createVirtualRegister(VecRC
);
3379 // Use insve.df to insert to element zero
3380 BuildMI(*BB
, MI
, DL
, TII
->get(InsveOp
), WdTmp2
)
3386 // Use insert.df to insert to element zero
3387 BuildMI(*BB
, MI
, DL
, TII
->get(InsertOp
), WdTmp2
)
3393 // Rotate elements the rest of the way for a full rotation.
3394 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3395 // the lane index to do this.
3396 unsigned LaneTmp2
= RegInfo
.createVirtualRegister(GPRRC
);
3397 BuildMI(*BB
, MI
, DL
, TII
->get(Subtarget
.isABI_N64() ? Mips::DSUB
: Mips::SUB
),
3399 .addReg(Subtarget
.isABI_N64() ? Mips::ZERO_64
: Mips::ZERO
)
3401 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SLD_B
), Wd
)
3404 .addReg(LaneTmp2
, 0, SubRegIdx
);
3406 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3410 // Emit the FILL_FW pseudo instruction.
3412 // fill_fw_pseudo $wd, $fs
3414 // implicit_def $wt1
3415 // insert_subreg $wt2:subreg_lo, $wt1, $fs
3416 // splati.w $wd, $wt2[0]
3418 MipsSETargetLowering::emitFILL_FW(MachineInstr
&MI
,
3419 MachineBasicBlock
*BB
) const {
3420 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3421 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3422 DebugLoc DL
= MI
.getDebugLoc();
3423 unsigned Wd
= MI
.getOperand(0).getReg();
3424 unsigned Fs
= MI
.getOperand(1).getReg();
3425 unsigned Wt1
= RegInfo
.createVirtualRegister(
3426 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
3427 : &Mips::MSA128WEvensRegClass
);
3428 unsigned Wt2
= RegInfo
.createVirtualRegister(
3429 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
3430 : &Mips::MSA128WEvensRegClass
);
3432 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::IMPLICIT_DEF
), Wt1
);
3433 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_SUBREG
), Wt2
)
3436 .addImm(Mips::sub_lo
);
3437 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_W
), Wd
).addReg(Wt2
).addImm(0);
3439 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3443 // Emit the FILL_FD pseudo instruction.
3445 // fill_fd_pseudo $wd, $fs
3447 // implicit_def $wt1
3448 // insert_subreg $wt2:subreg_64, $wt1, $fs
3449 // splati.d $wd, $wt2[0]
3451 MipsSETargetLowering::emitFILL_FD(MachineInstr
&MI
,
3452 MachineBasicBlock
*BB
) const {
3453 assert(Subtarget
.isFP64bit());
3455 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3456 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3457 DebugLoc DL
= MI
.getDebugLoc();
3458 unsigned Wd
= MI
.getOperand(0).getReg();
3459 unsigned Fs
= MI
.getOperand(1).getReg();
3460 unsigned Wt1
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3461 unsigned Wt2
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3463 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::IMPLICIT_DEF
), Wt1
);
3464 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_SUBREG
), Wt2
)
3467 .addImm(Mips::sub_64
);
3468 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_D
), Wd
).addReg(Wt2
).addImm(0);
3470 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3474 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3477 // STF16 MSA128F16:$wd, mem_simm10:$addr
3479 // copy_u.h $rtemp,$wd[0]
3482 // Safety: We can't use st.h & co as they would over write the memory after
3483 // the destination. It would require half floats be allocated 16 bytes(!) of
3486 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr
&MI
,
3487 MachineBasicBlock
*BB
) const {
3489 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3490 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3491 DebugLoc DL
= MI
.getDebugLoc();
3492 unsigned Ws
= MI
.getOperand(0).getReg();
3493 unsigned Rt
= MI
.getOperand(1).getReg();
3494 const MachineMemOperand
&MMO
= **MI
.memoperands_begin();
3495 unsigned Imm
= MMO
.getOffset();
3497 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3498 // spill and reload can expand as a GPR64 operand. Examine the
3499 // operand in detail and default to ABI.
3500 const TargetRegisterClass
*RC
=
3501 MI
.getOperand(1).isReg() ? RegInfo
.getRegClass(MI
.getOperand(1).getReg())
3502 : (Subtarget
.isABI_O32() ? &Mips::GPR32RegClass
3503 : &Mips::GPR64RegClass
);
3504 const bool UsingMips32
= RC
== &Mips::GPR32RegClass
;
3505 unsigned Rs
= RegInfo
.createVirtualRegister(&Mips::GPR32RegClass
);
3507 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY_U_H
), Rs
).addReg(Ws
).addImm(0);
3509 unsigned Tmp
= RegInfo
.createVirtualRegister(&Mips::GPR64RegClass
);
3510 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Tmp
)
3513 .addImm(Mips::sub_32
);
3516 BuildMI(*BB
, MI
, DL
, TII
->get(UsingMips32
? Mips::SH
: Mips::SH64
))
3520 .addMemOperand(BB
->getParent()->getMachineMemOperand(
3521 &MMO
, MMO
.getOffset(), MMO
.getSize()));
3523 MI
.eraseFromParent();
3527 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3529 // LD_F16 MSA128F16:$wd, mem_simm10:$addr
3532 // fill.h $wd, $rtemp
3534 // Safety: We can't use ld.h & co as they over-read from the source.
3535 // Additionally, if the address is not modulo 16, 2 cases can occur:
3536 // a) Segmentation fault as the load instruction reads from a memory page
3537 // memory it's not supposed to.
3538 // b) The load crosses an implementation specific boundary, requiring OS
3541 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr
&MI
,
3542 MachineBasicBlock
*BB
) const {
3544 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3545 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3546 DebugLoc DL
= MI
.getDebugLoc();
3547 unsigned Wd
= MI
.getOperand(0).getReg();
3549 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3550 // spill and reload can expand as a GPR64 operand. Examine the
3551 // operand in detail and default to ABI.
3552 const TargetRegisterClass
*RC
=
3553 MI
.getOperand(1).isReg() ? RegInfo
.getRegClass(MI
.getOperand(1).getReg())
3554 : (Subtarget
.isABI_O32() ? &Mips::GPR32RegClass
3555 : &Mips::GPR64RegClass
);
3557 const bool UsingMips32
= RC
== &Mips::GPR32RegClass
;
3558 unsigned Rt
= RegInfo
.createVirtualRegister(RC
);
3560 MachineInstrBuilder MIB
=
3561 BuildMI(*BB
, MI
, DL
, TII
->get(UsingMips32
? Mips::LH
: Mips::LH64
), Rt
);
3562 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++)
3563 MIB
.add(MI
.getOperand(i
));
3566 unsigned Tmp
= RegInfo
.createVirtualRegister(&Mips::GPR32RegClass
);
3567 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Tmp
).addReg(Rt
, 0, Mips::sub_32
);
3571 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FILL_H
), Wd
).addReg(Rt
);
3573 MI
.eraseFromParent();
3577 // Emit the FPROUND_PSEUDO instruction.
3579 // Round an FGR64Opnd, FGR32Opnd to an f16.
3581 // Safety: Cycle the operand through the GPRs so the result always ends up
3582 // the correct MSA register.
3584 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3585 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3586 // (which they can be, as the MSA registers are defined to alias the
3587 // FPU's 64 bit and 32 bit registers) the result can be accessed using
3588 // the correct register class. That requires operands be tie-able across
3589 // register classes which have a sub/super register class relationship.
3593 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3596 // fill.w $rtemp, $wtemp
3597 // fexdo.w $wd, $wtemp, $wtemp
3599 // For FPG64Opnd on mips32r2+:
3601 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3604 // fill.w $rtemp, $wtemp
3605 // mfhc1 $rtemp2, $fs
3606 // insert.w $wtemp[1], $rtemp2
3607 // insert.w $wtemp[3], $rtemp2
3608 // fexdo.w $wtemp2, $wtemp, $wtemp
3609 // fexdo.h $wd, $temp2, $temp2
3611 // For FGR64Opnd on mips64r2+:
3613 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3615 // dmfc1 $rtemp, $fs
3616 // fill.d $rtemp, $wtemp
3617 // fexdo.w $wtemp2, $wtemp, $wtemp
3618 // fexdo.h $wd, $wtemp2, $wtemp2
3620 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3621 // undef bits are "just right" and the exception enable bits are
3622 // set. By using fill.w to replicate $fs into all elements over
3623 // insert.w for one element, we avoid that potiential case. If
3624 // fexdo.[hw] causes an exception in, the exception is valid and it
3625 // occurs for all elements.
3627 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr
&MI
,
3628 MachineBasicBlock
*BB
,
3629 bool IsFGR64
) const {
3631 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3632 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3634 assert(Subtarget
.hasMSA() && Subtarget
.hasMips32r2());
3636 bool IsFGR64onMips64
= Subtarget
.hasMips64() && IsFGR64
;
3637 bool IsFGR64onMips32
= !Subtarget
.hasMips64() && IsFGR64
;
3639 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3640 DebugLoc DL
= MI
.getDebugLoc();
3641 unsigned Wd
= MI
.getOperand(0).getReg();
3642 unsigned Fs
= MI
.getOperand(1).getReg();
3644 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3645 unsigned Wtemp
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3646 const TargetRegisterClass
*GPRRC
=
3647 IsFGR64onMips64
? &Mips::GPR64RegClass
: &Mips::GPR32RegClass
;
3648 unsigned MFC1Opc
= IsFGR64onMips64
3650 : (IsFGR64onMips32
? Mips::MFC1_D64
: Mips::MFC1
);
3651 unsigned FILLOpc
= IsFGR64onMips64
? Mips::FILL_D
: Mips::FILL_W
;
3653 // Perform the register class copy as mentioned above.
3654 unsigned Rtemp
= RegInfo
.createVirtualRegister(GPRRC
);
3655 BuildMI(*BB
, MI
, DL
, TII
->get(MFC1Opc
), Rtemp
).addReg(Fs
);
3656 BuildMI(*BB
, MI
, DL
, TII
->get(FILLOpc
), Wtemp
).addReg(Rtemp
);
3657 unsigned WPHI
= Wtemp
;
3659 if (IsFGR64onMips32
) {
3660 unsigned Rtemp2
= RegInfo
.createVirtualRegister(GPRRC
);
3661 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::MFHC1_D64
), Rtemp2
).addReg(Fs
);
3662 unsigned Wtemp2
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3663 unsigned Wtemp3
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3664 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_W
), Wtemp2
)
3668 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_W
), Wtemp3
)
3676 unsigned Wtemp2
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3677 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXDO_W
), Wtemp2
)
3683 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXDO_H
), Wd
).addReg(WPHI
).addReg(WPHI
);
3685 MI
.eraseFromParent();
3689 // Emit the FPEXTEND_PSEUDO instruction.
3691 // Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3693 // Safety: Cycle the result through the GPRs so the result always ends up
3694 // the correct floating point register.
3696 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3697 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3698 // (which they can be, as the MSA registers are defined to alias the
3699 // FPU's 64 bit and 32 bit registers) the result can be accessed using
3700 // the correct register class. That requires operands be tie-able across
3701 // register classes which have a sub/super register class relationship. I
3706 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3708 // fexupr.w $wtemp, $ws
3709 // copy_s.w $rtemp, $ws[0]
3712 // For FGR64Opnd on Mips64:
3714 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3716 // fexupr.w $wtemp, $ws
3717 // fexupr.d $wtemp2, $wtemp
3718 // copy_s.d $rtemp, $wtemp2s[0]
3719 // dmtc1 $rtemp, $fd
3721 // For FGR64Opnd on Mips32:
3723 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3725 // fexupr.w $wtemp, $ws
3726 // fexupr.d $wtemp2, $wtemp
3727 // copy_s.w $rtemp, $wtemp2[0]
3728 // mtc1 $rtemp, $ftemp
3729 // copy_s.w $rtemp2, $wtemp2[1]
3730 // $fd = mthc1 $rtemp2, $ftemp
3732 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr
&MI
,
3733 MachineBasicBlock
*BB
,
3734 bool IsFGR64
) const {
3736 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3737 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3739 assert(Subtarget
.hasMSA() && Subtarget
.hasMips32r2());
3741 bool IsFGR64onMips64
= Subtarget
.hasMips64() && IsFGR64
;
3742 bool IsFGR64onMips32
= !Subtarget
.hasMips64() && IsFGR64
;
3744 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3745 DebugLoc DL
= MI
.getDebugLoc();
3746 unsigned Fd
= MI
.getOperand(0).getReg();
3747 unsigned Ws
= MI
.getOperand(1).getReg();
3749 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3750 const TargetRegisterClass
*GPRRC
=
3751 IsFGR64onMips64
? &Mips::GPR64RegClass
: &Mips::GPR32RegClass
;
3752 unsigned MTC1Opc
= IsFGR64onMips64
3754 : (IsFGR64onMips32
? Mips::MTC1_D64
: Mips::MTC1
);
3755 unsigned COPYOpc
= IsFGR64onMips64
? Mips::COPY_S_D
: Mips::COPY_S_W
;
3757 unsigned Wtemp
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3758 unsigned WPHI
= Wtemp
;
3760 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXUPR_W
), Wtemp
).addReg(Ws
);
3762 WPHI
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3763 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXUPR_D
), WPHI
).addReg(Wtemp
);
3766 // Perform the safety regclass copy mentioned above.
3767 unsigned Rtemp
= RegInfo
.createVirtualRegister(GPRRC
);
3768 unsigned FPRPHI
= IsFGR64onMips32
3769 ? RegInfo
.createVirtualRegister(&Mips::FGR64RegClass
)
3771 BuildMI(*BB
, MI
, DL
, TII
->get(COPYOpc
), Rtemp
).addReg(WPHI
).addImm(0);
3772 BuildMI(*BB
, MI
, DL
, TII
->get(MTC1Opc
), FPRPHI
).addReg(Rtemp
);
3774 if (IsFGR64onMips32
) {
3775 unsigned Rtemp2
= RegInfo
.createVirtualRegister(GPRRC
);
3776 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY_S_W
), Rtemp2
)
3779 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::MTHC1_D64
), Fd
)
3784 MI
.eraseFromParent();
3788 // Emit the FEXP2_W_1 pseudo instructions.
3790 // fexp2_w_1_pseudo $wd, $wt
3793 // fexp2.w $wd, $ws, $wt
3795 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr
&MI
,
3796 MachineBasicBlock
*BB
) const {
3797 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3798 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3799 const TargetRegisterClass
*RC
= &Mips::MSA128WRegClass
;
3800 unsigned Ws1
= RegInfo
.createVirtualRegister(RC
);
3801 unsigned Ws2
= RegInfo
.createVirtualRegister(RC
);
3802 DebugLoc DL
= MI
.getDebugLoc();
3804 // Splat 1.0 into a vector
3805 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::LDI_W
), Ws1
).addImm(1);
3806 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FFINT_U_W
), Ws2
).addReg(Ws1
);
3808 // Emit 1.0 * fexp2(Wt)
3809 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXP2_W
), MI
.getOperand(0).getReg())
3811 .addReg(MI
.getOperand(1).getReg());
3813 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3817 // Emit the FEXP2_D_1 pseudo instructions.
3819 // fexp2_d_1_pseudo $wd, $wt
3822 // fexp2.d $wd, $ws, $wt
3824 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr
&MI
,
3825 MachineBasicBlock
*BB
) const {
3826 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3827 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3828 const TargetRegisterClass
*RC
= &Mips::MSA128DRegClass
;
3829 unsigned Ws1
= RegInfo
.createVirtualRegister(RC
);
3830 unsigned Ws2
= RegInfo
.createVirtualRegister(RC
);
3831 DebugLoc DL
= MI
.getDebugLoc();
3833 // Splat 1.0 into a vector
3834 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::LDI_D
), Ws1
).addImm(1);
3835 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FFINT_U_D
), Ws2
).addReg(Ws1
);
3837 // Emit 1.0 * fexp2(Wt)
3838 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXP2_D
), MI
.getOperand(0).getReg())
3840 .addReg(MI
.getOperand(1).getReg());
3842 MI
.eraseFromParent(); // The pseudo instruction is gone now.