1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Subclass of MipsTargetLowering specialized for mips32/64.
11 //===----------------------------------------------------------------------===//
13 #include "MipsSEISelLowering.h"
14 #include "MipsMachineFunction.h"
15 #include "MipsRegisterInfo.h"
16 #include "MipsSubtarget.h"
17 #include "llvm/ADT/APInt.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Triple.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/ISDOpcodes.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/SelectionDAG.h"
31 #include "llvm/CodeGen/SelectionDAGNodes.h"
32 #include "llvm/CodeGen/TargetInstrInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/CodeGen/ValueTypes.h"
35 #include "llvm/IR/DebugLoc.h"
36 #include "llvm/IR/Intrinsics.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/MachineValueType.h"
42 #include "llvm/Support/MathExtras.h"
43 #include "llvm/Support/raw_ostream.h"
52 #define DEBUG_TYPE "mips-isel"
55 UseMipsTailCalls("mips-tail-calls", cl::Hidden
,
56 cl::desc("MIPS: permit tail calls."), cl::init(false));
58 static cl::opt
<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
59 cl::desc("Expand double precision loads and "
60 "stores to their single precision "
63 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine
&TM
,
64 const MipsSubtarget
&STI
)
65 : MipsTargetLowering(TM
, STI
) {
66 // Set up the register classes
67 addRegisterClass(MVT::i32
, &Mips::GPR32RegClass
);
69 if (Subtarget
.isGP64bit())
70 addRegisterClass(MVT::i64
, &Mips::GPR64RegClass
);
72 if (Subtarget
.hasDSP() || Subtarget
.hasMSA()) {
73 // Expand all truncating stores and extending loads.
74 for (MVT VT0
: MVT::fixedlen_vector_valuetypes()) {
75 for (MVT VT1
: MVT::fixedlen_vector_valuetypes()) {
76 setTruncStoreAction(VT0
, VT1
, Expand
);
77 setLoadExtAction(ISD::SEXTLOAD
, VT0
, VT1
, Expand
);
78 setLoadExtAction(ISD::ZEXTLOAD
, VT0
, VT1
, Expand
);
79 setLoadExtAction(ISD::EXTLOAD
, VT0
, VT1
, Expand
);
84 if (Subtarget
.hasDSP()) {
85 MVT::SimpleValueType VecTys
[2] = {MVT::v2i16
, MVT::v4i8
};
87 for (unsigned i
= 0; i
< array_lengthof(VecTys
); ++i
) {
88 addRegisterClass(VecTys
[i
], &Mips::DSPRRegClass
);
90 // Expand all builtin opcodes.
91 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
92 setOperationAction(Opc
, VecTys
[i
], Expand
);
94 setOperationAction(ISD::ADD
, VecTys
[i
], Legal
);
95 setOperationAction(ISD::SUB
, VecTys
[i
], Legal
);
96 setOperationAction(ISD::LOAD
, VecTys
[i
], Legal
);
97 setOperationAction(ISD::STORE
, VecTys
[i
], Legal
);
98 setOperationAction(ISD::BITCAST
, VecTys
[i
], Legal
);
101 setTargetDAGCombine(ISD::SHL
);
102 setTargetDAGCombine(ISD::SRA
);
103 setTargetDAGCombine(ISD::SRL
);
104 setTargetDAGCombine(ISD::SETCC
);
105 setTargetDAGCombine(ISD::VSELECT
);
107 if (Subtarget
.hasMips32r2()) {
108 setOperationAction(ISD::ADDC
, MVT::i32
, Legal
);
109 setOperationAction(ISD::ADDE
, MVT::i32
, Legal
);
113 if (Subtarget
.hasDSPR2())
114 setOperationAction(ISD::MUL
, MVT::v2i16
, Legal
);
116 if (Subtarget
.hasMSA()) {
117 addMSAIntType(MVT::v16i8
, &Mips::MSA128BRegClass
);
118 addMSAIntType(MVT::v8i16
, &Mips::MSA128HRegClass
);
119 addMSAIntType(MVT::v4i32
, &Mips::MSA128WRegClass
);
120 addMSAIntType(MVT::v2i64
, &Mips::MSA128DRegClass
);
121 addMSAFloatType(MVT::v8f16
, &Mips::MSA128HRegClass
);
122 addMSAFloatType(MVT::v4f32
, &Mips::MSA128WRegClass
);
123 addMSAFloatType(MVT::v2f64
, &Mips::MSA128DRegClass
);
125 // f16 is a storage-only type, always promote it to f32.
126 addRegisterClass(MVT::f16
, &Mips::MSA128HRegClass
);
127 setOperationAction(ISD::SETCC
, MVT::f16
, Promote
);
128 setOperationAction(ISD::BR_CC
, MVT::f16
, Promote
);
129 setOperationAction(ISD::SELECT_CC
, MVT::f16
, Promote
);
130 setOperationAction(ISD::SELECT
, MVT::f16
, Promote
);
131 setOperationAction(ISD::FADD
, MVT::f16
, Promote
);
132 setOperationAction(ISD::FSUB
, MVT::f16
, Promote
);
133 setOperationAction(ISD::FMUL
, MVT::f16
, Promote
);
134 setOperationAction(ISD::FDIV
, MVT::f16
, Promote
);
135 setOperationAction(ISD::FREM
, MVT::f16
, Promote
);
136 setOperationAction(ISD::FMA
, MVT::f16
, Promote
);
137 setOperationAction(ISD::FNEG
, MVT::f16
, Promote
);
138 setOperationAction(ISD::FABS
, MVT::f16
, Promote
);
139 setOperationAction(ISD::FCEIL
, MVT::f16
, Promote
);
140 setOperationAction(ISD::FCOPYSIGN
, MVT::f16
, Promote
);
141 setOperationAction(ISD::FCOS
, MVT::f16
, Promote
);
142 setOperationAction(ISD::FP_EXTEND
, MVT::f16
, Promote
);
143 setOperationAction(ISD::FFLOOR
, MVT::f16
, Promote
);
144 setOperationAction(ISD::FNEARBYINT
, MVT::f16
, Promote
);
145 setOperationAction(ISD::FPOW
, MVT::f16
, Promote
);
146 setOperationAction(ISD::FPOWI
, MVT::f16
, Promote
);
147 setOperationAction(ISD::FRINT
, MVT::f16
, Promote
);
148 setOperationAction(ISD::FSIN
, MVT::f16
, Promote
);
149 setOperationAction(ISD::FSINCOS
, MVT::f16
, Promote
);
150 setOperationAction(ISD::FSQRT
, MVT::f16
, Promote
);
151 setOperationAction(ISD::FEXP
, MVT::f16
, Promote
);
152 setOperationAction(ISD::FEXP2
, MVT::f16
, Promote
);
153 setOperationAction(ISD::FLOG
, MVT::f16
, Promote
);
154 setOperationAction(ISD::FLOG2
, MVT::f16
, Promote
);
155 setOperationAction(ISD::FLOG10
, MVT::f16
, Promote
);
156 setOperationAction(ISD::FROUND
, MVT::f16
, Promote
);
157 setOperationAction(ISD::FTRUNC
, MVT::f16
, Promote
);
158 setOperationAction(ISD::FMINNUM
, MVT::f16
, Promote
);
159 setOperationAction(ISD::FMAXNUM
, MVT::f16
, Promote
);
160 setOperationAction(ISD::FMINIMUM
, MVT::f16
, Promote
);
161 setOperationAction(ISD::FMAXIMUM
, MVT::f16
, Promote
);
163 setTargetDAGCombine(ISD::AND
);
164 setTargetDAGCombine(ISD::OR
);
165 setTargetDAGCombine(ISD::SRA
);
166 setTargetDAGCombine(ISD::VSELECT
);
167 setTargetDAGCombine(ISD::XOR
);
170 if (!Subtarget
.useSoftFloat()) {
171 addRegisterClass(MVT::f32
, &Mips::FGR32RegClass
);
173 // When dealing with single precision only, use libcalls
174 if (!Subtarget
.isSingleFloat()) {
175 if (Subtarget
.isFP64bit())
176 addRegisterClass(MVT::f64
, &Mips::FGR64RegClass
);
178 addRegisterClass(MVT::f64
, &Mips::AFGR64RegClass
);
182 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Custom
);
183 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Custom
);
184 setOperationAction(ISD::MULHS
, MVT::i32
, Custom
);
185 setOperationAction(ISD::MULHU
, MVT::i32
, Custom
);
187 if (Subtarget
.hasCnMips())
188 setOperationAction(ISD::MUL
, MVT::i64
, Legal
);
189 else if (Subtarget
.isGP64bit())
190 setOperationAction(ISD::MUL
, MVT::i64
, Custom
);
192 if (Subtarget
.isGP64bit()) {
193 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Custom
);
194 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Custom
);
195 setOperationAction(ISD::MULHS
, MVT::i64
, Custom
);
196 setOperationAction(ISD::MULHU
, MVT::i64
, Custom
);
197 setOperationAction(ISD::SDIVREM
, MVT::i64
, Custom
);
198 setOperationAction(ISD::UDIVREM
, MVT::i64
, Custom
);
201 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i64
, Custom
);
202 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i64
, Custom
);
204 setOperationAction(ISD::SDIVREM
, MVT::i32
, Custom
);
205 setOperationAction(ISD::UDIVREM
, MVT::i32
, Custom
);
206 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
207 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
208 setOperationAction(ISD::STORE
, MVT::i32
, Custom
);
210 setTargetDAGCombine(ISD::MUL
);
212 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
213 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
214 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
216 if (Subtarget
.hasMips32r2() && !Subtarget
.useSoftFloat() &&
217 !Subtarget
.hasMips64()) {
218 setOperationAction(ISD::BITCAST
, MVT::i64
, Custom
);
222 setOperationAction(ISD::LOAD
, MVT::f64
, Custom
);
223 setOperationAction(ISD::STORE
, MVT::f64
, Custom
);
226 if (Subtarget
.hasMips32r6()) {
227 // MIPS32r6 replaces the accumulator-based multiplies with a three register
229 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
230 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
231 setOperationAction(ISD::MUL
, MVT::i32
, Legal
);
232 setOperationAction(ISD::MULHS
, MVT::i32
, Legal
);
233 setOperationAction(ISD::MULHU
, MVT::i32
, Legal
);
235 // MIPS32r6 replaces the accumulator-based division/remainder with separate
236 // three register division and remainder instructions.
237 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
238 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
239 setOperationAction(ISD::SDIV
, MVT::i32
, Legal
);
240 setOperationAction(ISD::UDIV
, MVT::i32
, Legal
);
241 setOperationAction(ISD::SREM
, MVT::i32
, Legal
);
242 setOperationAction(ISD::UREM
, MVT::i32
, Legal
);
244 // MIPS32r6 replaces conditional moves with an equivalent that removes the
245 // need for three GPR read ports.
246 setOperationAction(ISD::SETCC
, MVT::i32
, Legal
);
247 setOperationAction(ISD::SELECT
, MVT::i32
, Legal
);
248 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Expand
);
250 setOperationAction(ISD::SETCC
, MVT::f32
, Legal
);
251 setOperationAction(ISD::SELECT
, MVT::f32
, Legal
);
252 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Expand
);
254 assert(Subtarget
.isFP64bit() && "FR=1 is required for MIPS32r6");
255 setOperationAction(ISD::SETCC
, MVT::f64
, Legal
);
256 setOperationAction(ISD::SELECT
, MVT::f64
, Custom
);
257 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Expand
);
259 setOperationAction(ISD::BRCOND
, MVT::Other
, Legal
);
261 // Floating point > and >= are supported via < and <=
262 setCondCodeAction(ISD::SETOGE
, MVT::f32
, Expand
);
263 setCondCodeAction(ISD::SETOGT
, MVT::f32
, Expand
);
264 setCondCodeAction(ISD::SETUGE
, MVT::f32
, Expand
);
265 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
267 setCondCodeAction(ISD::SETOGE
, MVT::f64
, Expand
);
268 setCondCodeAction(ISD::SETOGT
, MVT::f64
, Expand
);
269 setCondCodeAction(ISD::SETUGE
, MVT::f64
, Expand
);
270 setCondCodeAction(ISD::SETUGT
, MVT::f64
, Expand
);
273 if (Subtarget
.hasMips64r6()) {
274 // MIPS64r6 replaces the accumulator-based multiplies with a three register
276 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Expand
);
277 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Expand
);
278 setOperationAction(ISD::MUL
, MVT::i64
, Legal
);
279 setOperationAction(ISD::MULHS
, MVT::i64
, Legal
);
280 setOperationAction(ISD::MULHU
, MVT::i64
, Legal
);
282 // MIPS32r6 replaces the accumulator-based division/remainder with separate
283 // three register division and remainder instructions.
284 setOperationAction(ISD::SDIVREM
, MVT::i64
, Expand
);
285 setOperationAction(ISD::UDIVREM
, MVT::i64
, Expand
);
286 setOperationAction(ISD::SDIV
, MVT::i64
, Legal
);
287 setOperationAction(ISD::UDIV
, MVT::i64
, Legal
);
288 setOperationAction(ISD::SREM
, MVT::i64
, Legal
);
289 setOperationAction(ISD::UREM
, MVT::i64
, Legal
);
291 // MIPS64r6 replaces conditional moves with an equivalent that removes the
292 // need for three GPR read ports.
293 setOperationAction(ISD::SETCC
, MVT::i64
, Legal
);
294 setOperationAction(ISD::SELECT
, MVT::i64
, Legal
);
295 setOperationAction(ISD::SELECT_CC
, MVT::i64
, Expand
);
298 computeRegisterProperties(Subtarget
.getRegisterInfo());
301 const MipsTargetLowering
*
302 llvm::createMipsSETargetLowering(const MipsTargetMachine
&TM
,
303 const MipsSubtarget
&STI
) {
304 return new MipsSETargetLowering(TM
, STI
);
307 const TargetRegisterClass
*
308 MipsSETargetLowering::getRepRegClassFor(MVT VT
) const {
309 if (VT
== MVT::Untyped
)
310 return Subtarget
.hasDSP() ? &Mips::ACC64DSPRegClass
: &Mips::ACC64RegClass
;
312 return TargetLowering::getRepRegClassFor(VT
);
315 // Enable MSA support for the given integer type and Register class.
316 void MipsSETargetLowering::
317 addMSAIntType(MVT::SimpleValueType Ty
, const TargetRegisterClass
*RC
) {
318 addRegisterClass(Ty
, RC
);
320 // Expand all builtin opcodes.
321 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
322 setOperationAction(Opc
, Ty
, Expand
);
324 setOperationAction(ISD::BITCAST
, Ty
, Legal
);
325 setOperationAction(ISD::LOAD
, Ty
, Legal
);
326 setOperationAction(ISD::STORE
, Ty
, Legal
);
327 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, Ty
, Custom
);
328 setOperationAction(ISD::INSERT_VECTOR_ELT
, Ty
, Legal
);
329 setOperationAction(ISD::BUILD_VECTOR
, Ty
, Custom
);
330 setOperationAction(ISD::UNDEF
, Ty
, Legal
);
332 setOperationAction(ISD::ADD
, Ty
, Legal
);
333 setOperationAction(ISD::AND
, Ty
, Legal
);
334 setOperationAction(ISD::CTLZ
, Ty
, Legal
);
335 setOperationAction(ISD::CTPOP
, Ty
, Legal
);
336 setOperationAction(ISD::MUL
, Ty
, Legal
);
337 setOperationAction(ISD::OR
, Ty
, Legal
);
338 setOperationAction(ISD::SDIV
, Ty
, Legal
);
339 setOperationAction(ISD::SREM
, Ty
, Legal
);
340 setOperationAction(ISD::SHL
, Ty
, Legal
);
341 setOperationAction(ISD::SRA
, Ty
, Legal
);
342 setOperationAction(ISD::SRL
, Ty
, Legal
);
343 setOperationAction(ISD::SUB
, Ty
, Legal
);
344 setOperationAction(ISD::SMAX
, Ty
, Legal
);
345 setOperationAction(ISD::SMIN
, Ty
, Legal
);
346 setOperationAction(ISD::UDIV
, Ty
, Legal
);
347 setOperationAction(ISD::UREM
, Ty
, Legal
);
348 setOperationAction(ISD::UMAX
, Ty
, Legal
);
349 setOperationAction(ISD::UMIN
, Ty
, Legal
);
350 setOperationAction(ISD::VECTOR_SHUFFLE
, Ty
, Custom
);
351 setOperationAction(ISD::VSELECT
, Ty
, Legal
);
352 setOperationAction(ISD::XOR
, Ty
, Legal
);
354 if (Ty
== MVT::v4i32
|| Ty
== MVT::v2i64
) {
355 setOperationAction(ISD::FP_TO_SINT
, Ty
, Legal
);
356 setOperationAction(ISD::FP_TO_UINT
, Ty
, Legal
);
357 setOperationAction(ISD::SINT_TO_FP
, Ty
, Legal
);
358 setOperationAction(ISD::UINT_TO_FP
, Ty
, Legal
);
361 setOperationAction(ISD::SETCC
, Ty
, Legal
);
362 setCondCodeAction(ISD::SETNE
, Ty
, Expand
);
363 setCondCodeAction(ISD::SETGE
, Ty
, Expand
);
364 setCondCodeAction(ISD::SETGT
, Ty
, Expand
);
365 setCondCodeAction(ISD::SETUGE
, Ty
, Expand
);
366 setCondCodeAction(ISD::SETUGT
, Ty
, Expand
);
369 // Enable MSA support for the given floating-point type and Register class.
370 void MipsSETargetLowering::
371 addMSAFloatType(MVT::SimpleValueType Ty
, const TargetRegisterClass
*RC
) {
372 addRegisterClass(Ty
, RC
);
374 // Expand all builtin opcodes.
375 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
376 setOperationAction(Opc
, Ty
, Expand
);
378 setOperationAction(ISD::LOAD
, Ty
, Legal
);
379 setOperationAction(ISD::STORE
, Ty
, Legal
);
380 setOperationAction(ISD::BITCAST
, Ty
, Legal
);
381 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, Ty
, Legal
);
382 setOperationAction(ISD::INSERT_VECTOR_ELT
, Ty
, Legal
);
383 setOperationAction(ISD::BUILD_VECTOR
, Ty
, Custom
);
385 if (Ty
!= MVT::v8f16
) {
386 setOperationAction(ISD::FABS
, Ty
, Legal
);
387 setOperationAction(ISD::FADD
, Ty
, Legal
);
388 setOperationAction(ISD::FDIV
, Ty
, Legal
);
389 setOperationAction(ISD::FEXP2
, Ty
, Legal
);
390 setOperationAction(ISD::FLOG2
, Ty
, Legal
);
391 setOperationAction(ISD::FMA
, Ty
, Legal
);
392 setOperationAction(ISD::FMUL
, Ty
, Legal
);
393 setOperationAction(ISD::FRINT
, Ty
, Legal
);
394 setOperationAction(ISD::FSQRT
, Ty
, Legal
);
395 setOperationAction(ISD::FSUB
, Ty
, Legal
);
396 setOperationAction(ISD::VSELECT
, Ty
, Legal
);
398 setOperationAction(ISD::SETCC
, Ty
, Legal
);
399 setCondCodeAction(ISD::SETOGE
, Ty
, Expand
);
400 setCondCodeAction(ISD::SETOGT
, Ty
, Expand
);
401 setCondCodeAction(ISD::SETUGE
, Ty
, Expand
);
402 setCondCodeAction(ISD::SETUGT
, Ty
, Expand
);
403 setCondCodeAction(ISD::SETGE
, Ty
, Expand
);
404 setCondCodeAction(ISD::SETGT
, Ty
, Expand
);
408 SDValue
MipsSETargetLowering::lowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
409 if(!Subtarget
.hasMips32r6())
410 return MipsTargetLowering::LowerOperation(Op
, DAG
);
412 EVT ResTy
= Op
->getValueType(0);
415 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
416 // floating point register are undefined. Not really an issue as sel.d, which
417 // is produced from an FSELECT node, only looks at bit 0.
418 SDValue Tmp
= DAG
.getNode(MipsISD::MTC1_D64
, DL
, MVT::f64
, Op
->getOperand(0));
419 return DAG
.getNode(MipsISD::FSELECT
, DL
, ResTy
, Tmp
, Op
->getOperand(1),
423 bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
424 EVT VT
, unsigned, unsigned, MachineMemOperand::Flags
, bool *Fast
) const {
425 MVT::SimpleValueType SVT
= VT
.getSimpleVT().SimpleTy
;
427 if (Subtarget
.systemSupportsUnalignedAccess()) {
428 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
429 // implementation defined whether this is handled by hardware, software, or
430 // a hybrid of the two but it's expected that most implementations will
431 // handle the majority of cases in hardware.
448 SDValue
MipsSETargetLowering::LowerOperation(SDValue Op
,
449 SelectionDAG
&DAG
) const {
450 switch(Op
.getOpcode()) {
451 case ISD::LOAD
: return lowerLOAD(Op
, DAG
);
452 case ISD::STORE
: return lowerSTORE(Op
, DAG
);
453 case ISD::SMUL_LOHI
: return lowerMulDiv(Op
, MipsISD::Mult
, true, true, DAG
);
454 case ISD::UMUL_LOHI
: return lowerMulDiv(Op
, MipsISD::Multu
, true, true, DAG
);
455 case ISD::MULHS
: return lowerMulDiv(Op
, MipsISD::Mult
, false, true, DAG
);
456 case ISD::MULHU
: return lowerMulDiv(Op
, MipsISD::Multu
, false, true, DAG
);
457 case ISD::MUL
: return lowerMulDiv(Op
, MipsISD::Mult
, true, false, DAG
);
458 case ISD::SDIVREM
: return lowerMulDiv(Op
, MipsISD::DivRem
, true, true, DAG
);
459 case ISD::UDIVREM
: return lowerMulDiv(Op
, MipsISD::DivRemU
, true, true,
461 case ISD::INTRINSIC_WO_CHAIN
: return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
462 case ISD::INTRINSIC_W_CHAIN
: return lowerINTRINSIC_W_CHAIN(Op
, DAG
);
463 case ISD::INTRINSIC_VOID
: return lowerINTRINSIC_VOID(Op
, DAG
);
464 case ISD::EXTRACT_VECTOR_ELT
: return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
465 case ISD::BUILD_VECTOR
: return lowerBUILD_VECTOR(Op
, DAG
);
466 case ISD::VECTOR_SHUFFLE
: return lowerVECTOR_SHUFFLE(Op
, DAG
);
467 case ISD::SELECT
: return lowerSELECT(Op
, DAG
);
468 case ISD::BITCAST
: return lowerBITCAST(Op
, DAG
);
471 return MipsTargetLowering::LowerOperation(Op
, DAG
);
474 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
476 // Performs the following transformations:
477 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
478 // sign/zero-extension is completely overwritten by the new one performed by
480 // - Removes redundant zero extensions performed by an ISD::AND.
481 static SDValue
performANDCombine(SDNode
*N
, SelectionDAG
&DAG
,
482 TargetLowering::DAGCombinerInfo
&DCI
,
483 const MipsSubtarget
&Subtarget
) {
484 if (!Subtarget
.hasMSA())
487 SDValue Op0
= N
->getOperand(0);
488 SDValue Op1
= N
->getOperand(1);
489 unsigned Op0Opcode
= Op0
->getOpcode();
491 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
492 // where $d + 1 == 2^n and n == 32
493 // or $d + 1 == 2^n and n <= 32 and ZExt
494 // -> (MipsVExtractZExt $a, $b, $c)
495 if (Op0Opcode
== MipsISD::VEXTRACT_SEXT_ELT
||
496 Op0Opcode
== MipsISD::VEXTRACT_ZEXT_ELT
) {
497 ConstantSDNode
*Mask
= dyn_cast
<ConstantSDNode
>(Op1
);
502 int32_t Log2IfPositive
= (Mask
->getAPIntValue() + 1).exactLogBase2();
504 if (Log2IfPositive
<= 0)
505 return SDValue(); // Mask+1 is not a power of 2
507 SDValue Op0Op2
= Op0
->getOperand(2);
508 EVT ExtendTy
= cast
<VTSDNode
>(Op0Op2
)->getVT();
509 unsigned ExtendTySize
= ExtendTy
.getSizeInBits();
510 unsigned Log2
= Log2IfPositive
;
512 if ((Op0Opcode
== MipsISD::VEXTRACT_ZEXT_ELT
&& Log2
>= ExtendTySize
) ||
513 Log2
== ExtendTySize
) {
514 SDValue Ops
[] = { Op0
->getOperand(0), Op0
->getOperand(1), Op0Op2
};
515 return DAG
.getNode(MipsISD::VEXTRACT_ZEXT_ELT
, SDLoc(Op0
),
517 makeArrayRef(Ops
, Op0
->getNumOperands()));
524 // Determine if the specified node is a constant vector splat.
526 // Returns true and sets Imm if:
527 // * N is a ISD::BUILD_VECTOR representing a constant splat
529 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
530 // differences are that it assumes the MSA has already been checked and the
531 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and
532 // must not be in order for binsri.d to be selectable).
533 static bool isVSplat(SDValue N
, APInt
&Imm
, bool IsLittleEndian
) {
534 BuildVectorSDNode
*Node
= dyn_cast
<BuildVectorSDNode
>(N
.getNode());
539 APInt SplatValue
, SplatUndef
;
540 unsigned SplatBitSize
;
543 if (!Node
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
552 // Test whether the given node is an all-ones build_vector.
553 static bool isVectorAllOnes(SDValue N
) {
554 // Look through bitcasts. Endianness doesn't matter because we are looking
555 // for an all-ones value.
556 if (N
->getOpcode() == ISD::BITCAST
)
557 N
= N
->getOperand(0);
559 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(N
);
564 APInt SplatValue
, SplatUndef
;
565 unsigned SplatBitSize
;
568 // Endianness doesn't matter in this context because we are looking for
569 // an all-ones value.
570 if (BVN
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
))
571 return SplatValue
.isAllOnesValue();
576 // Test whether N is the bitwise inverse of OfNode.
577 static bool isBitwiseInverse(SDValue N
, SDValue OfNode
) {
578 if (N
->getOpcode() != ISD::XOR
)
581 if (isVectorAllOnes(N
->getOperand(0)))
582 return N
->getOperand(1) == OfNode
;
584 if (isVectorAllOnes(N
->getOperand(1)))
585 return N
->getOperand(0) == OfNode
;
590 // Perform combines where ISD::OR is the root node.
592 // Performs the following transformations:
593 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
594 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
596 static SDValue
performORCombine(SDNode
*N
, SelectionDAG
&DAG
,
597 TargetLowering::DAGCombinerInfo
&DCI
,
598 const MipsSubtarget
&Subtarget
) {
599 if (!Subtarget
.hasMSA())
602 EVT Ty
= N
->getValueType(0);
604 if (!Ty
.is128BitVector())
607 SDValue Op0
= N
->getOperand(0);
608 SDValue Op1
= N
->getOperand(1);
610 if (Op0
->getOpcode() == ISD::AND
&& Op1
->getOpcode() == ISD::AND
) {
611 SDValue Op0Op0
= Op0
->getOperand(0);
612 SDValue Op0Op1
= Op0
->getOperand(1);
613 SDValue Op1Op0
= Op1
->getOperand(0);
614 SDValue Op1Op1
= Op1
->getOperand(1);
615 bool IsLittleEndian
= !Subtarget
.isLittle();
617 SDValue IfSet
, IfClr
, Cond
;
618 bool IsConstantMask
= false;
621 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
622 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
624 // IfClr will be set if we find a valid match.
625 if (isVSplat(Op0Op0
, Mask
, IsLittleEndian
)) {
629 if (isVSplat(Op1Op0
, InvMask
, IsLittleEndian
) &&
630 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
632 else if (isVSplat(Op1Op1
, InvMask
, IsLittleEndian
) &&
633 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
636 IsConstantMask
= true;
639 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
640 // thing again using this mask.
641 // IfClr will be set if we find a valid match.
642 if (!IfClr
.getNode() && isVSplat(Op0Op1
, Mask
, IsLittleEndian
)) {
646 if (isVSplat(Op1Op0
, InvMask
, IsLittleEndian
) &&
647 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
649 else if (isVSplat(Op1Op1
, InvMask
, IsLittleEndian
) &&
650 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
653 IsConstantMask
= true;
656 // If IfClr is not yet set, try looking for a non-constant match.
657 // IfClr will be set if we find a valid match amongst the eight
659 if (!IfClr
.getNode()) {
660 if (isBitwiseInverse(Op0Op0
, Op1Op0
)) {
664 } else if (isBitwiseInverse(Op0Op1
, Op1Op0
)) {
668 } else if (isBitwiseInverse(Op0Op0
, Op1Op1
)) {
672 } else if (isBitwiseInverse(Op0Op1
, Op1Op1
)) {
676 } else if (isBitwiseInverse(Op1Op0
, Op0Op0
)) {
680 } else if (isBitwiseInverse(Op1Op1
, Op0Op0
)) {
684 } else if (isBitwiseInverse(Op1Op0
, Op0Op1
)) {
688 } else if (isBitwiseInverse(Op1Op1
, Op0Op1
)) {
695 // At this point, IfClr will be set if we have a valid match.
696 if (!IfClr
.getNode())
699 assert(Cond
.getNode() && IfSet
.getNode());
701 // Fold degenerate cases.
702 if (IsConstantMask
) {
703 if (Mask
.isAllOnesValue())
709 // Transform the DAG into an equivalent VSELECT.
710 return DAG
.getNode(ISD::VSELECT
, SDLoc(N
), Ty
, Cond
, IfSet
, IfClr
);
716 static bool shouldTransformMulToShiftsAddsSubs(APInt C
, EVT VT
,
718 const MipsSubtarget
&Subtarget
) {
719 // Estimate the number of operations the below transform will turn a
720 // constant multiply into. The number is approximately equal to the minimal
721 // number of powers of two that constant can be broken down to by adding
722 // or subtracting them.
724 // If we have taken more than 12[1] / 8[2] steps to attempt the
725 // optimization for a native sized value, it is more than likely that this
726 // optimization will make things worse.
728 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
729 // multiplication requires at least 4 cycles, but another cycle (or two)
730 // to retrieve the result from the HI/LO registers.
732 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
733 // materialized in 2 instructions, multiplication requires at least 4
734 // cycles, but another cycle (or two) to retrieve the result from the
738 // - MaxSteps needs to consider the `VT` of the constant for the current
740 // - Consider to perform this optimization after type legalization.
741 // That allows to remove a workaround for types not supported natively.
742 // - Take in account `-Os, -Oz` flags because this optimization
743 // increases code size.
744 unsigned MaxSteps
= Subtarget
.isABI_O32() ? 8 : 12;
746 SmallVector
<APInt
, 16> WorkStack(1, C
);
748 unsigned BitWidth
= C
.getBitWidth();
750 while (!WorkStack
.empty()) {
751 APInt Val
= WorkStack
.pop_back_val();
753 if (Val
== 0 || Val
== 1)
756 if (Steps
>= MaxSteps
)
759 if (Val
.isPowerOf2()) {
764 APInt Floor
= APInt(BitWidth
, 1) << Val
.logBase2();
765 APInt Ceil
= Val
.isNegative() ? APInt(BitWidth
, 0)
766 : APInt(BitWidth
, 1) << C
.ceilLogBase2();
767 if ((Val
- Floor
).ule(Ceil
- Val
)) {
768 WorkStack
.push_back(Floor
);
769 WorkStack
.push_back(Val
- Floor
);
771 WorkStack
.push_back(Ceil
);
772 WorkStack
.push_back(Ceil
- Val
);
778 // If the value being multiplied is not supported natively, we have to pay
779 // an additional legalization cost, conservatively assume an increase in the
780 // cost of 3 instructions per step. This values for this heuristic were
781 // determined experimentally.
782 unsigned RegisterSize
= DAG
.getTargetLoweringInfo()
783 .getRegisterType(*DAG
.getContext(), VT
)
785 Steps
*= (VT
.getSizeInBits() != RegisterSize
) * 3;
792 static SDValue
genConstMult(SDValue X
, APInt C
, const SDLoc
&DL
, EVT VT
,
793 EVT ShiftTy
, SelectionDAG
&DAG
) {
796 return DAG
.getConstant(0, DL
, VT
);
802 // If c is power of 2, return (shl x, log2(c)).
804 return DAG
.getNode(ISD::SHL
, DL
, VT
, X
,
805 DAG
.getConstant(C
.logBase2(), DL
, ShiftTy
));
807 unsigned BitWidth
= C
.getBitWidth();
808 APInt Floor
= APInt(BitWidth
, 1) << C
.logBase2();
809 APInt Ceil
= C
.isNegative() ? APInt(BitWidth
, 0) :
810 APInt(BitWidth
, 1) << C
.ceilLogBase2();
812 // If |c - floor_c| <= |c - ceil_c|,
813 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
814 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
815 if ((C
- Floor
).ule(Ceil
- C
)) {
816 SDValue Op0
= genConstMult(X
, Floor
, DL
, VT
, ShiftTy
, DAG
);
817 SDValue Op1
= genConstMult(X
, C
- Floor
, DL
, VT
, ShiftTy
, DAG
);
818 return DAG
.getNode(ISD::ADD
, DL
, VT
, Op0
, Op1
);
821 // If |c - floor_c| > |c - ceil_c|,
822 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
823 SDValue Op0
= genConstMult(X
, Ceil
, DL
, VT
, ShiftTy
, DAG
);
824 SDValue Op1
= genConstMult(X
, Ceil
- C
, DL
, VT
, ShiftTy
, DAG
);
825 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op0
, Op1
);
828 static SDValue
performMULCombine(SDNode
*N
, SelectionDAG
&DAG
,
829 const TargetLowering::DAGCombinerInfo
&DCI
,
830 const MipsSETargetLowering
*TL
,
831 const MipsSubtarget
&Subtarget
) {
832 EVT VT
= N
->getValueType(0);
834 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1)))
835 if (!VT
.isVector() && shouldTransformMulToShiftsAddsSubs(
836 C
->getAPIntValue(), VT
, DAG
, Subtarget
))
837 return genConstMult(N
->getOperand(0), C
->getAPIntValue(), SDLoc(N
), VT
,
838 TL
->getScalarShiftAmountTy(DAG
.getDataLayout(), VT
),
841 return SDValue(N
, 0);
844 static SDValue
performDSPShiftCombine(unsigned Opc
, SDNode
*N
, EVT Ty
,
846 const MipsSubtarget
&Subtarget
) {
847 // See if this is a vector splat immediate node.
848 APInt SplatValue
, SplatUndef
;
849 unsigned SplatBitSize
;
851 unsigned EltSize
= Ty
.getScalarSizeInBits();
852 BuildVectorSDNode
*BV
= dyn_cast
<BuildVectorSDNode
>(N
->getOperand(1));
854 if (!Subtarget
.hasDSP())
858 !BV
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
859 EltSize
, !Subtarget
.isLittle()) ||
860 (SplatBitSize
!= EltSize
) ||
861 (SplatValue
.getZExtValue() >= EltSize
))
865 return DAG
.getNode(Opc
, DL
, Ty
, N
->getOperand(0),
866 DAG
.getConstant(SplatValue
.getZExtValue(), DL
, MVT::i32
));
869 static SDValue
performSHLCombine(SDNode
*N
, SelectionDAG
&DAG
,
870 TargetLowering::DAGCombinerInfo
&DCI
,
871 const MipsSubtarget
&Subtarget
) {
872 EVT Ty
= N
->getValueType(0);
874 if ((Ty
!= MVT::v2i16
) && (Ty
!= MVT::v4i8
))
877 return performDSPShiftCombine(MipsISD::SHLL_DSP
, N
, Ty
, DAG
, Subtarget
);
880 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
881 // constant splats into MipsISD::SHRA_DSP for DSPr2.
883 // Performs the following transformations:
884 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
885 // sign/zero-extension is completely overwritten by the new one performed by
886 // the ISD::SRA and ISD::SHL nodes.
887 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
890 // See performDSPShiftCombine for more information about the transformation
892 static SDValue
performSRACombine(SDNode
*N
, SelectionDAG
&DAG
,
893 TargetLowering::DAGCombinerInfo
&DCI
,
894 const MipsSubtarget
&Subtarget
) {
895 EVT Ty
= N
->getValueType(0);
897 if (Subtarget
.hasMSA()) {
898 SDValue Op0
= N
->getOperand(0);
899 SDValue Op1
= N
->getOperand(1);
901 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
902 // where $d + sizeof($c) == 32
903 // or $d + sizeof($c) <= 32 and SExt
904 // -> (MipsVExtractSExt $a, $b, $c)
905 if (Op0
->getOpcode() == ISD::SHL
&& Op1
== Op0
->getOperand(1)) {
906 SDValue Op0Op0
= Op0
->getOperand(0);
907 ConstantSDNode
*ShAmount
= dyn_cast
<ConstantSDNode
>(Op1
);
912 if (Op0Op0
->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT
&&
913 Op0Op0
->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT
)
916 EVT ExtendTy
= cast
<VTSDNode
>(Op0Op0
->getOperand(2))->getVT();
917 unsigned TotalBits
= ShAmount
->getZExtValue() + ExtendTy
.getSizeInBits();
919 if (TotalBits
== 32 ||
920 (Op0Op0
->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT
&&
922 SDValue Ops
[] = { Op0Op0
->getOperand(0), Op0Op0
->getOperand(1),
923 Op0Op0
->getOperand(2) };
924 return DAG
.getNode(MipsISD::VEXTRACT_SEXT_ELT
, SDLoc(Op0Op0
),
926 makeArrayRef(Ops
, Op0Op0
->getNumOperands()));
931 if ((Ty
!= MVT::v2i16
) && ((Ty
!= MVT::v4i8
) || !Subtarget
.hasDSPR2()))
934 return performDSPShiftCombine(MipsISD::SHRA_DSP
, N
, Ty
, DAG
, Subtarget
);
938 static SDValue
performSRLCombine(SDNode
*N
, SelectionDAG
&DAG
,
939 TargetLowering::DAGCombinerInfo
&DCI
,
940 const MipsSubtarget
&Subtarget
) {
941 EVT Ty
= N
->getValueType(0);
943 if (((Ty
!= MVT::v2i16
) || !Subtarget
.hasDSPR2()) && (Ty
!= MVT::v4i8
))
946 return performDSPShiftCombine(MipsISD::SHRL_DSP
, N
, Ty
, DAG
, Subtarget
);
949 static bool isLegalDSPCondCode(EVT Ty
, ISD::CondCode CC
) {
950 bool IsV216
= (Ty
== MVT::v2i16
);
954 case ISD::SETNE
: return true;
958 case ISD::SETGE
: return IsV216
;
962 case ISD::SETUGE
: return !IsV216
;
963 default: return false;
967 static SDValue
performSETCCCombine(SDNode
*N
, SelectionDAG
&DAG
) {
968 EVT Ty
= N
->getValueType(0);
970 if ((Ty
!= MVT::v2i16
) && (Ty
!= MVT::v4i8
))
973 if (!isLegalDSPCondCode(Ty
, cast
<CondCodeSDNode
>(N
->getOperand(2))->get()))
976 return DAG
.getNode(MipsISD::SETCC_DSP
, SDLoc(N
), Ty
, N
->getOperand(0),
977 N
->getOperand(1), N
->getOperand(2));
980 static SDValue
performVSELECTCombine(SDNode
*N
, SelectionDAG
&DAG
) {
981 EVT Ty
= N
->getValueType(0);
983 if (Ty
== MVT::v2i16
|| Ty
== MVT::v4i8
) {
984 SDValue SetCC
= N
->getOperand(0);
986 if (SetCC
.getOpcode() != MipsISD::SETCC_DSP
)
989 return DAG
.getNode(MipsISD::SELECT_CC_DSP
, SDLoc(N
), Ty
,
990 SetCC
.getOperand(0), SetCC
.getOperand(1),
991 N
->getOperand(1), N
->getOperand(2), SetCC
.getOperand(2));
997 static SDValue
performXORCombine(SDNode
*N
, SelectionDAG
&DAG
,
998 const MipsSubtarget
&Subtarget
) {
999 EVT Ty
= N
->getValueType(0);
1001 if (Subtarget
.hasMSA() && Ty
.is128BitVector() && Ty
.isInteger()) {
1002 // Try the following combines:
1003 // (xor (or $a, $b), (build_vector allones))
1004 // (xor (or $a, $b), (bitcast (build_vector allones)))
1005 SDValue Op0
= N
->getOperand(0);
1006 SDValue Op1
= N
->getOperand(1);
1009 if (ISD::isBuildVectorAllOnes(Op0
.getNode()))
1011 else if (ISD::isBuildVectorAllOnes(Op1
.getNode()))
1016 if (NotOp
->getOpcode() == ISD::OR
)
1017 return DAG
.getNode(MipsISD::VNOR
, SDLoc(N
), Ty
, NotOp
->getOperand(0),
1018 NotOp
->getOperand(1));
1025 MipsSETargetLowering::PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
1026 SelectionDAG
&DAG
= DCI
.DAG
;
1029 switch (N
->getOpcode()) {
1031 Val
= performANDCombine(N
, DAG
, DCI
, Subtarget
);
1034 Val
= performORCombine(N
, DAG
, DCI
, Subtarget
);
1037 return performMULCombine(N
, DAG
, DCI
, this, Subtarget
);
1039 Val
= performSHLCombine(N
, DAG
, DCI
, Subtarget
);
1042 return performSRACombine(N
, DAG
, DCI
, Subtarget
);
1044 return performSRLCombine(N
, DAG
, DCI
, Subtarget
);
1046 return performVSELECTCombine(N
, DAG
);
1048 Val
= performXORCombine(N
, DAG
, Subtarget
);
1051 Val
= performSETCCCombine(N
, DAG
);
1055 if (Val
.getNode()) {
1056 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1057 N
->printrWithDepth(dbgs(), &DAG
); dbgs() << "\n=> \n";
1058 Val
.getNode()->printrWithDepth(dbgs(), &DAG
); dbgs() << "\n");
1062 return MipsTargetLowering::PerformDAGCombine(N
, DCI
);
1066 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
1067 MachineBasicBlock
*BB
) const {
1068 switch (MI
.getOpcode()) {
1070 return MipsTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
1071 case Mips::BPOSGE32_PSEUDO
:
1072 return emitBPOSGE32(MI
, BB
);
1073 case Mips::SNZ_B_PSEUDO
:
1074 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_B
);
1075 case Mips::SNZ_H_PSEUDO
:
1076 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_H
);
1077 case Mips::SNZ_W_PSEUDO
:
1078 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_W
);
1079 case Mips::SNZ_D_PSEUDO
:
1080 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_D
);
1081 case Mips::SNZ_V_PSEUDO
:
1082 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_V
);
1083 case Mips::SZ_B_PSEUDO
:
1084 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_B
);
1085 case Mips::SZ_H_PSEUDO
:
1086 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_H
);
1087 case Mips::SZ_W_PSEUDO
:
1088 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_W
);
1089 case Mips::SZ_D_PSEUDO
:
1090 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_D
);
1091 case Mips::SZ_V_PSEUDO
:
1092 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_V
);
1093 case Mips::COPY_FW_PSEUDO
:
1094 return emitCOPY_FW(MI
, BB
);
1095 case Mips::COPY_FD_PSEUDO
:
1096 return emitCOPY_FD(MI
, BB
);
1097 case Mips::INSERT_FW_PSEUDO
:
1098 return emitINSERT_FW(MI
, BB
);
1099 case Mips::INSERT_FD_PSEUDO
:
1100 return emitINSERT_FD(MI
, BB
);
1101 case Mips::INSERT_B_VIDX_PSEUDO
:
1102 case Mips::INSERT_B_VIDX64_PSEUDO
:
1103 return emitINSERT_DF_VIDX(MI
, BB
, 1, false);
1104 case Mips::INSERT_H_VIDX_PSEUDO
:
1105 case Mips::INSERT_H_VIDX64_PSEUDO
:
1106 return emitINSERT_DF_VIDX(MI
, BB
, 2, false);
1107 case Mips::INSERT_W_VIDX_PSEUDO
:
1108 case Mips::INSERT_W_VIDX64_PSEUDO
:
1109 return emitINSERT_DF_VIDX(MI
, BB
, 4, false);
1110 case Mips::INSERT_D_VIDX_PSEUDO
:
1111 case Mips::INSERT_D_VIDX64_PSEUDO
:
1112 return emitINSERT_DF_VIDX(MI
, BB
, 8, false);
1113 case Mips::INSERT_FW_VIDX_PSEUDO
:
1114 case Mips::INSERT_FW_VIDX64_PSEUDO
:
1115 return emitINSERT_DF_VIDX(MI
, BB
, 4, true);
1116 case Mips::INSERT_FD_VIDX_PSEUDO
:
1117 case Mips::INSERT_FD_VIDX64_PSEUDO
:
1118 return emitINSERT_DF_VIDX(MI
, BB
, 8, true);
1119 case Mips::FILL_FW_PSEUDO
:
1120 return emitFILL_FW(MI
, BB
);
1121 case Mips::FILL_FD_PSEUDO
:
1122 return emitFILL_FD(MI
, BB
);
1123 case Mips::FEXP2_W_1_PSEUDO
:
1124 return emitFEXP2_W_1(MI
, BB
);
1125 case Mips::FEXP2_D_1_PSEUDO
:
1126 return emitFEXP2_D_1(MI
, BB
);
1128 return emitST_F16_PSEUDO(MI
, BB
);
1130 return emitLD_F16_PSEUDO(MI
, BB
);
1131 case Mips::MSA_FP_EXTEND_W_PSEUDO
:
1132 return emitFPEXTEND_PSEUDO(MI
, BB
, false);
1133 case Mips::MSA_FP_ROUND_W_PSEUDO
:
1134 return emitFPROUND_PSEUDO(MI
, BB
, false);
1135 case Mips::MSA_FP_EXTEND_D_PSEUDO
:
1136 return emitFPEXTEND_PSEUDO(MI
, BB
, true);
1137 case Mips::MSA_FP_ROUND_D_PSEUDO
:
1138 return emitFPROUND_PSEUDO(MI
, BB
, true);
1142 bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1143 const CCState
&CCInfo
, unsigned NextStackOffset
,
1144 const MipsFunctionInfo
&FI
) const {
1145 if (!UseMipsTailCalls
)
1148 // Exception has to be cleared with eret.
1152 // Return false if either the callee or caller has a byval argument.
1153 if (CCInfo
.getInRegsParamsCount() > 0 || FI
.hasByvalArg())
1156 // Return true if the callee's argument area is no larger than the
1158 return NextStackOffset
<= FI
.getIncomingArgSize();
1161 void MipsSETargetLowering::
1162 getOpndList(SmallVectorImpl
<SDValue
> &Ops
,
1163 std::deque
<std::pair
<unsigned, SDValue
>> &RegsToPass
,
1164 bool IsPICCall
, bool GlobalOrExternal
, bool InternalLinkage
,
1165 bool IsCallReloc
, CallLoweringInfo
&CLI
, SDValue Callee
,
1166 SDValue Chain
) const {
1167 Ops
.push_back(Callee
);
1168 MipsTargetLowering::getOpndList(Ops
, RegsToPass
, IsPICCall
, GlobalOrExternal
,
1169 InternalLinkage
, IsCallReloc
, CLI
, Callee
,
1173 SDValue
MipsSETargetLowering::lowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
1174 LoadSDNode
&Nd
= *cast
<LoadSDNode
>(Op
);
1176 if (Nd
.getMemoryVT() != MVT::f64
|| !NoDPLoadStore
)
1177 return MipsTargetLowering::lowerLOAD(Op
, DAG
);
1179 // Replace a double precision load with two i32 loads and a buildpair64.
1181 SDValue Ptr
= Nd
.getBasePtr(), Chain
= Nd
.getChain();
1182 EVT PtrVT
= Ptr
.getValueType();
1184 // i32 load from lower address.
1185 SDValue Lo
= DAG
.getLoad(MVT::i32
, DL
, Chain
, Ptr
, MachinePointerInfo(),
1186 Nd
.getAlignment(), Nd
.getMemOperand()->getFlags());
1188 // i32 load from higher address.
1189 Ptr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Ptr
, DAG
.getConstant(4, DL
, PtrVT
));
1190 SDValue Hi
= DAG
.getLoad(
1191 MVT::i32
, DL
, Lo
.getValue(1), Ptr
, MachinePointerInfo(),
1192 std::min(Nd
.getAlignment(), 4U), Nd
.getMemOperand()->getFlags());
1194 if (!Subtarget
.isLittle())
1197 SDValue BP
= DAG
.getNode(MipsISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
1198 SDValue Ops
[2] = {BP
, Hi
.getValue(1)};
1199 return DAG
.getMergeValues(Ops
, DL
);
1202 SDValue
MipsSETargetLowering::lowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
1203 StoreSDNode
&Nd
= *cast
<StoreSDNode
>(Op
);
1205 if (Nd
.getMemoryVT() != MVT::f64
|| !NoDPLoadStore
)
1206 return MipsTargetLowering::lowerSTORE(Op
, DAG
);
1208 // Replace a double precision store with two extractelement64s and i32 stores.
1210 SDValue Val
= Nd
.getValue(), Ptr
= Nd
.getBasePtr(), Chain
= Nd
.getChain();
1211 EVT PtrVT
= Ptr
.getValueType();
1212 SDValue Lo
= DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
,
1213 Val
, DAG
.getConstant(0, DL
, MVT::i32
));
1214 SDValue Hi
= DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
,
1215 Val
, DAG
.getConstant(1, DL
, MVT::i32
));
1217 if (!Subtarget
.isLittle())
1220 // i32 store to lower address.
1222 DAG
.getStore(Chain
, DL
, Lo
, Ptr
, MachinePointerInfo(), Nd
.getAlignment(),
1223 Nd
.getMemOperand()->getFlags(), Nd
.getAAInfo());
1225 // i32 store to higher address.
1226 Ptr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Ptr
, DAG
.getConstant(4, DL
, PtrVT
));
1227 return DAG
.getStore(Chain
, DL
, Hi
, Ptr
, MachinePointerInfo(),
1228 std::min(Nd
.getAlignment(), 4U),
1229 Nd
.getMemOperand()->getFlags(), Nd
.getAAInfo());
1232 SDValue
MipsSETargetLowering::lowerBITCAST(SDValue Op
,
1233 SelectionDAG
&DAG
) const {
1235 MVT Src
= Op
.getOperand(0).getValueType().getSimpleVT();
1236 MVT Dest
= Op
.getValueType().getSimpleVT();
1238 // Bitcast i64 to double.
1239 if (Src
== MVT::i64
&& Dest
== MVT::f64
) {
1240 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
,
1241 Op
.getOperand(0), DAG
.getIntPtrConstant(0, DL
));
1242 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
,
1243 Op
.getOperand(0), DAG
.getIntPtrConstant(1, DL
));
1244 return DAG
.getNode(MipsISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
1247 // Bitcast double to i64.
1248 if (Src
== MVT::f64
&& Dest
== MVT::i64
) {
1250 DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
, Op
.getOperand(0),
1251 DAG
.getConstant(0, DL
, MVT::i32
));
1253 DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
, Op
.getOperand(0),
1254 DAG
.getConstant(1, DL
, MVT::i32
));
1255 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, Lo
, Hi
);
1258 // Skip other cases of bitcast and use default lowering.
1262 SDValue
MipsSETargetLowering::lowerMulDiv(SDValue Op
, unsigned NewOpc
,
1263 bool HasLo
, bool HasHi
,
1264 SelectionDAG
&DAG
) const {
1265 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1266 assert(!Subtarget
.hasMips32r6());
1268 EVT Ty
= Op
.getOperand(0).getValueType();
1270 SDValue Mult
= DAG
.getNode(NewOpc
, DL
, MVT::Untyped
,
1271 Op
.getOperand(0), Op
.getOperand(1));
1275 Lo
= DAG
.getNode(MipsISD::MFLO
, DL
, Ty
, Mult
);
1277 Hi
= DAG
.getNode(MipsISD::MFHI
, DL
, Ty
, Mult
);
1279 if (!HasLo
|| !HasHi
)
1280 return HasLo
? Lo
: Hi
;
1282 SDValue Vals
[] = { Lo
, Hi
};
1283 return DAG
.getMergeValues(Vals
, DL
);
1286 static SDValue
initAccumulator(SDValue In
, const SDLoc
&DL
, SelectionDAG
&DAG
) {
1287 SDValue InLo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
, In
,
1288 DAG
.getConstant(0, DL
, MVT::i32
));
1289 SDValue InHi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
, In
,
1290 DAG
.getConstant(1, DL
, MVT::i32
));
1291 return DAG
.getNode(MipsISD::MTLOHI
, DL
, MVT::Untyped
, InLo
, InHi
);
1294 static SDValue
extractLOHI(SDValue Op
, const SDLoc
&DL
, SelectionDAG
&DAG
) {
1295 SDValue Lo
= DAG
.getNode(MipsISD::MFLO
, DL
, MVT::i32
, Op
);
1296 SDValue Hi
= DAG
.getNode(MipsISD::MFHI
, DL
, MVT::i32
, Op
);
1297 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, Lo
, Hi
);
1300 // This function expands mips intrinsic nodes which have 64-bit input operands
1301 // or output values.
1303 // out64 = intrinsic-node in64
1305 // lo = copy (extract-element (in64, 0))
1306 // hi = copy (extract-element (in64, 1))
1307 // mips-specific-node
1310 // out64 = merge-values (v0, v1)
1312 static SDValue
lowerDSPIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Opc
) {
1314 bool HasChainIn
= Op
->getOperand(0).getValueType() == MVT::Other
;
1315 SmallVector
<SDValue
, 3> Ops
;
1318 // See if Op has a chain input.
1320 Ops
.push_back(Op
->getOperand(OpNo
++));
1322 // The next operand is the intrinsic opcode.
1323 assert(Op
->getOperand(OpNo
).getOpcode() == ISD::TargetConstant
);
1325 // See if the next operand has type i64.
1326 SDValue Opnd
= Op
->getOperand(++OpNo
), In64
;
1328 if (Opnd
.getValueType() == MVT::i64
)
1329 In64
= initAccumulator(Opnd
, DL
, DAG
);
1331 Ops
.push_back(Opnd
);
1333 // Push the remaining operands.
1334 for (++OpNo
; OpNo
< Op
->getNumOperands(); ++OpNo
)
1335 Ops
.push_back(Op
->getOperand(OpNo
));
1337 // Add In64 to the end of the list.
1339 Ops
.push_back(In64
);
1342 SmallVector
<EVT
, 2> ResTys
;
1344 for (SDNode::value_iterator I
= Op
->value_begin(), E
= Op
->value_end();
1346 ResTys
.push_back((*I
== MVT::i64
) ? MVT::Untyped
: *I
);
1349 SDValue Val
= DAG
.getNode(Opc
, DL
, ResTys
, Ops
);
1350 SDValue Out
= (ResTys
[0] == MVT::Untyped
) ? extractLOHI(Val
, DL
, DAG
) : Val
;
1355 assert(Val
->getValueType(1) == MVT::Other
);
1356 SDValue Vals
[] = { Out
, SDValue(Val
.getNode(), 1) };
1357 return DAG
.getMergeValues(Vals
, DL
);
1360 // Lower an MSA copy intrinsic into the specified SelectionDAG node
1361 static SDValue
lowerMSACopyIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Opc
) {
1363 SDValue Vec
= Op
->getOperand(1);
1364 SDValue Idx
= Op
->getOperand(2);
1365 EVT ResTy
= Op
->getValueType(0);
1366 EVT EltTy
= Vec
->getValueType(0).getVectorElementType();
1368 SDValue Result
= DAG
.getNode(Opc
, DL
, ResTy
, Vec
, Idx
,
1369 DAG
.getValueType(EltTy
));
1374 static SDValue
lowerMSASplatZExt(SDValue Op
, unsigned OpNr
, SelectionDAG
&DAG
) {
1375 EVT ResVecTy
= Op
->getValueType(0);
1376 EVT ViaVecTy
= ResVecTy
;
1377 bool BigEndian
= !DAG
.getSubtarget().getTargetTriple().isLittleEndian();
1380 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1381 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1383 SDValue LaneA
= Op
->getOperand(OpNr
);
1386 if (ResVecTy
== MVT::v2i64
) {
1387 // In case of the index being passed as an immediate value, set the upper
1388 // lane to 0 so that the splati.d instruction can be matched.
1389 if (isa
<ConstantSDNode
>(LaneA
))
1390 LaneB
= DAG
.getConstant(0, DL
, MVT::i32
);
1391 // Having the index passed in a register, set the upper lane to the same
1392 // value as the lower - this results in the BUILD_VECTOR node not being
1393 // expanded through stack. This way we are able to pattern match the set of
1394 // nodes created here to splat.d.
1397 ViaVecTy
= MVT::v4i32
;
1399 std::swap(LaneA
, LaneB
);
1403 SDValue Ops
[16] = { LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
,
1404 LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
};
1406 SDValue Result
= DAG
.getBuildVector(
1407 ViaVecTy
, DL
, makeArrayRef(Ops
, ViaVecTy
.getVectorNumElements()));
1409 if (ViaVecTy
!= ResVecTy
) {
1410 SDValue One
= DAG
.getConstant(1, DL
, ViaVecTy
);
1411 Result
= DAG
.getNode(ISD::BITCAST
, DL
, ResVecTy
,
1412 DAG
.getNode(ISD::AND
, DL
, ViaVecTy
, Result
, One
));
1418 static SDValue
lowerMSASplatImm(SDValue Op
, unsigned ImmOp
, SelectionDAG
&DAG
,
1419 bool IsSigned
= false) {
1420 auto *CImm
= cast
<ConstantSDNode
>(Op
->getOperand(ImmOp
));
1421 return DAG
.getConstant(
1422 APInt(Op
->getValueType(0).getScalarType().getSizeInBits(),
1423 IsSigned
? CImm
->getSExtValue() : CImm
->getZExtValue(), IsSigned
),
1424 SDLoc(Op
), Op
->getValueType(0));
1427 static SDValue
getBuildVectorSplat(EVT VecTy
, SDValue SplatValue
,
1428 bool BigEndian
, SelectionDAG
&DAG
) {
1429 EVT ViaVecTy
= VecTy
;
1430 SDValue SplatValueA
= SplatValue
;
1431 SDValue SplatValueB
= SplatValue
;
1432 SDLoc
DL(SplatValue
);
1434 if (VecTy
== MVT::v2i64
) {
1435 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1436 ViaVecTy
= MVT::v4i32
;
1438 SplatValueA
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, SplatValue
);
1439 SplatValueB
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, SplatValue
,
1440 DAG
.getConstant(32, DL
, MVT::i32
));
1441 SplatValueB
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, SplatValueB
);
1444 // We currently hold the parts in little endian order. Swap them if
1447 std::swap(SplatValueA
, SplatValueB
);
1449 SDValue Ops
[16] = { SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1450 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1451 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1452 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
};
1454 SDValue Result
= DAG
.getBuildVector(
1455 ViaVecTy
, DL
, makeArrayRef(Ops
, ViaVecTy
.getVectorNumElements()));
1457 if (VecTy
!= ViaVecTy
)
1458 Result
= DAG
.getNode(ISD::BITCAST
, DL
, VecTy
, Result
);
1463 static SDValue
lowerMSABinaryBitImmIntr(SDValue Op
, SelectionDAG
&DAG
,
1464 unsigned Opc
, SDValue Imm
,
1466 EVT VecTy
= Op
->getValueType(0);
1470 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1472 if (VecTy
== MVT::v2i64
) {
1473 if (ConstantSDNode
*CImm
= dyn_cast
<ConstantSDNode
>(Imm
)) {
1474 APInt BitImm
= APInt(64, 1) << CImm
->getAPIntValue();
1476 SDValue BitImmHiOp
= DAG
.getConstant(BitImm
.lshr(32).trunc(32), DL
,
1478 SDValue BitImmLoOp
= DAG
.getConstant(BitImm
.trunc(32), DL
, MVT::i32
);
1481 std::swap(BitImmLoOp
, BitImmHiOp
);
1483 Exp2Imm
= DAG
.getNode(
1484 ISD::BITCAST
, DL
, MVT::v2i64
,
1485 DAG
.getBuildVector(MVT::v4i32
, DL
,
1486 {BitImmLoOp
, BitImmHiOp
, BitImmLoOp
, BitImmHiOp
}));
1490 if (!Exp2Imm
.getNode()) {
1491 // We couldnt constant fold, do a vector shift instead
1493 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1494 // only values 0-63 are valid.
1495 if (VecTy
== MVT::v2i64
)
1496 Imm
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, Imm
);
1498 Exp2Imm
= getBuildVectorSplat(VecTy
, Imm
, BigEndian
, DAG
);
1500 Exp2Imm
= DAG
.getNode(ISD::SHL
, DL
, VecTy
, DAG
.getConstant(1, DL
, VecTy
),
1504 return DAG
.getNode(Opc
, DL
, VecTy
, Op
->getOperand(1), Exp2Imm
);
1507 static SDValue
truncateVecElts(SDValue Op
, SelectionDAG
&DAG
) {
1509 EVT ResTy
= Op
->getValueType(0);
1510 SDValue Vec
= Op
->getOperand(2);
1511 bool BigEndian
= !DAG
.getSubtarget().getTargetTriple().isLittleEndian();
1512 MVT ResEltTy
= ResTy
== MVT::v2i64
? MVT::i64
: MVT::i32
;
1513 SDValue ConstValue
= DAG
.getConstant(Vec
.getScalarValueSizeInBits() - 1,
1515 SDValue SplatVec
= getBuildVectorSplat(ResTy
, ConstValue
, BigEndian
, DAG
);
1517 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Vec
, SplatVec
);
1520 static SDValue
lowerMSABitClear(SDValue Op
, SelectionDAG
&DAG
) {
1521 EVT ResTy
= Op
->getValueType(0);
1523 SDValue One
= DAG
.getConstant(1, DL
, ResTy
);
1524 SDValue Bit
= DAG
.getNode(ISD::SHL
, DL
, ResTy
, One
, truncateVecElts(Op
, DAG
));
1526 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Op
->getOperand(1),
1527 DAG
.getNOT(DL
, Bit
, ResTy
));
1530 static SDValue
lowerMSABitClearImm(SDValue Op
, SelectionDAG
&DAG
) {
1532 EVT ResTy
= Op
->getValueType(0);
1533 APInt BitImm
= APInt(ResTy
.getScalarSizeInBits(), 1)
1534 << cast
<ConstantSDNode
>(Op
->getOperand(2))->getAPIntValue();
1535 SDValue BitMask
= DAG
.getConstant(~BitImm
, DL
, ResTy
);
1537 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Op
->getOperand(1), BitMask
);
1540 SDValue
MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
1541 SelectionDAG
&DAG
) const {
1543 unsigned Intrinsic
= cast
<ConstantSDNode
>(Op
->getOperand(0))->getZExtValue();
1544 switch (Intrinsic
) {
1547 case Intrinsic::mips_shilo
:
1548 return lowerDSPIntr(Op
, DAG
, MipsISD::SHILO
);
1549 case Intrinsic::mips_dpau_h_qbl
:
1550 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAU_H_QBL
);
1551 case Intrinsic::mips_dpau_h_qbr
:
1552 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAU_H_QBR
);
1553 case Intrinsic::mips_dpsu_h_qbl
:
1554 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSU_H_QBL
);
1555 case Intrinsic::mips_dpsu_h_qbr
:
1556 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSU_H_QBR
);
1557 case Intrinsic::mips_dpa_w_ph
:
1558 return lowerDSPIntr(Op
, DAG
, MipsISD::DPA_W_PH
);
1559 case Intrinsic::mips_dps_w_ph
:
1560 return lowerDSPIntr(Op
, DAG
, MipsISD::DPS_W_PH
);
1561 case Intrinsic::mips_dpax_w_ph
:
1562 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAX_W_PH
);
1563 case Intrinsic::mips_dpsx_w_ph
:
1564 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSX_W_PH
);
1565 case Intrinsic::mips_mulsa_w_ph
:
1566 return lowerDSPIntr(Op
, DAG
, MipsISD::MULSA_W_PH
);
1567 case Intrinsic::mips_mult
:
1568 return lowerDSPIntr(Op
, DAG
, MipsISD::Mult
);
1569 case Intrinsic::mips_multu
:
1570 return lowerDSPIntr(Op
, DAG
, MipsISD::Multu
);
1571 case Intrinsic::mips_madd
:
1572 return lowerDSPIntr(Op
, DAG
, MipsISD::MAdd
);
1573 case Intrinsic::mips_maddu
:
1574 return lowerDSPIntr(Op
, DAG
, MipsISD::MAddu
);
1575 case Intrinsic::mips_msub
:
1576 return lowerDSPIntr(Op
, DAG
, MipsISD::MSub
);
1577 case Intrinsic::mips_msubu
:
1578 return lowerDSPIntr(Op
, DAG
, MipsISD::MSubu
);
1579 case Intrinsic::mips_addv_b
:
1580 case Intrinsic::mips_addv_h
:
1581 case Intrinsic::mips_addv_w
:
1582 case Intrinsic::mips_addv_d
:
1583 return DAG
.getNode(ISD::ADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1585 case Intrinsic::mips_addvi_b
:
1586 case Intrinsic::mips_addvi_h
:
1587 case Intrinsic::mips_addvi_w
:
1588 case Intrinsic::mips_addvi_d
:
1589 return DAG
.getNode(ISD::ADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1590 lowerMSASplatImm(Op
, 2, DAG
));
1591 case Intrinsic::mips_and_v
:
1592 return DAG
.getNode(ISD::AND
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1594 case Intrinsic::mips_andi_b
:
1595 return DAG
.getNode(ISD::AND
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1596 lowerMSASplatImm(Op
, 2, DAG
));
1597 case Intrinsic::mips_bclr_b
:
1598 case Intrinsic::mips_bclr_h
:
1599 case Intrinsic::mips_bclr_w
:
1600 case Intrinsic::mips_bclr_d
:
1601 return lowerMSABitClear(Op
, DAG
);
1602 case Intrinsic::mips_bclri_b
:
1603 case Intrinsic::mips_bclri_h
:
1604 case Intrinsic::mips_bclri_w
:
1605 case Intrinsic::mips_bclri_d
:
1606 return lowerMSABitClearImm(Op
, DAG
);
1607 case Intrinsic::mips_binsli_b
:
1608 case Intrinsic::mips_binsli_h
:
1609 case Intrinsic::mips_binsli_w
:
1610 case Intrinsic::mips_binsli_d
: {
1611 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1612 EVT VecTy
= Op
->getValueType(0);
1613 EVT EltTy
= VecTy
.getVectorElementType();
1614 if (Op
->getConstantOperandVal(3) >= EltTy
.getSizeInBits())
1615 report_fatal_error("Immediate out of range");
1616 APInt Mask
= APInt::getHighBitsSet(EltTy
.getSizeInBits(),
1617 Op
->getConstantOperandVal(3) + 1);
1618 return DAG
.getNode(ISD::VSELECT
, DL
, VecTy
,
1619 DAG
.getConstant(Mask
, DL
, VecTy
, true),
1620 Op
->getOperand(2), Op
->getOperand(1));
1622 case Intrinsic::mips_binsri_b
:
1623 case Intrinsic::mips_binsri_h
:
1624 case Intrinsic::mips_binsri_w
:
1625 case Intrinsic::mips_binsri_d
: {
1626 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1627 EVT VecTy
= Op
->getValueType(0);
1628 EVT EltTy
= VecTy
.getVectorElementType();
1629 if (Op
->getConstantOperandVal(3) >= EltTy
.getSizeInBits())
1630 report_fatal_error("Immediate out of range");
1631 APInt Mask
= APInt::getLowBitsSet(EltTy
.getSizeInBits(),
1632 Op
->getConstantOperandVal(3) + 1);
1633 return DAG
.getNode(ISD::VSELECT
, DL
, VecTy
,
1634 DAG
.getConstant(Mask
, DL
, VecTy
, true),
1635 Op
->getOperand(2), Op
->getOperand(1));
1637 case Intrinsic::mips_bmnz_v
:
1638 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0), Op
->getOperand(3),
1639 Op
->getOperand(2), Op
->getOperand(1));
1640 case Intrinsic::mips_bmnzi_b
:
1641 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1642 lowerMSASplatImm(Op
, 3, DAG
), Op
->getOperand(2),
1644 case Intrinsic::mips_bmz_v
:
1645 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0), Op
->getOperand(3),
1646 Op
->getOperand(1), Op
->getOperand(2));
1647 case Intrinsic::mips_bmzi_b
:
1648 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1649 lowerMSASplatImm(Op
, 3, DAG
), Op
->getOperand(1),
1651 case Intrinsic::mips_bneg_b
:
1652 case Intrinsic::mips_bneg_h
:
1653 case Intrinsic::mips_bneg_w
:
1654 case Intrinsic::mips_bneg_d
: {
1655 EVT VecTy
= Op
->getValueType(0);
1656 SDValue One
= DAG
.getConstant(1, DL
, VecTy
);
1658 return DAG
.getNode(ISD::XOR
, DL
, VecTy
, Op
->getOperand(1),
1659 DAG
.getNode(ISD::SHL
, DL
, VecTy
, One
,
1660 truncateVecElts(Op
, DAG
)));
1662 case Intrinsic::mips_bnegi_b
:
1663 case Intrinsic::mips_bnegi_h
:
1664 case Intrinsic::mips_bnegi_w
:
1665 case Intrinsic::mips_bnegi_d
:
1666 return lowerMSABinaryBitImmIntr(Op
, DAG
, ISD::XOR
, Op
->getOperand(2),
1667 !Subtarget
.isLittle());
1668 case Intrinsic::mips_bnz_b
:
1669 case Intrinsic::mips_bnz_h
:
1670 case Intrinsic::mips_bnz_w
:
1671 case Intrinsic::mips_bnz_d
:
1672 return DAG
.getNode(MipsISD::VALL_NONZERO
, DL
, Op
->getValueType(0),
1674 case Intrinsic::mips_bnz_v
:
1675 return DAG
.getNode(MipsISD::VANY_NONZERO
, DL
, Op
->getValueType(0),
1677 case Intrinsic::mips_bsel_v
:
1678 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1679 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1680 Op
->getOperand(1), Op
->getOperand(3),
1682 case Intrinsic::mips_bseli_b
:
1683 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1684 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1685 Op
->getOperand(1), lowerMSASplatImm(Op
, 3, DAG
),
1687 case Intrinsic::mips_bset_b
:
1688 case Intrinsic::mips_bset_h
:
1689 case Intrinsic::mips_bset_w
:
1690 case Intrinsic::mips_bset_d
: {
1691 EVT VecTy
= Op
->getValueType(0);
1692 SDValue One
= DAG
.getConstant(1, DL
, VecTy
);
1694 return DAG
.getNode(ISD::OR
, DL
, VecTy
, Op
->getOperand(1),
1695 DAG
.getNode(ISD::SHL
, DL
, VecTy
, One
,
1696 truncateVecElts(Op
, DAG
)));
1698 case Intrinsic::mips_bseti_b
:
1699 case Intrinsic::mips_bseti_h
:
1700 case Intrinsic::mips_bseti_w
:
1701 case Intrinsic::mips_bseti_d
:
1702 return lowerMSABinaryBitImmIntr(Op
, DAG
, ISD::OR
, Op
->getOperand(2),
1703 !Subtarget
.isLittle());
1704 case Intrinsic::mips_bz_b
:
1705 case Intrinsic::mips_bz_h
:
1706 case Intrinsic::mips_bz_w
:
1707 case Intrinsic::mips_bz_d
:
1708 return DAG
.getNode(MipsISD::VALL_ZERO
, DL
, Op
->getValueType(0),
1710 case Intrinsic::mips_bz_v
:
1711 return DAG
.getNode(MipsISD::VANY_ZERO
, DL
, Op
->getValueType(0),
1713 case Intrinsic::mips_ceq_b
:
1714 case Intrinsic::mips_ceq_h
:
1715 case Intrinsic::mips_ceq_w
:
1716 case Intrinsic::mips_ceq_d
:
1717 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1718 Op
->getOperand(2), ISD::SETEQ
);
1719 case Intrinsic::mips_ceqi_b
:
1720 case Intrinsic::mips_ceqi_h
:
1721 case Intrinsic::mips_ceqi_w
:
1722 case Intrinsic::mips_ceqi_d
:
1723 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1724 lowerMSASplatImm(Op
, 2, DAG
, true), ISD::SETEQ
);
1725 case Intrinsic::mips_cle_s_b
:
1726 case Intrinsic::mips_cle_s_h
:
1727 case Intrinsic::mips_cle_s_w
:
1728 case Intrinsic::mips_cle_s_d
:
1729 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1730 Op
->getOperand(2), ISD::SETLE
);
1731 case Intrinsic::mips_clei_s_b
:
1732 case Intrinsic::mips_clei_s_h
:
1733 case Intrinsic::mips_clei_s_w
:
1734 case Intrinsic::mips_clei_s_d
:
1735 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1736 lowerMSASplatImm(Op
, 2, DAG
, true), ISD::SETLE
);
1737 case Intrinsic::mips_cle_u_b
:
1738 case Intrinsic::mips_cle_u_h
:
1739 case Intrinsic::mips_cle_u_w
:
1740 case Intrinsic::mips_cle_u_d
:
1741 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1742 Op
->getOperand(2), ISD::SETULE
);
1743 case Intrinsic::mips_clei_u_b
:
1744 case Intrinsic::mips_clei_u_h
:
1745 case Intrinsic::mips_clei_u_w
:
1746 case Intrinsic::mips_clei_u_d
:
1747 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1748 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETULE
);
1749 case Intrinsic::mips_clt_s_b
:
1750 case Intrinsic::mips_clt_s_h
:
1751 case Intrinsic::mips_clt_s_w
:
1752 case Intrinsic::mips_clt_s_d
:
1753 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1754 Op
->getOperand(2), ISD::SETLT
);
1755 case Intrinsic::mips_clti_s_b
:
1756 case Intrinsic::mips_clti_s_h
:
1757 case Intrinsic::mips_clti_s_w
:
1758 case Intrinsic::mips_clti_s_d
:
1759 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1760 lowerMSASplatImm(Op
, 2, DAG
, true), ISD::SETLT
);
1761 case Intrinsic::mips_clt_u_b
:
1762 case Intrinsic::mips_clt_u_h
:
1763 case Intrinsic::mips_clt_u_w
:
1764 case Intrinsic::mips_clt_u_d
:
1765 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1766 Op
->getOperand(2), ISD::SETULT
);
1767 case Intrinsic::mips_clti_u_b
:
1768 case Intrinsic::mips_clti_u_h
:
1769 case Intrinsic::mips_clti_u_w
:
1770 case Intrinsic::mips_clti_u_d
:
1771 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1772 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETULT
);
1773 case Intrinsic::mips_copy_s_b
:
1774 case Intrinsic::mips_copy_s_h
:
1775 case Intrinsic::mips_copy_s_w
:
1776 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_SEXT_ELT
);
1777 case Intrinsic::mips_copy_s_d
:
1778 if (Subtarget
.hasMips64())
1779 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1780 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_SEXT_ELT
);
1782 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1783 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1784 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op
),
1785 Op
->getValueType(0), Op
->getOperand(1),
1788 case Intrinsic::mips_copy_u_b
:
1789 case Intrinsic::mips_copy_u_h
:
1790 case Intrinsic::mips_copy_u_w
:
1791 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_ZEXT_ELT
);
1792 case Intrinsic::mips_copy_u_d
:
1793 if (Subtarget
.hasMips64())
1794 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1795 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_ZEXT_ELT
);
1797 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1798 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1799 // Note: When i64 is illegal, this results in copy_s.w instructions
1800 // instead of copy_u.w instructions. This makes no difference to the
1801 // behaviour since i64 is only illegal when the register file is 32-bit.
1802 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op
),
1803 Op
->getValueType(0), Op
->getOperand(1),
1806 case Intrinsic::mips_div_s_b
:
1807 case Intrinsic::mips_div_s_h
:
1808 case Intrinsic::mips_div_s_w
:
1809 case Intrinsic::mips_div_s_d
:
1810 return DAG
.getNode(ISD::SDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1812 case Intrinsic::mips_div_u_b
:
1813 case Intrinsic::mips_div_u_h
:
1814 case Intrinsic::mips_div_u_w
:
1815 case Intrinsic::mips_div_u_d
:
1816 return DAG
.getNode(ISD::UDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1818 case Intrinsic::mips_fadd_w
:
1819 case Intrinsic::mips_fadd_d
:
1820 // TODO: If intrinsics have fast-math-flags, propagate them.
1821 return DAG
.getNode(ISD::FADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1823 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1824 case Intrinsic::mips_fceq_w
:
1825 case Intrinsic::mips_fceq_d
:
1826 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1827 Op
->getOperand(2), ISD::SETOEQ
);
1828 case Intrinsic::mips_fcle_w
:
1829 case Intrinsic::mips_fcle_d
:
1830 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1831 Op
->getOperand(2), ISD::SETOLE
);
1832 case Intrinsic::mips_fclt_w
:
1833 case Intrinsic::mips_fclt_d
:
1834 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1835 Op
->getOperand(2), ISD::SETOLT
);
1836 case Intrinsic::mips_fcne_w
:
1837 case Intrinsic::mips_fcne_d
:
1838 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1839 Op
->getOperand(2), ISD::SETONE
);
1840 case Intrinsic::mips_fcor_w
:
1841 case Intrinsic::mips_fcor_d
:
1842 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1843 Op
->getOperand(2), ISD::SETO
);
1844 case Intrinsic::mips_fcueq_w
:
1845 case Intrinsic::mips_fcueq_d
:
1846 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1847 Op
->getOperand(2), ISD::SETUEQ
);
1848 case Intrinsic::mips_fcule_w
:
1849 case Intrinsic::mips_fcule_d
:
1850 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1851 Op
->getOperand(2), ISD::SETULE
);
1852 case Intrinsic::mips_fcult_w
:
1853 case Intrinsic::mips_fcult_d
:
1854 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1855 Op
->getOperand(2), ISD::SETULT
);
1856 case Intrinsic::mips_fcun_w
:
1857 case Intrinsic::mips_fcun_d
:
1858 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1859 Op
->getOperand(2), ISD::SETUO
);
1860 case Intrinsic::mips_fcune_w
:
1861 case Intrinsic::mips_fcune_d
:
1862 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1863 Op
->getOperand(2), ISD::SETUNE
);
1864 case Intrinsic::mips_fdiv_w
:
1865 case Intrinsic::mips_fdiv_d
:
1866 // TODO: If intrinsics have fast-math-flags, propagate them.
1867 return DAG
.getNode(ISD::FDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1869 case Intrinsic::mips_ffint_u_w
:
1870 case Intrinsic::mips_ffint_u_d
:
1871 return DAG
.getNode(ISD::UINT_TO_FP
, DL
, Op
->getValueType(0),
1873 case Intrinsic::mips_ffint_s_w
:
1874 case Intrinsic::mips_ffint_s_d
:
1875 return DAG
.getNode(ISD::SINT_TO_FP
, DL
, Op
->getValueType(0),
1877 case Intrinsic::mips_fill_b
:
1878 case Intrinsic::mips_fill_h
:
1879 case Intrinsic::mips_fill_w
:
1880 case Intrinsic::mips_fill_d
: {
1881 EVT ResTy
= Op
->getValueType(0);
1882 SmallVector
<SDValue
, 16> Ops(ResTy
.getVectorNumElements(),
1885 // If ResTy is v2i64 then the type legalizer will break this node down into
1886 // an equivalent v4i32.
1887 return DAG
.getBuildVector(ResTy
, DL
, Ops
);
1889 case Intrinsic::mips_fexp2_w
:
1890 case Intrinsic::mips_fexp2_d
: {
1891 // TODO: If intrinsics have fast-math-flags, propagate them.
1892 EVT ResTy
= Op
->getValueType(0);
1894 ISD::FMUL
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1895 DAG
.getNode(ISD::FEXP2
, SDLoc(Op
), ResTy
, Op
->getOperand(2)));
1897 case Intrinsic::mips_flog2_w
:
1898 case Intrinsic::mips_flog2_d
:
1899 return DAG
.getNode(ISD::FLOG2
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1900 case Intrinsic::mips_fmadd_w
:
1901 case Intrinsic::mips_fmadd_d
:
1902 return DAG
.getNode(ISD::FMA
, SDLoc(Op
), Op
->getValueType(0),
1903 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
1904 case Intrinsic::mips_fmul_w
:
1905 case Intrinsic::mips_fmul_d
:
1906 // TODO: If intrinsics have fast-math-flags, propagate them.
1907 return DAG
.getNode(ISD::FMUL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1909 case Intrinsic::mips_fmsub_w
:
1910 case Intrinsic::mips_fmsub_d
: {
1911 // TODO: If intrinsics have fast-math-flags, propagate them.
1912 return DAG
.getNode(MipsISD::FMS
, SDLoc(Op
), Op
->getValueType(0),
1913 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
1915 case Intrinsic::mips_frint_w
:
1916 case Intrinsic::mips_frint_d
:
1917 return DAG
.getNode(ISD::FRINT
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1918 case Intrinsic::mips_fsqrt_w
:
1919 case Intrinsic::mips_fsqrt_d
:
1920 return DAG
.getNode(ISD::FSQRT
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1921 case Intrinsic::mips_fsub_w
:
1922 case Intrinsic::mips_fsub_d
:
1923 // TODO: If intrinsics have fast-math-flags, propagate them.
1924 return DAG
.getNode(ISD::FSUB
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1926 case Intrinsic::mips_ftrunc_u_w
:
1927 case Intrinsic::mips_ftrunc_u_d
:
1928 return DAG
.getNode(ISD::FP_TO_UINT
, DL
, Op
->getValueType(0),
1930 case Intrinsic::mips_ftrunc_s_w
:
1931 case Intrinsic::mips_ftrunc_s_d
:
1932 return DAG
.getNode(ISD::FP_TO_SINT
, DL
, Op
->getValueType(0),
1934 case Intrinsic::mips_ilvev_b
:
1935 case Intrinsic::mips_ilvev_h
:
1936 case Intrinsic::mips_ilvev_w
:
1937 case Intrinsic::mips_ilvev_d
:
1938 return DAG
.getNode(MipsISD::ILVEV
, DL
, Op
->getValueType(0),
1939 Op
->getOperand(1), Op
->getOperand(2));
1940 case Intrinsic::mips_ilvl_b
:
1941 case Intrinsic::mips_ilvl_h
:
1942 case Intrinsic::mips_ilvl_w
:
1943 case Intrinsic::mips_ilvl_d
:
1944 return DAG
.getNode(MipsISD::ILVL
, DL
, Op
->getValueType(0),
1945 Op
->getOperand(1), Op
->getOperand(2));
1946 case Intrinsic::mips_ilvod_b
:
1947 case Intrinsic::mips_ilvod_h
:
1948 case Intrinsic::mips_ilvod_w
:
1949 case Intrinsic::mips_ilvod_d
:
1950 return DAG
.getNode(MipsISD::ILVOD
, DL
, Op
->getValueType(0),
1951 Op
->getOperand(1), Op
->getOperand(2));
1952 case Intrinsic::mips_ilvr_b
:
1953 case Intrinsic::mips_ilvr_h
:
1954 case Intrinsic::mips_ilvr_w
:
1955 case Intrinsic::mips_ilvr_d
:
1956 return DAG
.getNode(MipsISD::ILVR
, DL
, Op
->getValueType(0),
1957 Op
->getOperand(1), Op
->getOperand(2));
1958 case Intrinsic::mips_insert_b
:
1959 case Intrinsic::mips_insert_h
:
1960 case Intrinsic::mips_insert_w
:
1961 case Intrinsic::mips_insert_d
:
1962 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(Op
), Op
->getValueType(0),
1963 Op
->getOperand(1), Op
->getOperand(3), Op
->getOperand(2));
1964 case Intrinsic::mips_insve_b
:
1965 case Intrinsic::mips_insve_h
:
1966 case Intrinsic::mips_insve_w
:
1967 case Intrinsic::mips_insve_d
: {
1968 // Report an error for out of range values.
1970 switch (Intrinsic
) {
1971 case Intrinsic::mips_insve_b
: Max
= 15; break;
1972 case Intrinsic::mips_insve_h
: Max
= 7; break;
1973 case Intrinsic::mips_insve_w
: Max
= 3; break;
1974 case Intrinsic::mips_insve_d
: Max
= 1; break;
1975 default: llvm_unreachable("Unmatched intrinsic");
1977 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
1978 if (Value
< 0 || Value
> Max
)
1979 report_fatal_error("Immediate out of range");
1980 return DAG
.getNode(MipsISD::INSVE
, DL
, Op
->getValueType(0),
1981 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3),
1982 DAG
.getConstant(0, DL
, MVT::i32
));
1984 case Intrinsic::mips_ldi_b
:
1985 case Intrinsic::mips_ldi_h
:
1986 case Intrinsic::mips_ldi_w
:
1987 case Intrinsic::mips_ldi_d
:
1988 return lowerMSASplatImm(Op
, 1, DAG
, true);
1989 case Intrinsic::mips_lsa
:
1990 case Intrinsic::mips_dlsa
: {
1991 EVT ResTy
= Op
->getValueType(0);
1992 return DAG
.getNode(ISD::ADD
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1993 DAG
.getNode(ISD::SHL
, SDLoc(Op
), ResTy
,
1994 Op
->getOperand(2), Op
->getOperand(3)));
1996 case Intrinsic::mips_maddv_b
:
1997 case Intrinsic::mips_maddv_h
:
1998 case Intrinsic::mips_maddv_w
:
1999 case Intrinsic::mips_maddv_d
: {
2000 EVT ResTy
= Op
->getValueType(0);
2001 return DAG
.getNode(ISD::ADD
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
2002 DAG
.getNode(ISD::MUL
, SDLoc(Op
), ResTy
,
2003 Op
->getOperand(2), Op
->getOperand(3)));
2005 case Intrinsic::mips_max_s_b
:
2006 case Intrinsic::mips_max_s_h
:
2007 case Intrinsic::mips_max_s_w
:
2008 case Intrinsic::mips_max_s_d
:
2009 return DAG
.getNode(ISD::SMAX
, DL
, Op
->getValueType(0),
2010 Op
->getOperand(1), Op
->getOperand(2));
2011 case Intrinsic::mips_max_u_b
:
2012 case Intrinsic::mips_max_u_h
:
2013 case Intrinsic::mips_max_u_w
:
2014 case Intrinsic::mips_max_u_d
:
2015 return DAG
.getNode(ISD::UMAX
, DL
, Op
->getValueType(0),
2016 Op
->getOperand(1), Op
->getOperand(2));
2017 case Intrinsic::mips_maxi_s_b
:
2018 case Intrinsic::mips_maxi_s_h
:
2019 case Intrinsic::mips_maxi_s_w
:
2020 case Intrinsic::mips_maxi_s_d
:
2021 return DAG
.getNode(ISD::SMAX
, DL
, Op
->getValueType(0),
2022 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
, true));
2023 case Intrinsic::mips_maxi_u_b
:
2024 case Intrinsic::mips_maxi_u_h
:
2025 case Intrinsic::mips_maxi_u_w
:
2026 case Intrinsic::mips_maxi_u_d
:
2027 return DAG
.getNode(ISD::UMAX
, DL
, Op
->getValueType(0),
2028 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2029 case Intrinsic::mips_min_s_b
:
2030 case Intrinsic::mips_min_s_h
:
2031 case Intrinsic::mips_min_s_w
:
2032 case Intrinsic::mips_min_s_d
:
2033 return DAG
.getNode(ISD::SMIN
, DL
, Op
->getValueType(0),
2034 Op
->getOperand(1), Op
->getOperand(2));
2035 case Intrinsic::mips_min_u_b
:
2036 case Intrinsic::mips_min_u_h
:
2037 case Intrinsic::mips_min_u_w
:
2038 case Intrinsic::mips_min_u_d
:
2039 return DAG
.getNode(ISD::UMIN
, DL
, Op
->getValueType(0),
2040 Op
->getOperand(1), Op
->getOperand(2));
2041 case Intrinsic::mips_mini_s_b
:
2042 case Intrinsic::mips_mini_s_h
:
2043 case Intrinsic::mips_mini_s_w
:
2044 case Intrinsic::mips_mini_s_d
:
2045 return DAG
.getNode(ISD::SMIN
, DL
, Op
->getValueType(0),
2046 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
, true));
2047 case Intrinsic::mips_mini_u_b
:
2048 case Intrinsic::mips_mini_u_h
:
2049 case Intrinsic::mips_mini_u_w
:
2050 case Intrinsic::mips_mini_u_d
:
2051 return DAG
.getNode(ISD::UMIN
, DL
, Op
->getValueType(0),
2052 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2053 case Intrinsic::mips_mod_s_b
:
2054 case Intrinsic::mips_mod_s_h
:
2055 case Intrinsic::mips_mod_s_w
:
2056 case Intrinsic::mips_mod_s_d
:
2057 return DAG
.getNode(ISD::SREM
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2059 case Intrinsic::mips_mod_u_b
:
2060 case Intrinsic::mips_mod_u_h
:
2061 case Intrinsic::mips_mod_u_w
:
2062 case Intrinsic::mips_mod_u_d
:
2063 return DAG
.getNode(ISD::UREM
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2065 case Intrinsic::mips_mulv_b
:
2066 case Intrinsic::mips_mulv_h
:
2067 case Intrinsic::mips_mulv_w
:
2068 case Intrinsic::mips_mulv_d
:
2069 return DAG
.getNode(ISD::MUL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2071 case Intrinsic::mips_msubv_b
:
2072 case Intrinsic::mips_msubv_h
:
2073 case Intrinsic::mips_msubv_w
:
2074 case Intrinsic::mips_msubv_d
: {
2075 EVT ResTy
= Op
->getValueType(0);
2076 return DAG
.getNode(ISD::SUB
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
2077 DAG
.getNode(ISD::MUL
, SDLoc(Op
), ResTy
,
2078 Op
->getOperand(2), Op
->getOperand(3)));
2080 case Intrinsic::mips_nlzc_b
:
2081 case Intrinsic::mips_nlzc_h
:
2082 case Intrinsic::mips_nlzc_w
:
2083 case Intrinsic::mips_nlzc_d
:
2084 return DAG
.getNode(ISD::CTLZ
, DL
, Op
->getValueType(0), Op
->getOperand(1));
2085 case Intrinsic::mips_nor_v
: {
2086 SDValue Res
= DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2087 Op
->getOperand(1), Op
->getOperand(2));
2088 return DAG
.getNOT(DL
, Res
, Res
->getValueType(0));
2090 case Intrinsic::mips_nori_b
: {
2091 SDValue Res
= DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2093 lowerMSASplatImm(Op
, 2, DAG
));
2094 return DAG
.getNOT(DL
, Res
, Res
->getValueType(0));
2096 case Intrinsic::mips_or_v
:
2097 return DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2099 case Intrinsic::mips_ori_b
:
2100 return DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2101 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2102 case Intrinsic::mips_pckev_b
:
2103 case Intrinsic::mips_pckev_h
:
2104 case Intrinsic::mips_pckev_w
:
2105 case Intrinsic::mips_pckev_d
:
2106 return DAG
.getNode(MipsISD::PCKEV
, DL
, Op
->getValueType(0),
2107 Op
->getOperand(1), Op
->getOperand(2));
2108 case Intrinsic::mips_pckod_b
:
2109 case Intrinsic::mips_pckod_h
:
2110 case Intrinsic::mips_pckod_w
:
2111 case Intrinsic::mips_pckod_d
:
2112 return DAG
.getNode(MipsISD::PCKOD
, DL
, Op
->getValueType(0),
2113 Op
->getOperand(1), Op
->getOperand(2));
2114 case Intrinsic::mips_pcnt_b
:
2115 case Intrinsic::mips_pcnt_h
:
2116 case Intrinsic::mips_pcnt_w
:
2117 case Intrinsic::mips_pcnt_d
:
2118 return DAG
.getNode(ISD::CTPOP
, DL
, Op
->getValueType(0), Op
->getOperand(1));
2119 case Intrinsic::mips_sat_s_b
:
2120 case Intrinsic::mips_sat_s_h
:
2121 case Intrinsic::mips_sat_s_w
:
2122 case Intrinsic::mips_sat_s_d
:
2123 case Intrinsic::mips_sat_u_b
:
2124 case Intrinsic::mips_sat_u_h
:
2125 case Intrinsic::mips_sat_u_w
:
2126 case Intrinsic::mips_sat_u_d
: {
2127 // Report an error for out of range values.
2129 switch (Intrinsic
) {
2130 case Intrinsic::mips_sat_s_b
:
2131 case Intrinsic::mips_sat_u_b
: Max
= 7; break;
2132 case Intrinsic::mips_sat_s_h
:
2133 case Intrinsic::mips_sat_u_h
: Max
= 15; break;
2134 case Intrinsic::mips_sat_s_w
:
2135 case Intrinsic::mips_sat_u_w
: Max
= 31; break;
2136 case Intrinsic::mips_sat_s_d
:
2137 case Intrinsic::mips_sat_u_d
: Max
= 63; break;
2138 default: llvm_unreachable("Unmatched intrinsic");
2140 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2141 if (Value
< 0 || Value
> Max
)
2142 report_fatal_error("Immediate out of range");
2145 case Intrinsic::mips_shf_b
:
2146 case Intrinsic::mips_shf_h
:
2147 case Intrinsic::mips_shf_w
: {
2148 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2149 if (Value
< 0 || Value
> 255)
2150 report_fatal_error("Immediate out of range");
2151 return DAG
.getNode(MipsISD::SHF
, DL
, Op
->getValueType(0),
2152 Op
->getOperand(2), Op
->getOperand(1));
2154 case Intrinsic::mips_sldi_b
:
2155 case Intrinsic::mips_sldi_h
:
2156 case Intrinsic::mips_sldi_w
:
2157 case Intrinsic::mips_sldi_d
: {
2158 // Report an error for out of range values.
2160 switch (Intrinsic
) {
2161 case Intrinsic::mips_sldi_b
: Max
= 15; break;
2162 case Intrinsic::mips_sldi_h
: Max
= 7; break;
2163 case Intrinsic::mips_sldi_w
: Max
= 3; break;
2164 case Intrinsic::mips_sldi_d
: Max
= 1; break;
2165 default: llvm_unreachable("Unmatched intrinsic");
2167 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(3))->getSExtValue();
2168 if (Value
< 0 || Value
> Max
)
2169 report_fatal_error("Immediate out of range");
2172 case Intrinsic::mips_sll_b
:
2173 case Intrinsic::mips_sll_h
:
2174 case Intrinsic::mips_sll_w
:
2175 case Intrinsic::mips_sll_d
:
2176 return DAG
.getNode(ISD::SHL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2177 truncateVecElts(Op
, DAG
));
2178 case Intrinsic::mips_slli_b
:
2179 case Intrinsic::mips_slli_h
:
2180 case Intrinsic::mips_slli_w
:
2181 case Intrinsic::mips_slli_d
:
2182 return DAG
.getNode(ISD::SHL
, DL
, Op
->getValueType(0),
2183 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2184 case Intrinsic::mips_splat_b
:
2185 case Intrinsic::mips_splat_h
:
2186 case Intrinsic::mips_splat_w
:
2187 case Intrinsic::mips_splat_d
:
2188 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2189 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2190 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2191 // Instead we lower to MipsISD::VSHF and match from there.
2192 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2193 lowerMSASplatZExt(Op
, 2, DAG
), Op
->getOperand(1),
2195 case Intrinsic::mips_splati_b
:
2196 case Intrinsic::mips_splati_h
:
2197 case Intrinsic::mips_splati_w
:
2198 case Intrinsic::mips_splati_d
:
2199 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2200 lowerMSASplatImm(Op
, 2, DAG
), Op
->getOperand(1),
2202 case Intrinsic::mips_sra_b
:
2203 case Intrinsic::mips_sra_h
:
2204 case Intrinsic::mips_sra_w
:
2205 case Intrinsic::mips_sra_d
:
2206 return DAG
.getNode(ISD::SRA
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2207 truncateVecElts(Op
, DAG
));
2208 case Intrinsic::mips_srai_b
:
2209 case Intrinsic::mips_srai_h
:
2210 case Intrinsic::mips_srai_w
:
2211 case Intrinsic::mips_srai_d
:
2212 return DAG
.getNode(ISD::SRA
, DL
, Op
->getValueType(0),
2213 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2214 case Intrinsic::mips_srari_b
:
2215 case Intrinsic::mips_srari_h
:
2216 case Intrinsic::mips_srari_w
:
2217 case Intrinsic::mips_srari_d
: {
2218 // Report an error for out of range values.
2220 switch (Intrinsic
) {
2221 case Intrinsic::mips_srari_b
: Max
= 7; break;
2222 case Intrinsic::mips_srari_h
: Max
= 15; break;
2223 case Intrinsic::mips_srari_w
: Max
= 31; break;
2224 case Intrinsic::mips_srari_d
: Max
= 63; break;
2225 default: llvm_unreachable("Unmatched intrinsic");
2227 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2228 if (Value
< 0 || Value
> Max
)
2229 report_fatal_error("Immediate out of range");
2232 case Intrinsic::mips_srl_b
:
2233 case Intrinsic::mips_srl_h
:
2234 case Intrinsic::mips_srl_w
:
2235 case Intrinsic::mips_srl_d
:
2236 return DAG
.getNode(ISD::SRL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2237 truncateVecElts(Op
, DAG
));
2238 case Intrinsic::mips_srli_b
:
2239 case Intrinsic::mips_srli_h
:
2240 case Intrinsic::mips_srli_w
:
2241 case Intrinsic::mips_srli_d
:
2242 return DAG
.getNode(ISD::SRL
, DL
, Op
->getValueType(0),
2243 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2244 case Intrinsic::mips_srlri_b
:
2245 case Intrinsic::mips_srlri_h
:
2246 case Intrinsic::mips_srlri_w
:
2247 case Intrinsic::mips_srlri_d
: {
2248 // Report an error for out of range values.
2250 switch (Intrinsic
) {
2251 case Intrinsic::mips_srlri_b
: Max
= 7; break;
2252 case Intrinsic::mips_srlri_h
: Max
= 15; break;
2253 case Intrinsic::mips_srlri_w
: Max
= 31; break;
2254 case Intrinsic::mips_srlri_d
: Max
= 63; break;
2255 default: llvm_unreachable("Unmatched intrinsic");
2257 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2258 if (Value
< 0 || Value
> Max
)
2259 report_fatal_error("Immediate out of range");
2262 case Intrinsic::mips_subv_b
:
2263 case Intrinsic::mips_subv_h
:
2264 case Intrinsic::mips_subv_w
:
2265 case Intrinsic::mips_subv_d
:
2266 return DAG
.getNode(ISD::SUB
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2268 case Intrinsic::mips_subvi_b
:
2269 case Intrinsic::mips_subvi_h
:
2270 case Intrinsic::mips_subvi_w
:
2271 case Intrinsic::mips_subvi_d
:
2272 return DAG
.getNode(ISD::SUB
, DL
, Op
->getValueType(0),
2273 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2274 case Intrinsic::mips_vshf_b
:
2275 case Intrinsic::mips_vshf_h
:
2276 case Intrinsic::mips_vshf_w
:
2277 case Intrinsic::mips_vshf_d
:
2278 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2279 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
2280 case Intrinsic::mips_xor_v
:
2281 return DAG
.getNode(ISD::XOR
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2283 case Intrinsic::mips_xori_b
:
2284 return DAG
.getNode(ISD::XOR
, DL
, Op
->getValueType(0),
2285 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2286 case Intrinsic::thread_pointer
: {
2287 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2288 return DAG
.getNode(MipsISD::ThreadPointer
, DL
, PtrVT
);
2293 static SDValue
lowerMSALoadIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Intr
,
2294 const MipsSubtarget
&Subtarget
) {
2296 SDValue ChainIn
= Op
->getOperand(0);
2297 SDValue Address
= Op
->getOperand(2);
2298 SDValue Offset
= Op
->getOperand(3);
2299 EVT ResTy
= Op
->getValueType(0);
2300 EVT PtrTy
= Address
->getValueType(0);
2302 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2303 // however takes an i32 signed constant offset. The actual type of the
2304 // intrinsic is a scaled signed i10.
2305 if (Subtarget
.isABI_N64())
2306 Offset
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, PtrTy
, Offset
);
2308 Address
= DAG
.getNode(ISD::ADD
, DL
, PtrTy
, Address
, Offset
);
2309 return DAG
.getLoad(ResTy
, DL
, ChainIn
, Address
, MachinePointerInfo(),
2310 /* Alignment = */ 16);
2313 SDValue
MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op
,
2314 SelectionDAG
&DAG
) const {
2315 unsigned Intr
= cast
<ConstantSDNode
>(Op
->getOperand(1))->getZExtValue();
2319 case Intrinsic::mips_extp
:
2320 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTP
);
2321 case Intrinsic::mips_extpdp
:
2322 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTPDP
);
2323 case Intrinsic::mips_extr_w
:
2324 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_W
);
2325 case Intrinsic::mips_extr_r_w
:
2326 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_R_W
);
2327 case Intrinsic::mips_extr_rs_w
:
2328 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_RS_W
);
2329 case Intrinsic::mips_extr_s_h
:
2330 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_S_H
);
2331 case Intrinsic::mips_mthlip
:
2332 return lowerDSPIntr(Op
, DAG
, MipsISD::MTHLIP
);
2333 case Intrinsic::mips_mulsaq_s_w_ph
:
2334 return lowerDSPIntr(Op
, DAG
, MipsISD::MULSAQ_S_W_PH
);
2335 case Intrinsic::mips_maq_s_w_phl
:
2336 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_S_W_PHL
);
2337 case Intrinsic::mips_maq_s_w_phr
:
2338 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_S_W_PHR
);
2339 case Intrinsic::mips_maq_sa_w_phl
:
2340 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_SA_W_PHL
);
2341 case Intrinsic::mips_maq_sa_w_phr
:
2342 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_SA_W_PHR
);
2343 case Intrinsic::mips_dpaq_s_w_ph
:
2344 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQ_S_W_PH
);
2345 case Intrinsic::mips_dpsq_s_w_ph
:
2346 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQ_S_W_PH
);
2347 case Intrinsic::mips_dpaq_sa_l_w
:
2348 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQ_SA_L_W
);
2349 case Intrinsic::mips_dpsq_sa_l_w
:
2350 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQ_SA_L_W
);
2351 case Intrinsic::mips_dpaqx_s_w_ph
:
2352 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQX_S_W_PH
);
2353 case Intrinsic::mips_dpaqx_sa_w_ph
:
2354 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQX_SA_W_PH
);
2355 case Intrinsic::mips_dpsqx_s_w_ph
:
2356 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQX_S_W_PH
);
2357 case Intrinsic::mips_dpsqx_sa_w_ph
:
2358 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQX_SA_W_PH
);
2359 case Intrinsic::mips_ld_b
:
2360 case Intrinsic::mips_ld_h
:
2361 case Intrinsic::mips_ld_w
:
2362 case Intrinsic::mips_ld_d
:
2363 return lowerMSALoadIntr(Op
, DAG
, Intr
, Subtarget
);
2367 static SDValue
lowerMSAStoreIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Intr
,
2368 const MipsSubtarget
&Subtarget
) {
2370 SDValue ChainIn
= Op
->getOperand(0);
2371 SDValue Value
= Op
->getOperand(2);
2372 SDValue Address
= Op
->getOperand(3);
2373 SDValue Offset
= Op
->getOperand(4);
2374 EVT PtrTy
= Address
->getValueType(0);
2376 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2377 // however takes an i32 signed constant offset. The actual type of the
2378 // intrinsic is a scaled signed i10.
2379 if (Subtarget
.isABI_N64())
2380 Offset
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, PtrTy
, Offset
);
2382 Address
= DAG
.getNode(ISD::ADD
, DL
, PtrTy
, Address
, Offset
);
2384 return DAG
.getStore(ChainIn
, DL
, Value
, Address
, MachinePointerInfo(),
2385 /* Alignment = */ 16);
2388 SDValue
MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op
,
2389 SelectionDAG
&DAG
) const {
2390 unsigned Intr
= cast
<ConstantSDNode
>(Op
->getOperand(1))->getZExtValue();
2394 case Intrinsic::mips_st_b
:
2395 case Intrinsic::mips_st_h
:
2396 case Intrinsic::mips_st_w
:
2397 case Intrinsic::mips_st_d
:
2398 return lowerMSAStoreIntr(Op
, DAG
, Intr
, Subtarget
);
2402 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2404 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2405 // choose to sign-extend but we could have equally chosen zero-extend. The
2406 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2407 // result into this node later (possibly changing it to a zero-extend in the
2409 SDValue
MipsSETargetLowering::
2410 lowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const {
2412 EVT ResTy
= Op
->getValueType(0);
2413 SDValue Op0
= Op
->getOperand(0);
2414 EVT VecTy
= Op0
->getValueType(0);
2416 if (!VecTy
.is128BitVector())
2419 if (ResTy
.isInteger()) {
2420 SDValue Op1
= Op
->getOperand(1);
2421 EVT EltTy
= VecTy
.getVectorElementType();
2422 return DAG
.getNode(MipsISD::VEXTRACT_SEXT_ELT
, DL
, ResTy
, Op0
, Op1
,
2423 DAG
.getValueType(EltTy
));
2429 static bool isConstantOrUndef(const SDValue Op
) {
2432 if (isa
<ConstantSDNode
>(Op
))
2434 if (isa
<ConstantFPSDNode
>(Op
))
2439 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode
*Op
) {
2440 for (unsigned i
= 0; i
< Op
->getNumOperands(); ++i
)
2441 if (isConstantOrUndef(Op
->getOperand(i
)))
2446 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2449 // Lowers according to the following rules:
2450 // - Constant splats are legal as-is as long as the SplatBitSize is a power of
2451 // 2 less than or equal to 64 and the value fits into a signed 10-bit
2453 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2454 // is a power of 2 less than or equal to 64 and the value does not fit into a
2455 // signed 10-bit immediate
2456 // - Non-constant splats are legal as-is.
2457 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2458 // - All others are illegal and must be expanded.
2459 SDValue
MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op
,
2460 SelectionDAG
&DAG
) const {
2461 BuildVectorSDNode
*Node
= cast
<BuildVectorSDNode
>(Op
);
2462 EVT ResTy
= Op
->getValueType(0);
2464 APInt SplatValue
, SplatUndef
;
2465 unsigned SplatBitSize
;
2468 if (!Subtarget
.hasMSA() || !ResTy
.is128BitVector())
2471 if (Node
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
,
2473 !Subtarget
.isLittle()) && SplatBitSize
<= 64) {
2474 // We can only cope with 8, 16, 32, or 64-bit elements
2475 if (SplatBitSize
!= 8 && SplatBitSize
!= 16 && SplatBitSize
!= 32 &&
2479 // If the value isn't an integer type we will have to bitcast
2480 // from an integer type first. Also, if there are any undefs, we must
2481 // lower them to defined values first.
2482 if (ResTy
.isInteger() && !HasAnyUndefs
)
2487 switch (SplatBitSize
) {
2491 ViaVecTy
= MVT::v16i8
;
2494 ViaVecTy
= MVT::v8i16
;
2497 ViaVecTy
= MVT::v4i32
;
2500 // There's no fill.d to fall back on for 64-bit values
2504 // SelectionDAG::getConstant will promote SplatValue appropriately.
2505 SDValue Result
= DAG
.getConstant(SplatValue
, DL
, ViaVecTy
);
2507 // Bitcast to the type we originally wanted
2508 if (ViaVecTy
!= ResTy
)
2509 Result
= DAG
.getNode(ISD::BITCAST
, SDLoc(Node
), ResTy
, Result
);
2512 } else if (DAG
.isSplatValue(Op
, /* AllowUndefs */ false))
2514 else if (!isConstantOrUndefBUILD_VECTOR(Node
)) {
2515 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2516 // The resulting code is the same length as the expansion, but it doesn't
2517 // use memory operations
2518 EVT ResTy
= Node
->getValueType(0);
2520 assert(ResTy
.isVector());
2522 unsigned NumElts
= ResTy
.getVectorNumElements();
2523 SDValue Vector
= DAG
.getUNDEF(ResTy
);
2524 for (unsigned i
= 0; i
< NumElts
; ++i
) {
2525 Vector
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ResTy
, Vector
,
2526 Node
->getOperand(i
),
2527 DAG
.getConstant(i
, DL
, MVT::i32
));
2535 // Lower VECTOR_SHUFFLE into SHF (if possible).
2537 // SHF splits the vector into blocks of four elements, then shuffles these
2538 // elements according to a <4 x i2> constant (encoded as an integer immediate).
2540 // It is therefore possible to lower into SHF when the mask takes the form:
2541 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2542 // When undef's appear they are treated as if they were whatever value is
2543 // necessary in order to fit the above forms.
2546 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2547 // <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2548 // i32 7, i32 6, i32 5, i32 4>
2550 // (SHF_H $w0, $w1, 27)
2551 // where the 27 comes from:
2552 // 3 + (2 << 2) + (1 << 4) + (0 << 6)
2553 static SDValue
lowerVECTOR_SHUFFLE_SHF(SDValue Op
, EVT ResTy
,
2554 SmallVector
<int, 16> Indices
,
2555 SelectionDAG
&DAG
) {
2556 int SHFIndices
[4] = { -1, -1, -1, -1 };
2558 if (Indices
.size() < 4)
2561 for (unsigned i
= 0; i
< 4; ++i
) {
2562 for (unsigned j
= i
; j
< Indices
.size(); j
+= 4) {
2563 int Idx
= Indices
[j
];
2565 // Convert from vector index to 4-element subvector index
2566 // If an index refers to an element outside of the subvector then give up
2569 if (Idx
< 0 || Idx
>= 4)
2573 // If the mask has an undef, replace it with the current index.
2574 // Note that it might still be undef if the current index is also undef
2575 if (SHFIndices
[i
] == -1)
2576 SHFIndices
[i
] = Idx
;
2578 // Check that non-undef values are the same as in the mask. If they
2579 // aren't then give up
2580 if (!(Idx
== -1 || Idx
== SHFIndices
[i
]))
2585 // Calculate the immediate. Replace any remaining undefs with zero
2587 for (int i
= 3; i
>= 0; --i
) {
2588 int Idx
= SHFIndices
[i
];
2598 return DAG
.getNode(MipsISD::SHF
, DL
, ResTy
,
2599 DAG
.getTargetConstant(Imm
, DL
, MVT::i32
),
2603 /// Determine whether a range fits a regular pattern of values.
2604 /// This function accounts for the possibility of jumping over the End iterator.
2605 template <typename ValType
>
2607 fitsRegularPattern(typename SmallVectorImpl
<ValType
>::const_iterator Begin
,
2608 unsigned CheckStride
,
2609 typename SmallVectorImpl
<ValType
>::const_iterator End
,
2610 ValType ExpectedIndex
, unsigned ExpectedIndexStride
) {
2614 if (*I
!= -1 && *I
!= ExpectedIndex
)
2616 ExpectedIndex
+= ExpectedIndexStride
;
2618 // Incrementing past End is undefined behaviour so we must increment one
2619 // step at a time and check for End at each step.
2620 for (unsigned n
= 0; n
< CheckStride
&& I
!= End
; ++n
, ++I
)
2621 ; // Empty loop body.
2626 // Determine whether VECTOR_SHUFFLE is a SPLATI.
2628 // It is a SPLATI when the mask is:
2630 // where x is any valid index.
2632 // When undef's appear in the mask they are treated as if they were whatever
2633 // value is necessary in order to fit the above form.
2634 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op
, EVT ResTy
,
2635 SmallVector
<int, 16> Indices
,
2636 SelectionDAG
&DAG
) {
2637 assert((Indices
.size() % 2) == 0);
2639 int SplatIndex
= -1;
2640 for (const auto &V
: Indices
) {
2647 return fitsRegularPattern
<int>(Indices
.begin(), 1, Indices
.end(), SplatIndex
,
2651 // Lower VECTOR_SHUFFLE into ILVEV (if possible).
2653 // ILVEV interleaves the even elements from each vector.
2655 // It is possible to lower into ILVEV when the mask consists of two of the
2656 // following forms interleaved:
2658 // <n, n+2, n+4, ...>
2659 // where n is the number of elements in the vector.
2661 // <0, 0, 2, 2, 4, 4, ...>
2662 // <0, n, 2, n+2, 4, n+4, ...>
2664 // When undef's appear in the mask they are treated as if they were whatever
2665 // value is necessary in order to fit the above forms.
2666 static SDValue
lowerVECTOR_SHUFFLE_ILVEV(SDValue Op
, EVT ResTy
,
2667 SmallVector
<int, 16> Indices
,
2668 SelectionDAG
&DAG
) {
2669 assert((Indices
.size() % 2) == 0);
2673 const auto &Begin
= Indices
.begin();
2674 const auto &End
= Indices
.end();
2676 // Check even elements are taken from the even elements of one half or the
2677 // other and pick an operand accordingly.
2678 if (fitsRegularPattern
<int>(Begin
, 2, End
, 0, 2))
2679 Wt
= Op
->getOperand(0);
2680 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size(), 2))
2681 Wt
= Op
->getOperand(1);
2685 // Check odd elements are taken from the even elements of one half or the
2686 // other and pick an operand accordingly.
2687 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 0, 2))
2688 Ws
= Op
->getOperand(0);
2689 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size(), 2))
2690 Ws
= Op
->getOperand(1);
2694 return DAG
.getNode(MipsISD::ILVEV
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2697 // Lower VECTOR_SHUFFLE into ILVOD (if possible).
2699 // ILVOD interleaves the odd elements from each vector.
2701 // It is possible to lower into ILVOD when the mask consists of two of the
2702 // following forms interleaved:
2704 // <n+1, n+3, n+5, ...>
2705 // where n is the number of elements in the vector.
2707 // <1, 1, 3, 3, 5, 5, ...>
2708 // <1, n+1, 3, n+3, 5, n+5, ...>
2710 // When undef's appear in the mask they are treated as if they were whatever
2711 // value is necessary in order to fit the above forms.
2712 static SDValue
lowerVECTOR_SHUFFLE_ILVOD(SDValue Op
, EVT ResTy
,
2713 SmallVector
<int, 16> Indices
,
2714 SelectionDAG
&DAG
) {
2715 assert((Indices
.size() % 2) == 0);
2719 const auto &Begin
= Indices
.begin();
2720 const auto &End
= Indices
.end();
2722 // Check even elements are taken from the odd elements of one half or the
2723 // other and pick an operand accordingly.
2724 if (fitsRegularPattern
<int>(Begin
, 2, End
, 1, 2))
2725 Wt
= Op
->getOperand(0);
2726 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size() + 1, 2))
2727 Wt
= Op
->getOperand(1);
2731 // Check odd elements are taken from the odd elements of one half or the
2732 // other and pick an operand accordingly.
2733 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 1, 2))
2734 Ws
= Op
->getOperand(0);
2735 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size() + 1, 2))
2736 Ws
= Op
->getOperand(1);
2740 return DAG
.getNode(MipsISD::ILVOD
, SDLoc(Op
), ResTy
, Wt
, Ws
);
2743 // Lower VECTOR_SHUFFLE into ILVR (if possible).
2745 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2748 // It is possible to lower into ILVR when the mask consists of two of the
2749 // following forms interleaved:
2751 // <n, n+1, n+2, ...>
2752 // where n is the number of elements in the vector.
2754 // <0, 0, 1, 1, 2, 2, ...>
2755 // <0, n, 1, n+1, 2, n+2, ...>
2757 // When undef's appear in the mask they are treated as if they were whatever
2758 // value is necessary in order to fit the above forms.
2759 static SDValue
lowerVECTOR_SHUFFLE_ILVR(SDValue Op
, EVT ResTy
,
2760 SmallVector
<int, 16> Indices
,
2761 SelectionDAG
&DAG
) {
2762 assert((Indices
.size() % 2) == 0);
2766 const auto &Begin
= Indices
.begin();
2767 const auto &End
= Indices
.end();
2769 // Check even elements are taken from the right (lowest-indexed) elements of
2770 // one half or the other and pick an operand accordingly.
2771 if (fitsRegularPattern
<int>(Begin
, 2, End
, 0, 1))
2772 Wt
= Op
->getOperand(0);
2773 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size(), 1))
2774 Wt
= Op
->getOperand(1);
2778 // Check odd elements are taken from the right (lowest-indexed) elements of
2779 // one half or the other and pick an operand accordingly.
2780 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 0, 1))
2781 Ws
= Op
->getOperand(0);
2782 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size(), 1))
2783 Ws
= Op
->getOperand(1);
2787 return DAG
.getNode(MipsISD::ILVR
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2790 // Lower VECTOR_SHUFFLE into ILVL (if possible).
2792 // ILVL interleaves consecutive elements from the left (highest-indexed) half
2795 // It is possible to lower into ILVL when the mask consists of two of the
2796 // following forms interleaved:
2797 // <x, x+1, x+2, ...>
2798 // <n+x, n+x+1, n+x+2, ...>
2799 // where n is the number of elements in the vector and x is half n.
2801 // <x, x, x+1, x+1, x+2, x+2, ...>
2802 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2804 // When undef's appear in the mask they are treated as if they were whatever
2805 // value is necessary in order to fit the above forms.
2806 static SDValue
lowerVECTOR_SHUFFLE_ILVL(SDValue Op
, EVT ResTy
,
2807 SmallVector
<int, 16> Indices
,
2808 SelectionDAG
&DAG
) {
2809 assert((Indices
.size() % 2) == 0);
2811 unsigned HalfSize
= Indices
.size() / 2;
2814 const auto &Begin
= Indices
.begin();
2815 const auto &End
= Indices
.end();
2817 // Check even elements are taken from the left (highest-indexed) elements of
2818 // one half or the other and pick an operand accordingly.
2819 if (fitsRegularPattern
<int>(Begin
, 2, End
, HalfSize
, 1))
2820 Wt
= Op
->getOperand(0);
2821 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size() + HalfSize
, 1))
2822 Wt
= Op
->getOperand(1);
2826 // Check odd elements are taken from the left (highest-indexed) elements of
2827 // one half or the other and pick an operand accordingly.
2828 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, HalfSize
, 1))
2829 Ws
= Op
->getOperand(0);
2830 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size() + HalfSize
,
2832 Ws
= Op
->getOperand(1);
2836 return DAG
.getNode(MipsISD::ILVL
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2839 // Lower VECTOR_SHUFFLE into PCKEV (if possible).
2841 // PCKEV copies the even elements of each vector into the result vector.
2843 // It is possible to lower into PCKEV when the mask consists of two of the
2844 // following forms concatenated:
2846 // <n, n+2, n+4, ...>
2847 // where n is the number of elements in the vector.
2849 // <0, 2, 4, ..., 0, 2, 4, ...>
2850 // <0, 2, 4, ..., n, n+2, n+4, ...>
2852 // When undef's appear in the mask they are treated as if they were whatever
2853 // value is necessary in order to fit the above forms.
2854 static SDValue
lowerVECTOR_SHUFFLE_PCKEV(SDValue Op
, EVT ResTy
,
2855 SmallVector
<int, 16> Indices
,
2856 SelectionDAG
&DAG
) {
2857 assert((Indices
.size() % 2) == 0);
2861 const auto &Begin
= Indices
.begin();
2862 const auto &Mid
= Indices
.begin() + Indices
.size() / 2;
2863 const auto &End
= Indices
.end();
2865 if (fitsRegularPattern
<int>(Begin
, 1, Mid
, 0, 2))
2866 Wt
= Op
->getOperand(0);
2867 else if (fitsRegularPattern
<int>(Begin
, 1, Mid
, Indices
.size(), 2))
2868 Wt
= Op
->getOperand(1);
2872 if (fitsRegularPattern
<int>(Mid
, 1, End
, 0, 2))
2873 Ws
= Op
->getOperand(0);
2874 else if (fitsRegularPattern
<int>(Mid
, 1, End
, Indices
.size(), 2))
2875 Ws
= Op
->getOperand(1);
2879 return DAG
.getNode(MipsISD::PCKEV
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2882 // Lower VECTOR_SHUFFLE into PCKOD (if possible).
2884 // PCKOD copies the odd elements of each vector into the result vector.
2886 // It is possible to lower into PCKOD when the mask consists of two of the
2887 // following forms concatenated:
2889 // <n+1, n+3, n+5, ...>
2890 // where n is the number of elements in the vector.
2892 // <1, 3, 5, ..., 1, 3, 5, ...>
2893 // <1, 3, 5, ..., n+1, n+3, n+5, ...>
2895 // When undef's appear in the mask they are treated as if they were whatever
2896 // value is necessary in order to fit the above forms.
2897 static SDValue
lowerVECTOR_SHUFFLE_PCKOD(SDValue Op
, EVT ResTy
,
2898 SmallVector
<int, 16> Indices
,
2899 SelectionDAG
&DAG
) {
2900 assert((Indices
.size() % 2) == 0);
2904 const auto &Begin
= Indices
.begin();
2905 const auto &Mid
= Indices
.begin() + Indices
.size() / 2;
2906 const auto &End
= Indices
.end();
2908 if (fitsRegularPattern
<int>(Begin
, 1, Mid
, 1, 2))
2909 Wt
= Op
->getOperand(0);
2910 else if (fitsRegularPattern
<int>(Begin
, 1, Mid
, Indices
.size() + 1, 2))
2911 Wt
= Op
->getOperand(1);
2915 if (fitsRegularPattern
<int>(Mid
, 1, End
, 1, 2))
2916 Ws
= Op
->getOperand(0);
2917 else if (fitsRegularPattern
<int>(Mid
, 1, End
, Indices
.size() + 1, 2))
2918 Ws
= Op
->getOperand(1);
2922 return DAG
.getNode(MipsISD::PCKOD
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2925 // Lower VECTOR_SHUFFLE into VSHF.
2927 // This mostly consists of converting the shuffle indices in Indices into a
2928 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2929 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2930 // if the type is v8i16 and all the indices are less than 8 then the second
2931 // operand is unused and can be replaced with anything. We choose to replace it
2932 // with the used operand since this reduces the number of instructions overall.
2933 static SDValue
lowerVECTOR_SHUFFLE_VSHF(SDValue Op
, EVT ResTy
,
2934 SmallVector
<int, 16> Indices
,
2935 SelectionDAG
&DAG
) {
2936 SmallVector
<SDValue
, 16> Ops
;
2939 EVT MaskVecTy
= ResTy
.changeVectorElementTypeToInteger();
2940 EVT MaskEltTy
= MaskVecTy
.getVectorElementType();
2941 bool Using1stVec
= false;
2942 bool Using2ndVec
= false;
2944 int ResTyNumElts
= ResTy
.getVectorNumElements();
2946 for (int i
= 0; i
< ResTyNumElts
; ++i
) {
2947 // Idx == -1 means UNDEF
2948 int Idx
= Indices
[i
];
2950 if (0 <= Idx
&& Idx
< ResTyNumElts
)
2952 if (ResTyNumElts
<= Idx
&& Idx
< ResTyNumElts
* 2)
2956 for (SmallVector
<int, 16>::iterator I
= Indices
.begin(); I
!= Indices
.end();
2958 Ops
.push_back(DAG
.getTargetConstant(*I
, DL
, MaskEltTy
));
2960 SDValue MaskVec
= DAG
.getBuildVector(MaskVecTy
, DL
, Ops
);
2962 if (Using1stVec
&& Using2ndVec
) {
2963 Op0
= Op
->getOperand(0);
2964 Op1
= Op
->getOperand(1);
2965 } else if (Using1stVec
)
2966 Op0
= Op1
= Op
->getOperand(0);
2967 else if (Using2ndVec
)
2968 Op0
= Op1
= Op
->getOperand(1);
2970 llvm_unreachable("shuffle vector mask references neither vector operand?");
2972 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2973 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2974 // VSHF concatenates the vectors in a bitwise fashion:
2975 // <0b00, 0b01> + <0b10, 0b11> ->
2976 // 0b0100 + 0b1110 -> 0b01001110
2977 // <0b10, 0b11, 0b00, 0b01>
2978 // We must therefore swap the operands to get the correct result.
2979 return DAG
.getNode(MipsISD::VSHF
, DL
, ResTy
, MaskVec
, Op1
, Op0
);
2982 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
2983 // indices in the shuffle.
2984 SDValue
MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op
,
2985 SelectionDAG
&DAG
) const {
2986 ShuffleVectorSDNode
*Node
= cast
<ShuffleVectorSDNode
>(Op
);
2987 EVT ResTy
= Op
->getValueType(0);
2989 if (!ResTy
.is128BitVector())
2992 int ResTyNumElts
= ResTy
.getVectorNumElements();
2993 SmallVector
<int, 16> Indices
;
2995 for (int i
= 0; i
< ResTyNumElts
; ++i
)
2996 Indices
.push_back(Node
->getMaskElt(i
));
2998 // splati.[bhwd] is preferable to the others but is matched from
3000 if (isVECTOR_SHUFFLE_SPLATI(Op
, ResTy
, Indices
, DAG
))
3001 return lowerVECTOR_SHUFFLE_VSHF(Op
, ResTy
, Indices
, DAG
);
3003 if ((Result
= lowerVECTOR_SHUFFLE_ILVEV(Op
, ResTy
, Indices
, DAG
)))
3005 if ((Result
= lowerVECTOR_SHUFFLE_ILVOD(Op
, ResTy
, Indices
, DAG
)))
3007 if ((Result
= lowerVECTOR_SHUFFLE_ILVL(Op
, ResTy
, Indices
, DAG
)))
3009 if ((Result
= lowerVECTOR_SHUFFLE_ILVR(Op
, ResTy
, Indices
, DAG
)))
3011 if ((Result
= lowerVECTOR_SHUFFLE_PCKEV(Op
, ResTy
, Indices
, DAG
)))
3013 if ((Result
= lowerVECTOR_SHUFFLE_PCKOD(Op
, ResTy
, Indices
, DAG
)))
3015 if ((Result
= lowerVECTOR_SHUFFLE_SHF(Op
, ResTy
, Indices
, DAG
)))
3017 return lowerVECTOR_SHUFFLE_VSHF(Op
, ResTy
, Indices
, DAG
);
3021 MipsSETargetLowering::emitBPOSGE32(MachineInstr
&MI
,
3022 MachineBasicBlock
*BB
) const {
3024 // bposge32_pseudo $vr0
3034 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3036 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3037 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3038 const TargetRegisterClass
*RC
= &Mips::GPR32RegClass
;
3039 DebugLoc DL
= MI
.getDebugLoc();
3040 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
3041 MachineFunction::iterator It
= std::next(MachineFunction::iterator(BB
));
3042 MachineFunction
*F
= BB
->getParent();
3043 MachineBasicBlock
*FBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3044 MachineBasicBlock
*TBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3045 MachineBasicBlock
*Sink
= F
->CreateMachineBasicBlock(LLVM_BB
);
3048 F
->insert(It
, Sink
);
3050 // Transfer the remainder of BB and its successor edges to Sink.
3051 Sink
->splice(Sink
->begin(), BB
, std::next(MachineBasicBlock::iterator(MI
)),
3053 Sink
->transferSuccessorsAndUpdatePHIs(BB
);
3056 BB
->addSuccessor(FBB
);
3057 BB
->addSuccessor(TBB
);
3058 FBB
->addSuccessor(Sink
);
3059 TBB
->addSuccessor(Sink
);
3061 // Insert the real bposge32 instruction to $BB.
3062 BuildMI(BB
, DL
, TII
->get(Mips::BPOSGE32
)).addMBB(TBB
);
3063 // Insert the real bposge32c instruction to $BB.
3064 BuildMI(BB
, DL
, TII
->get(Mips::BPOSGE32C_MMR3
)).addMBB(TBB
);
3067 Register VR2
= RegInfo
.createVirtualRegister(RC
);
3068 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::ADDiu
), VR2
)
3069 .addReg(Mips::ZERO
).addImm(0);
3070 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::B
)).addMBB(Sink
);
3073 Register VR1
= RegInfo
.createVirtualRegister(RC
);
3074 BuildMI(*TBB
, TBB
->end(), DL
, TII
->get(Mips::ADDiu
), VR1
)
3075 .addReg(Mips::ZERO
).addImm(1);
3077 // Insert phi function to $Sink.
3078 BuildMI(*Sink
, Sink
->begin(), DL
, TII
->get(Mips::PHI
),
3079 MI
.getOperand(0).getReg())
3085 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3089 MachineBasicBlock
*MipsSETargetLowering::emitMSACBranchPseudo(
3090 MachineInstr
&MI
, MachineBasicBlock
*BB
, unsigned BranchOp
) const {
3092 // vany_nonzero $rd, $ws
3103 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3105 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3106 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3107 const TargetRegisterClass
*RC
= &Mips::GPR32RegClass
;
3108 DebugLoc DL
= MI
.getDebugLoc();
3109 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
3110 MachineFunction::iterator It
= std::next(MachineFunction::iterator(BB
));
3111 MachineFunction
*F
= BB
->getParent();
3112 MachineBasicBlock
*FBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3113 MachineBasicBlock
*TBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3114 MachineBasicBlock
*Sink
= F
->CreateMachineBasicBlock(LLVM_BB
);
3117 F
->insert(It
, Sink
);
3119 // Transfer the remainder of BB and its successor edges to Sink.
3120 Sink
->splice(Sink
->begin(), BB
, std::next(MachineBasicBlock::iterator(MI
)),
3122 Sink
->transferSuccessorsAndUpdatePHIs(BB
);
3125 BB
->addSuccessor(FBB
);
3126 BB
->addSuccessor(TBB
);
3127 FBB
->addSuccessor(Sink
);
3128 TBB
->addSuccessor(Sink
);
3130 // Insert the real bnz.b instruction to $BB.
3131 BuildMI(BB
, DL
, TII
->get(BranchOp
))
3132 .addReg(MI
.getOperand(1).getReg())
3136 Register RD1
= RegInfo
.createVirtualRegister(RC
);
3137 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::ADDiu
), RD1
)
3138 .addReg(Mips::ZERO
).addImm(0);
3139 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::B
)).addMBB(Sink
);
3142 Register RD2
= RegInfo
.createVirtualRegister(RC
);
3143 BuildMI(*TBB
, TBB
->end(), DL
, TII
->get(Mips::ADDiu
), RD2
)
3144 .addReg(Mips::ZERO
).addImm(1);
3146 // Insert phi function to $Sink.
3147 BuildMI(*Sink
, Sink
->begin(), DL
, TII
->get(Mips::PHI
),
3148 MI
.getOperand(0).getReg())
3154 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3158 // Emit the COPY_FW pseudo instruction.
3160 // copy_fw_pseudo $fd, $ws, n
3162 // copy_u_w $rt, $ws, $n
3165 // When n is zero, the equivalent operation can be performed with (potentially)
3166 // zero instructions due to register overlaps. This optimization is never valid
3167 // for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3169 MipsSETargetLowering::emitCOPY_FW(MachineInstr
&MI
,
3170 MachineBasicBlock
*BB
) const {
3171 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3172 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3173 DebugLoc DL
= MI
.getDebugLoc();
3174 Register Fd
= MI
.getOperand(0).getReg();
3175 Register Ws
= MI
.getOperand(1).getReg();
3176 unsigned Lane
= MI
.getOperand(2).getImm();
3180 if (!Subtarget
.useOddSPReg()) {
3181 // We must copy to an even-numbered MSA register so that the
3182 // single-precision sub-register is also guaranteed to be even-numbered.
3183 Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128WEvensRegClass
);
3185 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Wt
).addReg(Ws
);
3188 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_lo
);
3190 Register Wt
= RegInfo
.createVirtualRegister(
3191 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
3192 : &Mips::MSA128WEvensRegClass
);
3194 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_W
), Wt
).addReg(Ws
).addImm(Lane
);
3195 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_lo
);
3198 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3202 // Emit the COPY_FD pseudo instruction.
3204 // copy_fd_pseudo $fd, $ws, n
3206 // splati.d $wt, $ws, $n
3207 // copy $fd, $wt:sub_64
3209 // When n is zero, the equivalent operation can be performed with (potentially)
3210 // zero instructions due to register overlaps. This optimization is always
3211 // valid because FR=1 mode which is the only supported mode in MSA.
3213 MipsSETargetLowering::emitCOPY_FD(MachineInstr
&MI
,
3214 MachineBasicBlock
*BB
) const {
3215 assert(Subtarget
.isFP64bit());
3217 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3218 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3219 Register Fd
= MI
.getOperand(0).getReg();
3220 Register Ws
= MI
.getOperand(1).getReg();
3221 unsigned Lane
= MI
.getOperand(2).getImm() * 2;
3222 DebugLoc DL
= MI
.getDebugLoc();
3225 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Ws
, 0, Mips::sub_64
);
3227 Register Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3229 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_D
), Wt
).addReg(Ws
).addImm(1);
3230 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_64
);
3233 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3237 // Emit the INSERT_FW pseudo instruction.
3239 // insert_fw_pseudo $wd, $wd_in, $n, $fs
3241 // subreg_to_reg $wt:sub_lo, $fs
3242 // insve_w $wd[$n], $wd_in, $wt[0]
3244 MipsSETargetLowering::emitINSERT_FW(MachineInstr
&MI
,
3245 MachineBasicBlock
*BB
) const {
3246 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3247 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3248 DebugLoc DL
= MI
.getDebugLoc();
3249 Register Wd
= MI
.getOperand(0).getReg();
3250 Register Wd_in
= MI
.getOperand(1).getReg();
3251 unsigned Lane
= MI
.getOperand(2).getImm();
3252 Register Fs
= MI
.getOperand(3).getReg();
3253 Register Wt
= RegInfo
.createVirtualRegister(
3254 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
3255 : &Mips::MSA128WEvensRegClass
);
3257 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
3260 .addImm(Mips::sub_lo
);
3261 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSVE_W
), Wd
)
3267 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3271 // Emit the INSERT_FD pseudo instruction.
3273 // insert_fd_pseudo $wd, $fs, n
3275 // subreg_to_reg $wt:sub_64, $fs
3276 // insve_d $wd[$n], $wd_in, $wt[0]
3278 MipsSETargetLowering::emitINSERT_FD(MachineInstr
&MI
,
3279 MachineBasicBlock
*BB
) const {
3280 assert(Subtarget
.isFP64bit());
3282 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3283 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3284 DebugLoc DL
= MI
.getDebugLoc();
3285 Register Wd
= MI
.getOperand(0).getReg();
3286 Register Wd_in
= MI
.getOperand(1).getReg();
3287 unsigned Lane
= MI
.getOperand(2).getImm();
3288 Register Fs
= MI
.getOperand(3).getReg();
3289 Register Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3291 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
3294 .addImm(Mips::sub_64
);
3295 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSVE_D
), Wd
)
3301 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3305 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3308 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3310 // (SLL $lanetmp1, $lane, <log2size)
3311 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3312 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3313 // (NEG $lanetmp2, $lanetmp1)
3314 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3316 // For floating point:
3317 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3319 // (SUBREG_TO_REG $wt, $fs, <subreg>)
3320 // (SLL $lanetmp1, $lane, <log2size)
3321 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3322 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3323 // (NEG $lanetmp2, $lanetmp1)
3324 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3325 MachineBasicBlock
*MipsSETargetLowering::emitINSERT_DF_VIDX(
3326 MachineInstr
&MI
, MachineBasicBlock
*BB
, unsigned EltSizeInBytes
,
3328 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3329 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3330 DebugLoc DL
= MI
.getDebugLoc();
3331 Register Wd
= MI
.getOperand(0).getReg();
3332 Register SrcVecReg
= MI
.getOperand(1).getReg();
3333 Register LaneReg
= MI
.getOperand(2).getReg();
3334 Register SrcValReg
= MI
.getOperand(3).getReg();
3336 const TargetRegisterClass
*VecRC
= nullptr;
3337 // FIXME: This should be true for N32 too.
3338 const TargetRegisterClass
*GPRRC
=
3339 Subtarget
.isABI_N64() ? &Mips::GPR64RegClass
: &Mips::GPR32RegClass
;
3340 unsigned SubRegIdx
= Subtarget
.isABI_N64() ? Mips::sub_32
: 0;
3341 unsigned ShiftOp
= Subtarget
.isABI_N64() ? Mips::DSLL
: Mips::SLL
;
3342 unsigned EltLog2Size
;
3343 unsigned InsertOp
= 0;
3344 unsigned InsveOp
= 0;
3345 switch (EltSizeInBytes
) {
3347 llvm_unreachable("Unexpected size");
3350 InsertOp
= Mips::INSERT_B
;
3351 InsveOp
= Mips::INSVE_B
;
3352 VecRC
= &Mips::MSA128BRegClass
;
3356 InsertOp
= Mips::INSERT_H
;
3357 InsveOp
= Mips::INSVE_H
;
3358 VecRC
= &Mips::MSA128HRegClass
;
3362 InsertOp
= Mips::INSERT_W
;
3363 InsveOp
= Mips::INSVE_W
;
3364 VecRC
= &Mips::MSA128WRegClass
;
3368 InsertOp
= Mips::INSERT_D
;
3369 InsveOp
= Mips::INSVE_D
;
3370 VecRC
= &Mips::MSA128DRegClass
;
3375 Register Wt
= RegInfo
.createVirtualRegister(VecRC
);
3376 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
3379 .addImm(EltSizeInBytes
== 8 ? Mips::sub_64
: Mips::sub_lo
);
3383 // Convert the lane index into a byte index
3384 if (EltSizeInBytes
!= 1) {
3385 Register LaneTmp1
= RegInfo
.createVirtualRegister(GPRRC
);
3386 BuildMI(*BB
, MI
, DL
, TII
->get(ShiftOp
), LaneTmp1
)
3388 .addImm(EltLog2Size
);
3392 // Rotate bytes around so that the desired lane is element zero
3393 Register WdTmp1
= RegInfo
.createVirtualRegister(VecRC
);
3394 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SLD_B
), WdTmp1
)
3397 .addReg(LaneReg
, 0, SubRegIdx
);
3399 Register WdTmp2
= RegInfo
.createVirtualRegister(VecRC
);
3401 // Use insve.df to insert to element zero
3402 BuildMI(*BB
, MI
, DL
, TII
->get(InsveOp
), WdTmp2
)
3408 // Use insert.df to insert to element zero
3409 BuildMI(*BB
, MI
, DL
, TII
->get(InsertOp
), WdTmp2
)
3415 // Rotate elements the rest of the way for a full rotation.
3416 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3417 // the lane index to do this.
3418 Register LaneTmp2
= RegInfo
.createVirtualRegister(GPRRC
);
3419 BuildMI(*BB
, MI
, DL
, TII
->get(Subtarget
.isABI_N64() ? Mips::DSUB
: Mips::SUB
),
3421 .addReg(Subtarget
.isABI_N64() ? Mips::ZERO_64
: Mips::ZERO
)
3423 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SLD_B
), Wd
)
3426 .addReg(LaneTmp2
, 0, SubRegIdx
);
3428 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3432 // Emit the FILL_FW pseudo instruction.
3434 // fill_fw_pseudo $wd, $fs
3436 // implicit_def $wt1
3437 // insert_subreg $wt2:subreg_lo, $wt1, $fs
3438 // splati.w $wd, $wt2[0]
3440 MipsSETargetLowering::emitFILL_FW(MachineInstr
&MI
,
3441 MachineBasicBlock
*BB
) const {
3442 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3443 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3444 DebugLoc DL
= MI
.getDebugLoc();
3445 Register Wd
= MI
.getOperand(0).getReg();
3446 Register Fs
= MI
.getOperand(1).getReg();
3447 Register Wt1
= RegInfo
.createVirtualRegister(
3448 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
3449 : &Mips::MSA128WEvensRegClass
);
3450 Register Wt2
= RegInfo
.createVirtualRegister(
3451 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
3452 : &Mips::MSA128WEvensRegClass
);
3454 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::IMPLICIT_DEF
), Wt1
);
3455 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_SUBREG
), Wt2
)
3458 .addImm(Mips::sub_lo
);
3459 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_W
), Wd
).addReg(Wt2
).addImm(0);
3461 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3465 // Emit the FILL_FD pseudo instruction.
3467 // fill_fd_pseudo $wd, $fs
3469 // implicit_def $wt1
3470 // insert_subreg $wt2:subreg_64, $wt1, $fs
3471 // splati.d $wd, $wt2[0]
3473 MipsSETargetLowering::emitFILL_FD(MachineInstr
&MI
,
3474 MachineBasicBlock
*BB
) const {
3475 assert(Subtarget
.isFP64bit());
3477 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3478 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3479 DebugLoc DL
= MI
.getDebugLoc();
3480 Register Wd
= MI
.getOperand(0).getReg();
3481 Register Fs
= MI
.getOperand(1).getReg();
3482 Register Wt1
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3483 Register Wt2
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3485 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::IMPLICIT_DEF
), Wt1
);
3486 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_SUBREG
), Wt2
)
3489 .addImm(Mips::sub_64
);
3490 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_D
), Wd
).addReg(Wt2
).addImm(0);
3492 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3496 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3499 // STF16 MSA128F16:$wd, mem_simm10:$addr
3501 // copy_u.h $rtemp,$wd[0]
3504 // Safety: We can't use st.h & co as they would over write the memory after
3505 // the destination. It would require half floats be allocated 16 bytes(!) of
3508 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr
&MI
,
3509 MachineBasicBlock
*BB
) const {
3511 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3512 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3513 DebugLoc DL
= MI
.getDebugLoc();
3514 Register Ws
= MI
.getOperand(0).getReg();
3515 Register Rt
= MI
.getOperand(1).getReg();
3516 const MachineMemOperand
&MMO
= **MI
.memoperands_begin();
3517 unsigned Imm
= MMO
.getOffset();
3519 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3520 // spill and reload can expand as a GPR64 operand. Examine the
3521 // operand in detail and default to ABI.
3522 const TargetRegisterClass
*RC
=
3523 MI
.getOperand(1).isReg() ? RegInfo
.getRegClass(MI
.getOperand(1).getReg())
3524 : (Subtarget
.isABI_O32() ? &Mips::GPR32RegClass
3525 : &Mips::GPR64RegClass
);
3526 const bool UsingMips32
= RC
== &Mips::GPR32RegClass
;
3527 Register Rs
= RegInfo
.createVirtualRegister(&Mips::GPR32RegClass
);
3529 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY_U_H
), Rs
).addReg(Ws
).addImm(0);
3531 Register Tmp
= RegInfo
.createVirtualRegister(&Mips::GPR64RegClass
);
3532 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Tmp
)
3535 .addImm(Mips::sub_32
);
3538 BuildMI(*BB
, MI
, DL
, TII
->get(UsingMips32
? Mips::SH
: Mips::SH64
))
3542 .addMemOperand(BB
->getParent()->getMachineMemOperand(
3543 &MMO
, MMO
.getOffset(), MMO
.getSize()));
3545 MI
.eraseFromParent();
3549 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3551 // LD_F16 MSA128F16:$wd, mem_simm10:$addr
3554 // fill.h $wd, $rtemp
3556 // Safety: We can't use ld.h & co as they over-read from the source.
3557 // Additionally, if the address is not modulo 16, 2 cases can occur:
3558 // a) Segmentation fault as the load instruction reads from a memory page
3559 // memory it's not supposed to.
3560 // b) The load crosses an implementation specific boundary, requiring OS
3563 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr
&MI
,
3564 MachineBasicBlock
*BB
) const {
3566 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3567 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3568 DebugLoc DL
= MI
.getDebugLoc();
3569 Register Wd
= MI
.getOperand(0).getReg();
3571 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3572 // spill and reload can expand as a GPR64 operand. Examine the
3573 // operand in detail and default to ABI.
3574 const TargetRegisterClass
*RC
=
3575 MI
.getOperand(1).isReg() ? RegInfo
.getRegClass(MI
.getOperand(1).getReg())
3576 : (Subtarget
.isABI_O32() ? &Mips::GPR32RegClass
3577 : &Mips::GPR64RegClass
);
3579 const bool UsingMips32
= RC
== &Mips::GPR32RegClass
;
3580 Register Rt
= RegInfo
.createVirtualRegister(RC
);
3582 MachineInstrBuilder MIB
=
3583 BuildMI(*BB
, MI
, DL
, TII
->get(UsingMips32
? Mips::LH
: Mips::LH64
), Rt
);
3584 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++)
3585 MIB
.add(MI
.getOperand(i
));
3588 Register Tmp
= RegInfo
.createVirtualRegister(&Mips::GPR32RegClass
);
3589 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Tmp
).addReg(Rt
, 0, Mips::sub_32
);
3593 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FILL_H
), Wd
).addReg(Rt
);
3595 MI
.eraseFromParent();
3599 // Emit the FPROUND_PSEUDO instruction.
3601 // Round an FGR64Opnd, FGR32Opnd to an f16.
3603 // Safety: Cycle the operand through the GPRs so the result always ends up
3604 // the correct MSA register.
3606 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3607 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3608 // (which they can be, as the MSA registers are defined to alias the
3609 // FPU's 64 bit and 32 bit registers) the result can be accessed using
3610 // the correct register class. That requires operands be tie-able across
3611 // register classes which have a sub/super register class relationship.
3615 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3618 // fill.w $rtemp, $wtemp
3619 // fexdo.w $wd, $wtemp, $wtemp
3621 // For FPG64Opnd on mips32r2+:
3623 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3626 // fill.w $rtemp, $wtemp
3627 // mfhc1 $rtemp2, $fs
3628 // insert.w $wtemp[1], $rtemp2
3629 // insert.w $wtemp[3], $rtemp2
3630 // fexdo.w $wtemp2, $wtemp, $wtemp
3631 // fexdo.h $wd, $temp2, $temp2
3633 // For FGR64Opnd on mips64r2+:
3635 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3637 // dmfc1 $rtemp, $fs
3638 // fill.d $rtemp, $wtemp
3639 // fexdo.w $wtemp2, $wtemp, $wtemp
3640 // fexdo.h $wd, $wtemp2, $wtemp2
3642 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3643 // undef bits are "just right" and the exception enable bits are
3644 // set. By using fill.w to replicate $fs into all elements over
3645 // insert.w for one element, we avoid that potiential case. If
3646 // fexdo.[hw] causes an exception in, the exception is valid and it
3647 // occurs for all elements.
3649 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr
&MI
,
3650 MachineBasicBlock
*BB
,
3651 bool IsFGR64
) const {
3653 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3654 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3656 assert(Subtarget
.hasMSA() && Subtarget
.hasMips32r2());
3658 bool IsFGR64onMips64
= Subtarget
.hasMips64() && IsFGR64
;
3659 bool IsFGR64onMips32
= !Subtarget
.hasMips64() && IsFGR64
;
3661 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3662 DebugLoc DL
= MI
.getDebugLoc();
3663 Register Wd
= MI
.getOperand(0).getReg();
3664 Register Fs
= MI
.getOperand(1).getReg();
3666 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3667 Register Wtemp
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3668 const TargetRegisterClass
*GPRRC
=
3669 IsFGR64onMips64
? &Mips::GPR64RegClass
: &Mips::GPR32RegClass
;
3670 unsigned MFC1Opc
= IsFGR64onMips64
3672 : (IsFGR64onMips32
? Mips::MFC1_D64
: Mips::MFC1
);
3673 unsigned FILLOpc
= IsFGR64onMips64
? Mips::FILL_D
: Mips::FILL_W
;
3675 // Perform the register class copy as mentioned above.
3676 Register Rtemp
= RegInfo
.createVirtualRegister(GPRRC
);
3677 BuildMI(*BB
, MI
, DL
, TII
->get(MFC1Opc
), Rtemp
).addReg(Fs
);
3678 BuildMI(*BB
, MI
, DL
, TII
->get(FILLOpc
), Wtemp
).addReg(Rtemp
);
3679 unsigned WPHI
= Wtemp
;
3681 if (IsFGR64onMips32
) {
3682 Register Rtemp2
= RegInfo
.createVirtualRegister(GPRRC
);
3683 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::MFHC1_D64
), Rtemp2
).addReg(Fs
);
3684 Register Wtemp2
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3685 Register Wtemp3
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3686 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_W
), Wtemp2
)
3690 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_W
), Wtemp3
)
3698 Register Wtemp2
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3699 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXDO_W
), Wtemp2
)
3705 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXDO_H
), Wd
).addReg(WPHI
).addReg(WPHI
);
3707 MI
.eraseFromParent();
3711 // Emit the FPEXTEND_PSEUDO instruction.
3713 // Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3715 // Safety: Cycle the result through the GPRs so the result always ends up
3716 // the correct floating point register.
3718 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3719 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3720 // (which they can be, as the MSA registers are defined to alias the
3721 // FPU's 64 bit and 32 bit registers) the result can be accessed using
3722 // the correct register class. That requires operands be tie-able across
3723 // register classes which have a sub/super register class relationship. I
3728 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3730 // fexupr.w $wtemp, $ws
3731 // copy_s.w $rtemp, $ws[0]
3734 // For FGR64Opnd on Mips64:
3736 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3738 // fexupr.w $wtemp, $ws
3739 // fexupr.d $wtemp2, $wtemp
3740 // copy_s.d $rtemp, $wtemp2s[0]
3741 // dmtc1 $rtemp, $fd
3743 // For FGR64Opnd on Mips32:
3745 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3747 // fexupr.w $wtemp, $ws
3748 // fexupr.d $wtemp2, $wtemp
3749 // copy_s.w $rtemp, $wtemp2[0]
3750 // mtc1 $rtemp, $ftemp
3751 // copy_s.w $rtemp2, $wtemp2[1]
3752 // $fd = mthc1 $rtemp2, $ftemp
3754 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr
&MI
,
3755 MachineBasicBlock
*BB
,
3756 bool IsFGR64
) const {
3758 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3759 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3761 assert(Subtarget
.hasMSA() && Subtarget
.hasMips32r2());
3763 bool IsFGR64onMips64
= Subtarget
.hasMips64() && IsFGR64
;
3764 bool IsFGR64onMips32
= !Subtarget
.hasMips64() && IsFGR64
;
3766 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3767 DebugLoc DL
= MI
.getDebugLoc();
3768 Register Fd
= MI
.getOperand(0).getReg();
3769 Register Ws
= MI
.getOperand(1).getReg();
3771 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3772 const TargetRegisterClass
*GPRRC
=
3773 IsFGR64onMips64
? &Mips::GPR64RegClass
: &Mips::GPR32RegClass
;
3774 unsigned MTC1Opc
= IsFGR64onMips64
3776 : (IsFGR64onMips32
? Mips::MTC1_D64
: Mips::MTC1
);
3777 Register COPYOpc
= IsFGR64onMips64
? Mips::COPY_S_D
: Mips::COPY_S_W
;
3779 Register Wtemp
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3780 Register WPHI
= Wtemp
;
3782 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXUPR_W
), Wtemp
).addReg(Ws
);
3784 WPHI
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3785 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXUPR_D
), WPHI
).addReg(Wtemp
);
3788 // Perform the safety regclass copy mentioned above.
3789 Register Rtemp
= RegInfo
.createVirtualRegister(GPRRC
);
3790 Register FPRPHI
= IsFGR64onMips32
3791 ? RegInfo
.createVirtualRegister(&Mips::FGR64RegClass
)
3793 BuildMI(*BB
, MI
, DL
, TII
->get(COPYOpc
), Rtemp
).addReg(WPHI
).addImm(0);
3794 BuildMI(*BB
, MI
, DL
, TII
->get(MTC1Opc
), FPRPHI
).addReg(Rtemp
);
3796 if (IsFGR64onMips32
) {
3797 Register Rtemp2
= RegInfo
.createVirtualRegister(GPRRC
);
3798 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY_S_W
), Rtemp2
)
3801 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::MTHC1_D64
), Fd
)
3806 MI
.eraseFromParent();
3810 // Emit the FEXP2_W_1 pseudo instructions.
3812 // fexp2_w_1_pseudo $wd, $wt
3815 // fexp2.w $wd, $ws, $wt
3817 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr
&MI
,
3818 MachineBasicBlock
*BB
) const {
3819 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3820 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3821 const TargetRegisterClass
*RC
= &Mips::MSA128WRegClass
;
3822 Register Ws1
= RegInfo
.createVirtualRegister(RC
);
3823 Register Ws2
= RegInfo
.createVirtualRegister(RC
);
3824 DebugLoc DL
= MI
.getDebugLoc();
3826 // Splat 1.0 into a vector
3827 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::LDI_W
), Ws1
).addImm(1);
3828 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FFINT_U_W
), Ws2
).addReg(Ws1
);
3830 // Emit 1.0 * fexp2(Wt)
3831 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXP2_W
), MI
.getOperand(0).getReg())
3833 .addReg(MI
.getOperand(1).getReg());
3835 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3839 // Emit the FEXP2_D_1 pseudo instructions.
3841 // fexp2_d_1_pseudo $wd, $wt
3844 // fexp2.d $wd, $ws, $wt
3846 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr
&MI
,
3847 MachineBasicBlock
*BB
) const {
3848 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3849 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3850 const TargetRegisterClass
*RC
= &Mips::MSA128DRegClass
;
3851 Register Ws1
= RegInfo
.createVirtualRegister(RC
);
3852 Register Ws2
= RegInfo
.createVirtualRegister(RC
);
3853 DebugLoc DL
= MI
.getDebugLoc();
3855 // Splat 1.0 into a vector
3856 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::LDI_D
), Ws1
).addImm(1);
3857 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FFINT_U_D
), Ws2
).addReg(Ws1
);
3859 // Emit 1.0 * fexp2(Wt)
3860 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXP2_D
), MI
.getOperand(0).getReg())
3862 .addReg(MI
.getOperand(1).getReg());
3864 MI
.eraseFromParent(); // The pseudo instruction is gone now.