1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Subclass of MipsTargetLowering specialized for mips32/64.
11 //===----------------------------------------------------------------------===//
13 #include "MipsSEISelLowering.h"
14 #include "MipsMachineFunction.h"
15 #include "MipsRegisterInfo.h"
16 #include "MipsSubtarget.h"
17 #include "llvm/ADT/APInt.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Triple.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/ISDOpcodes.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/SelectionDAG.h"
31 #include "llvm/CodeGen/SelectionDAGNodes.h"
32 #include "llvm/CodeGen/TargetInstrInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/CodeGen/ValueTypes.h"
35 #include "llvm/IR/DebugLoc.h"
36 #include "llvm/IR/Intrinsics.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/MachineValueType.h"
42 #include "llvm/Support/MathExtras.h"
43 #include "llvm/Support/raw_ostream.h"
52 #define DEBUG_TYPE "mips-isel"
55 UseMipsTailCalls("mips-tail-calls", cl::Hidden
,
56 cl::desc("MIPS: permit tail calls."), cl::init(false));
58 static cl::opt
<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
59 cl::desc("Expand double precision loads and "
60 "stores to their single precision "
63 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine
&TM
,
64 const MipsSubtarget
&STI
)
65 : MipsTargetLowering(TM
, STI
) {
66 // Set up the register classes
67 addRegisterClass(MVT::i32
, &Mips::GPR32RegClass
);
69 if (Subtarget
.isGP64bit())
70 addRegisterClass(MVT::i64
, &Mips::GPR64RegClass
);
72 if (Subtarget
.hasDSP() || Subtarget
.hasMSA()) {
73 // Expand all truncating stores and extending loads.
74 for (MVT VT0
: MVT::vector_valuetypes()) {
75 for (MVT VT1
: MVT::vector_valuetypes()) {
76 setTruncStoreAction(VT0
, VT1
, Expand
);
77 setLoadExtAction(ISD::SEXTLOAD
, VT0
, VT1
, Expand
);
78 setLoadExtAction(ISD::ZEXTLOAD
, VT0
, VT1
, Expand
);
79 setLoadExtAction(ISD::EXTLOAD
, VT0
, VT1
, Expand
);
84 if (Subtarget
.hasDSP()) {
85 MVT::SimpleValueType VecTys
[2] = {MVT::v2i16
, MVT::v4i8
};
87 for (unsigned i
= 0; i
< array_lengthof(VecTys
); ++i
) {
88 addRegisterClass(VecTys
[i
], &Mips::DSPRRegClass
);
90 // Expand all builtin opcodes.
91 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
92 setOperationAction(Opc
, VecTys
[i
], Expand
);
94 setOperationAction(ISD::ADD
, VecTys
[i
], Legal
);
95 setOperationAction(ISD::SUB
, VecTys
[i
], Legal
);
96 setOperationAction(ISD::LOAD
, VecTys
[i
], Legal
);
97 setOperationAction(ISD::STORE
, VecTys
[i
], Legal
);
98 setOperationAction(ISD::BITCAST
, VecTys
[i
], Legal
);
101 setTargetDAGCombine(ISD::SHL
);
102 setTargetDAGCombine(ISD::SRA
);
103 setTargetDAGCombine(ISD::SRL
);
104 setTargetDAGCombine(ISD::SETCC
);
105 setTargetDAGCombine(ISD::VSELECT
);
107 if (Subtarget
.hasMips32r2()) {
108 setOperationAction(ISD::ADDC
, MVT::i32
, Legal
);
109 setOperationAction(ISD::ADDE
, MVT::i32
, Legal
);
113 if (Subtarget
.hasDSPR2())
114 setOperationAction(ISD::MUL
, MVT::v2i16
, Legal
);
116 if (Subtarget
.hasMSA()) {
117 addMSAIntType(MVT::v16i8
, &Mips::MSA128BRegClass
);
118 addMSAIntType(MVT::v8i16
, &Mips::MSA128HRegClass
);
119 addMSAIntType(MVT::v4i32
, &Mips::MSA128WRegClass
);
120 addMSAIntType(MVT::v2i64
, &Mips::MSA128DRegClass
);
121 addMSAFloatType(MVT::v8f16
, &Mips::MSA128HRegClass
);
122 addMSAFloatType(MVT::v4f32
, &Mips::MSA128WRegClass
);
123 addMSAFloatType(MVT::v2f64
, &Mips::MSA128DRegClass
);
125 // f16 is a storage-only type, always promote it to f32.
126 addRegisterClass(MVT::f16
, &Mips::MSA128HRegClass
);
127 setOperationAction(ISD::SETCC
, MVT::f16
, Promote
);
128 setOperationAction(ISD::BR_CC
, MVT::f16
, Promote
);
129 setOperationAction(ISD::SELECT_CC
, MVT::f16
, Promote
);
130 setOperationAction(ISD::SELECT
, MVT::f16
, Promote
);
131 setOperationAction(ISD::FADD
, MVT::f16
, Promote
);
132 setOperationAction(ISD::FSUB
, MVT::f16
, Promote
);
133 setOperationAction(ISD::FMUL
, MVT::f16
, Promote
);
134 setOperationAction(ISD::FDIV
, MVT::f16
, Promote
);
135 setOperationAction(ISD::FREM
, MVT::f16
, Promote
);
136 setOperationAction(ISD::FMA
, MVT::f16
, Promote
);
137 setOperationAction(ISD::FNEG
, MVT::f16
, Promote
);
138 setOperationAction(ISD::FABS
, MVT::f16
, Promote
);
139 setOperationAction(ISD::FCEIL
, MVT::f16
, Promote
);
140 setOperationAction(ISD::FCOPYSIGN
, MVT::f16
, Promote
);
141 setOperationAction(ISD::FCOS
, MVT::f16
, Promote
);
142 setOperationAction(ISD::FP_EXTEND
, MVT::f16
, Promote
);
143 setOperationAction(ISD::FFLOOR
, MVT::f16
, Promote
);
144 setOperationAction(ISD::FNEARBYINT
, MVT::f16
, Promote
);
145 setOperationAction(ISD::FPOW
, MVT::f16
, Promote
);
146 setOperationAction(ISD::FPOWI
, MVT::f16
, Promote
);
147 setOperationAction(ISD::FRINT
, MVT::f16
, Promote
);
148 setOperationAction(ISD::FSIN
, MVT::f16
, Promote
);
149 setOperationAction(ISD::FSINCOS
, MVT::f16
, Promote
);
150 setOperationAction(ISD::FSQRT
, MVT::f16
, Promote
);
151 setOperationAction(ISD::FEXP
, MVT::f16
, Promote
);
152 setOperationAction(ISD::FEXP2
, MVT::f16
, Promote
);
153 setOperationAction(ISD::FLOG
, MVT::f16
, Promote
);
154 setOperationAction(ISD::FLOG2
, MVT::f16
, Promote
);
155 setOperationAction(ISD::FLOG10
, MVT::f16
, Promote
);
156 setOperationAction(ISD::FROUND
, MVT::f16
, Promote
);
157 setOperationAction(ISD::FTRUNC
, MVT::f16
, Promote
);
158 setOperationAction(ISD::FMINNUM
, MVT::f16
, Promote
);
159 setOperationAction(ISD::FMAXNUM
, MVT::f16
, Promote
);
160 setOperationAction(ISD::FMINIMUM
, MVT::f16
, Promote
);
161 setOperationAction(ISD::FMAXIMUM
, MVT::f16
, Promote
);
163 setTargetDAGCombine(ISD::AND
);
164 setTargetDAGCombine(ISD::OR
);
165 setTargetDAGCombine(ISD::SRA
);
166 setTargetDAGCombine(ISD::VSELECT
);
167 setTargetDAGCombine(ISD::XOR
);
170 if (!Subtarget
.useSoftFloat()) {
171 addRegisterClass(MVT::f32
, &Mips::FGR32RegClass
);
173 // When dealing with single precision only, use libcalls
174 if (!Subtarget
.isSingleFloat()) {
175 if (Subtarget
.isFP64bit())
176 addRegisterClass(MVT::f64
, &Mips::FGR64RegClass
);
178 addRegisterClass(MVT::f64
, &Mips::AFGR64RegClass
);
182 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Custom
);
183 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Custom
);
184 setOperationAction(ISD::MULHS
, MVT::i32
, Custom
);
185 setOperationAction(ISD::MULHU
, MVT::i32
, Custom
);
187 if (Subtarget
.hasCnMips())
188 setOperationAction(ISD::MUL
, MVT::i64
, Legal
);
189 else if (Subtarget
.isGP64bit())
190 setOperationAction(ISD::MUL
, MVT::i64
, Custom
);
192 if (Subtarget
.isGP64bit()) {
193 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Custom
);
194 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Custom
);
195 setOperationAction(ISD::MULHS
, MVT::i64
, Custom
);
196 setOperationAction(ISD::MULHU
, MVT::i64
, Custom
);
197 setOperationAction(ISD::SDIVREM
, MVT::i64
, Custom
);
198 setOperationAction(ISD::UDIVREM
, MVT::i64
, Custom
);
201 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i64
, Custom
);
202 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i64
, Custom
);
204 setOperationAction(ISD::SDIVREM
, MVT::i32
, Custom
);
205 setOperationAction(ISD::UDIVREM
, MVT::i32
, Custom
);
206 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
207 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
208 setOperationAction(ISD::STORE
, MVT::i32
, Custom
);
210 setTargetDAGCombine(ISD::MUL
);
212 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
213 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
214 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
216 if (Subtarget
.hasMips32r2() && !Subtarget
.useSoftFloat() &&
217 !Subtarget
.hasMips64()) {
218 setOperationAction(ISD::BITCAST
, MVT::i64
, Custom
);
222 setOperationAction(ISD::LOAD
, MVT::f64
, Custom
);
223 setOperationAction(ISD::STORE
, MVT::f64
, Custom
);
226 if (Subtarget
.hasMips32r6()) {
227 // MIPS32r6 replaces the accumulator-based multiplies with a three register
229 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
230 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
231 setOperationAction(ISD::MUL
, MVT::i32
, Legal
);
232 setOperationAction(ISD::MULHS
, MVT::i32
, Legal
);
233 setOperationAction(ISD::MULHU
, MVT::i32
, Legal
);
235 // MIPS32r6 replaces the accumulator-based division/remainder with separate
236 // three register division and remainder instructions.
237 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
238 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
239 setOperationAction(ISD::SDIV
, MVT::i32
, Legal
);
240 setOperationAction(ISD::UDIV
, MVT::i32
, Legal
);
241 setOperationAction(ISD::SREM
, MVT::i32
, Legal
);
242 setOperationAction(ISD::UREM
, MVT::i32
, Legal
);
244 // MIPS32r6 replaces conditional moves with an equivalent that removes the
245 // need for three GPR read ports.
246 setOperationAction(ISD::SETCC
, MVT::i32
, Legal
);
247 setOperationAction(ISD::SELECT
, MVT::i32
, Legal
);
248 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Expand
);
250 setOperationAction(ISD::SETCC
, MVT::f32
, Legal
);
251 setOperationAction(ISD::SELECT
, MVT::f32
, Legal
);
252 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Expand
);
254 assert(Subtarget
.isFP64bit() && "FR=1 is required for MIPS32r6");
255 setOperationAction(ISD::SETCC
, MVT::f64
, Legal
);
256 setOperationAction(ISD::SELECT
, MVT::f64
, Custom
);
257 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Expand
);
259 setOperationAction(ISD::BRCOND
, MVT::Other
, Legal
);
261 // Floating point > and >= are supported via < and <=
262 setCondCodeAction(ISD::SETOGE
, MVT::f32
, Expand
);
263 setCondCodeAction(ISD::SETOGT
, MVT::f32
, Expand
);
264 setCondCodeAction(ISD::SETUGE
, MVT::f32
, Expand
);
265 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
267 setCondCodeAction(ISD::SETOGE
, MVT::f64
, Expand
);
268 setCondCodeAction(ISD::SETOGT
, MVT::f64
, Expand
);
269 setCondCodeAction(ISD::SETUGE
, MVT::f64
, Expand
);
270 setCondCodeAction(ISD::SETUGT
, MVT::f64
, Expand
);
273 if (Subtarget
.hasMips64r6()) {
274 // MIPS64r6 replaces the accumulator-based multiplies with a three register
276 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Expand
);
277 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Expand
);
278 setOperationAction(ISD::MUL
, MVT::i64
, Legal
);
279 setOperationAction(ISD::MULHS
, MVT::i64
, Legal
);
280 setOperationAction(ISD::MULHU
, MVT::i64
, Legal
);
282 // MIPS32r6 replaces the accumulator-based division/remainder with separate
283 // three register division and remainder instructions.
284 setOperationAction(ISD::SDIVREM
, MVT::i64
, Expand
);
285 setOperationAction(ISD::UDIVREM
, MVT::i64
, Expand
);
286 setOperationAction(ISD::SDIV
, MVT::i64
, Legal
);
287 setOperationAction(ISD::UDIV
, MVT::i64
, Legal
);
288 setOperationAction(ISD::SREM
, MVT::i64
, Legal
);
289 setOperationAction(ISD::UREM
, MVT::i64
, Legal
);
291 // MIPS64r6 replaces conditional moves with an equivalent that removes the
292 // need for three GPR read ports.
293 setOperationAction(ISD::SETCC
, MVT::i64
, Legal
);
294 setOperationAction(ISD::SELECT
, MVT::i64
, Legal
);
295 setOperationAction(ISD::SELECT_CC
, MVT::i64
, Expand
);
298 computeRegisterProperties(Subtarget
.getRegisterInfo());
301 const MipsTargetLowering
*
302 llvm::createMipsSETargetLowering(const MipsTargetMachine
&TM
,
303 const MipsSubtarget
&STI
) {
304 return new MipsSETargetLowering(TM
, STI
);
307 const TargetRegisterClass
*
308 MipsSETargetLowering::getRepRegClassFor(MVT VT
) const {
309 if (VT
== MVT::Untyped
)
310 return Subtarget
.hasDSP() ? &Mips::ACC64DSPRegClass
: &Mips::ACC64RegClass
;
312 return TargetLowering::getRepRegClassFor(VT
);
315 // Enable MSA support for the given integer type and Register class.
316 void MipsSETargetLowering::
317 addMSAIntType(MVT::SimpleValueType Ty
, const TargetRegisterClass
*RC
) {
318 addRegisterClass(Ty
, RC
);
320 // Expand all builtin opcodes.
321 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
322 setOperationAction(Opc
, Ty
, Expand
);
324 setOperationAction(ISD::BITCAST
, Ty
, Legal
);
325 setOperationAction(ISD::LOAD
, Ty
, Legal
);
326 setOperationAction(ISD::STORE
, Ty
, Legal
);
327 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, Ty
, Custom
);
328 setOperationAction(ISD::INSERT_VECTOR_ELT
, Ty
, Legal
);
329 setOperationAction(ISD::BUILD_VECTOR
, Ty
, Custom
);
331 setOperationAction(ISD::ADD
, Ty
, Legal
);
332 setOperationAction(ISD::AND
, Ty
, Legal
);
333 setOperationAction(ISD::CTLZ
, Ty
, Legal
);
334 setOperationAction(ISD::CTPOP
, Ty
, Legal
);
335 setOperationAction(ISD::MUL
, Ty
, Legal
);
336 setOperationAction(ISD::OR
, Ty
, Legal
);
337 setOperationAction(ISD::SDIV
, Ty
, Legal
);
338 setOperationAction(ISD::SREM
, Ty
, Legal
);
339 setOperationAction(ISD::SHL
, Ty
, Legal
);
340 setOperationAction(ISD::SRA
, Ty
, Legal
);
341 setOperationAction(ISD::SRL
, Ty
, Legal
);
342 setOperationAction(ISD::SUB
, Ty
, Legal
);
343 setOperationAction(ISD::SMAX
, Ty
, Legal
);
344 setOperationAction(ISD::SMIN
, Ty
, Legal
);
345 setOperationAction(ISD::UDIV
, Ty
, Legal
);
346 setOperationAction(ISD::UREM
, Ty
, Legal
);
347 setOperationAction(ISD::UMAX
, Ty
, Legal
);
348 setOperationAction(ISD::UMIN
, Ty
, Legal
);
349 setOperationAction(ISD::VECTOR_SHUFFLE
, Ty
, Custom
);
350 setOperationAction(ISD::VSELECT
, Ty
, Legal
);
351 setOperationAction(ISD::XOR
, Ty
, Legal
);
353 if (Ty
== MVT::v4i32
|| Ty
== MVT::v2i64
) {
354 setOperationAction(ISD::FP_TO_SINT
, Ty
, Legal
);
355 setOperationAction(ISD::FP_TO_UINT
, Ty
, Legal
);
356 setOperationAction(ISD::SINT_TO_FP
, Ty
, Legal
);
357 setOperationAction(ISD::UINT_TO_FP
, Ty
, Legal
);
360 setOperationAction(ISD::SETCC
, Ty
, Legal
);
361 setCondCodeAction(ISD::SETNE
, Ty
, Expand
);
362 setCondCodeAction(ISD::SETGE
, Ty
, Expand
);
363 setCondCodeAction(ISD::SETGT
, Ty
, Expand
);
364 setCondCodeAction(ISD::SETUGE
, Ty
, Expand
);
365 setCondCodeAction(ISD::SETUGT
, Ty
, Expand
);
368 // Enable MSA support for the given floating-point type and Register class.
369 void MipsSETargetLowering::
370 addMSAFloatType(MVT::SimpleValueType Ty
, const TargetRegisterClass
*RC
) {
371 addRegisterClass(Ty
, RC
);
373 // Expand all builtin opcodes.
374 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
375 setOperationAction(Opc
, Ty
, Expand
);
377 setOperationAction(ISD::LOAD
, Ty
, Legal
);
378 setOperationAction(ISD::STORE
, Ty
, Legal
);
379 setOperationAction(ISD::BITCAST
, Ty
, Legal
);
380 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, Ty
, Legal
);
381 setOperationAction(ISD::INSERT_VECTOR_ELT
, Ty
, Legal
);
382 setOperationAction(ISD::BUILD_VECTOR
, Ty
, Custom
);
384 if (Ty
!= MVT::v8f16
) {
385 setOperationAction(ISD::FABS
, Ty
, Legal
);
386 setOperationAction(ISD::FADD
, Ty
, Legal
);
387 setOperationAction(ISD::FDIV
, Ty
, Legal
);
388 setOperationAction(ISD::FEXP2
, Ty
, Legal
);
389 setOperationAction(ISD::FLOG2
, Ty
, Legal
);
390 setOperationAction(ISD::FMA
, Ty
, Legal
);
391 setOperationAction(ISD::FMUL
, Ty
, Legal
);
392 setOperationAction(ISD::FRINT
, Ty
, Legal
);
393 setOperationAction(ISD::FSQRT
, Ty
, Legal
);
394 setOperationAction(ISD::FSUB
, Ty
, Legal
);
395 setOperationAction(ISD::VSELECT
, Ty
, Legal
);
397 setOperationAction(ISD::SETCC
, Ty
, Legal
);
398 setCondCodeAction(ISD::SETOGE
, Ty
, Expand
);
399 setCondCodeAction(ISD::SETOGT
, Ty
, Expand
);
400 setCondCodeAction(ISD::SETUGE
, Ty
, Expand
);
401 setCondCodeAction(ISD::SETUGT
, Ty
, Expand
);
402 setCondCodeAction(ISD::SETGE
, Ty
, Expand
);
403 setCondCodeAction(ISD::SETGT
, Ty
, Expand
);
407 SDValue
MipsSETargetLowering::lowerSELECT(SDValue Op
, SelectionDAG
&DAG
) const {
408 if(!Subtarget
.hasMips32r6())
409 return MipsTargetLowering::LowerOperation(Op
, DAG
);
411 EVT ResTy
= Op
->getValueType(0);
414 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
415 // floating point register are undefined. Not really an issue as sel.d, which
416 // is produced from an FSELECT node, only looks at bit 0.
417 SDValue Tmp
= DAG
.getNode(MipsISD::MTC1_D64
, DL
, MVT::f64
, Op
->getOperand(0));
418 return DAG
.getNode(MipsISD::FSELECT
, DL
, ResTy
, Tmp
, Op
->getOperand(1),
422 bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
423 EVT VT
, unsigned, unsigned, MachineMemOperand::Flags
, bool *Fast
) const {
424 MVT::SimpleValueType SVT
= VT
.getSimpleVT().SimpleTy
;
426 if (Subtarget
.systemSupportsUnalignedAccess()) {
427 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
428 // implementation defined whether this is handled by hardware, software, or
429 // a hybrid of the two but it's expected that most implementations will
430 // handle the majority of cases in hardware.
447 SDValue
MipsSETargetLowering::LowerOperation(SDValue Op
,
448 SelectionDAG
&DAG
) const {
449 switch(Op
.getOpcode()) {
450 case ISD::LOAD
: return lowerLOAD(Op
, DAG
);
451 case ISD::STORE
: return lowerSTORE(Op
, DAG
);
452 case ISD::SMUL_LOHI
: return lowerMulDiv(Op
, MipsISD::Mult
, true, true, DAG
);
453 case ISD::UMUL_LOHI
: return lowerMulDiv(Op
, MipsISD::Multu
, true, true, DAG
);
454 case ISD::MULHS
: return lowerMulDiv(Op
, MipsISD::Mult
, false, true, DAG
);
455 case ISD::MULHU
: return lowerMulDiv(Op
, MipsISD::Multu
, false, true, DAG
);
456 case ISD::MUL
: return lowerMulDiv(Op
, MipsISD::Mult
, true, false, DAG
);
457 case ISD::SDIVREM
: return lowerMulDiv(Op
, MipsISD::DivRem
, true, true, DAG
);
458 case ISD::UDIVREM
: return lowerMulDiv(Op
, MipsISD::DivRemU
, true, true,
460 case ISD::INTRINSIC_WO_CHAIN
: return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
461 case ISD::INTRINSIC_W_CHAIN
: return lowerINTRINSIC_W_CHAIN(Op
, DAG
);
462 case ISD::INTRINSIC_VOID
: return lowerINTRINSIC_VOID(Op
, DAG
);
463 case ISD::EXTRACT_VECTOR_ELT
: return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
464 case ISD::BUILD_VECTOR
: return lowerBUILD_VECTOR(Op
, DAG
);
465 case ISD::VECTOR_SHUFFLE
: return lowerVECTOR_SHUFFLE(Op
, DAG
);
466 case ISD::SELECT
: return lowerSELECT(Op
, DAG
);
467 case ISD::BITCAST
: return lowerBITCAST(Op
, DAG
);
470 return MipsTargetLowering::LowerOperation(Op
, DAG
);
473 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
475 // Performs the following transformations:
476 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
477 // sign/zero-extension is completely overwritten by the new one performed by
479 // - Removes redundant zero extensions performed by an ISD::AND.
480 static SDValue
performANDCombine(SDNode
*N
, SelectionDAG
&DAG
,
481 TargetLowering::DAGCombinerInfo
&DCI
,
482 const MipsSubtarget
&Subtarget
) {
483 if (!Subtarget
.hasMSA())
486 SDValue Op0
= N
->getOperand(0);
487 SDValue Op1
= N
->getOperand(1);
488 unsigned Op0Opcode
= Op0
->getOpcode();
490 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
491 // where $d + 1 == 2^n and n == 32
492 // or $d + 1 == 2^n and n <= 32 and ZExt
493 // -> (MipsVExtractZExt $a, $b, $c)
494 if (Op0Opcode
== MipsISD::VEXTRACT_SEXT_ELT
||
495 Op0Opcode
== MipsISD::VEXTRACT_ZEXT_ELT
) {
496 ConstantSDNode
*Mask
= dyn_cast
<ConstantSDNode
>(Op1
);
501 int32_t Log2IfPositive
= (Mask
->getAPIntValue() + 1).exactLogBase2();
503 if (Log2IfPositive
<= 0)
504 return SDValue(); // Mask+1 is not a power of 2
506 SDValue Op0Op2
= Op0
->getOperand(2);
507 EVT ExtendTy
= cast
<VTSDNode
>(Op0Op2
)->getVT();
508 unsigned ExtendTySize
= ExtendTy
.getSizeInBits();
509 unsigned Log2
= Log2IfPositive
;
511 if ((Op0Opcode
== MipsISD::VEXTRACT_ZEXT_ELT
&& Log2
>= ExtendTySize
) ||
512 Log2
== ExtendTySize
) {
513 SDValue Ops
[] = { Op0
->getOperand(0), Op0
->getOperand(1), Op0Op2
};
514 return DAG
.getNode(MipsISD::VEXTRACT_ZEXT_ELT
, SDLoc(Op0
),
516 makeArrayRef(Ops
, Op0
->getNumOperands()));
523 // Determine if the specified node is a constant vector splat.
525 // Returns true and sets Imm if:
526 // * N is a ISD::BUILD_VECTOR representing a constant splat
528 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
529 // differences are that it assumes the MSA has already been checked and the
530 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and
531 // must not be in order for binsri.d to be selectable).
532 static bool isVSplat(SDValue N
, APInt
&Imm
, bool IsLittleEndian
) {
533 BuildVectorSDNode
*Node
= dyn_cast
<BuildVectorSDNode
>(N
.getNode());
538 APInt SplatValue
, SplatUndef
;
539 unsigned SplatBitSize
;
542 if (!Node
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
551 // Test whether the given node is an all-ones build_vector.
552 static bool isVectorAllOnes(SDValue N
) {
553 // Look through bitcasts. Endianness doesn't matter because we are looking
554 // for an all-ones value.
555 if (N
->getOpcode() == ISD::BITCAST
)
556 N
= N
->getOperand(0);
558 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(N
);
563 APInt SplatValue
, SplatUndef
;
564 unsigned SplatBitSize
;
567 // Endianness doesn't matter in this context because we are looking for
568 // an all-ones value.
569 if (BVN
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
))
570 return SplatValue
.isAllOnesValue();
575 // Test whether N is the bitwise inverse of OfNode.
576 static bool isBitwiseInverse(SDValue N
, SDValue OfNode
) {
577 if (N
->getOpcode() != ISD::XOR
)
580 if (isVectorAllOnes(N
->getOperand(0)))
581 return N
->getOperand(1) == OfNode
;
583 if (isVectorAllOnes(N
->getOperand(1)))
584 return N
->getOperand(0) == OfNode
;
589 // Perform combines where ISD::OR is the root node.
591 // Performs the following transformations:
592 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
593 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
595 static SDValue
performORCombine(SDNode
*N
, SelectionDAG
&DAG
,
596 TargetLowering::DAGCombinerInfo
&DCI
,
597 const MipsSubtarget
&Subtarget
) {
598 if (!Subtarget
.hasMSA())
601 EVT Ty
= N
->getValueType(0);
603 if (!Ty
.is128BitVector())
606 SDValue Op0
= N
->getOperand(0);
607 SDValue Op1
= N
->getOperand(1);
609 if (Op0
->getOpcode() == ISD::AND
&& Op1
->getOpcode() == ISD::AND
) {
610 SDValue Op0Op0
= Op0
->getOperand(0);
611 SDValue Op0Op1
= Op0
->getOperand(1);
612 SDValue Op1Op0
= Op1
->getOperand(0);
613 SDValue Op1Op1
= Op1
->getOperand(1);
614 bool IsLittleEndian
= !Subtarget
.isLittle();
616 SDValue IfSet
, IfClr
, Cond
;
617 bool IsConstantMask
= false;
620 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
621 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
623 // IfClr will be set if we find a valid match.
624 if (isVSplat(Op0Op0
, Mask
, IsLittleEndian
)) {
628 if (isVSplat(Op1Op0
, InvMask
, IsLittleEndian
) &&
629 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
631 else if (isVSplat(Op1Op1
, InvMask
, IsLittleEndian
) &&
632 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
635 IsConstantMask
= true;
638 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
639 // thing again using this mask.
640 // IfClr will be set if we find a valid match.
641 if (!IfClr
.getNode() && isVSplat(Op0Op1
, Mask
, IsLittleEndian
)) {
645 if (isVSplat(Op1Op0
, InvMask
, IsLittleEndian
) &&
646 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
648 else if (isVSplat(Op1Op1
, InvMask
, IsLittleEndian
) &&
649 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
652 IsConstantMask
= true;
655 // If IfClr is not yet set, try looking for a non-constant match.
656 // IfClr will be set if we find a valid match amongst the eight
658 if (!IfClr
.getNode()) {
659 if (isBitwiseInverse(Op0Op0
, Op1Op0
)) {
663 } else if (isBitwiseInverse(Op0Op1
, Op1Op0
)) {
667 } else if (isBitwiseInverse(Op0Op0
, Op1Op1
)) {
671 } else if (isBitwiseInverse(Op0Op1
, Op1Op1
)) {
675 } else if (isBitwiseInverse(Op1Op0
, Op0Op0
)) {
679 } else if (isBitwiseInverse(Op1Op1
, Op0Op0
)) {
683 } else if (isBitwiseInverse(Op1Op0
, Op0Op1
)) {
687 } else if (isBitwiseInverse(Op1Op1
, Op0Op1
)) {
694 // At this point, IfClr will be set if we have a valid match.
695 if (!IfClr
.getNode())
698 assert(Cond
.getNode() && IfSet
.getNode());
700 // Fold degenerate cases.
701 if (IsConstantMask
) {
702 if (Mask
.isAllOnesValue())
708 // Transform the DAG into an equivalent VSELECT.
709 return DAG
.getNode(ISD::VSELECT
, SDLoc(N
), Ty
, Cond
, IfSet
, IfClr
);
715 static bool shouldTransformMulToShiftsAddsSubs(APInt C
, EVT VT
,
717 const MipsSubtarget
&Subtarget
) {
718 // Estimate the number of operations the below transform will turn a
719 // constant multiply into. The number is approximately equal to the minimal
720 // number of powers of two that constant can be broken down to by adding
721 // or subtracting them.
723 // If we have taken more than 12[1] / 8[2] steps to attempt the
724 // optimization for a native sized value, it is more than likely that this
725 // optimization will make things worse.
727 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
728 // multiplication requires at least 4 cycles, but another cycle (or two)
729 // to retrieve the result from the HI/LO registers.
731 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
732 // materialized in 2 instructions, multiplication requires at least 4
733 // cycles, but another cycle (or two) to retrieve the result from the
737 // - MaxSteps needs to consider the `VT` of the constant for the current
739 // - Consider to perform this optimization after type legalization.
740 // That allows to remove a workaround for types not supported natively.
741 // - Take in account `-Os, -Oz` flags because this optimization
742 // increases code size.
743 unsigned MaxSteps
= Subtarget
.isABI_O32() ? 8 : 12;
745 SmallVector
<APInt
, 16> WorkStack(1, C
);
747 unsigned BitWidth
= C
.getBitWidth();
749 while (!WorkStack
.empty()) {
750 APInt Val
= WorkStack
.pop_back_val();
752 if (Val
== 0 || Val
== 1)
755 if (Steps
>= MaxSteps
)
758 if (Val
.isPowerOf2()) {
763 APInt Floor
= APInt(BitWidth
, 1) << Val
.logBase2();
764 APInt Ceil
= Val
.isNegative() ? APInt(BitWidth
, 0)
765 : APInt(BitWidth
, 1) << C
.ceilLogBase2();
766 if ((Val
- Floor
).ule(Ceil
- Val
)) {
767 WorkStack
.push_back(Floor
);
768 WorkStack
.push_back(Val
- Floor
);
770 WorkStack
.push_back(Ceil
);
771 WorkStack
.push_back(Ceil
- Val
);
777 // If the value being multiplied is not supported natively, we have to pay
778 // an additional legalization cost, conservatively assume an increase in the
779 // cost of 3 instructions per step. This values for this heuristic were
780 // determined experimentally.
781 unsigned RegisterSize
= DAG
.getTargetLoweringInfo()
782 .getRegisterType(*DAG
.getContext(), VT
)
784 Steps
*= (VT
.getSizeInBits() != RegisterSize
) * 3;
791 static SDValue
genConstMult(SDValue X
, APInt C
, const SDLoc
&DL
, EVT VT
,
792 EVT ShiftTy
, SelectionDAG
&DAG
) {
795 return DAG
.getConstant(0, DL
, VT
);
801 // If c is power of 2, return (shl x, log2(c)).
803 return DAG
.getNode(ISD::SHL
, DL
, VT
, X
,
804 DAG
.getConstant(C
.logBase2(), DL
, ShiftTy
));
806 unsigned BitWidth
= C
.getBitWidth();
807 APInt Floor
= APInt(BitWidth
, 1) << C
.logBase2();
808 APInt Ceil
= C
.isNegative() ? APInt(BitWidth
, 0) :
809 APInt(BitWidth
, 1) << C
.ceilLogBase2();
811 // If |c - floor_c| <= |c - ceil_c|,
812 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
813 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
814 if ((C
- Floor
).ule(Ceil
- C
)) {
815 SDValue Op0
= genConstMult(X
, Floor
, DL
, VT
, ShiftTy
, DAG
);
816 SDValue Op1
= genConstMult(X
, C
- Floor
, DL
, VT
, ShiftTy
, DAG
);
817 return DAG
.getNode(ISD::ADD
, DL
, VT
, Op0
, Op1
);
820 // If |c - floor_c| > |c - ceil_c|,
821 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
822 SDValue Op0
= genConstMult(X
, Ceil
, DL
, VT
, ShiftTy
, DAG
);
823 SDValue Op1
= genConstMult(X
, Ceil
- C
, DL
, VT
, ShiftTy
, DAG
);
824 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op0
, Op1
);
827 static SDValue
performMULCombine(SDNode
*N
, SelectionDAG
&DAG
,
828 const TargetLowering::DAGCombinerInfo
&DCI
,
829 const MipsSETargetLowering
*TL
,
830 const MipsSubtarget
&Subtarget
) {
831 EVT VT
= N
->getValueType(0);
833 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1)))
834 if (!VT
.isVector() && shouldTransformMulToShiftsAddsSubs(
835 C
->getAPIntValue(), VT
, DAG
, Subtarget
))
836 return genConstMult(N
->getOperand(0), C
->getAPIntValue(), SDLoc(N
), VT
,
837 TL
->getScalarShiftAmountTy(DAG
.getDataLayout(), VT
),
840 return SDValue(N
, 0);
843 static SDValue
performDSPShiftCombine(unsigned Opc
, SDNode
*N
, EVT Ty
,
845 const MipsSubtarget
&Subtarget
) {
846 // See if this is a vector splat immediate node.
847 APInt SplatValue
, SplatUndef
;
848 unsigned SplatBitSize
;
850 unsigned EltSize
= Ty
.getScalarSizeInBits();
851 BuildVectorSDNode
*BV
= dyn_cast
<BuildVectorSDNode
>(N
->getOperand(1));
853 if (!Subtarget
.hasDSP())
857 !BV
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
858 EltSize
, !Subtarget
.isLittle()) ||
859 (SplatBitSize
!= EltSize
) ||
860 (SplatValue
.getZExtValue() >= EltSize
))
864 return DAG
.getNode(Opc
, DL
, Ty
, N
->getOperand(0),
865 DAG
.getConstant(SplatValue
.getZExtValue(), DL
, MVT::i32
));
868 static SDValue
performSHLCombine(SDNode
*N
, SelectionDAG
&DAG
,
869 TargetLowering::DAGCombinerInfo
&DCI
,
870 const MipsSubtarget
&Subtarget
) {
871 EVT Ty
= N
->getValueType(0);
873 if ((Ty
!= MVT::v2i16
) && (Ty
!= MVT::v4i8
))
876 return performDSPShiftCombine(MipsISD::SHLL_DSP
, N
, Ty
, DAG
, Subtarget
);
879 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
880 // constant splats into MipsISD::SHRA_DSP for DSPr2.
882 // Performs the following transformations:
883 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
884 // sign/zero-extension is completely overwritten by the new one performed by
885 // the ISD::SRA and ISD::SHL nodes.
886 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
889 // See performDSPShiftCombine for more information about the transformation
891 static SDValue
performSRACombine(SDNode
*N
, SelectionDAG
&DAG
,
892 TargetLowering::DAGCombinerInfo
&DCI
,
893 const MipsSubtarget
&Subtarget
) {
894 EVT Ty
= N
->getValueType(0);
896 if (Subtarget
.hasMSA()) {
897 SDValue Op0
= N
->getOperand(0);
898 SDValue Op1
= N
->getOperand(1);
900 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
901 // where $d + sizeof($c) == 32
902 // or $d + sizeof($c) <= 32 and SExt
903 // -> (MipsVExtractSExt $a, $b, $c)
904 if (Op0
->getOpcode() == ISD::SHL
&& Op1
== Op0
->getOperand(1)) {
905 SDValue Op0Op0
= Op0
->getOperand(0);
906 ConstantSDNode
*ShAmount
= dyn_cast
<ConstantSDNode
>(Op1
);
911 if (Op0Op0
->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT
&&
912 Op0Op0
->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT
)
915 EVT ExtendTy
= cast
<VTSDNode
>(Op0Op0
->getOperand(2))->getVT();
916 unsigned TotalBits
= ShAmount
->getZExtValue() + ExtendTy
.getSizeInBits();
918 if (TotalBits
== 32 ||
919 (Op0Op0
->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT
&&
921 SDValue Ops
[] = { Op0Op0
->getOperand(0), Op0Op0
->getOperand(1),
922 Op0Op0
->getOperand(2) };
923 return DAG
.getNode(MipsISD::VEXTRACT_SEXT_ELT
, SDLoc(Op0Op0
),
925 makeArrayRef(Ops
, Op0Op0
->getNumOperands()));
930 if ((Ty
!= MVT::v2i16
) && ((Ty
!= MVT::v4i8
) || !Subtarget
.hasDSPR2()))
933 return performDSPShiftCombine(MipsISD::SHRA_DSP
, N
, Ty
, DAG
, Subtarget
);
937 static SDValue
performSRLCombine(SDNode
*N
, SelectionDAG
&DAG
,
938 TargetLowering::DAGCombinerInfo
&DCI
,
939 const MipsSubtarget
&Subtarget
) {
940 EVT Ty
= N
->getValueType(0);
942 if (((Ty
!= MVT::v2i16
) || !Subtarget
.hasDSPR2()) && (Ty
!= MVT::v4i8
))
945 return performDSPShiftCombine(MipsISD::SHRL_DSP
, N
, Ty
, DAG
, Subtarget
);
948 static bool isLegalDSPCondCode(EVT Ty
, ISD::CondCode CC
) {
949 bool IsV216
= (Ty
== MVT::v2i16
);
953 case ISD::SETNE
: return true;
957 case ISD::SETGE
: return IsV216
;
961 case ISD::SETUGE
: return !IsV216
;
962 default: return false;
966 static SDValue
performSETCCCombine(SDNode
*N
, SelectionDAG
&DAG
) {
967 EVT Ty
= N
->getValueType(0);
969 if ((Ty
!= MVT::v2i16
) && (Ty
!= MVT::v4i8
))
972 if (!isLegalDSPCondCode(Ty
, cast
<CondCodeSDNode
>(N
->getOperand(2))->get()))
975 return DAG
.getNode(MipsISD::SETCC_DSP
, SDLoc(N
), Ty
, N
->getOperand(0),
976 N
->getOperand(1), N
->getOperand(2));
979 static SDValue
performVSELECTCombine(SDNode
*N
, SelectionDAG
&DAG
) {
980 EVT Ty
= N
->getValueType(0);
982 if (Ty
== MVT::v2i16
|| Ty
== MVT::v4i8
) {
983 SDValue SetCC
= N
->getOperand(0);
985 if (SetCC
.getOpcode() != MipsISD::SETCC_DSP
)
988 return DAG
.getNode(MipsISD::SELECT_CC_DSP
, SDLoc(N
), Ty
,
989 SetCC
.getOperand(0), SetCC
.getOperand(1),
990 N
->getOperand(1), N
->getOperand(2), SetCC
.getOperand(2));
996 static SDValue
performXORCombine(SDNode
*N
, SelectionDAG
&DAG
,
997 const MipsSubtarget
&Subtarget
) {
998 EVT Ty
= N
->getValueType(0);
1000 if (Subtarget
.hasMSA() && Ty
.is128BitVector() && Ty
.isInteger()) {
1001 // Try the following combines:
1002 // (xor (or $a, $b), (build_vector allones))
1003 // (xor (or $a, $b), (bitcast (build_vector allones)))
1004 SDValue Op0
= N
->getOperand(0);
1005 SDValue Op1
= N
->getOperand(1);
1008 if (ISD::isBuildVectorAllOnes(Op0
.getNode()))
1010 else if (ISD::isBuildVectorAllOnes(Op1
.getNode()))
1015 if (NotOp
->getOpcode() == ISD::OR
)
1016 return DAG
.getNode(MipsISD::VNOR
, SDLoc(N
), Ty
, NotOp
->getOperand(0),
1017 NotOp
->getOperand(1));
1024 MipsSETargetLowering::PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
1025 SelectionDAG
&DAG
= DCI
.DAG
;
1028 switch (N
->getOpcode()) {
1030 Val
= performANDCombine(N
, DAG
, DCI
, Subtarget
);
1033 Val
= performORCombine(N
, DAG
, DCI
, Subtarget
);
1036 return performMULCombine(N
, DAG
, DCI
, this, Subtarget
);
1038 Val
= performSHLCombine(N
, DAG
, DCI
, Subtarget
);
1041 return performSRACombine(N
, DAG
, DCI
, Subtarget
);
1043 return performSRLCombine(N
, DAG
, DCI
, Subtarget
);
1045 return performVSELECTCombine(N
, DAG
);
1047 Val
= performXORCombine(N
, DAG
, Subtarget
);
1050 Val
= performSETCCCombine(N
, DAG
);
1054 if (Val
.getNode()) {
1055 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1056 N
->printrWithDepth(dbgs(), &DAG
); dbgs() << "\n=> \n";
1057 Val
.getNode()->printrWithDepth(dbgs(), &DAG
); dbgs() << "\n");
1061 return MipsTargetLowering::PerformDAGCombine(N
, DCI
);
1065 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
1066 MachineBasicBlock
*BB
) const {
1067 switch (MI
.getOpcode()) {
1069 return MipsTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
1070 case Mips::BPOSGE32_PSEUDO
:
1071 return emitBPOSGE32(MI
, BB
);
1072 case Mips::SNZ_B_PSEUDO
:
1073 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_B
);
1074 case Mips::SNZ_H_PSEUDO
:
1075 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_H
);
1076 case Mips::SNZ_W_PSEUDO
:
1077 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_W
);
1078 case Mips::SNZ_D_PSEUDO
:
1079 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_D
);
1080 case Mips::SNZ_V_PSEUDO
:
1081 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_V
);
1082 case Mips::SZ_B_PSEUDO
:
1083 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_B
);
1084 case Mips::SZ_H_PSEUDO
:
1085 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_H
);
1086 case Mips::SZ_W_PSEUDO
:
1087 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_W
);
1088 case Mips::SZ_D_PSEUDO
:
1089 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_D
);
1090 case Mips::SZ_V_PSEUDO
:
1091 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_V
);
1092 case Mips::COPY_FW_PSEUDO
:
1093 return emitCOPY_FW(MI
, BB
);
1094 case Mips::COPY_FD_PSEUDO
:
1095 return emitCOPY_FD(MI
, BB
);
1096 case Mips::INSERT_FW_PSEUDO
:
1097 return emitINSERT_FW(MI
, BB
);
1098 case Mips::INSERT_FD_PSEUDO
:
1099 return emitINSERT_FD(MI
, BB
);
1100 case Mips::INSERT_B_VIDX_PSEUDO
:
1101 case Mips::INSERT_B_VIDX64_PSEUDO
:
1102 return emitINSERT_DF_VIDX(MI
, BB
, 1, false);
1103 case Mips::INSERT_H_VIDX_PSEUDO
:
1104 case Mips::INSERT_H_VIDX64_PSEUDO
:
1105 return emitINSERT_DF_VIDX(MI
, BB
, 2, false);
1106 case Mips::INSERT_W_VIDX_PSEUDO
:
1107 case Mips::INSERT_W_VIDX64_PSEUDO
:
1108 return emitINSERT_DF_VIDX(MI
, BB
, 4, false);
1109 case Mips::INSERT_D_VIDX_PSEUDO
:
1110 case Mips::INSERT_D_VIDX64_PSEUDO
:
1111 return emitINSERT_DF_VIDX(MI
, BB
, 8, false);
1112 case Mips::INSERT_FW_VIDX_PSEUDO
:
1113 case Mips::INSERT_FW_VIDX64_PSEUDO
:
1114 return emitINSERT_DF_VIDX(MI
, BB
, 4, true);
1115 case Mips::INSERT_FD_VIDX_PSEUDO
:
1116 case Mips::INSERT_FD_VIDX64_PSEUDO
:
1117 return emitINSERT_DF_VIDX(MI
, BB
, 8, true);
1118 case Mips::FILL_FW_PSEUDO
:
1119 return emitFILL_FW(MI
, BB
);
1120 case Mips::FILL_FD_PSEUDO
:
1121 return emitFILL_FD(MI
, BB
);
1122 case Mips::FEXP2_W_1_PSEUDO
:
1123 return emitFEXP2_W_1(MI
, BB
);
1124 case Mips::FEXP2_D_1_PSEUDO
:
1125 return emitFEXP2_D_1(MI
, BB
);
1127 return emitST_F16_PSEUDO(MI
, BB
);
1129 return emitLD_F16_PSEUDO(MI
, BB
);
1130 case Mips::MSA_FP_EXTEND_W_PSEUDO
:
1131 return emitFPEXTEND_PSEUDO(MI
, BB
, false);
1132 case Mips::MSA_FP_ROUND_W_PSEUDO
:
1133 return emitFPROUND_PSEUDO(MI
, BB
, false);
1134 case Mips::MSA_FP_EXTEND_D_PSEUDO
:
1135 return emitFPEXTEND_PSEUDO(MI
, BB
, true);
1136 case Mips::MSA_FP_ROUND_D_PSEUDO
:
1137 return emitFPROUND_PSEUDO(MI
, BB
, true);
1141 bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1142 const CCState
&CCInfo
, unsigned NextStackOffset
,
1143 const MipsFunctionInfo
&FI
) const {
1144 if (!UseMipsTailCalls
)
1147 // Exception has to be cleared with eret.
1151 // Return false if either the callee or caller has a byval argument.
1152 if (CCInfo
.getInRegsParamsCount() > 0 || FI
.hasByvalArg())
1155 // Return true if the callee's argument area is no larger than the
1157 return NextStackOffset
<= FI
.getIncomingArgSize();
1160 void MipsSETargetLowering::
1161 getOpndList(SmallVectorImpl
<SDValue
> &Ops
,
1162 std::deque
<std::pair
<unsigned, SDValue
>> &RegsToPass
,
1163 bool IsPICCall
, bool GlobalOrExternal
, bool InternalLinkage
,
1164 bool IsCallReloc
, CallLoweringInfo
&CLI
, SDValue Callee
,
1165 SDValue Chain
) const {
1166 Ops
.push_back(Callee
);
1167 MipsTargetLowering::getOpndList(Ops
, RegsToPass
, IsPICCall
, GlobalOrExternal
,
1168 InternalLinkage
, IsCallReloc
, CLI
, Callee
,
1172 SDValue
MipsSETargetLowering::lowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
1173 LoadSDNode
&Nd
= *cast
<LoadSDNode
>(Op
);
1175 if (Nd
.getMemoryVT() != MVT::f64
|| !NoDPLoadStore
)
1176 return MipsTargetLowering::lowerLOAD(Op
, DAG
);
1178 // Replace a double precision load with two i32 loads and a buildpair64.
1180 SDValue Ptr
= Nd
.getBasePtr(), Chain
= Nd
.getChain();
1181 EVT PtrVT
= Ptr
.getValueType();
1183 // i32 load from lower address.
1184 SDValue Lo
= DAG
.getLoad(MVT::i32
, DL
, Chain
, Ptr
, MachinePointerInfo(),
1185 Nd
.getAlignment(), Nd
.getMemOperand()->getFlags());
1187 // i32 load from higher address.
1188 Ptr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Ptr
, DAG
.getConstant(4, DL
, PtrVT
));
1189 SDValue Hi
= DAG
.getLoad(
1190 MVT::i32
, DL
, Lo
.getValue(1), Ptr
, MachinePointerInfo(),
1191 std::min(Nd
.getAlignment(), 4U), Nd
.getMemOperand()->getFlags());
1193 if (!Subtarget
.isLittle())
1196 SDValue BP
= DAG
.getNode(MipsISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
1197 SDValue Ops
[2] = {BP
, Hi
.getValue(1)};
1198 return DAG
.getMergeValues(Ops
, DL
);
1201 SDValue
MipsSETargetLowering::lowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
1202 StoreSDNode
&Nd
= *cast
<StoreSDNode
>(Op
);
1204 if (Nd
.getMemoryVT() != MVT::f64
|| !NoDPLoadStore
)
1205 return MipsTargetLowering::lowerSTORE(Op
, DAG
);
1207 // Replace a double precision store with two extractelement64s and i32 stores.
1209 SDValue Val
= Nd
.getValue(), Ptr
= Nd
.getBasePtr(), Chain
= Nd
.getChain();
1210 EVT PtrVT
= Ptr
.getValueType();
1211 SDValue Lo
= DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
,
1212 Val
, DAG
.getConstant(0, DL
, MVT::i32
));
1213 SDValue Hi
= DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
,
1214 Val
, DAG
.getConstant(1, DL
, MVT::i32
));
1216 if (!Subtarget
.isLittle())
1219 // i32 store to lower address.
1221 DAG
.getStore(Chain
, DL
, Lo
, Ptr
, MachinePointerInfo(), Nd
.getAlignment(),
1222 Nd
.getMemOperand()->getFlags(), Nd
.getAAInfo());
1224 // i32 store to higher address.
1225 Ptr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Ptr
, DAG
.getConstant(4, DL
, PtrVT
));
1226 return DAG
.getStore(Chain
, DL
, Hi
, Ptr
, MachinePointerInfo(),
1227 std::min(Nd
.getAlignment(), 4U),
1228 Nd
.getMemOperand()->getFlags(), Nd
.getAAInfo());
1231 SDValue
MipsSETargetLowering::lowerBITCAST(SDValue Op
,
1232 SelectionDAG
&DAG
) const {
1234 MVT Src
= Op
.getOperand(0).getValueType().getSimpleVT();
1235 MVT Dest
= Op
.getValueType().getSimpleVT();
1237 // Bitcast i64 to double.
1238 if (Src
== MVT::i64
&& Dest
== MVT::f64
) {
1239 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
,
1240 Op
.getOperand(0), DAG
.getIntPtrConstant(0, DL
));
1241 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
,
1242 Op
.getOperand(0), DAG
.getIntPtrConstant(1, DL
));
1243 return DAG
.getNode(MipsISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
1246 // Bitcast double to i64.
1247 if (Src
== MVT::f64
&& Dest
== MVT::i64
) {
1249 DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
, Op
.getOperand(0),
1250 DAG
.getConstant(0, DL
, MVT::i32
));
1252 DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
, Op
.getOperand(0),
1253 DAG
.getConstant(1, DL
, MVT::i32
));
1254 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, Lo
, Hi
);
1257 // Skip other cases of bitcast and use default lowering.
1261 SDValue
MipsSETargetLowering::lowerMulDiv(SDValue Op
, unsigned NewOpc
,
1262 bool HasLo
, bool HasHi
,
1263 SelectionDAG
&DAG
) const {
1264 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1265 assert(!Subtarget
.hasMips32r6());
1267 EVT Ty
= Op
.getOperand(0).getValueType();
1269 SDValue Mult
= DAG
.getNode(NewOpc
, DL
, MVT::Untyped
,
1270 Op
.getOperand(0), Op
.getOperand(1));
1274 Lo
= DAG
.getNode(MipsISD::MFLO
, DL
, Ty
, Mult
);
1276 Hi
= DAG
.getNode(MipsISD::MFHI
, DL
, Ty
, Mult
);
1278 if (!HasLo
|| !HasHi
)
1279 return HasLo
? Lo
: Hi
;
1281 SDValue Vals
[] = { Lo
, Hi
};
1282 return DAG
.getMergeValues(Vals
, DL
);
1285 static SDValue
initAccumulator(SDValue In
, const SDLoc
&DL
, SelectionDAG
&DAG
) {
1286 SDValue InLo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
, In
,
1287 DAG
.getConstant(0, DL
, MVT::i32
));
1288 SDValue InHi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
, In
,
1289 DAG
.getConstant(1, DL
, MVT::i32
));
1290 return DAG
.getNode(MipsISD::MTLOHI
, DL
, MVT::Untyped
, InLo
, InHi
);
1293 static SDValue
extractLOHI(SDValue Op
, const SDLoc
&DL
, SelectionDAG
&DAG
) {
1294 SDValue Lo
= DAG
.getNode(MipsISD::MFLO
, DL
, MVT::i32
, Op
);
1295 SDValue Hi
= DAG
.getNode(MipsISD::MFHI
, DL
, MVT::i32
, Op
);
1296 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, Lo
, Hi
);
1299 // This function expands mips intrinsic nodes which have 64-bit input operands
1300 // or output values.
1302 // out64 = intrinsic-node in64
1304 // lo = copy (extract-element (in64, 0))
1305 // hi = copy (extract-element (in64, 1))
1306 // mips-specific-node
1309 // out64 = merge-values (v0, v1)
1311 static SDValue
lowerDSPIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Opc
) {
1313 bool HasChainIn
= Op
->getOperand(0).getValueType() == MVT::Other
;
1314 SmallVector
<SDValue
, 3> Ops
;
1317 // See if Op has a chain input.
1319 Ops
.push_back(Op
->getOperand(OpNo
++));
1321 // The next operand is the intrinsic opcode.
1322 assert(Op
->getOperand(OpNo
).getOpcode() == ISD::TargetConstant
);
1324 // See if the next operand has type i64.
1325 SDValue Opnd
= Op
->getOperand(++OpNo
), In64
;
1327 if (Opnd
.getValueType() == MVT::i64
)
1328 In64
= initAccumulator(Opnd
, DL
, DAG
);
1330 Ops
.push_back(Opnd
);
1332 // Push the remaining operands.
1333 for (++OpNo
; OpNo
< Op
->getNumOperands(); ++OpNo
)
1334 Ops
.push_back(Op
->getOperand(OpNo
));
1336 // Add In64 to the end of the list.
1338 Ops
.push_back(In64
);
1341 SmallVector
<EVT
, 2> ResTys
;
1343 for (SDNode::value_iterator I
= Op
->value_begin(), E
= Op
->value_end();
1345 ResTys
.push_back((*I
== MVT::i64
) ? MVT::Untyped
: *I
);
1348 SDValue Val
= DAG
.getNode(Opc
, DL
, ResTys
, Ops
);
1349 SDValue Out
= (ResTys
[0] == MVT::Untyped
) ? extractLOHI(Val
, DL
, DAG
) : Val
;
1354 assert(Val
->getValueType(1) == MVT::Other
);
1355 SDValue Vals
[] = { Out
, SDValue(Val
.getNode(), 1) };
1356 return DAG
.getMergeValues(Vals
, DL
);
1359 // Lower an MSA copy intrinsic into the specified SelectionDAG node
1360 static SDValue
lowerMSACopyIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Opc
) {
1362 SDValue Vec
= Op
->getOperand(1);
1363 SDValue Idx
= Op
->getOperand(2);
1364 EVT ResTy
= Op
->getValueType(0);
1365 EVT EltTy
= Vec
->getValueType(0).getVectorElementType();
1367 SDValue Result
= DAG
.getNode(Opc
, DL
, ResTy
, Vec
, Idx
,
1368 DAG
.getValueType(EltTy
));
1373 static SDValue
lowerMSASplatZExt(SDValue Op
, unsigned OpNr
, SelectionDAG
&DAG
) {
1374 EVT ResVecTy
= Op
->getValueType(0);
1375 EVT ViaVecTy
= ResVecTy
;
1376 bool BigEndian
= !DAG
.getSubtarget().getTargetTriple().isLittleEndian();
1379 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1380 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1382 SDValue LaneA
= Op
->getOperand(OpNr
);
1385 if (ResVecTy
== MVT::v2i64
) {
1386 // In case of the index being passed as an immediate value, set the upper
1387 // lane to 0 so that the splati.d instruction can be matched.
1388 if (isa
<ConstantSDNode
>(LaneA
))
1389 LaneB
= DAG
.getConstant(0, DL
, MVT::i32
);
1390 // Having the index passed in a register, set the upper lane to the same
1391 // value as the lower - this results in the BUILD_VECTOR node not being
1392 // expanded through stack. This way we are able to pattern match the set of
1393 // nodes created here to splat.d.
1396 ViaVecTy
= MVT::v4i32
;
1398 std::swap(LaneA
, LaneB
);
1402 SDValue Ops
[16] = { LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
,
1403 LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
};
1405 SDValue Result
= DAG
.getBuildVector(
1406 ViaVecTy
, DL
, makeArrayRef(Ops
, ViaVecTy
.getVectorNumElements()));
1408 if (ViaVecTy
!= ResVecTy
) {
1409 SDValue One
= DAG
.getConstant(1, DL
, ViaVecTy
);
1410 Result
= DAG
.getNode(ISD::BITCAST
, DL
, ResVecTy
,
1411 DAG
.getNode(ISD::AND
, DL
, ViaVecTy
, Result
, One
));
1417 static SDValue
lowerMSASplatImm(SDValue Op
, unsigned ImmOp
, SelectionDAG
&DAG
,
1418 bool IsSigned
= false) {
1419 auto *CImm
= cast
<ConstantSDNode
>(Op
->getOperand(ImmOp
));
1420 return DAG
.getConstant(
1421 APInt(Op
->getValueType(0).getScalarType().getSizeInBits(),
1422 IsSigned
? CImm
->getSExtValue() : CImm
->getZExtValue(), IsSigned
),
1423 SDLoc(Op
), Op
->getValueType(0));
1426 static SDValue
getBuildVectorSplat(EVT VecTy
, SDValue SplatValue
,
1427 bool BigEndian
, SelectionDAG
&DAG
) {
1428 EVT ViaVecTy
= VecTy
;
1429 SDValue SplatValueA
= SplatValue
;
1430 SDValue SplatValueB
= SplatValue
;
1431 SDLoc
DL(SplatValue
);
1433 if (VecTy
== MVT::v2i64
) {
1434 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1435 ViaVecTy
= MVT::v4i32
;
1437 SplatValueA
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, SplatValue
);
1438 SplatValueB
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, SplatValue
,
1439 DAG
.getConstant(32, DL
, MVT::i32
));
1440 SplatValueB
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, SplatValueB
);
1443 // We currently hold the parts in little endian order. Swap them if
1446 std::swap(SplatValueA
, SplatValueB
);
1448 SDValue Ops
[16] = { SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1449 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1450 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1451 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
};
1453 SDValue Result
= DAG
.getBuildVector(
1454 ViaVecTy
, DL
, makeArrayRef(Ops
, ViaVecTy
.getVectorNumElements()));
1456 if (VecTy
!= ViaVecTy
)
1457 Result
= DAG
.getNode(ISD::BITCAST
, DL
, VecTy
, Result
);
1462 static SDValue
lowerMSABinaryBitImmIntr(SDValue Op
, SelectionDAG
&DAG
,
1463 unsigned Opc
, SDValue Imm
,
1465 EVT VecTy
= Op
->getValueType(0);
1469 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1471 if (VecTy
== MVT::v2i64
) {
1472 if (ConstantSDNode
*CImm
= dyn_cast
<ConstantSDNode
>(Imm
)) {
1473 APInt BitImm
= APInt(64, 1) << CImm
->getAPIntValue();
1475 SDValue BitImmHiOp
= DAG
.getConstant(BitImm
.lshr(32).trunc(32), DL
,
1477 SDValue BitImmLoOp
= DAG
.getConstant(BitImm
.trunc(32), DL
, MVT::i32
);
1480 std::swap(BitImmLoOp
, BitImmHiOp
);
1482 Exp2Imm
= DAG
.getNode(
1483 ISD::BITCAST
, DL
, MVT::v2i64
,
1484 DAG
.getBuildVector(MVT::v4i32
, DL
,
1485 {BitImmLoOp
, BitImmHiOp
, BitImmLoOp
, BitImmHiOp
}));
1489 if (!Exp2Imm
.getNode()) {
1490 // We couldnt constant fold, do a vector shift instead
1492 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1493 // only values 0-63 are valid.
1494 if (VecTy
== MVT::v2i64
)
1495 Imm
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, Imm
);
1497 Exp2Imm
= getBuildVectorSplat(VecTy
, Imm
, BigEndian
, DAG
);
1499 Exp2Imm
= DAG
.getNode(ISD::SHL
, DL
, VecTy
, DAG
.getConstant(1, DL
, VecTy
),
1503 return DAG
.getNode(Opc
, DL
, VecTy
, Op
->getOperand(1), Exp2Imm
);
1506 static SDValue
truncateVecElts(SDValue Op
, SelectionDAG
&DAG
) {
1508 EVT ResTy
= Op
->getValueType(0);
1509 SDValue Vec
= Op
->getOperand(2);
1510 bool BigEndian
= !DAG
.getSubtarget().getTargetTriple().isLittleEndian();
1511 MVT ResEltTy
= ResTy
== MVT::v2i64
? MVT::i64
: MVT::i32
;
1512 SDValue ConstValue
= DAG
.getConstant(Vec
.getScalarValueSizeInBits() - 1,
1514 SDValue SplatVec
= getBuildVectorSplat(ResTy
, ConstValue
, BigEndian
, DAG
);
1516 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Vec
, SplatVec
);
1519 static SDValue
lowerMSABitClear(SDValue Op
, SelectionDAG
&DAG
) {
1520 EVT ResTy
= Op
->getValueType(0);
1522 SDValue One
= DAG
.getConstant(1, DL
, ResTy
);
1523 SDValue Bit
= DAG
.getNode(ISD::SHL
, DL
, ResTy
, One
, truncateVecElts(Op
, DAG
));
1525 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Op
->getOperand(1),
1526 DAG
.getNOT(DL
, Bit
, ResTy
));
1529 static SDValue
lowerMSABitClearImm(SDValue Op
, SelectionDAG
&DAG
) {
1531 EVT ResTy
= Op
->getValueType(0);
1532 APInt BitImm
= APInt(ResTy
.getScalarSizeInBits(), 1)
1533 << cast
<ConstantSDNode
>(Op
->getOperand(2))->getAPIntValue();
1534 SDValue BitMask
= DAG
.getConstant(~BitImm
, DL
, ResTy
);
1536 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Op
->getOperand(1), BitMask
);
1539 SDValue
MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
1540 SelectionDAG
&DAG
) const {
1542 unsigned Intrinsic
= cast
<ConstantSDNode
>(Op
->getOperand(0))->getZExtValue();
1543 switch (Intrinsic
) {
1546 case Intrinsic::mips_shilo
:
1547 return lowerDSPIntr(Op
, DAG
, MipsISD::SHILO
);
1548 case Intrinsic::mips_dpau_h_qbl
:
1549 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAU_H_QBL
);
1550 case Intrinsic::mips_dpau_h_qbr
:
1551 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAU_H_QBR
);
1552 case Intrinsic::mips_dpsu_h_qbl
:
1553 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSU_H_QBL
);
1554 case Intrinsic::mips_dpsu_h_qbr
:
1555 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSU_H_QBR
);
1556 case Intrinsic::mips_dpa_w_ph
:
1557 return lowerDSPIntr(Op
, DAG
, MipsISD::DPA_W_PH
);
1558 case Intrinsic::mips_dps_w_ph
:
1559 return lowerDSPIntr(Op
, DAG
, MipsISD::DPS_W_PH
);
1560 case Intrinsic::mips_dpax_w_ph
:
1561 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAX_W_PH
);
1562 case Intrinsic::mips_dpsx_w_ph
:
1563 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSX_W_PH
);
1564 case Intrinsic::mips_mulsa_w_ph
:
1565 return lowerDSPIntr(Op
, DAG
, MipsISD::MULSA_W_PH
);
1566 case Intrinsic::mips_mult
:
1567 return lowerDSPIntr(Op
, DAG
, MipsISD::Mult
);
1568 case Intrinsic::mips_multu
:
1569 return lowerDSPIntr(Op
, DAG
, MipsISD::Multu
);
1570 case Intrinsic::mips_madd
:
1571 return lowerDSPIntr(Op
, DAG
, MipsISD::MAdd
);
1572 case Intrinsic::mips_maddu
:
1573 return lowerDSPIntr(Op
, DAG
, MipsISD::MAddu
);
1574 case Intrinsic::mips_msub
:
1575 return lowerDSPIntr(Op
, DAG
, MipsISD::MSub
);
1576 case Intrinsic::mips_msubu
:
1577 return lowerDSPIntr(Op
, DAG
, MipsISD::MSubu
);
1578 case Intrinsic::mips_addv_b
:
1579 case Intrinsic::mips_addv_h
:
1580 case Intrinsic::mips_addv_w
:
1581 case Intrinsic::mips_addv_d
:
1582 return DAG
.getNode(ISD::ADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1584 case Intrinsic::mips_addvi_b
:
1585 case Intrinsic::mips_addvi_h
:
1586 case Intrinsic::mips_addvi_w
:
1587 case Intrinsic::mips_addvi_d
:
1588 return DAG
.getNode(ISD::ADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1589 lowerMSASplatImm(Op
, 2, DAG
));
1590 case Intrinsic::mips_and_v
:
1591 return DAG
.getNode(ISD::AND
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1593 case Intrinsic::mips_andi_b
:
1594 return DAG
.getNode(ISD::AND
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1595 lowerMSASplatImm(Op
, 2, DAG
));
1596 case Intrinsic::mips_bclr_b
:
1597 case Intrinsic::mips_bclr_h
:
1598 case Intrinsic::mips_bclr_w
:
1599 case Intrinsic::mips_bclr_d
:
1600 return lowerMSABitClear(Op
, DAG
);
1601 case Intrinsic::mips_bclri_b
:
1602 case Intrinsic::mips_bclri_h
:
1603 case Intrinsic::mips_bclri_w
:
1604 case Intrinsic::mips_bclri_d
:
1605 return lowerMSABitClearImm(Op
, DAG
);
1606 case Intrinsic::mips_binsli_b
:
1607 case Intrinsic::mips_binsli_h
:
1608 case Intrinsic::mips_binsli_w
:
1609 case Intrinsic::mips_binsli_d
: {
1610 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1611 EVT VecTy
= Op
->getValueType(0);
1612 EVT EltTy
= VecTy
.getVectorElementType();
1613 if (Op
->getConstantOperandVal(3) >= EltTy
.getSizeInBits())
1614 report_fatal_error("Immediate out of range");
1615 APInt Mask
= APInt::getHighBitsSet(EltTy
.getSizeInBits(),
1616 Op
->getConstantOperandVal(3) + 1);
1617 return DAG
.getNode(ISD::VSELECT
, DL
, VecTy
,
1618 DAG
.getConstant(Mask
, DL
, VecTy
, true),
1619 Op
->getOperand(2), Op
->getOperand(1));
1621 case Intrinsic::mips_binsri_b
:
1622 case Intrinsic::mips_binsri_h
:
1623 case Intrinsic::mips_binsri_w
:
1624 case Intrinsic::mips_binsri_d
: {
1625 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1626 EVT VecTy
= Op
->getValueType(0);
1627 EVT EltTy
= VecTy
.getVectorElementType();
1628 if (Op
->getConstantOperandVal(3) >= EltTy
.getSizeInBits())
1629 report_fatal_error("Immediate out of range");
1630 APInt Mask
= APInt::getLowBitsSet(EltTy
.getSizeInBits(),
1631 Op
->getConstantOperandVal(3) + 1);
1632 return DAG
.getNode(ISD::VSELECT
, DL
, VecTy
,
1633 DAG
.getConstant(Mask
, DL
, VecTy
, true),
1634 Op
->getOperand(2), Op
->getOperand(1));
1636 case Intrinsic::mips_bmnz_v
:
1637 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0), Op
->getOperand(3),
1638 Op
->getOperand(2), Op
->getOperand(1));
1639 case Intrinsic::mips_bmnzi_b
:
1640 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1641 lowerMSASplatImm(Op
, 3, DAG
), Op
->getOperand(2),
1643 case Intrinsic::mips_bmz_v
:
1644 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0), Op
->getOperand(3),
1645 Op
->getOperand(1), Op
->getOperand(2));
1646 case Intrinsic::mips_bmzi_b
:
1647 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1648 lowerMSASplatImm(Op
, 3, DAG
), Op
->getOperand(1),
1650 case Intrinsic::mips_bneg_b
:
1651 case Intrinsic::mips_bneg_h
:
1652 case Intrinsic::mips_bneg_w
:
1653 case Intrinsic::mips_bneg_d
: {
1654 EVT VecTy
= Op
->getValueType(0);
1655 SDValue One
= DAG
.getConstant(1, DL
, VecTy
);
1657 return DAG
.getNode(ISD::XOR
, DL
, VecTy
, Op
->getOperand(1),
1658 DAG
.getNode(ISD::SHL
, DL
, VecTy
, One
,
1659 truncateVecElts(Op
, DAG
)));
1661 case Intrinsic::mips_bnegi_b
:
1662 case Intrinsic::mips_bnegi_h
:
1663 case Intrinsic::mips_bnegi_w
:
1664 case Intrinsic::mips_bnegi_d
:
1665 return lowerMSABinaryBitImmIntr(Op
, DAG
, ISD::XOR
, Op
->getOperand(2),
1666 !Subtarget
.isLittle());
1667 case Intrinsic::mips_bnz_b
:
1668 case Intrinsic::mips_bnz_h
:
1669 case Intrinsic::mips_bnz_w
:
1670 case Intrinsic::mips_bnz_d
:
1671 return DAG
.getNode(MipsISD::VALL_NONZERO
, DL
, Op
->getValueType(0),
1673 case Intrinsic::mips_bnz_v
:
1674 return DAG
.getNode(MipsISD::VANY_NONZERO
, DL
, Op
->getValueType(0),
1676 case Intrinsic::mips_bsel_v
:
1677 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1678 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1679 Op
->getOperand(1), Op
->getOperand(3),
1681 case Intrinsic::mips_bseli_b
:
1682 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1683 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1684 Op
->getOperand(1), lowerMSASplatImm(Op
, 3, DAG
),
1686 case Intrinsic::mips_bset_b
:
1687 case Intrinsic::mips_bset_h
:
1688 case Intrinsic::mips_bset_w
:
1689 case Intrinsic::mips_bset_d
: {
1690 EVT VecTy
= Op
->getValueType(0);
1691 SDValue One
= DAG
.getConstant(1, DL
, VecTy
);
1693 return DAG
.getNode(ISD::OR
, DL
, VecTy
, Op
->getOperand(1),
1694 DAG
.getNode(ISD::SHL
, DL
, VecTy
, One
,
1695 truncateVecElts(Op
, DAG
)));
1697 case Intrinsic::mips_bseti_b
:
1698 case Intrinsic::mips_bseti_h
:
1699 case Intrinsic::mips_bseti_w
:
1700 case Intrinsic::mips_bseti_d
:
1701 return lowerMSABinaryBitImmIntr(Op
, DAG
, ISD::OR
, Op
->getOperand(2),
1702 !Subtarget
.isLittle());
1703 case Intrinsic::mips_bz_b
:
1704 case Intrinsic::mips_bz_h
:
1705 case Intrinsic::mips_bz_w
:
1706 case Intrinsic::mips_bz_d
:
1707 return DAG
.getNode(MipsISD::VALL_ZERO
, DL
, Op
->getValueType(0),
1709 case Intrinsic::mips_bz_v
:
1710 return DAG
.getNode(MipsISD::VANY_ZERO
, DL
, Op
->getValueType(0),
1712 case Intrinsic::mips_ceq_b
:
1713 case Intrinsic::mips_ceq_h
:
1714 case Intrinsic::mips_ceq_w
:
1715 case Intrinsic::mips_ceq_d
:
1716 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1717 Op
->getOperand(2), ISD::SETEQ
);
1718 case Intrinsic::mips_ceqi_b
:
1719 case Intrinsic::mips_ceqi_h
:
1720 case Intrinsic::mips_ceqi_w
:
1721 case Intrinsic::mips_ceqi_d
:
1722 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1723 lowerMSASplatImm(Op
, 2, DAG
, true), ISD::SETEQ
);
1724 case Intrinsic::mips_cle_s_b
:
1725 case Intrinsic::mips_cle_s_h
:
1726 case Intrinsic::mips_cle_s_w
:
1727 case Intrinsic::mips_cle_s_d
:
1728 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1729 Op
->getOperand(2), ISD::SETLE
);
1730 case Intrinsic::mips_clei_s_b
:
1731 case Intrinsic::mips_clei_s_h
:
1732 case Intrinsic::mips_clei_s_w
:
1733 case Intrinsic::mips_clei_s_d
:
1734 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1735 lowerMSASplatImm(Op
, 2, DAG
, true), ISD::SETLE
);
1736 case Intrinsic::mips_cle_u_b
:
1737 case Intrinsic::mips_cle_u_h
:
1738 case Intrinsic::mips_cle_u_w
:
1739 case Intrinsic::mips_cle_u_d
:
1740 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1741 Op
->getOperand(2), ISD::SETULE
);
1742 case Intrinsic::mips_clei_u_b
:
1743 case Intrinsic::mips_clei_u_h
:
1744 case Intrinsic::mips_clei_u_w
:
1745 case Intrinsic::mips_clei_u_d
:
1746 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1747 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETULE
);
1748 case Intrinsic::mips_clt_s_b
:
1749 case Intrinsic::mips_clt_s_h
:
1750 case Intrinsic::mips_clt_s_w
:
1751 case Intrinsic::mips_clt_s_d
:
1752 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1753 Op
->getOperand(2), ISD::SETLT
);
1754 case Intrinsic::mips_clti_s_b
:
1755 case Intrinsic::mips_clti_s_h
:
1756 case Intrinsic::mips_clti_s_w
:
1757 case Intrinsic::mips_clti_s_d
:
1758 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1759 lowerMSASplatImm(Op
, 2, DAG
, true), ISD::SETLT
);
1760 case Intrinsic::mips_clt_u_b
:
1761 case Intrinsic::mips_clt_u_h
:
1762 case Intrinsic::mips_clt_u_w
:
1763 case Intrinsic::mips_clt_u_d
:
1764 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1765 Op
->getOperand(2), ISD::SETULT
);
1766 case Intrinsic::mips_clti_u_b
:
1767 case Intrinsic::mips_clti_u_h
:
1768 case Intrinsic::mips_clti_u_w
:
1769 case Intrinsic::mips_clti_u_d
:
1770 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1771 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETULT
);
1772 case Intrinsic::mips_copy_s_b
:
1773 case Intrinsic::mips_copy_s_h
:
1774 case Intrinsic::mips_copy_s_w
:
1775 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_SEXT_ELT
);
1776 case Intrinsic::mips_copy_s_d
:
1777 if (Subtarget
.hasMips64())
1778 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1779 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_SEXT_ELT
);
1781 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1782 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1783 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op
),
1784 Op
->getValueType(0), Op
->getOperand(1),
1787 case Intrinsic::mips_copy_u_b
:
1788 case Intrinsic::mips_copy_u_h
:
1789 case Intrinsic::mips_copy_u_w
:
1790 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_ZEXT_ELT
);
1791 case Intrinsic::mips_copy_u_d
:
1792 if (Subtarget
.hasMips64())
1793 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1794 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_ZEXT_ELT
);
1796 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1797 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1798 // Note: When i64 is illegal, this results in copy_s.w instructions
1799 // instead of copy_u.w instructions. This makes no difference to the
1800 // behaviour since i64 is only illegal when the register file is 32-bit.
1801 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op
),
1802 Op
->getValueType(0), Op
->getOperand(1),
1805 case Intrinsic::mips_div_s_b
:
1806 case Intrinsic::mips_div_s_h
:
1807 case Intrinsic::mips_div_s_w
:
1808 case Intrinsic::mips_div_s_d
:
1809 return DAG
.getNode(ISD::SDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1811 case Intrinsic::mips_div_u_b
:
1812 case Intrinsic::mips_div_u_h
:
1813 case Intrinsic::mips_div_u_w
:
1814 case Intrinsic::mips_div_u_d
:
1815 return DAG
.getNode(ISD::UDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1817 case Intrinsic::mips_fadd_w
:
1818 case Intrinsic::mips_fadd_d
:
1819 // TODO: If intrinsics have fast-math-flags, propagate them.
1820 return DAG
.getNode(ISD::FADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1822 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1823 case Intrinsic::mips_fceq_w
:
1824 case Intrinsic::mips_fceq_d
:
1825 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1826 Op
->getOperand(2), ISD::SETOEQ
);
1827 case Intrinsic::mips_fcle_w
:
1828 case Intrinsic::mips_fcle_d
:
1829 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1830 Op
->getOperand(2), ISD::SETOLE
);
1831 case Intrinsic::mips_fclt_w
:
1832 case Intrinsic::mips_fclt_d
:
1833 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1834 Op
->getOperand(2), ISD::SETOLT
);
1835 case Intrinsic::mips_fcne_w
:
1836 case Intrinsic::mips_fcne_d
:
1837 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1838 Op
->getOperand(2), ISD::SETONE
);
1839 case Intrinsic::mips_fcor_w
:
1840 case Intrinsic::mips_fcor_d
:
1841 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1842 Op
->getOperand(2), ISD::SETO
);
1843 case Intrinsic::mips_fcueq_w
:
1844 case Intrinsic::mips_fcueq_d
:
1845 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1846 Op
->getOperand(2), ISD::SETUEQ
);
1847 case Intrinsic::mips_fcule_w
:
1848 case Intrinsic::mips_fcule_d
:
1849 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1850 Op
->getOperand(2), ISD::SETULE
);
1851 case Intrinsic::mips_fcult_w
:
1852 case Intrinsic::mips_fcult_d
:
1853 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1854 Op
->getOperand(2), ISD::SETULT
);
1855 case Intrinsic::mips_fcun_w
:
1856 case Intrinsic::mips_fcun_d
:
1857 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1858 Op
->getOperand(2), ISD::SETUO
);
1859 case Intrinsic::mips_fcune_w
:
1860 case Intrinsic::mips_fcune_d
:
1861 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1862 Op
->getOperand(2), ISD::SETUNE
);
1863 case Intrinsic::mips_fdiv_w
:
1864 case Intrinsic::mips_fdiv_d
:
1865 // TODO: If intrinsics have fast-math-flags, propagate them.
1866 return DAG
.getNode(ISD::FDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1868 case Intrinsic::mips_ffint_u_w
:
1869 case Intrinsic::mips_ffint_u_d
:
1870 return DAG
.getNode(ISD::UINT_TO_FP
, DL
, Op
->getValueType(0),
1872 case Intrinsic::mips_ffint_s_w
:
1873 case Intrinsic::mips_ffint_s_d
:
1874 return DAG
.getNode(ISD::SINT_TO_FP
, DL
, Op
->getValueType(0),
1876 case Intrinsic::mips_fill_b
:
1877 case Intrinsic::mips_fill_h
:
1878 case Intrinsic::mips_fill_w
:
1879 case Intrinsic::mips_fill_d
: {
1880 EVT ResTy
= Op
->getValueType(0);
1881 SmallVector
<SDValue
, 16> Ops(ResTy
.getVectorNumElements(),
1884 // If ResTy is v2i64 then the type legalizer will break this node down into
1885 // an equivalent v4i32.
1886 return DAG
.getBuildVector(ResTy
, DL
, Ops
);
1888 case Intrinsic::mips_fexp2_w
:
1889 case Intrinsic::mips_fexp2_d
: {
1890 // TODO: If intrinsics have fast-math-flags, propagate them.
1891 EVT ResTy
= Op
->getValueType(0);
1893 ISD::FMUL
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1894 DAG
.getNode(ISD::FEXP2
, SDLoc(Op
), ResTy
, Op
->getOperand(2)));
1896 case Intrinsic::mips_flog2_w
:
1897 case Intrinsic::mips_flog2_d
:
1898 return DAG
.getNode(ISD::FLOG2
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1899 case Intrinsic::mips_fmadd_w
:
1900 case Intrinsic::mips_fmadd_d
:
1901 return DAG
.getNode(ISD::FMA
, SDLoc(Op
), Op
->getValueType(0),
1902 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
1903 case Intrinsic::mips_fmul_w
:
1904 case Intrinsic::mips_fmul_d
:
1905 // TODO: If intrinsics have fast-math-flags, propagate them.
1906 return DAG
.getNode(ISD::FMUL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1908 case Intrinsic::mips_fmsub_w
:
1909 case Intrinsic::mips_fmsub_d
: {
1910 // TODO: If intrinsics have fast-math-flags, propagate them.
1911 return DAG
.getNode(MipsISD::FMS
, SDLoc(Op
), Op
->getValueType(0),
1912 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
1914 case Intrinsic::mips_frint_w
:
1915 case Intrinsic::mips_frint_d
:
1916 return DAG
.getNode(ISD::FRINT
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1917 case Intrinsic::mips_fsqrt_w
:
1918 case Intrinsic::mips_fsqrt_d
:
1919 return DAG
.getNode(ISD::FSQRT
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1920 case Intrinsic::mips_fsub_w
:
1921 case Intrinsic::mips_fsub_d
:
1922 // TODO: If intrinsics have fast-math-flags, propagate them.
1923 return DAG
.getNode(ISD::FSUB
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1925 case Intrinsic::mips_ftrunc_u_w
:
1926 case Intrinsic::mips_ftrunc_u_d
:
1927 return DAG
.getNode(ISD::FP_TO_UINT
, DL
, Op
->getValueType(0),
1929 case Intrinsic::mips_ftrunc_s_w
:
1930 case Intrinsic::mips_ftrunc_s_d
:
1931 return DAG
.getNode(ISD::FP_TO_SINT
, DL
, Op
->getValueType(0),
1933 case Intrinsic::mips_ilvev_b
:
1934 case Intrinsic::mips_ilvev_h
:
1935 case Intrinsic::mips_ilvev_w
:
1936 case Intrinsic::mips_ilvev_d
:
1937 return DAG
.getNode(MipsISD::ILVEV
, DL
, Op
->getValueType(0),
1938 Op
->getOperand(1), Op
->getOperand(2));
1939 case Intrinsic::mips_ilvl_b
:
1940 case Intrinsic::mips_ilvl_h
:
1941 case Intrinsic::mips_ilvl_w
:
1942 case Intrinsic::mips_ilvl_d
:
1943 return DAG
.getNode(MipsISD::ILVL
, DL
, Op
->getValueType(0),
1944 Op
->getOperand(1), Op
->getOperand(2));
1945 case Intrinsic::mips_ilvod_b
:
1946 case Intrinsic::mips_ilvod_h
:
1947 case Intrinsic::mips_ilvod_w
:
1948 case Intrinsic::mips_ilvod_d
:
1949 return DAG
.getNode(MipsISD::ILVOD
, DL
, Op
->getValueType(0),
1950 Op
->getOperand(1), Op
->getOperand(2));
1951 case Intrinsic::mips_ilvr_b
:
1952 case Intrinsic::mips_ilvr_h
:
1953 case Intrinsic::mips_ilvr_w
:
1954 case Intrinsic::mips_ilvr_d
:
1955 return DAG
.getNode(MipsISD::ILVR
, DL
, Op
->getValueType(0),
1956 Op
->getOperand(1), Op
->getOperand(2));
1957 case Intrinsic::mips_insert_b
:
1958 case Intrinsic::mips_insert_h
:
1959 case Intrinsic::mips_insert_w
:
1960 case Intrinsic::mips_insert_d
:
1961 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(Op
), Op
->getValueType(0),
1962 Op
->getOperand(1), Op
->getOperand(3), Op
->getOperand(2));
1963 case Intrinsic::mips_insve_b
:
1964 case Intrinsic::mips_insve_h
:
1965 case Intrinsic::mips_insve_w
:
1966 case Intrinsic::mips_insve_d
: {
1967 // Report an error for out of range values.
1969 switch (Intrinsic
) {
1970 case Intrinsic::mips_insve_b
: Max
= 15; break;
1971 case Intrinsic::mips_insve_h
: Max
= 7; break;
1972 case Intrinsic::mips_insve_w
: Max
= 3; break;
1973 case Intrinsic::mips_insve_d
: Max
= 1; break;
1974 default: llvm_unreachable("Unmatched intrinsic");
1976 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
1977 if (Value
< 0 || Value
> Max
)
1978 report_fatal_error("Immediate out of range");
1979 return DAG
.getNode(MipsISD::INSVE
, DL
, Op
->getValueType(0),
1980 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3),
1981 DAG
.getConstant(0, DL
, MVT::i32
));
1983 case Intrinsic::mips_ldi_b
:
1984 case Intrinsic::mips_ldi_h
:
1985 case Intrinsic::mips_ldi_w
:
1986 case Intrinsic::mips_ldi_d
:
1987 return lowerMSASplatImm(Op
, 1, DAG
, true);
1988 case Intrinsic::mips_lsa
:
1989 case Intrinsic::mips_dlsa
: {
1990 EVT ResTy
= Op
->getValueType(0);
1991 return DAG
.getNode(ISD::ADD
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1992 DAG
.getNode(ISD::SHL
, SDLoc(Op
), ResTy
,
1993 Op
->getOperand(2), Op
->getOperand(3)));
1995 case Intrinsic::mips_maddv_b
:
1996 case Intrinsic::mips_maddv_h
:
1997 case Intrinsic::mips_maddv_w
:
1998 case Intrinsic::mips_maddv_d
: {
1999 EVT ResTy
= Op
->getValueType(0);
2000 return DAG
.getNode(ISD::ADD
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
2001 DAG
.getNode(ISD::MUL
, SDLoc(Op
), ResTy
,
2002 Op
->getOperand(2), Op
->getOperand(3)));
2004 case Intrinsic::mips_max_s_b
:
2005 case Intrinsic::mips_max_s_h
:
2006 case Intrinsic::mips_max_s_w
:
2007 case Intrinsic::mips_max_s_d
:
2008 return DAG
.getNode(ISD::SMAX
, DL
, Op
->getValueType(0),
2009 Op
->getOperand(1), Op
->getOperand(2));
2010 case Intrinsic::mips_max_u_b
:
2011 case Intrinsic::mips_max_u_h
:
2012 case Intrinsic::mips_max_u_w
:
2013 case Intrinsic::mips_max_u_d
:
2014 return DAG
.getNode(ISD::UMAX
, DL
, Op
->getValueType(0),
2015 Op
->getOperand(1), Op
->getOperand(2));
2016 case Intrinsic::mips_maxi_s_b
:
2017 case Intrinsic::mips_maxi_s_h
:
2018 case Intrinsic::mips_maxi_s_w
:
2019 case Intrinsic::mips_maxi_s_d
:
2020 return DAG
.getNode(ISD::SMAX
, DL
, Op
->getValueType(0),
2021 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
, true));
2022 case Intrinsic::mips_maxi_u_b
:
2023 case Intrinsic::mips_maxi_u_h
:
2024 case Intrinsic::mips_maxi_u_w
:
2025 case Intrinsic::mips_maxi_u_d
:
2026 return DAG
.getNode(ISD::UMAX
, DL
, Op
->getValueType(0),
2027 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2028 case Intrinsic::mips_min_s_b
:
2029 case Intrinsic::mips_min_s_h
:
2030 case Intrinsic::mips_min_s_w
:
2031 case Intrinsic::mips_min_s_d
:
2032 return DAG
.getNode(ISD::SMIN
, DL
, Op
->getValueType(0),
2033 Op
->getOperand(1), Op
->getOperand(2));
2034 case Intrinsic::mips_min_u_b
:
2035 case Intrinsic::mips_min_u_h
:
2036 case Intrinsic::mips_min_u_w
:
2037 case Intrinsic::mips_min_u_d
:
2038 return DAG
.getNode(ISD::UMIN
, DL
, Op
->getValueType(0),
2039 Op
->getOperand(1), Op
->getOperand(2));
2040 case Intrinsic::mips_mini_s_b
:
2041 case Intrinsic::mips_mini_s_h
:
2042 case Intrinsic::mips_mini_s_w
:
2043 case Intrinsic::mips_mini_s_d
:
2044 return DAG
.getNode(ISD::SMIN
, DL
, Op
->getValueType(0),
2045 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
, true));
2046 case Intrinsic::mips_mini_u_b
:
2047 case Intrinsic::mips_mini_u_h
:
2048 case Intrinsic::mips_mini_u_w
:
2049 case Intrinsic::mips_mini_u_d
:
2050 return DAG
.getNode(ISD::UMIN
, DL
, Op
->getValueType(0),
2051 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2052 case Intrinsic::mips_mod_s_b
:
2053 case Intrinsic::mips_mod_s_h
:
2054 case Intrinsic::mips_mod_s_w
:
2055 case Intrinsic::mips_mod_s_d
:
2056 return DAG
.getNode(ISD::SREM
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2058 case Intrinsic::mips_mod_u_b
:
2059 case Intrinsic::mips_mod_u_h
:
2060 case Intrinsic::mips_mod_u_w
:
2061 case Intrinsic::mips_mod_u_d
:
2062 return DAG
.getNode(ISD::UREM
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2064 case Intrinsic::mips_mulv_b
:
2065 case Intrinsic::mips_mulv_h
:
2066 case Intrinsic::mips_mulv_w
:
2067 case Intrinsic::mips_mulv_d
:
2068 return DAG
.getNode(ISD::MUL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2070 case Intrinsic::mips_msubv_b
:
2071 case Intrinsic::mips_msubv_h
:
2072 case Intrinsic::mips_msubv_w
:
2073 case Intrinsic::mips_msubv_d
: {
2074 EVT ResTy
= Op
->getValueType(0);
2075 return DAG
.getNode(ISD::SUB
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
2076 DAG
.getNode(ISD::MUL
, SDLoc(Op
), ResTy
,
2077 Op
->getOperand(2), Op
->getOperand(3)));
2079 case Intrinsic::mips_nlzc_b
:
2080 case Intrinsic::mips_nlzc_h
:
2081 case Intrinsic::mips_nlzc_w
:
2082 case Intrinsic::mips_nlzc_d
:
2083 return DAG
.getNode(ISD::CTLZ
, DL
, Op
->getValueType(0), Op
->getOperand(1));
2084 case Intrinsic::mips_nor_v
: {
2085 SDValue Res
= DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2086 Op
->getOperand(1), Op
->getOperand(2));
2087 return DAG
.getNOT(DL
, Res
, Res
->getValueType(0));
2089 case Intrinsic::mips_nori_b
: {
2090 SDValue Res
= DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2092 lowerMSASplatImm(Op
, 2, DAG
));
2093 return DAG
.getNOT(DL
, Res
, Res
->getValueType(0));
2095 case Intrinsic::mips_or_v
:
2096 return DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2098 case Intrinsic::mips_ori_b
:
2099 return DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2100 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2101 case Intrinsic::mips_pckev_b
:
2102 case Intrinsic::mips_pckev_h
:
2103 case Intrinsic::mips_pckev_w
:
2104 case Intrinsic::mips_pckev_d
:
2105 return DAG
.getNode(MipsISD::PCKEV
, DL
, Op
->getValueType(0),
2106 Op
->getOperand(1), Op
->getOperand(2));
2107 case Intrinsic::mips_pckod_b
:
2108 case Intrinsic::mips_pckod_h
:
2109 case Intrinsic::mips_pckod_w
:
2110 case Intrinsic::mips_pckod_d
:
2111 return DAG
.getNode(MipsISD::PCKOD
, DL
, Op
->getValueType(0),
2112 Op
->getOperand(1), Op
->getOperand(2));
2113 case Intrinsic::mips_pcnt_b
:
2114 case Intrinsic::mips_pcnt_h
:
2115 case Intrinsic::mips_pcnt_w
:
2116 case Intrinsic::mips_pcnt_d
:
2117 return DAG
.getNode(ISD::CTPOP
, DL
, Op
->getValueType(0), Op
->getOperand(1));
2118 case Intrinsic::mips_sat_s_b
:
2119 case Intrinsic::mips_sat_s_h
:
2120 case Intrinsic::mips_sat_s_w
:
2121 case Intrinsic::mips_sat_s_d
:
2122 case Intrinsic::mips_sat_u_b
:
2123 case Intrinsic::mips_sat_u_h
:
2124 case Intrinsic::mips_sat_u_w
:
2125 case Intrinsic::mips_sat_u_d
: {
2126 // Report an error for out of range values.
2128 switch (Intrinsic
) {
2129 case Intrinsic::mips_sat_s_b
:
2130 case Intrinsic::mips_sat_u_b
: Max
= 7; break;
2131 case Intrinsic::mips_sat_s_h
:
2132 case Intrinsic::mips_sat_u_h
: Max
= 15; break;
2133 case Intrinsic::mips_sat_s_w
:
2134 case Intrinsic::mips_sat_u_w
: Max
= 31; break;
2135 case Intrinsic::mips_sat_s_d
:
2136 case Intrinsic::mips_sat_u_d
: Max
= 63; break;
2137 default: llvm_unreachable("Unmatched intrinsic");
2139 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2140 if (Value
< 0 || Value
> Max
)
2141 report_fatal_error("Immediate out of range");
2144 case Intrinsic::mips_shf_b
:
2145 case Intrinsic::mips_shf_h
:
2146 case Intrinsic::mips_shf_w
: {
2147 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2148 if (Value
< 0 || Value
> 255)
2149 report_fatal_error("Immediate out of range");
2150 return DAG
.getNode(MipsISD::SHF
, DL
, Op
->getValueType(0),
2151 Op
->getOperand(2), Op
->getOperand(1));
2153 case Intrinsic::mips_sldi_b
:
2154 case Intrinsic::mips_sldi_h
:
2155 case Intrinsic::mips_sldi_w
:
2156 case Intrinsic::mips_sldi_d
: {
2157 // Report an error for out of range values.
2159 switch (Intrinsic
) {
2160 case Intrinsic::mips_sldi_b
: Max
= 15; break;
2161 case Intrinsic::mips_sldi_h
: Max
= 7; break;
2162 case Intrinsic::mips_sldi_w
: Max
= 3; break;
2163 case Intrinsic::mips_sldi_d
: Max
= 1; break;
2164 default: llvm_unreachable("Unmatched intrinsic");
2166 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(3))->getSExtValue();
2167 if (Value
< 0 || Value
> Max
)
2168 report_fatal_error("Immediate out of range");
2171 case Intrinsic::mips_sll_b
:
2172 case Intrinsic::mips_sll_h
:
2173 case Intrinsic::mips_sll_w
:
2174 case Intrinsic::mips_sll_d
:
2175 return DAG
.getNode(ISD::SHL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2176 truncateVecElts(Op
, DAG
));
2177 case Intrinsic::mips_slli_b
:
2178 case Intrinsic::mips_slli_h
:
2179 case Intrinsic::mips_slli_w
:
2180 case Intrinsic::mips_slli_d
:
2181 return DAG
.getNode(ISD::SHL
, DL
, Op
->getValueType(0),
2182 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2183 case Intrinsic::mips_splat_b
:
2184 case Intrinsic::mips_splat_h
:
2185 case Intrinsic::mips_splat_w
:
2186 case Intrinsic::mips_splat_d
:
2187 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2188 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2189 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2190 // Instead we lower to MipsISD::VSHF and match from there.
2191 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2192 lowerMSASplatZExt(Op
, 2, DAG
), Op
->getOperand(1),
2194 case Intrinsic::mips_splati_b
:
2195 case Intrinsic::mips_splati_h
:
2196 case Intrinsic::mips_splati_w
:
2197 case Intrinsic::mips_splati_d
:
2198 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2199 lowerMSASplatImm(Op
, 2, DAG
), Op
->getOperand(1),
2201 case Intrinsic::mips_sra_b
:
2202 case Intrinsic::mips_sra_h
:
2203 case Intrinsic::mips_sra_w
:
2204 case Intrinsic::mips_sra_d
:
2205 return DAG
.getNode(ISD::SRA
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2206 truncateVecElts(Op
, DAG
));
2207 case Intrinsic::mips_srai_b
:
2208 case Intrinsic::mips_srai_h
:
2209 case Intrinsic::mips_srai_w
:
2210 case Intrinsic::mips_srai_d
:
2211 return DAG
.getNode(ISD::SRA
, DL
, Op
->getValueType(0),
2212 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2213 case Intrinsic::mips_srari_b
:
2214 case Intrinsic::mips_srari_h
:
2215 case Intrinsic::mips_srari_w
:
2216 case Intrinsic::mips_srari_d
: {
2217 // Report an error for out of range values.
2219 switch (Intrinsic
) {
2220 case Intrinsic::mips_srari_b
: Max
= 7; break;
2221 case Intrinsic::mips_srari_h
: Max
= 15; break;
2222 case Intrinsic::mips_srari_w
: Max
= 31; break;
2223 case Intrinsic::mips_srari_d
: Max
= 63; break;
2224 default: llvm_unreachable("Unmatched intrinsic");
2226 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2227 if (Value
< 0 || Value
> Max
)
2228 report_fatal_error("Immediate out of range");
2231 case Intrinsic::mips_srl_b
:
2232 case Intrinsic::mips_srl_h
:
2233 case Intrinsic::mips_srl_w
:
2234 case Intrinsic::mips_srl_d
:
2235 return DAG
.getNode(ISD::SRL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2236 truncateVecElts(Op
, DAG
));
2237 case Intrinsic::mips_srli_b
:
2238 case Intrinsic::mips_srli_h
:
2239 case Intrinsic::mips_srli_w
:
2240 case Intrinsic::mips_srli_d
:
2241 return DAG
.getNode(ISD::SRL
, DL
, Op
->getValueType(0),
2242 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2243 case Intrinsic::mips_srlri_b
:
2244 case Intrinsic::mips_srlri_h
:
2245 case Intrinsic::mips_srlri_w
:
2246 case Intrinsic::mips_srlri_d
: {
2247 // Report an error for out of range values.
2249 switch (Intrinsic
) {
2250 case Intrinsic::mips_srlri_b
: Max
= 7; break;
2251 case Intrinsic::mips_srlri_h
: Max
= 15; break;
2252 case Intrinsic::mips_srlri_w
: Max
= 31; break;
2253 case Intrinsic::mips_srlri_d
: Max
= 63; break;
2254 default: llvm_unreachable("Unmatched intrinsic");
2256 int64_t Value
= cast
<ConstantSDNode
>(Op
->getOperand(2))->getSExtValue();
2257 if (Value
< 0 || Value
> Max
)
2258 report_fatal_error("Immediate out of range");
2261 case Intrinsic::mips_subv_b
:
2262 case Intrinsic::mips_subv_h
:
2263 case Intrinsic::mips_subv_w
:
2264 case Intrinsic::mips_subv_d
:
2265 return DAG
.getNode(ISD::SUB
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2267 case Intrinsic::mips_subvi_b
:
2268 case Intrinsic::mips_subvi_h
:
2269 case Intrinsic::mips_subvi_w
:
2270 case Intrinsic::mips_subvi_d
:
2271 return DAG
.getNode(ISD::SUB
, DL
, Op
->getValueType(0),
2272 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2273 case Intrinsic::mips_vshf_b
:
2274 case Intrinsic::mips_vshf_h
:
2275 case Intrinsic::mips_vshf_w
:
2276 case Intrinsic::mips_vshf_d
:
2277 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2278 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
2279 case Intrinsic::mips_xor_v
:
2280 return DAG
.getNode(ISD::XOR
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2282 case Intrinsic::mips_xori_b
:
2283 return DAG
.getNode(ISD::XOR
, DL
, Op
->getValueType(0),
2284 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2285 case Intrinsic::thread_pointer
: {
2286 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2287 return DAG
.getNode(MipsISD::ThreadPointer
, DL
, PtrVT
);
2292 static SDValue
lowerMSALoadIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Intr
,
2293 const MipsSubtarget
&Subtarget
) {
2295 SDValue ChainIn
= Op
->getOperand(0);
2296 SDValue Address
= Op
->getOperand(2);
2297 SDValue Offset
= Op
->getOperand(3);
2298 EVT ResTy
= Op
->getValueType(0);
2299 EVT PtrTy
= Address
->getValueType(0);
2301 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2302 // however takes an i32 signed constant offset. The actual type of the
2303 // intrinsic is a scaled signed i10.
2304 if (Subtarget
.isABI_N64())
2305 Offset
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, PtrTy
, Offset
);
2307 Address
= DAG
.getNode(ISD::ADD
, DL
, PtrTy
, Address
, Offset
);
2308 return DAG
.getLoad(ResTy
, DL
, ChainIn
, Address
, MachinePointerInfo(),
2309 /* Alignment = */ 16);
2312 SDValue
MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op
,
2313 SelectionDAG
&DAG
) const {
2314 unsigned Intr
= cast
<ConstantSDNode
>(Op
->getOperand(1))->getZExtValue();
2318 case Intrinsic::mips_extp
:
2319 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTP
);
2320 case Intrinsic::mips_extpdp
:
2321 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTPDP
);
2322 case Intrinsic::mips_extr_w
:
2323 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_W
);
2324 case Intrinsic::mips_extr_r_w
:
2325 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_R_W
);
2326 case Intrinsic::mips_extr_rs_w
:
2327 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_RS_W
);
2328 case Intrinsic::mips_extr_s_h
:
2329 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_S_H
);
2330 case Intrinsic::mips_mthlip
:
2331 return lowerDSPIntr(Op
, DAG
, MipsISD::MTHLIP
);
2332 case Intrinsic::mips_mulsaq_s_w_ph
:
2333 return lowerDSPIntr(Op
, DAG
, MipsISD::MULSAQ_S_W_PH
);
2334 case Intrinsic::mips_maq_s_w_phl
:
2335 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_S_W_PHL
);
2336 case Intrinsic::mips_maq_s_w_phr
:
2337 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_S_W_PHR
);
2338 case Intrinsic::mips_maq_sa_w_phl
:
2339 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_SA_W_PHL
);
2340 case Intrinsic::mips_maq_sa_w_phr
:
2341 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_SA_W_PHR
);
2342 case Intrinsic::mips_dpaq_s_w_ph
:
2343 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQ_S_W_PH
);
2344 case Intrinsic::mips_dpsq_s_w_ph
:
2345 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQ_S_W_PH
);
2346 case Intrinsic::mips_dpaq_sa_l_w
:
2347 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQ_SA_L_W
);
2348 case Intrinsic::mips_dpsq_sa_l_w
:
2349 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQ_SA_L_W
);
2350 case Intrinsic::mips_dpaqx_s_w_ph
:
2351 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQX_S_W_PH
);
2352 case Intrinsic::mips_dpaqx_sa_w_ph
:
2353 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQX_SA_W_PH
);
2354 case Intrinsic::mips_dpsqx_s_w_ph
:
2355 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQX_S_W_PH
);
2356 case Intrinsic::mips_dpsqx_sa_w_ph
:
2357 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQX_SA_W_PH
);
2358 case Intrinsic::mips_ld_b
:
2359 case Intrinsic::mips_ld_h
:
2360 case Intrinsic::mips_ld_w
:
2361 case Intrinsic::mips_ld_d
:
2362 return lowerMSALoadIntr(Op
, DAG
, Intr
, Subtarget
);
2366 static SDValue
lowerMSAStoreIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Intr
,
2367 const MipsSubtarget
&Subtarget
) {
2369 SDValue ChainIn
= Op
->getOperand(0);
2370 SDValue Value
= Op
->getOperand(2);
2371 SDValue Address
= Op
->getOperand(3);
2372 SDValue Offset
= Op
->getOperand(4);
2373 EVT PtrTy
= Address
->getValueType(0);
2375 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2376 // however takes an i32 signed constant offset. The actual type of the
2377 // intrinsic is a scaled signed i10.
2378 if (Subtarget
.isABI_N64())
2379 Offset
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, PtrTy
, Offset
);
2381 Address
= DAG
.getNode(ISD::ADD
, DL
, PtrTy
, Address
, Offset
);
2383 return DAG
.getStore(ChainIn
, DL
, Value
, Address
, MachinePointerInfo(),
2384 /* Alignment = */ 16);
2387 SDValue
MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op
,
2388 SelectionDAG
&DAG
) const {
2389 unsigned Intr
= cast
<ConstantSDNode
>(Op
->getOperand(1))->getZExtValue();
2393 case Intrinsic::mips_st_b
:
2394 case Intrinsic::mips_st_h
:
2395 case Intrinsic::mips_st_w
:
2396 case Intrinsic::mips_st_d
:
2397 return lowerMSAStoreIntr(Op
, DAG
, Intr
, Subtarget
);
2401 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2403 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2404 // choose to sign-extend but we could have equally chosen zero-extend. The
2405 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2406 // result into this node later (possibly changing it to a zero-extend in the
2408 SDValue
MipsSETargetLowering::
2409 lowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const {
2411 EVT ResTy
= Op
->getValueType(0);
2412 SDValue Op0
= Op
->getOperand(0);
2413 EVT VecTy
= Op0
->getValueType(0);
2415 if (!VecTy
.is128BitVector())
2418 if (ResTy
.isInteger()) {
2419 SDValue Op1
= Op
->getOperand(1);
2420 EVT EltTy
= VecTy
.getVectorElementType();
2421 return DAG
.getNode(MipsISD::VEXTRACT_SEXT_ELT
, DL
, ResTy
, Op0
, Op1
,
2422 DAG
.getValueType(EltTy
));
2428 static bool isConstantOrUndef(const SDValue Op
) {
2431 if (isa
<ConstantSDNode
>(Op
))
2433 if (isa
<ConstantFPSDNode
>(Op
))
2438 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode
*Op
) {
2439 for (unsigned i
= 0; i
< Op
->getNumOperands(); ++i
)
2440 if (isConstantOrUndef(Op
->getOperand(i
)))
2445 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2448 // Lowers according to the following rules:
2449 // - Constant splats are legal as-is as long as the SplatBitSize is a power of
2450 // 2 less than or equal to 64 and the value fits into a signed 10-bit
2452 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2453 // is a power of 2 less than or equal to 64 and the value does not fit into a
2454 // signed 10-bit immediate
2455 // - Non-constant splats are legal as-is.
2456 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2457 // - All others are illegal and must be expanded.
2458 SDValue
MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op
,
2459 SelectionDAG
&DAG
) const {
2460 BuildVectorSDNode
*Node
= cast
<BuildVectorSDNode
>(Op
);
2461 EVT ResTy
= Op
->getValueType(0);
2463 APInt SplatValue
, SplatUndef
;
2464 unsigned SplatBitSize
;
2467 if (!Subtarget
.hasMSA() || !ResTy
.is128BitVector())
2470 if (Node
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
,
2472 !Subtarget
.isLittle()) && SplatBitSize
<= 64) {
2473 // We can only cope with 8, 16, 32, or 64-bit elements
2474 if (SplatBitSize
!= 8 && SplatBitSize
!= 16 && SplatBitSize
!= 32 &&
2478 // If the value isn't an integer type we will have to bitcast
2479 // from an integer type first. Also, if there are any undefs, we must
2480 // lower them to defined values first.
2481 if (ResTy
.isInteger() && !HasAnyUndefs
)
2486 switch (SplatBitSize
) {
2490 ViaVecTy
= MVT::v16i8
;
2493 ViaVecTy
= MVT::v8i16
;
2496 ViaVecTy
= MVT::v4i32
;
2499 // There's no fill.d to fall back on for 64-bit values
2503 // SelectionDAG::getConstant will promote SplatValue appropriately.
2504 SDValue Result
= DAG
.getConstant(SplatValue
, DL
, ViaVecTy
);
2506 // Bitcast to the type we originally wanted
2507 if (ViaVecTy
!= ResTy
)
2508 Result
= DAG
.getNode(ISD::BITCAST
, SDLoc(Node
), ResTy
, Result
);
2511 } else if (DAG
.isSplatValue(Op
, /* AllowUndefs */ false))
2513 else if (!isConstantOrUndefBUILD_VECTOR(Node
)) {
2514 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2515 // The resulting code is the same length as the expansion, but it doesn't
2516 // use memory operations
2517 EVT ResTy
= Node
->getValueType(0);
2519 assert(ResTy
.isVector());
2521 unsigned NumElts
= ResTy
.getVectorNumElements();
2522 SDValue Vector
= DAG
.getUNDEF(ResTy
);
2523 for (unsigned i
= 0; i
< NumElts
; ++i
) {
2524 Vector
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ResTy
, Vector
,
2525 Node
->getOperand(i
),
2526 DAG
.getConstant(i
, DL
, MVT::i32
));
2534 // Lower VECTOR_SHUFFLE into SHF (if possible).
2536 // SHF splits the vector into blocks of four elements, then shuffles these
2537 // elements according to a <4 x i2> constant (encoded as an integer immediate).
2539 // It is therefore possible to lower into SHF when the mask takes the form:
2540 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2541 // When undef's appear they are treated as if they were whatever value is
2542 // necessary in order to fit the above forms.
2545 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2546 // <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2547 // i32 7, i32 6, i32 5, i32 4>
2549 // (SHF_H $w0, $w1, 27)
2550 // where the 27 comes from:
2551 // 3 + (2 << 2) + (1 << 4) + (0 << 6)
2552 static SDValue
lowerVECTOR_SHUFFLE_SHF(SDValue Op
, EVT ResTy
,
2553 SmallVector
<int, 16> Indices
,
2554 SelectionDAG
&DAG
) {
2555 int SHFIndices
[4] = { -1, -1, -1, -1 };
2557 if (Indices
.size() < 4)
2560 for (unsigned i
= 0; i
< 4; ++i
) {
2561 for (unsigned j
= i
; j
< Indices
.size(); j
+= 4) {
2562 int Idx
= Indices
[j
];
2564 // Convert from vector index to 4-element subvector index
2565 // If an index refers to an element outside of the subvector then give up
2568 if (Idx
< 0 || Idx
>= 4)
2572 // If the mask has an undef, replace it with the current index.
2573 // Note that it might still be undef if the current index is also undef
2574 if (SHFIndices
[i
] == -1)
2575 SHFIndices
[i
] = Idx
;
2577 // Check that non-undef values are the same as in the mask. If they
2578 // aren't then give up
2579 if (!(Idx
== -1 || Idx
== SHFIndices
[i
]))
2584 // Calculate the immediate. Replace any remaining undefs with zero
2586 for (int i
= 3; i
>= 0; --i
) {
2587 int Idx
= SHFIndices
[i
];
2597 return DAG
.getNode(MipsISD::SHF
, DL
, ResTy
,
2598 DAG
.getConstant(Imm
, DL
, MVT::i32
), Op
->getOperand(0));
2601 /// Determine whether a range fits a regular pattern of values.
2602 /// This function accounts for the possibility of jumping over the End iterator.
2603 template <typename ValType
>
2605 fitsRegularPattern(typename SmallVectorImpl
<ValType
>::const_iterator Begin
,
2606 unsigned CheckStride
,
2607 typename SmallVectorImpl
<ValType
>::const_iterator End
,
2608 ValType ExpectedIndex
, unsigned ExpectedIndexStride
) {
2612 if (*I
!= -1 && *I
!= ExpectedIndex
)
2614 ExpectedIndex
+= ExpectedIndexStride
;
2616 // Incrementing past End is undefined behaviour so we must increment one
2617 // step at a time and check for End at each step.
2618 for (unsigned n
= 0; n
< CheckStride
&& I
!= End
; ++n
, ++I
)
2619 ; // Empty loop body.
2624 // Determine whether VECTOR_SHUFFLE is a SPLATI.
2626 // It is a SPLATI when the mask is:
2628 // where x is any valid index.
2630 // When undef's appear in the mask they are treated as if they were whatever
2631 // value is necessary in order to fit the above form.
2632 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op
, EVT ResTy
,
2633 SmallVector
<int, 16> Indices
,
2634 SelectionDAG
&DAG
) {
2635 assert((Indices
.size() % 2) == 0);
2637 int SplatIndex
= -1;
2638 for (const auto &V
: Indices
) {
2645 return fitsRegularPattern
<int>(Indices
.begin(), 1, Indices
.end(), SplatIndex
,
2649 // Lower VECTOR_SHUFFLE into ILVEV (if possible).
2651 // ILVEV interleaves the even elements from each vector.
2653 // It is possible to lower into ILVEV when the mask consists of two of the
2654 // following forms interleaved:
2656 // <n, n+2, n+4, ...>
2657 // where n is the number of elements in the vector.
2659 // <0, 0, 2, 2, 4, 4, ...>
2660 // <0, n, 2, n+2, 4, n+4, ...>
2662 // When undef's appear in the mask they are treated as if they were whatever
2663 // value is necessary in order to fit the above forms.
2664 static SDValue
lowerVECTOR_SHUFFLE_ILVEV(SDValue Op
, EVT ResTy
,
2665 SmallVector
<int, 16> Indices
,
2666 SelectionDAG
&DAG
) {
2667 assert((Indices
.size() % 2) == 0);
2671 const auto &Begin
= Indices
.begin();
2672 const auto &End
= Indices
.end();
2674 // Check even elements are taken from the even elements of one half or the
2675 // other and pick an operand accordingly.
2676 if (fitsRegularPattern
<int>(Begin
, 2, End
, 0, 2))
2677 Wt
= Op
->getOperand(0);
2678 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size(), 2))
2679 Wt
= Op
->getOperand(1);
2683 // Check odd elements are taken from the even elements of one half or the
2684 // other and pick an operand accordingly.
2685 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 0, 2))
2686 Ws
= Op
->getOperand(0);
2687 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size(), 2))
2688 Ws
= Op
->getOperand(1);
2692 return DAG
.getNode(MipsISD::ILVEV
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2695 // Lower VECTOR_SHUFFLE into ILVOD (if possible).
2697 // ILVOD interleaves the odd elements from each vector.
2699 // It is possible to lower into ILVOD when the mask consists of two of the
2700 // following forms interleaved:
2702 // <n+1, n+3, n+5, ...>
2703 // where n is the number of elements in the vector.
2705 // <1, 1, 3, 3, 5, 5, ...>
2706 // <1, n+1, 3, n+3, 5, n+5, ...>
2708 // When undef's appear in the mask they are treated as if they were whatever
2709 // value is necessary in order to fit the above forms.
2710 static SDValue
lowerVECTOR_SHUFFLE_ILVOD(SDValue Op
, EVT ResTy
,
2711 SmallVector
<int, 16> Indices
,
2712 SelectionDAG
&DAG
) {
2713 assert((Indices
.size() % 2) == 0);
2717 const auto &Begin
= Indices
.begin();
2718 const auto &End
= Indices
.end();
2720 // Check even elements are taken from the odd elements of one half or the
2721 // other and pick an operand accordingly.
2722 if (fitsRegularPattern
<int>(Begin
, 2, End
, 1, 2))
2723 Wt
= Op
->getOperand(0);
2724 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size() + 1, 2))
2725 Wt
= Op
->getOperand(1);
2729 // Check odd elements are taken from the odd elements of one half or the
2730 // other and pick an operand accordingly.
2731 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 1, 2))
2732 Ws
= Op
->getOperand(0);
2733 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size() + 1, 2))
2734 Ws
= Op
->getOperand(1);
2738 return DAG
.getNode(MipsISD::ILVOD
, SDLoc(Op
), ResTy
, Wt
, Ws
);
2741 // Lower VECTOR_SHUFFLE into ILVR (if possible).
2743 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2746 // It is possible to lower into ILVR when the mask consists of two of the
2747 // following forms interleaved:
2749 // <n, n+1, n+2, ...>
2750 // where n is the number of elements in the vector.
2752 // <0, 0, 1, 1, 2, 2, ...>
2753 // <0, n, 1, n+1, 2, n+2, ...>
2755 // When undef's appear in the mask they are treated as if they were whatever
2756 // value is necessary in order to fit the above forms.
2757 static SDValue
lowerVECTOR_SHUFFLE_ILVR(SDValue Op
, EVT ResTy
,
2758 SmallVector
<int, 16> Indices
,
2759 SelectionDAG
&DAG
) {
2760 assert((Indices
.size() % 2) == 0);
2764 const auto &Begin
= Indices
.begin();
2765 const auto &End
= Indices
.end();
2767 // Check even elements are taken from the right (lowest-indexed) elements of
2768 // one half or the other and pick an operand accordingly.
2769 if (fitsRegularPattern
<int>(Begin
, 2, End
, 0, 1))
2770 Wt
= Op
->getOperand(0);
2771 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size(), 1))
2772 Wt
= Op
->getOperand(1);
2776 // Check odd elements are taken from the right (lowest-indexed) elements of
2777 // one half or the other and pick an operand accordingly.
2778 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 0, 1))
2779 Ws
= Op
->getOperand(0);
2780 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size(), 1))
2781 Ws
= Op
->getOperand(1);
2785 return DAG
.getNode(MipsISD::ILVR
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2788 // Lower VECTOR_SHUFFLE into ILVL (if possible).
2790 // ILVL interleaves consecutive elements from the left (highest-indexed) half
2793 // It is possible to lower into ILVL when the mask consists of two of the
2794 // following forms interleaved:
2795 // <x, x+1, x+2, ...>
2796 // <n+x, n+x+1, n+x+2, ...>
2797 // where n is the number of elements in the vector and x is half n.
2799 // <x, x, x+1, x+1, x+2, x+2, ...>
2800 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2802 // When undef's appear in the mask they are treated as if they were whatever
2803 // value is necessary in order to fit the above forms.
2804 static SDValue
lowerVECTOR_SHUFFLE_ILVL(SDValue Op
, EVT ResTy
,
2805 SmallVector
<int, 16> Indices
,
2806 SelectionDAG
&DAG
) {
2807 assert((Indices
.size() % 2) == 0);
2809 unsigned HalfSize
= Indices
.size() / 2;
2812 const auto &Begin
= Indices
.begin();
2813 const auto &End
= Indices
.end();
2815 // Check even elements are taken from the left (highest-indexed) elements of
2816 // one half or the other and pick an operand accordingly.
2817 if (fitsRegularPattern
<int>(Begin
, 2, End
, HalfSize
, 1))
2818 Wt
= Op
->getOperand(0);
2819 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Indices
.size() + HalfSize
, 1))
2820 Wt
= Op
->getOperand(1);
2824 // Check odd elements are taken from the left (highest-indexed) elements of
2825 // one half or the other and pick an operand accordingly.
2826 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, HalfSize
, 1))
2827 Ws
= Op
->getOperand(0);
2828 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Indices
.size() + HalfSize
,
2830 Ws
= Op
->getOperand(1);
2834 return DAG
.getNode(MipsISD::ILVL
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2837 // Lower VECTOR_SHUFFLE into PCKEV (if possible).
2839 // PCKEV copies the even elements of each vector into the result vector.
2841 // It is possible to lower into PCKEV when the mask consists of two of the
2842 // following forms concatenated:
2844 // <n, n+2, n+4, ...>
2845 // where n is the number of elements in the vector.
2847 // <0, 2, 4, ..., 0, 2, 4, ...>
2848 // <0, 2, 4, ..., n, n+2, n+4, ...>
2850 // When undef's appear in the mask they are treated as if they were whatever
2851 // value is necessary in order to fit the above forms.
2852 static SDValue
lowerVECTOR_SHUFFLE_PCKEV(SDValue Op
, EVT ResTy
,
2853 SmallVector
<int, 16> Indices
,
2854 SelectionDAG
&DAG
) {
2855 assert((Indices
.size() % 2) == 0);
2859 const auto &Begin
= Indices
.begin();
2860 const auto &Mid
= Indices
.begin() + Indices
.size() / 2;
2861 const auto &End
= Indices
.end();
2863 if (fitsRegularPattern
<int>(Begin
, 1, Mid
, 0, 2))
2864 Wt
= Op
->getOperand(0);
2865 else if (fitsRegularPattern
<int>(Begin
, 1, Mid
, Indices
.size(), 2))
2866 Wt
= Op
->getOperand(1);
2870 if (fitsRegularPattern
<int>(Mid
, 1, End
, 0, 2))
2871 Ws
= Op
->getOperand(0);
2872 else if (fitsRegularPattern
<int>(Mid
, 1, End
, Indices
.size(), 2))
2873 Ws
= Op
->getOperand(1);
2877 return DAG
.getNode(MipsISD::PCKEV
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2880 // Lower VECTOR_SHUFFLE into PCKOD (if possible).
2882 // PCKOD copies the odd elements of each vector into the result vector.
2884 // It is possible to lower into PCKOD when the mask consists of two of the
2885 // following forms concatenated:
2887 // <n+1, n+3, n+5, ...>
2888 // where n is the number of elements in the vector.
2890 // <1, 3, 5, ..., 1, 3, 5, ...>
2891 // <1, 3, 5, ..., n+1, n+3, n+5, ...>
2893 // When undef's appear in the mask they are treated as if they were whatever
2894 // value is necessary in order to fit the above forms.
2895 static SDValue
lowerVECTOR_SHUFFLE_PCKOD(SDValue Op
, EVT ResTy
,
2896 SmallVector
<int, 16> Indices
,
2897 SelectionDAG
&DAG
) {
2898 assert((Indices
.size() % 2) == 0);
2902 const auto &Begin
= Indices
.begin();
2903 const auto &Mid
= Indices
.begin() + Indices
.size() / 2;
2904 const auto &End
= Indices
.end();
2906 if (fitsRegularPattern
<int>(Begin
, 1, Mid
, 1, 2))
2907 Wt
= Op
->getOperand(0);
2908 else if (fitsRegularPattern
<int>(Begin
, 1, Mid
, Indices
.size() + 1, 2))
2909 Wt
= Op
->getOperand(1);
2913 if (fitsRegularPattern
<int>(Mid
, 1, End
, 1, 2))
2914 Ws
= Op
->getOperand(0);
2915 else if (fitsRegularPattern
<int>(Mid
, 1, End
, Indices
.size() + 1, 2))
2916 Ws
= Op
->getOperand(1);
2920 return DAG
.getNode(MipsISD::PCKOD
, SDLoc(Op
), ResTy
, Ws
, Wt
);
2923 // Lower VECTOR_SHUFFLE into VSHF.
2925 // This mostly consists of converting the shuffle indices in Indices into a
2926 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2927 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2928 // if the type is v8i16 and all the indices are less than 8 then the second
2929 // operand is unused and can be replaced with anything. We choose to replace it
2930 // with the used operand since this reduces the number of instructions overall.
2931 static SDValue
lowerVECTOR_SHUFFLE_VSHF(SDValue Op
, EVT ResTy
,
2932 SmallVector
<int, 16> Indices
,
2933 SelectionDAG
&DAG
) {
2934 SmallVector
<SDValue
, 16> Ops
;
2937 EVT MaskVecTy
= ResTy
.changeVectorElementTypeToInteger();
2938 EVT MaskEltTy
= MaskVecTy
.getVectorElementType();
2939 bool Using1stVec
= false;
2940 bool Using2ndVec
= false;
2942 int ResTyNumElts
= ResTy
.getVectorNumElements();
2944 for (int i
= 0; i
< ResTyNumElts
; ++i
) {
2945 // Idx == -1 means UNDEF
2946 int Idx
= Indices
[i
];
2948 if (0 <= Idx
&& Idx
< ResTyNumElts
)
2950 if (ResTyNumElts
<= Idx
&& Idx
< ResTyNumElts
* 2)
2954 for (SmallVector
<int, 16>::iterator I
= Indices
.begin(); I
!= Indices
.end();
2956 Ops
.push_back(DAG
.getTargetConstant(*I
, DL
, MaskEltTy
));
2958 SDValue MaskVec
= DAG
.getBuildVector(MaskVecTy
, DL
, Ops
);
2960 if (Using1stVec
&& Using2ndVec
) {
2961 Op0
= Op
->getOperand(0);
2962 Op1
= Op
->getOperand(1);
2963 } else if (Using1stVec
)
2964 Op0
= Op1
= Op
->getOperand(0);
2965 else if (Using2ndVec
)
2966 Op0
= Op1
= Op
->getOperand(1);
2968 llvm_unreachable("shuffle vector mask references neither vector operand?");
2970 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2971 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2972 // VSHF concatenates the vectors in a bitwise fashion:
2973 // <0b00, 0b01> + <0b10, 0b11> ->
2974 // 0b0100 + 0b1110 -> 0b01001110
2975 // <0b10, 0b11, 0b00, 0b01>
2976 // We must therefore swap the operands to get the correct result.
2977 return DAG
.getNode(MipsISD::VSHF
, DL
, ResTy
, MaskVec
, Op1
, Op0
);
2980 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
2981 // indices in the shuffle.
2982 SDValue
MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op
,
2983 SelectionDAG
&DAG
) const {
2984 ShuffleVectorSDNode
*Node
= cast
<ShuffleVectorSDNode
>(Op
);
2985 EVT ResTy
= Op
->getValueType(0);
2987 if (!ResTy
.is128BitVector())
2990 int ResTyNumElts
= ResTy
.getVectorNumElements();
2991 SmallVector
<int, 16> Indices
;
2993 for (int i
= 0; i
< ResTyNumElts
; ++i
)
2994 Indices
.push_back(Node
->getMaskElt(i
));
2996 // splati.[bhwd] is preferable to the others but is matched from
2998 if (isVECTOR_SHUFFLE_SPLATI(Op
, ResTy
, Indices
, DAG
))
2999 return lowerVECTOR_SHUFFLE_VSHF(Op
, ResTy
, Indices
, DAG
);
3001 if ((Result
= lowerVECTOR_SHUFFLE_ILVEV(Op
, ResTy
, Indices
, DAG
)))
3003 if ((Result
= lowerVECTOR_SHUFFLE_ILVOD(Op
, ResTy
, Indices
, DAG
)))
3005 if ((Result
= lowerVECTOR_SHUFFLE_ILVL(Op
, ResTy
, Indices
, DAG
)))
3007 if ((Result
= lowerVECTOR_SHUFFLE_ILVR(Op
, ResTy
, Indices
, DAG
)))
3009 if ((Result
= lowerVECTOR_SHUFFLE_PCKEV(Op
, ResTy
, Indices
, DAG
)))
3011 if ((Result
= lowerVECTOR_SHUFFLE_PCKOD(Op
, ResTy
, Indices
, DAG
)))
3013 if ((Result
= lowerVECTOR_SHUFFLE_SHF(Op
, ResTy
, Indices
, DAG
)))
3015 return lowerVECTOR_SHUFFLE_VSHF(Op
, ResTy
, Indices
, DAG
);
3019 MipsSETargetLowering::emitBPOSGE32(MachineInstr
&MI
,
3020 MachineBasicBlock
*BB
) const {
3022 // bposge32_pseudo $vr0
3032 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3034 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3035 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3036 const TargetRegisterClass
*RC
= &Mips::GPR32RegClass
;
3037 DebugLoc DL
= MI
.getDebugLoc();
3038 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
3039 MachineFunction::iterator It
= std::next(MachineFunction::iterator(BB
));
3040 MachineFunction
*F
= BB
->getParent();
3041 MachineBasicBlock
*FBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3042 MachineBasicBlock
*TBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3043 MachineBasicBlock
*Sink
= F
->CreateMachineBasicBlock(LLVM_BB
);
3046 F
->insert(It
, Sink
);
3048 // Transfer the remainder of BB and its successor edges to Sink.
3049 Sink
->splice(Sink
->begin(), BB
, std::next(MachineBasicBlock::iterator(MI
)),
3051 Sink
->transferSuccessorsAndUpdatePHIs(BB
);
3054 BB
->addSuccessor(FBB
);
3055 BB
->addSuccessor(TBB
);
3056 FBB
->addSuccessor(Sink
);
3057 TBB
->addSuccessor(Sink
);
3059 // Insert the real bposge32 instruction to $BB.
3060 BuildMI(BB
, DL
, TII
->get(Mips::BPOSGE32
)).addMBB(TBB
);
3061 // Insert the real bposge32c instruction to $BB.
3062 BuildMI(BB
, DL
, TII
->get(Mips::BPOSGE32C_MMR3
)).addMBB(TBB
);
3065 Register VR2
= RegInfo
.createVirtualRegister(RC
);
3066 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::ADDiu
), VR2
)
3067 .addReg(Mips::ZERO
).addImm(0);
3068 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::B
)).addMBB(Sink
);
3071 Register VR1
= RegInfo
.createVirtualRegister(RC
);
3072 BuildMI(*TBB
, TBB
->end(), DL
, TII
->get(Mips::ADDiu
), VR1
)
3073 .addReg(Mips::ZERO
).addImm(1);
3075 // Insert phi function to $Sink.
3076 BuildMI(*Sink
, Sink
->begin(), DL
, TII
->get(Mips::PHI
),
3077 MI
.getOperand(0).getReg())
3083 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3087 MachineBasicBlock
*MipsSETargetLowering::emitMSACBranchPseudo(
3088 MachineInstr
&MI
, MachineBasicBlock
*BB
, unsigned BranchOp
) const {
3090 // vany_nonzero $rd, $ws
3101 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3103 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3104 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3105 const TargetRegisterClass
*RC
= &Mips::GPR32RegClass
;
3106 DebugLoc DL
= MI
.getDebugLoc();
3107 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
3108 MachineFunction::iterator It
= std::next(MachineFunction::iterator(BB
));
3109 MachineFunction
*F
= BB
->getParent();
3110 MachineBasicBlock
*FBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3111 MachineBasicBlock
*TBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3112 MachineBasicBlock
*Sink
= F
->CreateMachineBasicBlock(LLVM_BB
);
3115 F
->insert(It
, Sink
);
3117 // Transfer the remainder of BB and its successor edges to Sink.
3118 Sink
->splice(Sink
->begin(), BB
, std::next(MachineBasicBlock::iterator(MI
)),
3120 Sink
->transferSuccessorsAndUpdatePHIs(BB
);
3123 BB
->addSuccessor(FBB
);
3124 BB
->addSuccessor(TBB
);
3125 FBB
->addSuccessor(Sink
);
3126 TBB
->addSuccessor(Sink
);
3128 // Insert the real bnz.b instruction to $BB.
3129 BuildMI(BB
, DL
, TII
->get(BranchOp
))
3130 .addReg(MI
.getOperand(1).getReg())
3134 Register RD1
= RegInfo
.createVirtualRegister(RC
);
3135 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::ADDiu
), RD1
)
3136 .addReg(Mips::ZERO
).addImm(0);
3137 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::B
)).addMBB(Sink
);
3140 Register RD2
= RegInfo
.createVirtualRegister(RC
);
3141 BuildMI(*TBB
, TBB
->end(), DL
, TII
->get(Mips::ADDiu
), RD2
)
3142 .addReg(Mips::ZERO
).addImm(1);
3144 // Insert phi function to $Sink.
3145 BuildMI(*Sink
, Sink
->begin(), DL
, TII
->get(Mips::PHI
),
3146 MI
.getOperand(0).getReg())
3152 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3156 // Emit the COPY_FW pseudo instruction.
3158 // copy_fw_pseudo $fd, $ws, n
3160 // copy_u_w $rt, $ws, $n
3163 // When n is zero, the equivalent operation can be performed with (potentially)
3164 // zero instructions due to register overlaps. This optimization is never valid
3165 // for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3167 MipsSETargetLowering::emitCOPY_FW(MachineInstr
&MI
,
3168 MachineBasicBlock
*BB
) const {
3169 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3170 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3171 DebugLoc DL
= MI
.getDebugLoc();
3172 Register Fd
= MI
.getOperand(0).getReg();
3173 Register Ws
= MI
.getOperand(1).getReg();
3174 unsigned Lane
= MI
.getOperand(2).getImm();
3178 if (!Subtarget
.useOddSPReg()) {
3179 // We must copy to an even-numbered MSA register so that the
3180 // single-precision sub-register is also guaranteed to be even-numbered.
3181 Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128WEvensRegClass
);
3183 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Wt
).addReg(Ws
);
3186 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_lo
);
3188 Register Wt
= RegInfo
.createVirtualRegister(
3189 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
3190 : &Mips::MSA128WEvensRegClass
);
3192 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_W
), Wt
).addReg(Ws
).addImm(Lane
);
3193 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_lo
);
3196 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3200 // Emit the COPY_FD pseudo instruction.
3202 // copy_fd_pseudo $fd, $ws, n
3204 // splati.d $wt, $ws, $n
3205 // copy $fd, $wt:sub_64
3207 // When n is zero, the equivalent operation can be performed with (potentially)
3208 // zero instructions due to register overlaps. This optimization is always
3209 // valid because FR=1 mode which is the only supported mode in MSA.
3211 MipsSETargetLowering::emitCOPY_FD(MachineInstr
&MI
,
3212 MachineBasicBlock
*BB
) const {
3213 assert(Subtarget
.isFP64bit());
3215 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3216 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3217 Register Fd
= MI
.getOperand(0).getReg();
3218 Register Ws
= MI
.getOperand(1).getReg();
3219 unsigned Lane
= MI
.getOperand(2).getImm() * 2;
3220 DebugLoc DL
= MI
.getDebugLoc();
3223 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Ws
, 0, Mips::sub_64
);
3225 Register Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3227 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_D
), Wt
).addReg(Ws
).addImm(1);
3228 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_64
);
3231 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3235 // Emit the INSERT_FW pseudo instruction.
3237 // insert_fw_pseudo $wd, $wd_in, $n, $fs
3239 // subreg_to_reg $wt:sub_lo, $fs
3240 // insve_w $wd[$n], $wd_in, $wt[0]
3242 MipsSETargetLowering::emitINSERT_FW(MachineInstr
&MI
,
3243 MachineBasicBlock
*BB
) const {
3244 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3245 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3246 DebugLoc DL
= MI
.getDebugLoc();
3247 Register Wd
= MI
.getOperand(0).getReg();
3248 Register Wd_in
= MI
.getOperand(1).getReg();
3249 unsigned Lane
= MI
.getOperand(2).getImm();
3250 Register Fs
= MI
.getOperand(3).getReg();
3251 Register Wt
= RegInfo
.createVirtualRegister(
3252 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
3253 : &Mips::MSA128WEvensRegClass
);
3255 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
3258 .addImm(Mips::sub_lo
);
3259 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSVE_W
), Wd
)
3265 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3269 // Emit the INSERT_FD pseudo instruction.
3271 // insert_fd_pseudo $wd, $fs, n
3273 // subreg_to_reg $wt:sub_64, $fs
3274 // insve_d $wd[$n], $wd_in, $wt[0]
3276 MipsSETargetLowering::emitINSERT_FD(MachineInstr
&MI
,
3277 MachineBasicBlock
*BB
) const {
3278 assert(Subtarget
.isFP64bit());
3280 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3281 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3282 DebugLoc DL
= MI
.getDebugLoc();
3283 Register Wd
= MI
.getOperand(0).getReg();
3284 Register Wd_in
= MI
.getOperand(1).getReg();
3285 unsigned Lane
= MI
.getOperand(2).getImm();
3286 Register Fs
= MI
.getOperand(3).getReg();
3287 Register Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3289 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
3292 .addImm(Mips::sub_64
);
3293 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSVE_D
), Wd
)
3299 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3303 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3306 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3308 // (SLL $lanetmp1, $lane, <log2size)
3309 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3310 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3311 // (NEG $lanetmp2, $lanetmp1)
3312 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3314 // For floating point:
3315 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3317 // (SUBREG_TO_REG $wt, $fs, <subreg>)
3318 // (SLL $lanetmp1, $lane, <log2size)
3319 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3320 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3321 // (NEG $lanetmp2, $lanetmp1)
3322 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3323 MachineBasicBlock
*MipsSETargetLowering::emitINSERT_DF_VIDX(
3324 MachineInstr
&MI
, MachineBasicBlock
*BB
, unsigned EltSizeInBytes
,
3326 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3327 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3328 DebugLoc DL
= MI
.getDebugLoc();
3329 Register Wd
= MI
.getOperand(0).getReg();
3330 Register SrcVecReg
= MI
.getOperand(1).getReg();
3331 Register LaneReg
= MI
.getOperand(2).getReg();
3332 Register SrcValReg
= MI
.getOperand(3).getReg();
3334 const TargetRegisterClass
*VecRC
= nullptr;
3335 // FIXME: This should be true for N32 too.
3336 const TargetRegisterClass
*GPRRC
=
3337 Subtarget
.isABI_N64() ? &Mips::GPR64RegClass
: &Mips::GPR32RegClass
;
3338 unsigned SubRegIdx
= Subtarget
.isABI_N64() ? Mips::sub_32
: 0;
3339 unsigned ShiftOp
= Subtarget
.isABI_N64() ? Mips::DSLL
: Mips::SLL
;
3340 unsigned EltLog2Size
;
3341 unsigned InsertOp
= 0;
3342 unsigned InsveOp
= 0;
3343 switch (EltSizeInBytes
) {
3345 llvm_unreachable("Unexpected size");
3348 InsertOp
= Mips::INSERT_B
;
3349 InsveOp
= Mips::INSVE_B
;
3350 VecRC
= &Mips::MSA128BRegClass
;
3354 InsertOp
= Mips::INSERT_H
;
3355 InsveOp
= Mips::INSVE_H
;
3356 VecRC
= &Mips::MSA128HRegClass
;
3360 InsertOp
= Mips::INSERT_W
;
3361 InsveOp
= Mips::INSVE_W
;
3362 VecRC
= &Mips::MSA128WRegClass
;
3366 InsertOp
= Mips::INSERT_D
;
3367 InsveOp
= Mips::INSVE_D
;
3368 VecRC
= &Mips::MSA128DRegClass
;
3373 Register Wt
= RegInfo
.createVirtualRegister(VecRC
);
3374 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
3377 .addImm(EltSizeInBytes
== 8 ? Mips::sub_64
: Mips::sub_lo
);
3381 // Convert the lane index into a byte index
3382 if (EltSizeInBytes
!= 1) {
3383 Register LaneTmp1
= RegInfo
.createVirtualRegister(GPRRC
);
3384 BuildMI(*BB
, MI
, DL
, TII
->get(ShiftOp
), LaneTmp1
)
3386 .addImm(EltLog2Size
);
3390 // Rotate bytes around so that the desired lane is element zero
3391 Register WdTmp1
= RegInfo
.createVirtualRegister(VecRC
);
3392 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SLD_B
), WdTmp1
)
3395 .addReg(LaneReg
, 0, SubRegIdx
);
3397 Register WdTmp2
= RegInfo
.createVirtualRegister(VecRC
);
3399 // Use insve.df to insert to element zero
3400 BuildMI(*BB
, MI
, DL
, TII
->get(InsveOp
), WdTmp2
)
3406 // Use insert.df to insert to element zero
3407 BuildMI(*BB
, MI
, DL
, TII
->get(InsertOp
), WdTmp2
)
3413 // Rotate elements the rest of the way for a full rotation.
3414 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3415 // the lane index to do this.
3416 Register LaneTmp2
= RegInfo
.createVirtualRegister(GPRRC
);
3417 BuildMI(*BB
, MI
, DL
, TII
->get(Subtarget
.isABI_N64() ? Mips::DSUB
: Mips::SUB
),
3419 .addReg(Subtarget
.isABI_N64() ? Mips::ZERO_64
: Mips::ZERO
)
3421 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SLD_B
), Wd
)
3424 .addReg(LaneTmp2
, 0, SubRegIdx
);
3426 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3430 // Emit the FILL_FW pseudo instruction.
3432 // fill_fw_pseudo $wd, $fs
3434 // implicit_def $wt1
3435 // insert_subreg $wt2:subreg_lo, $wt1, $fs
3436 // splati.w $wd, $wt2[0]
3438 MipsSETargetLowering::emitFILL_FW(MachineInstr
&MI
,
3439 MachineBasicBlock
*BB
) const {
3440 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3441 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3442 DebugLoc DL
= MI
.getDebugLoc();
3443 Register Wd
= MI
.getOperand(0).getReg();
3444 Register Fs
= MI
.getOperand(1).getReg();
3445 Register Wt1
= RegInfo
.createVirtualRegister(
3446 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
3447 : &Mips::MSA128WEvensRegClass
);
3448 Register Wt2
= RegInfo
.createVirtualRegister(
3449 Subtarget
.useOddSPReg() ? &Mips::MSA128WRegClass
3450 : &Mips::MSA128WEvensRegClass
);
3452 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::IMPLICIT_DEF
), Wt1
);
3453 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_SUBREG
), Wt2
)
3456 .addImm(Mips::sub_lo
);
3457 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_W
), Wd
).addReg(Wt2
).addImm(0);
3459 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3463 // Emit the FILL_FD pseudo instruction.
3465 // fill_fd_pseudo $wd, $fs
3467 // implicit_def $wt1
3468 // insert_subreg $wt2:subreg_64, $wt1, $fs
3469 // splati.d $wd, $wt2[0]
3471 MipsSETargetLowering::emitFILL_FD(MachineInstr
&MI
,
3472 MachineBasicBlock
*BB
) const {
3473 assert(Subtarget
.isFP64bit());
3475 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3476 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3477 DebugLoc DL
= MI
.getDebugLoc();
3478 Register Wd
= MI
.getOperand(0).getReg();
3479 Register Fs
= MI
.getOperand(1).getReg();
3480 Register Wt1
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3481 Register Wt2
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3483 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::IMPLICIT_DEF
), Wt1
);
3484 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_SUBREG
), Wt2
)
3487 .addImm(Mips::sub_64
);
3488 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_D
), Wd
).addReg(Wt2
).addImm(0);
3490 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3494 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3497 // STF16 MSA128F16:$wd, mem_simm10:$addr
3499 // copy_u.h $rtemp,$wd[0]
3502 // Safety: We can't use st.h & co as they would over write the memory after
3503 // the destination. It would require half floats be allocated 16 bytes(!) of
3506 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr
&MI
,
3507 MachineBasicBlock
*BB
) const {
3509 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3510 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3511 DebugLoc DL
= MI
.getDebugLoc();
3512 Register Ws
= MI
.getOperand(0).getReg();
3513 Register Rt
= MI
.getOperand(1).getReg();
3514 const MachineMemOperand
&MMO
= **MI
.memoperands_begin();
3515 unsigned Imm
= MMO
.getOffset();
3517 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3518 // spill and reload can expand as a GPR64 operand. Examine the
3519 // operand in detail and default to ABI.
3520 const TargetRegisterClass
*RC
=
3521 MI
.getOperand(1).isReg() ? RegInfo
.getRegClass(MI
.getOperand(1).getReg())
3522 : (Subtarget
.isABI_O32() ? &Mips::GPR32RegClass
3523 : &Mips::GPR64RegClass
);
3524 const bool UsingMips32
= RC
== &Mips::GPR32RegClass
;
3525 Register Rs
= RegInfo
.createVirtualRegister(&Mips::GPR32RegClass
);
3527 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY_U_H
), Rs
).addReg(Ws
).addImm(0);
3529 Register Tmp
= RegInfo
.createVirtualRegister(&Mips::GPR64RegClass
);
3530 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Tmp
)
3533 .addImm(Mips::sub_32
);
3536 BuildMI(*BB
, MI
, DL
, TII
->get(UsingMips32
? Mips::SH
: Mips::SH64
))
3540 .addMemOperand(BB
->getParent()->getMachineMemOperand(
3541 &MMO
, MMO
.getOffset(), MMO
.getSize()));
3543 MI
.eraseFromParent();
3547 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3549 // LD_F16 MSA128F16:$wd, mem_simm10:$addr
3552 // fill.h $wd, $rtemp
3554 // Safety: We can't use ld.h & co as they over-read from the source.
3555 // Additionally, if the address is not modulo 16, 2 cases can occur:
3556 // a) Segmentation fault as the load instruction reads from a memory page
3557 // memory it's not supposed to.
3558 // b) The load crosses an implementation specific boundary, requiring OS
3561 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr
&MI
,
3562 MachineBasicBlock
*BB
) const {
3564 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3565 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3566 DebugLoc DL
= MI
.getDebugLoc();
3567 Register Wd
= MI
.getOperand(0).getReg();
3569 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3570 // spill and reload can expand as a GPR64 operand. Examine the
3571 // operand in detail and default to ABI.
3572 const TargetRegisterClass
*RC
=
3573 MI
.getOperand(1).isReg() ? RegInfo
.getRegClass(MI
.getOperand(1).getReg())
3574 : (Subtarget
.isABI_O32() ? &Mips::GPR32RegClass
3575 : &Mips::GPR64RegClass
);
3577 const bool UsingMips32
= RC
== &Mips::GPR32RegClass
;
3578 Register Rt
= RegInfo
.createVirtualRegister(RC
);
3580 MachineInstrBuilder MIB
=
3581 BuildMI(*BB
, MI
, DL
, TII
->get(UsingMips32
? Mips::LH
: Mips::LH64
), Rt
);
3582 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++)
3583 MIB
.add(MI
.getOperand(i
));
3586 Register Tmp
= RegInfo
.createVirtualRegister(&Mips::GPR32RegClass
);
3587 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Tmp
).addReg(Rt
, 0, Mips::sub_32
);
3591 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FILL_H
), Wd
).addReg(Rt
);
3593 MI
.eraseFromParent();
3597 // Emit the FPROUND_PSEUDO instruction.
3599 // Round an FGR64Opnd, FGR32Opnd to an f16.
3601 // Safety: Cycle the operand through the GPRs so the result always ends up
3602 // the correct MSA register.
3604 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3605 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3606 // (which they can be, as the MSA registers are defined to alias the
3607 // FPU's 64 bit and 32 bit registers) the result can be accessed using
3608 // the correct register class. That requires operands be tie-able across
3609 // register classes which have a sub/super register class relationship.
3613 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3616 // fill.w $rtemp, $wtemp
3617 // fexdo.w $wd, $wtemp, $wtemp
3619 // For FPG64Opnd on mips32r2+:
3621 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3624 // fill.w $rtemp, $wtemp
3625 // mfhc1 $rtemp2, $fs
3626 // insert.w $wtemp[1], $rtemp2
3627 // insert.w $wtemp[3], $rtemp2
3628 // fexdo.w $wtemp2, $wtemp, $wtemp
3629 // fexdo.h $wd, $temp2, $temp2
3631 // For FGR64Opnd on mips64r2+:
3633 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3635 // dmfc1 $rtemp, $fs
3636 // fill.d $rtemp, $wtemp
3637 // fexdo.w $wtemp2, $wtemp, $wtemp
3638 // fexdo.h $wd, $wtemp2, $wtemp2
3640 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3641 // undef bits are "just right" and the exception enable bits are
3642 // set. By using fill.w to replicate $fs into all elements over
3643 // insert.w for one element, we avoid that potiential case. If
3644 // fexdo.[hw] causes an exception in, the exception is valid and it
3645 // occurs for all elements.
3647 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr
&MI
,
3648 MachineBasicBlock
*BB
,
3649 bool IsFGR64
) const {
3651 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3652 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3654 assert(Subtarget
.hasMSA() && Subtarget
.hasMips32r2());
3656 bool IsFGR64onMips64
= Subtarget
.hasMips64() && IsFGR64
;
3657 bool IsFGR64onMips32
= !Subtarget
.hasMips64() && IsFGR64
;
3659 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3660 DebugLoc DL
= MI
.getDebugLoc();
3661 Register Wd
= MI
.getOperand(0).getReg();
3662 Register Fs
= MI
.getOperand(1).getReg();
3664 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3665 Register Wtemp
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3666 const TargetRegisterClass
*GPRRC
=
3667 IsFGR64onMips64
? &Mips::GPR64RegClass
: &Mips::GPR32RegClass
;
3668 unsigned MFC1Opc
= IsFGR64onMips64
3670 : (IsFGR64onMips32
? Mips::MFC1_D64
: Mips::MFC1
);
3671 unsigned FILLOpc
= IsFGR64onMips64
? Mips::FILL_D
: Mips::FILL_W
;
3673 // Perform the register class copy as mentioned above.
3674 Register Rtemp
= RegInfo
.createVirtualRegister(GPRRC
);
3675 BuildMI(*BB
, MI
, DL
, TII
->get(MFC1Opc
), Rtemp
).addReg(Fs
);
3676 BuildMI(*BB
, MI
, DL
, TII
->get(FILLOpc
), Wtemp
).addReg(Rtemp
);
3677 unsigned WPHI
= Wtemp
;
3679 if (IsFGR64onMips32
) {
3680 Register Rtemp2
= RegInfo
.createVirtualRegister(GPRRC
);
3681 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::MFHC1_D64
), Rtemp2
).addReg(Fs
);
3682 Register Wtemp2
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3683 Register Wtemp3
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3684 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_W
), Wtemp2
)
3688 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_W
), Wtemp3
)
3696 Register Wtemp2
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3697 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXDO_W
), Wtemp2
)
3703 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXDO_H
), Wd
).addReg(WPHI
).addReg(WPHI
);
3705 MI
.eraseFromParent();
3709 // Emit the FPEXTEND_PSEUDO instruction.
3711 // Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3713 // Safety: Cycle the result through the GPRs so the result always ends up
3714 // the correct floating point register.
3716 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3717 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3718 // (which they can be, as the MSA registers are defined to alias the
3719 // FPU's 64 bit and 32 bit registers) the result can be accessed using
3720 // the correct register class. That requires operands be tie-able across
3721 // register classes which have a sub/super register class relationship. I
3726 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3728 // fexupr.w $wtemp, $ws
3729 // copy_s.w $rtemp, $ws[0]
3732 // For FGR64Opnd on Mips64:
3734 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3736 // fexupr.w $wtemp, $ws
3737 // fexupr.d $wtemp2, $wtemp
3738 // copy_s.d $rtemp, $wtemp2s[0]
3739 // dmtc1 $rtemp, $fd
3741 // For FGR64Opnd on Mips32:
3743 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3745 // fexupr.w $wtemp, $ws
3746 // fexupr.d $wtemp2, $wtemp
3747 // copy_s.w $rtemp, $wtemp2[0]
3748 // mtc1 $rtemp, $ftemp
3749 // copy_s.w $rtemp2, $wtemp2[1]
3750 // $fd = mthc1 $rtemp2, $ftemp
3752 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr
&MI
,
3753 MachineBasicBlock
*BB
,
3754 bool IsFGR64
) const {
3756 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3757 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3759 assert(Subtarget
.hasMSA() && Subtarget
.hasMips32r2());
3761 bool IsFGR64onMips64
= Subtarget
.hasMips64() && IsFGR64
;
3762 bool IsFGR64onMips32
= !Subtarget
.hasMips64() && IsFGR64
;
3764 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3765 DebugLoc DL
= MI
.getDebugLoc();
3766 Register Fd
= MI
.getOperand(0).getReg();
3767 Register Ws
= MI
.getOperand(1).getReg();
3769 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3770 const TargetRegisterClass
*GPRRC
=
3771 IsFGR64onMips64
? &Mips::GPR64RegClass
: &Mips::GPR32RegClass
;
3772 unsigned MTC1Opc
= IsFGR64onMips64
3774 : (IsFGR64onMips32
? Mips::MTC1_D64
: Mips::MTC1
);
3775 Register COPYOpc
= IsFGR64onMips64
? Mips::COPY_S_D
: Mips::COPY_S_W
;
3777 Register Wtemp
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3778 Register WPHI
= Wtemp
;
3780 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXUPR_W
), Wtemp
).addReg(Ws
);
3782 WPHI
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3783 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXUPR_D
), WPHI
).addReg(Wtemp
);
3786 // Perform the safety regclass copy mentioned above.
3787 Register Rtemp
= RegInfo
.createVirtualRegister(GPRRC
);
3788 Register FPRPHI
= IsFGR64onMips32
3789 ? RegInfo
.createVirtualRegister(&Mips::FGR64RegClass
)
3791 BuildMI(*BB
, MI
, DL
, TII
->get(COPYOpc
), Rtemp
).addReg(WPHI
).addImm(0);
3792 BuildMI(*BB
, MI
, DL
, TII
->get(MTC1Opc
), FPRPHI
).addReg(Rtemp
);
3794 if (IsFGR64onMips32
) {
3795 Register Rtemp2
= RegInfo
.createVirtualRegister(GPRRC
);
3796 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY_S_W
), Rtemp2
)
3799 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::MTHC1_D64
), Fd
)
3804 MI
.eraseFromParent();
3808 // Emit the FEXP2_W_1 pseudo instructions.
3810 // fexp2_w_1_pseudo $wd, $wt
3813 // fexp2.w $wd, $ws, $wt
3815 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr
&MI
,
3816 MachineBasicBlock
*BB
) const {
3817 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3818 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3819 const TargetRegisterClass
*RC
= &Mips::MSA128WRegClass
;
3820 Register Ws1
= RegInfo
.createVirtualRegister(RC
);
3821 Register Ws2
= RegInfo
.createVirtualRegister(RC
);
3822 DebugLoc DL
= MI
.getDebugLoc();
3824 // Splat 1.0 into a vector
3825 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::LDI_W
), Ws1
).addImm(1);
3826 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FFINT_U_W
), Ws2
).addReg(Ws1
);
3828 // Emit 1.0 * fexp2(Wt)
3829 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXP2_W
), MI
.getOperand(0).getReg())
3831 .addReg(MI
.getOperand(1).getReg());
3833 MI
.eraseFromParent(); // The pseudo instruction is gone now.
3837 // Emit the FEXP2_D_1 pseudo instructions.
3839 // fexp2_d_1_pseudo $wd, $wt
3842 // fexp2.d $wd, $ws, $wt
3844 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr
&MI
,
3845 MachineBasicBlock
*BB
) const {
3846 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
3847 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3848 const TargetRegisterClass
*RC
= &Mips::MSA128DRegClass
;
3849 Register Ws1
= RegInfo
.createVirtualRegister(RC
);
3850 Register Ws2
= RegInfo
.createVirtualRegister(RC
);
3851 DebugLoc DL
= MI
.getDebugLoc();
3853 // Splat 1.0 into a vector
3854 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::LDI_D
), Ws1
).addImm(1);
3855 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FFINT_U_D
), Ws2
).addReg(Ws1
);
3857 // Emit 1.0 * fexp2(Wt)
3858 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXP2_D
), MI
.getOperand(0).getReg())
3860 .addReg(MI
.getOperand(1).getReg());
3862 MI
.eraseFromParent(); // The pseudo instruction is gone now.