1 //===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the VSX extension to the PowerPC instruction set.
11 //===----------------------------------------------------------------------===//
13 // *********************************** NOTE ***********************************
14 // ** For POWER8 Little Endian, the VSX swap optimization relies on knowing **
15 // ** which VMX and VSX instructions are lane-sensitive and which are not. **
16 // ** A lane-sensitive instruction relies, implicitly or explicitly, on **
17 // ** whether lanes are numbered from left to right. An instruction like **
18 // ** VADDFP is not lane-sensitive, because each lane of the result vector **
19 // ** relies only on the corresponding lane of the source vectors. However, **
20 // ** an instruction like VMULESB is lane-sensitive, because "even" and **
21 // ** "odd" lanes are different for big-endian and little-endian numbering. **
23 // ** When adding new VMX and VSX instructions, please consider whether they **
24 // ** are lane-sensitive. If so, they must be added to a switch statement **
25 // ** in PPCVSXSwapRemoval::gatherVectorInstructions(). **
26 // ****************************************************************************
28 // *********************************** NOTE ***********************************
29 // ** When adding new anonymous patterns to this file, please add them to **
30 // ** the section titled Anonymous Patterns. Chances are that the existing **
31 // ** predicate blocks already contain a combination of features that you **
32 // ** are after. There is a list of blocks at the top of the section. If **
33 // ** you definitely need a new combination of predicates, please add that **
34 // ** combination to the list. **
35 // ** File Structure: **
36 // ** - Custom PPCISD node definitions **
37 // ** - Predicate definitions: predicates to specify the subtargets for **
38 // ** which an instruction or pattern can be emitted. **
39 // ** - Instruction formats: classes instantiated by the instructions. **
40 // ** These generally correspond to instruction formats in section 1.6 of **
41 // ** the ISA document. **
42 // ** - Instruction definitions: the actual definitions of the instructions **
43 // ** often including input patterns that they match. **
44 // ** - Helper DAG definitions: We define a number of dag objects to use as **
45 // ** input or output patterns for consciseness of the code. **
46 // ** - Anonymous patterns: input patterns that an instruction matches can **
47 // ** often not be specified as part of the instruction definition, so an **
48 // ** anonymous pattern must be specified mapping an input pattern to an **
49 // ** output pattern. These are generally guarded by subtarget predicates. **
50 // ** - Instruction aliases: used to define extended mnemonics for assembly **
51 // ** printing (for example: xxswapd for xxpermdi with 0x2 as the imm). **
52 // ****************************************************************************
54 def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [
55 SDTCisVT<0, v4f32>, SDTCisPtrTy<1>
58 def SDT_PPCfpexth : SDTypeProfile<1, 2, [
59 SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>, SDTCisPtrTy<2>
62 def SDT_PPCldsplat : SDTypeProfile<1, 1, [
63 SDTCisVec<0>, SDTCisPtrTy<1>
66 // Little-endian-specific nodes.
67 def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
68 SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
70 def SDT_PPCstxvd2x : SDTypeProfile<0, 2, [
71 SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
73 def SDT_PPCxxswapd : SDTypeProfile<1, 1, [
76 def SDTVecConv : SDTypeProfile<1, 2, [
77 SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>
79 def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [
80 SDTCisVec<0>, SDTCisPtrTy<1>
82 def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [
83 SDTCisVec<0>, SDTCisPtrTy<1>
86 def SDT_PPCxxperm : SDTypeProfile<1, 3, [
87 SDTCisVT<0, v2f64>, SDTCisVT<1, v2f64>,
88 SDTCisVT<2, v2f64>, SDTCisVT<3, v4i32>]>;
89 //--------------------------- Custom PPC nodes -------------------------------//
90 def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
91 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
92 def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
93 [SDNPHasChain, SDNPMayStore]>;
94 def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be,
95 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
96 def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be,
97 [SDNPHasChain, SDNPMayStore]>;
98 def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
99 def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
100 def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
101 def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
102 def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
103 def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
104 def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
106 def PPCfpexth : SDNode<"PPCISD::FP_EXTEND_HALF", SDT_PPCfpexth, []>;
107 def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
108 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
109 def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
110 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
111 def PPCzextldsplat : SDNode<"PPCISD::ZEXT_LD_SPLAT", SDT_PPCldsplat,
112 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
113 def PPCsextldsplat : SDNode<"PPCISD::SEXT_LD_SPLAT", SDT_PPCldsplat,
114 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
115 def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED",
116 SDTypeProfile<1, 1, []>, []>;
118 def PPCxxperm : SDNode<"PPCISD::XXPERM", SDT_PPCxxperm, []>;
119 //-------------------------- Predicate definitions ---------------------------//
120 def HasVSX : Predicate<"Subtarget->hasVSX()">;
121 def IsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
122 def IsBigEndian : Predicate<"!Subtarget->isLittleEndian()">;
123 def IsPPC64 : Predicate<"Subtarget->isPPC64()">;
124 def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">;
125 def NoP8Vector : Predicate<"!Subtarget->hasP8Vector()">;
126 def HasP8Vector : Predicate<"Subtarget->hasP8Vector()">;
127 def HasDirectMove : Predicate<"Subtarget->hasDirectMove()">;
128 def NoP9Vector : Predicate<"!Subtarget->hasP9Vector()">;
129 def HasP9Vector : Predicate<"Subtarget->hasP9Vector()">;
130 def NoP9Altivec : Predicate<"!Subtarget->hasP9Altivec()">;
131 def NoP10Vector: Predicate<"!Subtarget->hasP10Vector()">;
132 def HasP10Vector: Predicate<"Subtarget->hasP10Vector()">;
134 def PPCldsplatAlign16 : PatFrag<(ops node:$ptr), (PPCldsplat node:$ptr), [{
135 return cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
136 isOffsetMultipleOf(N, 16);
139 //--------------------- VSX-specific instruction formats ---------------------//
140 // By default, all VSX instructions are to be selected over their Altivec
141 // counter parts and they do not have unmodeled sideeffects.
142 let AddedComplexity = 400, hasSideEffects = 0 in {
143 multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
144 string asmstr, InstrItinClass itin, Intrinsic Int,
145 ValueType OutTy, ValueType InTy> {
146 let BaseName = asmbase in {
147 def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
148 !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
149 [(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;
151 def _rec : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
152 !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
154 (InTy (PPCvcmp_rec InTy:$XA, InTy:$XB, xo)))]>,
159 // Instruction form with a single input register for instructions such as
160 // XXPERMDI. The reason for defining this is that specifying multiple chained
161 // operands (such as loads) to an instruction will perform both chained
162 // operations rather than coalescing them into a single register - even though
163 // the source memory location is the same. This simply forces the instruction
164 // to use the same register for both inputs.
165 // For example, an output DAG such as this:
166 // (XXPERMDI (LXSIBZX xoaddr:$src), (LXSIBZX xoaddr:$src ), 0))
167 // would result in two load instructions emitted and used as separate inputs
168 // to the XXPERMDI instruction.
169 class XX3Form_2s<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
170 InstrItinClass itin, list<dag> pattern>
171 : XX3Form_2<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
175 let Predicates = [HasVSX, HasP9Vector] in {
176 class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
178 : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$RST), (ins vrrc:$RB),
179 !strconcat(opc, " $RST, $RB"), IIC_VecFP, pattern>;
181 // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
182 class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
184 : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm;
186 // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
187 // So we use different operand class for VRB
188 class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
189 RegisterOperand vbtype, list<dag> pattern>
190 : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$RST), (ins vbtype:$RB),
191 !strconcat(opc, " $RST, $RB"), IIC_VecFP, pattern>;
193 // [PO VRT XO VRB XO /]
194 class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
196 : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$RST), (ins vrrc:$RB),
197 !strconcat(opc, " $RST, $RB"), IIC_VecFP, pattern>;
199 // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
200 class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
202 : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm;
204 // [PO T XO B XO BX /]
205 class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
207 : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$RT), (ins vsfrc:$XB),
208 !strconcat(opc, " $RT, $XB"), IIC_VecFP, pattern>;
210 // [PO T XO B XO BX TX]
211 class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
212 RegisterOperand vtype, list<dag> pattern>
213 : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB),
214 !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>;
216 // [PO T A B XO AX BX TX], src and dest register use different operand class
217 class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc,
218 RegisterOperand xty, RegisterOperand aty, RegisterOperand bty,
219 InstrItinClass itin, list<dag> pattern>
220 : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
221 !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
223 // [PO VRT VRA VRB XO /]
224 class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
226 : XForm_1<opcode, xo, (outs vrrc:$RST), (ins vrrc:$RA, vrrc:$RB),
227 !strconcat(opc, " $RST, $RA, $RB"), IIC_VecFP, pattern>;
229 // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
230 class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
232 : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm;
234 // [PO VRT VRA VRB XO /]
235 class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
237 : XForm_1<opcode, xo, (outs vrrc:$RST), (ins vrrc:$RSTi, vrrc:$RA, vrrc:$RB),
238 !strconcat(opc, " $RST, $RA, $RB"), IIC_VecFP, pattern>,
239 RegConstraint<"$RSTi = $RST">, NoEncode<"$RSTi">;
241 // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
242 class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
244 : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm;
246 class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
248 : Z23Form_8<opcode, xo,
249 (outs vrrc:$VRT), (ins u1imm:$R, vrrc:$VRB, u2imm:$idx),
250 !strconcat(opc, " $R, $VRT, $VRB, $idx"), IIC_VecFP, pattern> {
254 // [PO BF // VRA VRB XO /]
255 class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
257 : XForm_17<opcode, xo, (outs crrc:$BF), (ins vrrc:$RA, vrrc:$RB),
258 !strconcat(opc, " $BF, $RA, $RB"), IIC_FPCompare> {
259 let Pattern = pattern;
262 // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different
263 // "out" and "in" dag
264 class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
265 RegisterOperand vtype, list<dag> pattern>
266 : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins (memrr $RA, $RB):$addr),
267 !strconcat(opc, " $XT, $addr"), IIC_LdStLFD, pattern>;
269 // [PO S RA RB XO SX]
270 class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
271 RegisterOperand vtype, list<dag> pattern>
272 : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, (memrr $RA, $RB):$addr),
273 !strconcat(opc, " $XT, $addr"), IIC_LdStSTFD, pattern>;
274 } // Predicates = HasP9Vector
275 } // AddedComplexity = 400, hasSideEffects = 0
277 multiclass ScalToVecWPermute<ValueType Ty, dag In, dag NonPermOut, dag PermOut> {
278 def : Pat<(Ty (scalar_to_vector In)), (Ty NonPermOut)>;
279 def : Pat<(Ty (PPCSToV In)), (Ty PermOut)>;
282 //-------------------------- Instruction definitions -------------------------//
283 // VSX instructions require the VSX feature, they are to be selected over
284 // equivalent Altivec patterns (as they address a larger register set) and
285 // they do not have unmodeled side effects.
286 let Predicates = [HasVSX], AddedComplexity = 400 in {
287 let hasSideEffects = 0 in {
289 // Load indexed instructions
290 let mayLoad = 1, mayStore = 0 in {
292 def LXSDX : XX1Form_memOp<31, 588,
293 (outs vsfrc:$XT), (ins (memrr $RA, $RB):$addr),
294 "lxsdx $XT, $addr", IIC_LdStLFD,
297 // Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later
299 def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins (memrr $RA, $RB):$addr),
301 [(set f64:$XT, (load XForm:$addr))]>;
303 let Predicates = [HasVSX, HasOnlySwappingMemOps] in
304 def LXVD2X : XX1Form_memOp<31, 844,
305 (outs vsrc:$XT), (ins (memrr $RA, $RB):$addr),
306 "lxvd2x $XT, $addr", IIC_LdStLFD,
309 def LXVDSX : XX1Form_memOp<31, 332,
310 (outs vsrc:$XT), (ins (memrr $RA, $RB):$addr),
311 "lxvdsx $XT, $addr", IIC_LdStLFD, []>;
313 let Predicates = [HasVSX, HasOnlySwappingMemOps] in
314 def LXVW4X : XX1Form_memOp<31, 780,
315 (outs vsrc:$XT), (ins (memrr $RA, $RB):$addr),
316 "lxvw4x $XT, $addr", IIC_LdStLFD,
320 // Store indexed instructions
321 let mayStore = 1, mayLoad = 0 in {
323 def STXSDX : XX1Form_memOp<31, 716,
324 (outs), (ins vsfrc:$XT, (memrr $RA, $RB):$addr),
325 "stxsdx $XT, $addr", IIC_LdStSTFD,
328 // Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later
330 def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, (memrr $RA, $RB):$addr),
332 [(store f64:$XT, XForm:$addr)]>;
334 let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
335 // The behaviour of this instruction is endianness-specific so we provide no
336 // pattern to match it without considering endianness.
337 def STXVD2X : XX1Form_memOp<31, 972,
338 (outs), (ins vsrc:$XT, (memrr $RA, $RB):$addr),
339 "stxvd2x $XT, $addr", IIC_LdStSTFD,
342 def STXVW4X : XX1Form_memOp<31, 908,
343 (outs), (ins vsrc:$XT, (memrr $RA, $RB):$addr),
344 "stxvw4x $XT, $addr", IIC_LdStSTFD,
349 let mayRaiseFPException = 1 in {
351 // Add/Mul Instructions
352 let isCommutable = 1 in {
353 def XSADDDP : XX3Form<60, 32,
354 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
355 "xsadddp $XT, $XA, $XB", IIC_VecFP,
356 [(set f64:$XT, (any_fadd f64:$XA, f64:$XB))]>;
357 def XSMULDP : XX3Form<60, 48,
358 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
359 "xsmuldp $XT, $XA, $XB", IIC_VecFP,
360 [(set f64:$XT, (any_fmul f64:$XA, f64:$XB))]>;
362 def XVADDDP : XX3Form<60, 96,
363 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
364 "xvadddp $XT, $XA, $XB", IIC_VecFP,
365 [(set v2f64:$XT, (any_fadd v2f64:$XA, v2f64:$XB))]>;
367 def XVADDSP : XX3Form<60, 64,
368 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
369 "xvaddsp $XT, $XA, $XB", IIC_VecFP,
370 [(set v4f32:$XT, (any_fadd v4f32:$XA, v4f32:$XB))]>;
372 def XVMULDP : XX3Form<60, 112,
373 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
374 "xvmuldp $XT, $XA, $XB", IIC_VecFP,
375 [(set v2f64:$XT, (any_fmul v2f64:$XA, v2f64:$XB))]>;
377 def XVMULSP : XX3Form<60, 80,
378 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
379 "xvmulsp $XT, $XA, $XB", IIC_VecFP,
380 [(set v4f32:$XT, (any_fmul v4f32:$XA, v4f32:$XB))]>;
383 // Subtract Instructions
384 def XSSUBDP : XX3Form<60, 40,
385 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
386 "xssubdp $XT, $XA, $XB", IIC_VecFP,
387 [(set f64:$XT, (any_fsub f64:$XA, f64:$XB))]>;
389 def XVSUBDP : XX3Form<60, 104,
390 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
391 "xvsubdp $XT, $XA, $XB", IIC_VecFP,
392 [(set v2f64:$XT, (any_fsub v2f64:$XA, v2f64:$XB))]>;
393 def XVSUBSP : XX3Form<60, 72,
394 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
395 "xvsubsp $XT, $XA, $XB", IIC_VecFP,
396 [(set v4f32:$XT, (any_fsub v4f32:$XA, v4f32:$XB))]>;
399 let BaseName = "XSMADDADP" in {
400 let isCommutable = 1 in
401 def XSMADDADP : XX3Form<60, 33,
402 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
403 "xsmaddadp $XT, $XA, $XB", IIC_VecFP,
404 [(set f64:$XT, (any_fma f64:$XA, f64:$XB, f64:$XTi))]>,
405 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
407 let IsVSXFMAAlt = 1 in
408 def XSMADDMDP : XX3Form<60, 41,
409 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
410 "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
411 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
415 let BaseName = "XSMSUBADP" in {
416 let isCommutable = 1 in
417 def XSMSUBADP : XX3Form<60, 49,
418 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
419 "xsmsubadp $XT, $XA, $XB", IIC_VecFP,
420 [(set f64:$XT, (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
421 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
423 let IsVSXFMAAlt = 1 in
424 def XSMSUBMDP : XX3Form<60, 57,
425 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
426 "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
427 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
431 let BaseName = "XSNMADDADP" in {
432 let isCommutable = 1 in
433 def XSNMADDADP : XX3Form<60, 161,
434 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
435 "xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
436 [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, f64:$XTi)))]>,
437 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
439 let IsVSXFMAAlt = 1 in
440 def XSNMADDMDP : XX3Form<60, 169,
441 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
442 "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
443 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
447 let BaseName = "XSNMSUBADP" in {
448 let isCommutable = 1 in
449 def XSNMSUBADP : XX3Form<60, 177,
450 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
451 "xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
452 [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
453 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
455 let IsVSXFMAAlt = 1 in
456 def XSNMSUBMDP : XX3Form<60, 185,
457 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
458 "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
459 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
463 let BaseName = "XVMADDADP" in {
464 let isCommutable = 1 in
465 def XVMADDADP : XX3Form<60, 97,
466 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
467 "xvmaddadp $XT, $XA, $XB", IIC_VecFP,
468 [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
469 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
471 let IsVSXFMAAlt = 1 in
472 def XVMADDMDP : XX3Form<60, 105,
473 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
474 "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
475 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
479 let BaseName = "XVMADDASP" in {
480 let isCommutable = 1 in
481 def XVMADDASP : XX3Form<60, 65,
482 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
483 "xvmaddasp $XT, $XA, $XB", IIC_VecFP,
484 [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
485 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
487 let IsVSXFMAAlt = 1 in
488 def XVMADDMSP : XX3Form<60, 73,
489 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
490 "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
491 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
495 let BaseName = "XVMSUBADP" in {
496 let isCommutable = 1 in
497 def XVMSUBADP : XX3Form<60, 113,
498 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
499 "xvmsubadp $XT, $XA, $XB", IIC_VecFP,
500 [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
501 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
503 let IsVSXFMAAlt = 1 in
504 def XVMSUBMDP : XX3Form<60, 121,
505 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
506 "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
507 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
511 let BaseName = "XVMSUBASP" in {
512 let isCommutable = 1 in
513 def XVMSUBASP : XX3Form<60, 81,
514 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
515 "xvmsubasp $XT, $XA, $XB", IIC_VecFP,
516 [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
517 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
519 let IsVSXFMAAlt = 1 in
520 def XVMSUBMSP : XX3Form<60, 89,
521 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
522 "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
523 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
527 let BaseName = "XVNMADDADP" in {
528 let isCommutable = 1 in
529 def XVNMADDADP : XX3Form<60, 225,
530 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
531 "xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
532 [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
533 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
535 let IsVSXFMAAlt = 1 in
536 def XVNMADDMDP : XX3Form<60, 233,
537 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
538 "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
539 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
543 let BaseName = "XVNMADDASP" in {
544 let isCommutable = 1 in
545 def XVNMADDASP : XX3Form<60, 193,
546 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
547 "xvnmaddasp $XT, $XA, $XB", IIC_VecFP,
548 [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>,
549 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
551 let IsVSXFMAAlt = 1 in
552 def XVNMADDMSP : XX3Form<60, 201,
553 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
554 "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
555 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
559 let BaseName = "XVNMSUBADP" in {
560 let isCommutable = 1 in
561 def XVNMSUBADP : XX3Form<60, 241,
562 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
563 "xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
564 [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
565 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
567 let IsVSXFMAAlt = 1 in
568 def XVNMSUBMDP : XX3Form<60, 249,
569 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
570 "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
571 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
575 let BaseName = "XVNMSUBASP" in {
576 let isCommutable = 1 in
577 def XVNMSUBASP : XX3Form<60, 209,
578 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
579 "xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
580 [(set v4f32:$XT, (fneg (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
581 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
583 let IsVSXFMAAlt = 1 in
584 def XVNMSUBMSP : XX3Form<60, 217,
585 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
586 "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
587 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
591 // Division Instructions
592 def XSDIVDP : XX3Form<60, 56,
593 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
594 "xsdivdp $XT, $XA, $XB", IIC_FPDivD,
595 [(set f64:$XT, (any_fdiv f64:$XA, f64:$XB))]>;
596 def XSSQRTDP : XX2Form<60, 75,
597 (outs vsfrc:$XT), (ins vsfrc:$XB),
598 "xssqrtdp $XT, $XB", IIC_FPSqrtD,
599 [(set f64:$XT, (any_fsqrt f64:$XB))]>;
601 def XSREDP : XX2Form<60, 90,
602 (outs vsfrc:$XT), (ins vsfrc:$XB),
603 "xsredp $XT, $XB", IIC_VecFP,
604 [(set f64:$XT, (PPCfre f64:$XB))]>;
605 def XSRSQRTEDP : XX2Form<60, 74,
606 (outs vsfrc:$XT), (ins vsfrc:$XB),
607 "xsrsqrtedp $XT, $XB", IIC_VecFP,
608 [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
610 let mayRaiseFPException = 0 in {
611 def XSTDIVDP : XX3Form_1<60, 61,
612 (outs crrc:$CR), (ins vsfrc:$XA, vsfrc:$XB),
613 "xstdivdp $CR, $XA, $XB", IIC_FPCompare, []>;
614 def XSTSQRTDP : XX2Form_1<60, 106,
615 (outs crrc:$CR), (ins vsfrc:$XB),
616 "xstsqrtdp $CR, $XB", IIC_FPCompare,
617 [(set i32:$CR, (PPCftsqrt f64:$XB))]>;
618 def XVTDIVDP : XX3Form_1<60, 125,
619 (outs crrc:$CR), (ins vsrc:$XA, vsrc:$XB),
620 "xvtdivdp $CR, $XA, $XB", IIC_FPCompare, []>;
621 def XVTDIVSP : XX3Form_1<60, 93,
622 (outs crrc:$CR), (ins vsrc:$XA, vsrc:$XB),
623 "xvtdivsp $CR, $XA, $XB", IIC_FPCompare, []>;
625 def XVTSQRTDP : XX2Form_1<60, 234,
626 (outs crrc:$CR), (ins vsrc:$XB),
627 "xvtsqrtdp $CR, $XB", IIC_FPCompare,
628 [(set i32:$CR, (PPCftsqrt v2f64:$XB))]>;
629 def XVTSQRTSP : XX2Form_1<60, 170,
630 (outs crrc:$CR), (ins vsrc:$XB),
631 "xvtsqrtsp $CR, $XB", IIC_FPCompare,
632 [(set i32:$CR, (PPCftsqrt v4f32:$XB))]>;
635 def XVDIVDP : XX3Form<60, 120,
636 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
637 "xvdivdp $XT, $XA, $XB", IIC_FPDivD,
638 [(set v2f64:$XT, (any_fdiv v2f64:$XA, v2f64:$XB))]>;
639 def XVDIVSP : XX3Form<60, 88,
640 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
641 "xvdivsp $XT, $XA, $XB", IIC_FPDivS,
642 [(set v4f32:$XT, (any_fdiv v4f32:$XA, v4f32:$XB))]>;
644 def XVSQRTDP : XX2Form<60, 203,
645 (outs vsrc:$XT), (ins vsrc:$XB),
646 "xvsqrtdp $XT, $XB", IIC_FPSqrtD,
647 [(set v2f64:$XT, (any_fsqrt v2f64:$XB))]>;
648 def XVSQRTSP : XX2Form<60, 139,
649 (outs vsrc:$XT), (ins vsrc:$XB),
650 "xvsqrtsp $XT, $XB", IIC_FPSqrtS,
651 [(set v4f32:$XT, (any_fsqrt v4f32:$XB))]>;
653 def XVREDP : XX2Form<60, 218,
654 (outs vsrc:$XT), (ins vsrc:$XB),
655 "xvredp $XT, $XB", IIC_VecFP,
656 [(set v2f64:$XT, (PPCfre v2f64:$XB))]>;
657 def XVRESP : XX2Form<60, 154,
658 (outs vsrc:$XT), (ins vsrc:$XB),
659 "xvresp $XT, $XB", IIC_VecFP,
660 [(set v4f32:$XT, (PPCfre v4f32:$XB))]>;
662 def XVRSQRTEDP : XX2Form<60, 202,
663 (outs vsrc:$XT), (ins vsrc:$XB),
664 "xvrsqrtedp $XT, $XB", IIC_VecFP,
665 [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>;
666 def XVRSQRTESP : XX2Form<60, 138,
667 (outs vsrc:$XT), (ins vsrc:$XB),
668 "xvrsqrtesp $XT, $XB", IIC_VecFP,
669 [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>;
671 // Compare Instructions
672 def XSCMPODP : XX3Form_1<60, 43,
673 (outs crrc:$CR), (ins vsfrc:$XA, vsfrc:$XB),
674 "xscmpodp $CR, $XA, $XB", IIC_FPCompare, []>;
675 def XSCMPUDP : XX3Form_1<60, 35,
676 (outs crrc:$CR), (ins vsfrc:$XA, vsfrc:$XB),
677 "xscmpudp $CR, $XA, $XB", IIC_FPCompare, []>;
679 defm XVCMPEQDP : XX3Form_Rcr<60, 99,
680 "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
681 int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
682 defm XVCMPEQSP : XX3Form_Rcr<60, 67,
683 "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare,
684 int_ppc_vsx_xvcmpeqsp, v4i32, v4f32>;
685 defm XVCMPGEDP : XX3Form_Rcr<60, 115,
686 "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare,
687 int_ppc_vsx_xvcmpgedp, v2i64, v2f64>;
688 defm XVCMPGESP : XX3Form_Rcr<60, 83,
689 "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare,
690 int_ppc_vsx_xvcmpgesp, v4i32, v4f32>;
691 defm XVCMPGTDP : XX3Form_Rcr<60, 107,
692 "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare,
693 int_ppc_vsx_xvcmpgtdp, v2i64, v2f64>;
694 defm XVCMPGTSP : XX3Form_Rcr<60, 75,
695 "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare,
696 int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>;
699 let mayRaiseFPException = 0 in {
700 def XSABSDP : XX2Form<60, 345,
701 (outs vsfrc:$XT), (ins vsfrc:$XB),
702 "xsabsdp $XT, $XB", IIC_VecFP,
703 [(set f64:$XT, (fabs f64:$XB))]>;
704 def XSNABSDP : XX2Form<60, 361,
705 (outs vsfrc:$XT), (ins vsfrc:$XB),
706 "xsnabsdp $XT, $XB", IIC_VecFP,
707 [(set f64:$XT, (fneg (fabs f64:$XB)))]>;
708 let isCodeGenOnly = 1 in
709 def XSNABSDPs : XX2Form<60, 361,
710 (outs vssrc:$XT), (ins vssrc:$XB),
711 "xsnabsdp $XT, $XB", IIC_VecFP,
712 [(set f32:$XT, (fneg (fabs f32:$XB)))]>;
713 def XSNEGDP : XX2Form<60, 377,
714 (outs vsfrc:$XT), (ins vsfrc:$XB),
715 "xsnegdp $XT, $XB", IIC_VecFP,
716 [(set f64:$XT, (fneg f64:$XB))]>;
717 def XSCPSGNDP : XX3Form<60, 176,
718 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
719 "xscpsgndp $XT, $XA, $XB", IIC_VecFP,
720 [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>;
722 def XVABSDP : XX2Form<60, 473,
723 (outs vsrc:$XT), (ins vsrc:$XB),
724 "xvabsdp $XT, $XB", IIC_VecFP,
725 [(set v2f64:$XT, (fabs v2f64:$XB))]>;
727 def XVABSSP : XX2Form<60, 409,
728 (outs vsrc:$XT), (ins vsrc:$XB),
729 "xvabssp $XT, $XB", IIC_VecFP,
730 [(set v4f32:$XT, (fabs v4f32:$XB))]>;
732 def XVCPSGNDP : XX3Form<60, 240,
733 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
734 "xvcpsgndp $XT, $XA, $XB", IIC_VecFP,
735 [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>;
736 def XVCPSGNSP : XX3Form<60, 208,
737 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
738 "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP,
739 [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>;
741 def XVNABSDP : XX2Form<60, 489,
742 (outs vsrc:$XT), (ins vsrc:$XB),
743 "xvnabsdp $XT, $XB", IIC_VecFP,
744 [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>;
745 def XVNABSSP : XX2Form<60, 425,
746 (outs vsrc:$XT), (ins vsrc:$XB),
747 "xvnabssp $XT, $XB", IIC_VecFP,
748 [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>;
750 def XVNEGDP : XX2Form<60, 505,
751 (outs vsrc:$XT), (ins vsrc:$XB),
752 "xvnegdp $XT, $XB", IIC_VecFP,
753 [(set v2f64:$XT, (fneg v2f64:$XB))]>;
754 def XVNEGSP : XX2Form<60, 441,
755 (outs vsrc:$XT), (ins vsrc:$XB),
756 "xvnegsp $XT, $XB", IIC_VecFP,
757 [(set v4f32:$XT, (fneg v4f32:$XB))]>;
760 // Conversion Instructions
761 def XSCVDPSP : XX2Form<60, 265,
762 (outs vsfrc:$XT), (ins vsfrc:$XB),
763 "xscvdpsp $XT, $XB", IIC_VecFP, []>;
764 def XSCVDPSXDS : XX2Form<60, 344,
765 (outs vsfrc:$XT), (ins vsfrc:$XB),
766 "xscvdpsxds $XT, $XB", IIC_VecFP,
767 [(set f64:$XT, (PPCany_fctidz f64:$XB))]>;
768 let isCodeGenOnly = 1 in
769 def XSCVDPSXDSs : XX2Form<60, 344,
770 (outs vssrc:$XT), (ins vssrc:$XB),
771 "xscvdpsxds $XT, $XB", IIC_VecFP,
772 [(set f32:$XT, (PPCany_fctidz f32:$XB))]>;
773 def XSCVDPSXWS : XX2Form<60, 88,
774 (outs vsfrc:$XT), (ins vsfrc:$XB),
775 "xscvdpsxws $XT, $XB", IIC_VecFP,
776 [(set f64:$XT, (PPCany_fctiwz f64:$XB))]>;
777 let isCodeGenOnly = 1 in
778 def XSCVDPSXWSs : XX2Form<60, 88,
779 (outs vssrc:$XT), (ins vssrc:$XB),
780 "xscvdpsxws $XT, $XB", IIC_VecFP,
781 [(set f32:$XT, (PPCany_fctiwz f32:$XB))]>;
782 def XSCVDPUXDS : XX2Form<60, 328,
783 (outs vsfrc:$XT), (ins vsfrc:$XB),
784 "xscvdpuxds $XT, $XB", IIC_VecFP,
785 [(set f64:$XT, (PPCany_fctiduz f64:$XB))]>;
786 let isCodeGenOnly = 1 in
787 def XSCVDPUXDSs : XX2Form<60, 328,
788 (outs vssrc:$XT), (ins vssrc:$XB),
789 "xscvdpuxds $XT, $XB", IIC_VecFP,
790 [(set f32:$XT, (PPCany_fctiduz f32:$XB))]>;
791 def XSCVDPUXWS : XX2Form<60, 72,
792 (outs vsfrc:$XT), (ins vsfrc:$XB),
793 "xscvdpuxws $XT, $XB", IIC_VecFP,
794 [(set f64:$XT, (PPCany_fctiwuz f64:$XB))]>;
795 let isCodeGenOnly = 1 in
796 def XSCVDPUXWSs : XX2Form<60, 72,
797 (outs vssrc:$XT), (ins vssrc:$XB),
798 "xscvdpuxws $XT, $XB", IIC_VecFP,
799 [(set f32:$XT, (PPCany_fctiwuz f32:$XB))]>;
800 def XSCVSPDP : XX2Form<60, 329,
801 (outs vsfrc:$XT), (ins vsfrc:$XB),
802 "xscvspdp $XT, $XB", IIC_VecFP, []>;
803 def XSCVSXDDP : XX2Form<60, 376,
804 (outs vsfrc:$XT), (ins vsfrc:$XB),
805 "xscvsxddp $XT, $XB", IIC_VecFP,
806 [(set f64:$XT, (PPCany_fcfid f64:$XB))]>;
807 def XSCVUXDDP : XX2Form<60, 360,
808 (outs vsfrc:$XT), (ins vsfrc:$XB),
809 "xscvuxddp $XT, $XB", IIC_VecFP,
810 [(set f64:$XT, (PPCany_fcfidu f64:$XB))]>;
812 def XVCVDPSP : XX2Form<60, 393,
813 (outs vsrc:$XT), (ins vsrc:$XB),
814 "xvcvdpsp $XT, $XB", IIC_VecFP,
815 [(set v4f32:$XT, (int_ppc_vsx_xvcvdpsp v2f64:$XB))]>;
816 def XVCVDPSXDS : XX2Form<60, 472,
817 (outs vsrc:$XT), (ins vsrc:$XB),
818 "xvcvdpsxds $XT, $XB", IIC_VecFP,
819 [(set v2i64:$XT, (any_fp_to_sint v2f64:$XB))]>;
820 def XVCVDPSXWS : XX2Form<60, 216,
821 (outs vsrc:$XT), (ins vsrc:$XB),
822 "xvcvdpsxws $XT, $XB", IIC_VecFP,
823 [(set v4i32:$XT, (int_ppc_vsx_xvcvdpsxws v2f64:$XB))]>;
824 def XVCVDPUXDS : XX2Form<60, 456,
825 (outs vsrc:$XT), (ins vsrc:$XB),
826 "xvcvdpuxds $XT, $XB", IIC_VecFP,
827 [(set v2i64:$XT, (any_fp_to_uint v2f64:$XB))]>;
828 def XVCVDPUXWS : XX2Form<60, 200,
829 (outs vsrc:$XT), (ins vsrc:$XB),
830 "xvcvdpuxws $XT, $XB", IIC_VecFP,
831 [(set v4i32:$XT, (int_ppc_vsx_xvcvdpuxws v2f64:$XB))]>;
833 def XVCVSPDP : XX2Form<60, 457,
834 (outs vsrc:$XT), (ins vsrc:$XB),
835 "xvcvspdp $XT, $XB", IIC_VecFP,
836 [(set v2f64:$XT, (int_ppc_vsx_xvcvspdp v4f32:$XB))]>;
837 def XVCVSPSXDS : XX2Form<60, 408,
838 (outs vsrc:$XT), (ins vsrc:$XB),
839 "xvcvspsxds $XT, $XB", IIC_VecFP,
840 [(set v2i64:$XT, (int_ppc_vsx_xvcvspsxds v4f32:$XB))]>;
841 def XVCVSPSXWS : XX2Form<60, 152,
842 (outs vsrc:$XT), (ins vsrc:$XB),
843 "xvcvspsxws $XT, $XB", IIC_VecFP,
844 [(set v4i32:$XT, (any_fp_to_sint v4f32:$XB))]>;
845 def XVCVSPUXDS : XX2Form<60, 392,
846 (outs vsrc:$XT), (ins vsrc:$XB),
847 "xvcvspuxds $XT, $XB", IIC_VecFP,
848 [(set v2i64:$XT, (int_ppc_vsx_xvcvspuxds v4f32:$XB))]>;
849 def XVCVSPUXWS : XX2Form<60, 136,
850 (outs vsrc:$XT), (ins vsrc:$XB),
851 "xvcvspuxws $XT, $XB", IIC_VecFP,
852 [(set v4i32:$XT, (any_fp_to_uint v4f32:$XB))]>;
853 def XVCVSXDDP : XX2Form<60, 504,
854 (outs vsrc:$XT), (ins vsrc:$XB),
855 "xvcvsxddp $XT, $XB", IIC_VecFP,
856 [(set v2f64:$XT, (any_sint_to_fp v2i64:$XB))]>;
857 def XVCVSXDSP : XX2Form<60, 440,
858 (outs vsrc:$XT), (ins vsrc:$XB),
859 "xvcvsxdsp $XT, $XB", IIC_VecFP,
860 [(set v4f32:$XT, (int_ppc_vsx_xvcvsxdsp v2i64:$XB))]>;
861 def XVCVSXWSP : XX2Form<60, 184,
862 (outs vsrc:$XT), (ins vsrc:$XB),
863 "xvcvsxwsp $XT, $XB", IIC_VecFP,
864 [(set v4f32:$XT, (any_sint_to_fp v4i32:$XB))]>;
865 def XVCVUXDDP : XX2Form<60, 488,
866 (outs vsrc:$XT), (ins vsrc:$XB),
867 "xvcvuxddp $XT, $XB", IIC_VecFP,
868 [(set v2f64:$XT, (any_uint_to_fp v2i64:$XB))]>;
869 def XVCVUXDSP : XX2Form<60, 424,
870 (outs vsrc:$XT), (ins vsrc:$XB),
871 "xvcvuxdsp $XT, $XB", IIC_VecFP,
872 [(set v4f32:$XT, (int_ppc_vsx_xvcvuxdsp v2i64:$XB))]>;
873 def XVCVUXWSP : XX2Form<60, 168,
874 (outs vsrc:$XT), (ins vsrc:$XB),
875 "xvcvuxwsp $XT, $XB", IIC_VecFP,
876 [(set v4f32:$XT, (any_uint_to_fp v4i32:$XB))]>;
878 let mayRaiseFPException = 0 in {
879 def XVCVSXWDP : XX2Form<60, 248,
880 (outs vsrc:$XT), (ins vsrc:$XB),
881 "xvcvsxwdp $XT, $XB", IIC_VecFP,
882 [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>;
883 def XVCVUXWDP : XX2Form<60, 232,
884 (outs vsrc:$XT), (ins vsrc:$XB),
885 "xvcvuxwdp $XT, $XB", IIC_VecFP,
886 [(set v2f64:$XT, (int_ppc_vsx_xvcvuxwdp v4i32:$XB))]>;
889 // Rounding Instructions respecting current rounding mode
890 def XSRDPIC : XX2Form<60, 107,
891 (outs vsfrc:$XT), (ins vsfrc:$XB),
892 "xsrdpic $XT, $XB", IIC_VecFP, []>;
893 def XVRDPIC : XX2Form<60, 235,
894 (outs vsrc:$XT), (ins vsrc:$XB),
895 "xvrdpic $XT, $XB", IIC_VecFP, []>;
896 def XVRSPIC : XX2Form<60, 171,
897 (outs vsrc:$XT), (ins vsrc:$XB),
898 "xvrspic $XT, $XB", IIC_VecFP, []>;
899 // Max/Min Instructions
900 let isCommutable = 1 in {
901 def XSMAXDP : XX3Form<60, 160,
902 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
903 "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
905 (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
906 def XSMINDP : XX3Form<60, 168,
907 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
908 "xsmindp $XT, $XA, $XB", IIC_VecFP,
910 (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
912 def XVMAXDP : XX3Form<60, 224,
913 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
914 "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
916 (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
917 def XVMINDP : XX3Form<60, 232,
918 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
919 "xvmindp $XT, $XA, $XB", IIC_VecFP,
921 (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
923 def XVMAXSP : XX3Form<60, 192,
924 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
925 "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
927 (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
928 def XVMINSP : XX3Form<60, 200,
929 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
930 "xvminsp $XT, $XA, $XB", IIC_VecFP,
932 (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
936 // Rounding Instructions with static direction.
937 def XSRDPI : XX2Form<60, 73,
938 (outs vsfrc:$XT), (ins vsfrc:$XB),
939 "xsrdpi $XT, $XB", IIC_VecFP,
940 [(set f64:$XT, (any_fround f64:$XB))]>;
941 def XSRDPIM : XX2Form<60, 121,
942 (outs vsfrc:$XT), (ins vsfrc:$XB),
943 "xsrdpim $XT, $XB", IIC_VecFP,
944 [(set f64:$XT, (any_ffloor f64:$XB))]>;
945 def XSRDPIP : XX2Form<60, 105,
946 (outs vsfrc:$XT), (ins vsfrc:$XB),
947 "xsrdpip $XT, $XB", IIC_VecFP,
948 [(set f64:$XT, (any_fceil f64:$XB))]>;
949 def XSRDPIZ : XX2Form<60, 89,
950 (outs vsfrc:$XT), (ins vsfrc:$XB),
951 "xsrdpiz $XT, $XB", IIC_VecFP,
952 [(set f64:$XT, (any_ftrunc f64:$XB))]>;
954 def XVRDPI : XX2Form<60, 201,
955 (outs vsrc:$XT), (ins vsrc:$XB),
956 "xvrdpi $XT, $XB", IIC_VecFP,
957 [(set v2f64:$XT, (any_fround v2f64:$XB))]>;
958 def XVRDPIM : XX2Form<60, 249,
959 (outs vsrc:$XT), (ins vsrc:$XB),
960 "xvrdpim $XT, $XB", IIC_VecFP,
961 [(set v2f64:$XT, (any_ffloor v2f64:$XB))]>;
962 def XVRDPIP : XX2Form<60, 233,
963 (outs vsrc:$XT), (ins vsrc:$XB),
964 "xvrdpip $XT, $XB", IIC_VecFP,
965 [(set v2f64:$XT, (any_fceil v2f64:$XB))]>;
966 def XVRDPIZ : XX2Form<60, 217,
967 (outs vsrc:$XT), (ins vsrc:$XB),
968 "xvrdpiz $XT, $XB", IIC_VecFP,
969 [(set v2f64:$XT, (any_ftrunc v2f64:$XB))]>;
971 def XVRSPI : XX2Form<60, 137,
972 (outs vsrc:$XT), (ins vsrc:$XB),
973 "xvrspi $XT, $XB", IIC_VecFP,
974 [(set v4f32:$XT, (any_fround v4f32:$XB))]>;
975 def XVRSPIM : XX2Form<60, 185,
976 (outs vsrc:$XT), (ins vsrc:$XB),
977 "xvrspim $XT, $XB", IIC_VecFP,
978 [(set v4f32:$XT, (any_ffloor v4f32:$XB))]>;
979 def XVRSPIP : XX2Form<60, 169,
980 (outs vsrc:$XT), (ins vsrc:$XB),
981 "xvrspip $XT, $XB", IIC_VecFP,
982 [(set v4f32:$XT, (any_fceil v4f32:$XB))]>;
983 def XVRSPIZ : XX2Form<60, 153,
984 (outs vsrc:$XT), (ins vsrc:$XB),
985 "xvrspiz $XT, $XB", IIC_VecFP,
986 [(set v4f32:$XT, (any_ftrunc v4f32:$XB))]>;
987 } // mayRaiseFPException
989 // Logical Instructions
990 let isCommutable = 1 in
991 def XXLAND : XX3Form<60, 130,
992 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
993 "xxland $XT, $XA, $XB", IIC_VecGeneral,
994 [(set v4i32:$XT, (and v4i32:$XA, v4i32:$XB))]>;
995 def XXLANDC : XX3Form<60, 138,
996 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
997 "xxlandc $XT, $XA, $XB", IIC_VecGeneral,
998 [(set v4i32:$XT, (and v4i32:$XA,
999 (vnot v4i32:$XB)))]>;
1000 let isCommutable = 1 in {
1001 def XXLNOR : XX3Form<60, 162,
1002 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
1003 "xxlnor $XT, $XA, $XB", IIC_VecGeneral,
1004 [(set v4i32:$XT, (vnot (or v4i32:$XA,
1006 def XXLOR : XX3Form<60, 146,
1007 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
1008 "xxlor $XT, $XA, $XB", IIC_VecGeneral,
1009 [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>;
1010 let isCodeGenOnly = 1 in
1011 def XXLORf: XX3Form<60, 146,
1012 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
1013 "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>;
1014 def XXLXOR : XX3Form<60, 154,
1015 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
1016 "xxlxor $XT, $XA, $XB", IIC_VecGeneral,
1017 [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
1020 let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
1021 isReMaterializable = 1 in {
1022 def XXLXORz : XX3Form_SameOp<60, 154, (outs vsrc:$XT), (ins),
1023 "xxlxor $XT, $XT, $XT", IIC_VecGeneral,
1024 [(set v4i32:$XT, (v4i32 immAllZerosV))]>;
1025 def XXLXORdpz : XX3Form_SameOp<60, 154,
1026 (outs vsfrc:$XT), (ins),
1027 "xxlxor $XT, $XT, $XT", IIC_VecGeneral,
1028 [(set f64:$XT, (fpimm0))]>;
1029 def XXLXORspz : XX3Form_SameOp<60, 154,
1030 (outs vssrc:$XT), (ins),
1031 "xxlxor $XT, $XT, $XT", IIC_VecGeneral,
1032 [(set f32:$XT, (fpimm0))]>;
1035 // Permutation Instructions
1036 def XXMRGHW : XX3Form<60, 18,
1037 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
1038 "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>;
1039 def XXMRGLW : XX3Form<60, 50,
1040 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
1041 "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>;
1043 def XXPERMDI : XX3Form_2<60, 10,
1044 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$D),
1045 "xxpermdi $XT, $XA, $XB, $D", IIC_VecPerm,
1046 [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB,
1048 let isCodeGenOnly = 1 in
1049 // Note that the input register class for `$XA` of XXPERMDIs is `vsfrc` which
1050 // is not the same with the input register class(`vsrc`) of XXPERMDI instruction.
1051 // We did this on purpose because:
1052 // 1: The input is primarily for loads that load a partial vector(LFIWZX,
1053 // etc.), no need for SUBREG_TO_REG.
1054 // 2: With `vsfrc` register class, in the final assembly, float registers
1055 // like `f0` are used instead of vector scalar register like `vs0`. This
1056 // helps readability.
1057 def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$D),
1058 "xxpermdi $XT, $XA, $XA, $D", IIC_VecPerm, []>;
1059 def XXSEL : XX4Form<60, 3,
1060 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
1061 "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
1063 def XXSLDWI : XX3Form_2<60, 2,
1064 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$D),
1065 "xxsldwi $XT, $XA, $XB, $D", IIC_VecPerm,
1066 [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB,
1069 let isCodeGenOnly = 1 in
1070 def XXSLDWIs : XX3Form_2s<60, 2,
1071 (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$D),
1072 "xxsldwi $XT, $XA, $XA, $D", IIC_VecPerm, []>;
1074 def XXSPLTW : XX2Form_2<60, 164,
1075 (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$D),
1076 "xxspltw $XT, $XB, $D", IIC_VecPerm,
1078 (PPCxxsplt v4i32:$XB, imm32SExt16:$D))]>;
1079 let isCodeGenOnly = 1 in
1080 def XXSPLTWs : XX2Form_2<60, 164,
1081 (outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$D),
1082 "xxspltw $XT, $XB, $D", IIC_VecPerm, []>;
1084 // The following VSX instructions were introduced in Power ISA 2.07
1085 let Predicates = [HasVSX, HasP8Vector] in {
1086 let isCommutable = 1 in {
1087 def XXLEQV : XX3Form<60, 186,
1088 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
1089 "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
1090 [(set v4i32:$XT, (vnot (xor v4i32:$XA, v4i32:$XB)))]>;
1091 def XXLNAND : XX3Form<60, 178,
1092 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
1093 "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
1094 [(set v4i32:$XT, (vnot (and v4i32:$XA, v4i32:$XB)))]>;
1097 let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
1098 isReMaterializable = 1 in {
1099 def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins),
1100 "xxleqv $XT, $XT, $XT", IIC_VecGeneral,
1101 [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>;
1104 def XXLORC : XX3Form<60, 170,
1105 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
1106 "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
1107 [(set v4i32:$XT, (or v4i32:$XA, (vnot v4i32:$XB)))]>;
1109 // VSX scalar loads introduced in ISA 2.07
1110 let mayLoad = 1, mayStore = 0 in {
1112 def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins (memrr $RA, $RB):$addr),
1113 "lxsspx $XT, $addr", IIC_LdStLFD, []>;
1114 def LXSIWAX : XX1Form_memOp<31, 76, (outs vsfrc:$XT), (ins (memrr $RA, $RB):$addr),
1115 "lxsiwax $XT, $addr", IIC_LdStLFD, []>;
1116 def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins (memrr $RA, $RB):$addr),
1117 "lxsiwzx $XT, $addr", IIC_LdStLFD, []>;
1119 // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
1121 def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
1123 [(set f32:$XT, (load XForm:$src))]>;
1124 // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
1125 def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
1127 [(set f64:$XT, (PPClfiwax ForceXForm:$src))]>;
1128 // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
1129 def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
1131 [(set f64:$XT, (PPClfiwzx ForceXForm:$src))]>;
1134 // VSX scalar stores introduced in ISA 2.07
1135 let mayStore = 1, mayLoad = 0 in {
1137 def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, (memrr $RA, $RB):$addr),
1138 "stxsspx $XT, $addr", IIC_LdStSTFD, []>;
1139 def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, (memrr $RA, $RB):$addr),
1140 "stxsiwx $XT, $addr", IIC_LdStSTFD, []>;
1142 // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
1144 def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
1146 [(store f32:$XT, XForm:$dst)]>;
1147 // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
1148 def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
1150 [(PPCstfiwx f64:$XT, ForceXForm:$dst)]>;
1153 // VSX Elementary Scalar FP arithmetic (SP)
1154 let mayRaiseFPException = 1 in {
1155 let isCommutable = 1 in {
1156 def XSADDSP : XX3Form<60, 0,
1157 (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
1158 "xsaddsp $XT, $XA, $XB", IIC_VecFP,
1159 [(set f32:$XT, (any_fadd f32:$XA, f32:$XB))]>;
1160 def XSMULSP : XX3Form<60, 16,
1161 (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
1162 "xsmulsp $XT, $XA, $XB", IIC_VecFP,
1163 [(set f32:$XT, (any_fmul f32:$XA, f32:$XB))]>;
1166 def XSSUBSP : XX3Form<60, 8,
1167 (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
1168 "xssubsp $XT, $XA, $XB", IIC_VecFP,
1169 [(set f32:$XT, (any_fsub f32:$XA, f32:$XB))]>;
1170 def XSDIVSP : XX3Form<60, 24,
1171 (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
1172 "xsdivsp $XT, $XA, $XB", IIC_FPDivS,
1173 [(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>;
1175 def XSRESP : XX2Form<60, 26,
1176 (outs vssrc:$XT), (ins vssrc:$XB),
1177 "xsresp $XT, $XB", IIC_VecFP,
1178 [(set f32:$XT, (PPCfre f32:$XB))]>;
1179 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1180 let hasSideEffects = 1 in
1181 def XSRSP : XX2Form<60, 281,
1182 (outs vssrc:$XT), (ins vsfrc:$XB),
1183 "xsrsp $XT, $XB", IIC_VecFP,
1184 [(set f32:$XT, (any_fpround f64:$XB))]>;
1185 def XSSQRTSP : XX2Form<60, 11,
1186 (outs vssrc:$XT), (ins vssrc:$XB),
1187 "xssqrtsp $XT, $XB", IIC_FPSqrtS,
1188 [(set f32:$XT, (any_fsqrt f32:$XB))]>;
1189 def XSRSQRTESP : XX2Form<60, 10,
1190 (outs vssrc:$XT), (ins vssrc:$XB),
1191 "xsrsqrtesp $XT, $XB", IIC_VecFP,
1192 [(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
1195 let BaseName = "XSMADDASP" in {
1196 let isCommutable = 1 in
1197 def XSMADDASP : XX3Form<60, 1,
1199 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
1200 "xsmaddasp $XT, $XA, $XB", IIC_VecFP,
1201 [(set f32:$XT, (any_fma f32:$XA, f32:$XB, f32:$XTi))]>,
1202 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
1204 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1205 let IsVSXFMAAlt = 1, hasSideEffects = 1 in
1206 def XSMADDMSP : XX3Form<60, 9,
1208 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
1209 "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
1210 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
1214 let BaseName = "XSMSUBASP" in {
1215 let isCommutable = 1 in
1216 def XSMSUBASP : XX3Form<60, 17,
1218 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
1219 "xsmsubasp $XT, $XA, $XB", IIC_VecFP,
1220 [(set f32:$XT, (any_fma f32:$XA, f32:$XB,
1221 (fneg f32:$XTi)))]>,
1222 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
1224 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1225 let IsVSXFMAAlt = 1, hasSideEffects = 1 in
1226 def XSMSUBMSP : XX3Form<60, 25,
1228 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
1229 "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
1230 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
1234 let BaseName = "XSNMADDASP" in {
1235 let isCommutable = 1 in
1236 def XSNMADDASP : XX3Form<60, 129,
1238 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
1239 "xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
1240 [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB,
1242 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
1244 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1245 let IsVSXFMAAlt = 1, hasSideEffects = 1 in
1246 def XSNMADDMSP : XX3Form<60, 137,
1248 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
1249 "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
1250 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
1254 let BaseName = "XSNMSUBASP" in {
1255 let isCommutable = 1 in
1256 def XSNMSUBASP : XX3Form<60, 145,
1258 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
1259 "xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
1260 [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB,
1261 (fneg f32:$XTi))))]>,
1262 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
1264 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1265 let IsVSXFMAAlt = 1, hasSideEffects = 1 in
1266 def XSNMSUBMSP : XX3Form<60, 153,
1268 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
1269 "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
1270 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
1274 // Single Precision Conversions (FP <-> INT)
1275 def XSCVSXDSP : XX2Form<60, 312,
1276 (outs vssrc:$XT), (ins vsfrc:$XB),
1277 "xscvsxdsp $XT, $XB", IIC_VecFP,
1278 [(set f32:$XT, (PPCany_fcfids f64:$XB))]>;
1279 def XSCVUXDSP : XX2Form<60, 296,
1280 (outs vssrc:$XT), (ins vsfrc:$XB),
1281 "xscvuxdsp $XT, $XB", IIC_VecFP,
1282 [(set f32:$XT, (PPCany_fcfidus f64:$XB))]>;
1283 } // mayRaiseFPException
1285 // Conversions between vector and scalar single precision
1286 def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
1287 "xscvdpspn $XT, $XB", IIC_VecFP, []>;
1288 def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
1289 "xscvspdpn $XT, $XB", IIC_VecFP, []>;
1291 let Predicates = [HasVSX, HasDirectMove] in {
1292 // VSX direct move instructions
1293 def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$RA), (ins vsfrc:$XT),
1294 "mfvsrd $RA, $XT", IIC_VecGeneral,
1295 [(set i64:$RA, (PPCmfvsr f64:$XT))]>,
1296 Requires<[In64BitMode]>;
1297 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1298 let isCodeGenOnly = 1, hasSideEffects = 1 in
1299 def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$RA), (ins vsrc:$XT),
1300 "mfvsrd $RA, $XT", IIC_VecGeneral,
1302 Requires<[In64BitMode]>;
1303 def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$RA), (ins vsfrc:$XT),
1304 "mfvsrwz $RA, $XT", IIC_VecGeneral,
1305 [(set i32:$RA, (PPCmfvsr f64:$XT))]>, ZExt32To64;
1306 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1307 let isCodeGenOnly = 1, hasSideEffects = 1 in
1308 def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$RA), (ins vsrc:$XT),
1309 "mfvsrwz $RA, $XT", IIC_VecGeneral,
1311 def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$RA),
1312 "mtvsrd $XT, $RA", IIC_VecGeneral,
1313 [(set f64:$XT, (PPCmtvsra i64:$RA))]>,
1314 Requires<[In64BitMode]>;
1315 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1316 let isCodeGenOnly = 1, hasSideEffects = 1 in
1317 def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$RA),
1318 "mtvsrd $XT, $RA", IIC_VecGeneral,
1320 Requires<[In64BitMode]>;
1321 def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$RA),
1322 "mtvsrwa $XT, $RA", IIC_VecGeneral,
1323 [(set f64:$XT, (PPCmtvsra i32:$RA))]>;
1324 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1325 let isCodeGenOnly = 1, hasSideEffects = 1 in
1326 def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$RA),
1327 "mtvsrwa $XT, $RA", IIC_VecGeneral,
1329 def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$RA),
1330 "mtvsrwz $XT, $RA", IIC_VecGeneral,
1331 [(set f64:$XT, (PPCmtvsrz i32:$RA))]>;
1332 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1333 let isCodeGenOnly = 1, hasSideEffects = 1 in
1334 def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$RA),
1335 "mtvsrwz $XT, $RA", IIC_VecGeneral,
1339 } // HasVSX, HasP8Vector
1341 let Predicates = [HasVSX, IsISA3_0, HasDirectMove] in {
1342 def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$RA),
1343 "mtvsrws $XT, $RA", IIC_VecGeneral, []>;
1345 def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$RA, g8rc:$RB),
1346 "mtvsrdd $XT, $RA, $RB", IIC_VecGeneral,
1347 []>, Requires<[In64BitMode]>;
1349 def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$RA), (ins vsrc:$XT),
1350 "mfvsrld $RA, $XT", IIC_VecGeneral,
1351 []>, Requires<[In64BitMode]>;
1353 } // HasVSX, IsISA3_0, HasDirectMove
1355 let Predicates = [HasVSX, HasP9Vector] in {
1356 // Quad-Precision Scalar Move Instructions:
1358 def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp",
1360 (fcopysign f128:$RB, f128:$RA))]>;
1362 // Absolute/Negative-Absolute/Negate
1363 def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp",
1364 [(set f128:$RST, (fabs f128:$RB))]>;
1365 def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp",
1366 [(set f128:$RST, (fneg (fabs f128:$RB)))]>;
1367 def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp",
1368 [(set f128:$RST, (fneg f128:$RB))]>;
1370 //===--------------------------------------------------------------------===//
1371 // Quad-Precision Scalar Floating-Point Arithmetic Instructions:
1373 // Add/Divide/Multiply/Subtract
1374 let mayRaiseFPException = 1 in {
1375 let isCommutable = 1 in {
1376 def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp",
1377 [(set f128:$RST, (any_fadd f128:$RA, f128:$RB))]>;
1378 def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp",
1379 [(set f128:$RST, (any_fmul f128:$RA, f128:$RB))]>;
1381 def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" ,
1382 [(set f128:$RST, (any_fsub f128:$RA, f128:$RB))]>;
1383 def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
1384 [(set f128:$RST, (any_fdiv f128:$RA, f128:$RB))]>;
1386 def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
1387 [(set f128:$RST, (any_fsqrt f128:$RB))]>;
1388 // (Negative) Multiply-{Add/Subtract}
1389 def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
1391 (any_fma f128:$RA, f128:$RB, f128:$RSTi))]>;
1392 def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
1394 (any_fma f128:$RA, f128:$RB,
1395 (fneg f128:$RSTi)))]>;
1396 def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
1398 (fneg (any_fma f128:$RA, f128:$RB,
1400 def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
1402 (fneg (any_fma f128:$RA, f128:$RB,
1403 (fneg f128:$RSTi))))]>;
1405 let isCommutable = 1 in {
1406 def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
1408 (int_ppc_addf128_round_to_odd
1409 f128:$RA, f128:$RB))]>;
1410 def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
1412 (int_ppc_mulf128_round_to_odd
1413 f128:$RA, f128:$RB))]>;
1415 def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
1417 (int_ppc_subf128_round_to_odd
1418 f128:$RA, f128:$RB))]>;
1419 def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
1421 (int_ppc_divf128_round_to_odd
1422 f128:$RA, f128:$RB))]>;
1423 def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
1425 (int_ppc_sqrtf128_round_to_odd f128:$RB))]>;
1428 def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo",
1430 (int_ppc_fmaf128_round_to_odd
1431 f128:$RA,f128:$RB,f128:$RSTi))]>;
1433 def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" ,
1435 (int_ppc_fmaf128_round_to_odd
1436 f128:$RA, f128:$RB, (fneg f128:$RSTi)))]>;
1437 def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo",
1439 (fneg (int_ppc_fmaf128_round_to_odd
1440 f128:$RA, f128:$RB, f128:$RSTi)))]>;
1441 def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo",
1443 (fneg (int_ppc_fmaf128_round_to_odd
1444 f128:$RA, f128:$RB, (fneg f128:$RSTi))))]>;
1445 } // mayRaiseFPException
1447 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1448 // QP Compare Ordered/Unordered
1449 let hasSideEffects = 1 in {
1450 // DP/QP Compare Exponents
1451 def XSCMPEXPDP : XX3Form_1<60, 59,
1452 (outs crrc:$CR), (ins vsfrc:$XA, vsfrc:$XB),
1453 "xscmpexpdp $CR, $XA, $XB", IIC_FPCompare, []>;
1454 def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
1456 let mayRaiseFPException = 1 in {
1457 def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
1458 def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
1460 // DP Compare ==, >=, >, !=
1461 // Use vsrc for XT, because the entire register of XT is set.
1462 // XT.dword[1] = 0x0000_0000_0000_0000
1463 def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc,
1465 def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc,
1467 def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
1472 //===--------------------------------------------------------------------===//
1473 // Quad-Precision Floating-Point Conversion Instructions:
1475 let mayRaiseFPException = 1 in {
1477 def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc,
1478 [(set f128:$RST, (any_fpextend f64:$RB))]>;
1480 // Round & Convert QP -> DP (dword[1] is set to zero)
1481 def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>;
1482 def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo",
1484 (int_ppc_truncf128_round_to_odd
1488 // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
1489 let mayRaiseFPException = 1 in {
1490 def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz",
1491 [(set f128:$RST, (PPCany_fctidz f128:$RB))]>;
1492 def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz",
1493 [(set f128:$RST, (PPCany_fctiwz f128:$RB))]>;
1494 def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz",
1495 [(set f128:$RST, (PPCany_fctiduz f128:$RB))]>;
1496 def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz",
1497 [(set f128:$RST, (PPCany_fctiwuz f128:$RB))]>;
1500 // Convert (Un)Signed DWord -> QP.
1501 def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>;
1502 def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>;
1504 // (Round &) Convert DP <-> HP
1505 // Note! xscvdphp's src and dest register both use the left 64 bits, so we use
1506 // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits,
1507 // but we still use vsfrc for it.
1508 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1509 let hasSideEffects = 1, mayRaiseFPException = 1 in {
1510 def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
1511 def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
1514 let mayRaiseFPException = 1 in {
1516 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1517 let hasSideEffects = 1 in
1518 def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>;
1519 def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc,
1521 (int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
1523 // Round to Quad-Precision Integer [with Inexact]
1524 def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>;
1525 def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>;
1527 // Round Quad-Precision to Double-Extended Precision (fp80)
1528 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1529 let hasSideEffects = 1 in
1530 def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>;
1533 //===--------------------------------------------------------------------===//
1534 // Insert/Extract Instructions
1536 // Insert Exponent DP/QP
1537 // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
1538 def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$RA, g8rc:$RB),
1539 "xsiexpdp $XT, $RA, $RB", IIC_VecFP, []>;
1540 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1541 let hasSideEffects = 1 in {
1542 // vB NOTE: only vB.dword[0] is used, that's why we don't use
1543 // X_VT5_VA5_VB5 form
1544 def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$FRT), (ins vrrc:$FRA, vsfrc:$FRB),
1545 "xsiexpqp $FRT, $FRA, $FRB", IIC_VecFP, []>;
1548 // Extract Exponent/Significand DP/QP
1549 def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
1550 def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
1552 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1553 let hasSideEffects = 1 in {
1554 def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
1555 def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
1558 // Vector Insert Word
1559 // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
1561 XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
1562 (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM5),
1563 "xxinsertw $XT, $XB, $UIM5", IIC_VecFP,
1564 [(set v4i32:$XT, (PPCvecinsert v4i32:$XTi, v4i32:$XB,
1565 imm32SExt16:$UIM5))]>,
1566 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
1568 // Vector Extract Unsigned Word
1569 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1570 let hasSideEffects = 1 in
1571 def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
1572 (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIM5),
1573 "xxextractuw $XT, $XB, $UIM5", IIC_VecFP, []>;
1575 // Vector Insert Exponent DP/SP
1576 def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
1577 IIC_VecFP, [(set v2f64: $XT,(int_ppc_vsx_xviexpdp v2i64:$XA, v2i64:$XB))]>;
1578 def XVIEXPSP : XX3_XT5_XA5_XB5<60, 216, "xviexpsp", vsrc, vsrc, vsrc,
1579 IIC_VecFP, [(set v4f32: $XT,(int_ppc_vsx_xviexpsp v4i32:$XA, v4i32:$XB))]>;
1581 // Vector Extract Exponent/Significand DP/SP
1582 def XVXEXPDP : XX2_XT6_XO5_XB6<60, 0, 475, "xvxexpdp", vsrc,
1584 (int_ppc_vsx_xvxexpdp v2f64:$XB))]>;
1585 def XVXEXPSP : XX2_XT6_XO5_XB6<60, 8, 475, "xvxexpsp", vsrc,
1587 (int_ppc_vsx_xvxexpsp v4f32:$XB))]>;
1588 def XVXSIGDP : XX2_XT6_XO5_XB6<60, 1, 475, "xvxsigdp", vsrc,
1590 (int_ppc_vsx_xvxsigdp v2f64:$XB))]>;
1591 def XVXSIGSP : XX2_XT6_XO5_XB6<60, 9, 475, "xvxsigsp", vsrc,
1593 (int_ppc_vsx_xvxsigsp v4f32:$XB))]>;
1595 // Test Data Class SP/DP/QP
1596 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1597 let hasSideEffects = 1 in {
1598 def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
1599 (outs crrc:$BF), (ins u7imm:$DCMX, vssrc:$XB),
1600 "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
1601 def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
1602 (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
1603 "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
1604 def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
1605 (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$VB),
1606 "xststdcqp $BF, $VB, $DCMX", IIC_VecFP, []>;
1609 // Vector Test Data Class SP/DP
1610 def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
1611 (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
1612 "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
1614 (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>;
1615 def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
1616 (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
1617 "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
1619 (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
1621 // Maximum/Minimum Type-C/Type-J DP
1622 let mayRaiseFPException = 1 in {
1623 def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
1625 [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
1626 def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc,
1628 [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>;
1630 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1631 let hasSideEffects = 1 in {
1632 def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
1634 def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
1639 // Vector Byte-Reverse H/W/D/Q Word
1640 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1641 let hasSideEffects = 1 in
1642 def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>;
1643 def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc,
1644 [(set v4i32:$XT, (bswap v4i32:$XB))]>;
1645 def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc,
1646 [(set v2i64:$XT, (bswap v2i64:$XB))]>;
1647 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1648 let hasSideEffects = 1 in
1649 def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
1652 def XXPERM : XX3Form<60, 26, (outs vsrc:$XT),
1653 (ins vsrc:$XA, vsrc:$XTi, vsrc:$XB),
1654 "xxperm $XT, $XA, $XB", IIC_VecPerm, []>,
1655 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
1656 def XXPERMR : XX3Form<60, 58, (outs vsrc:$XT),
1657 (ins vsrc:$XA, vsrc:$XTi, vsrc:$XB),
1658 "xxpermr $XT, $XA, $XB", IIC_VecPerm, []>,
1659 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
1661 // Vector Splat Immediate Byte
1662 // FIXME: Setting the hasSideEffects flag here to match current behaviour.
1663 let hasSideEffects = 1 in
1664 def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
1665 "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
1667 // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
1668 // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
1669 let mayLoad = 1, mayStore = 0 in {
1671 def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins (memrix16 $DQ, $RA):$addr),
1672 "lxv $XT, $addr", IIC_LdStLFD, []>;
1674 def LXSD : DSForm_1<57, 2, (outs vfrc:$RST), (ins (memrix $D, $RA):$addr),
1675 "lxsd $RST, $addr", IIC_LdStLFD, []>;
1676 // Load SP from src, convert it to DP, and place in dword[0]
1677 def LXSSP : DSForm_1<57, 3, (outs vfrc:$RST), (ins (memrix $D, $RA):$addr),
1678 "lxssp $RST, $addr", IIC_LdStLFD, []>;
1680 // Load as Integer Byte/Halfword & Zero Indexed
1681 def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
1682 [(set f64:$XT, (PPClxsizx ForceXForm:$addr, 1))]>;
1683 def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc,
1684 [(set f64:$XT, (PPClxsizx ForceXForm:$addr, 2))]>;
1686 // Load Vector Halfword*8/Byte*16 Indexed
1687 def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>;
1688 def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>;
1690 // Load Vector Indexed
1691 def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
1692 [(set v2f64:$XT, (load XForm:$addr))]>;
1693 // Load Vector (Left-justified) with Length
1694 def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins (memr $RA):$addr, g8rc:$RB),
1695 "lxvl $XT, $addr, $RB", IIC_LdStLoad,
1696 [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$addr, i64:$RB))]>;
1697 def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins (memr $RA):$addr, g8rc:$RB),
1698 "lxvll $XT, $addr, $RB", IIC_LdStLoad,
1699 [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$addr, i64:$RB))]>;
1701 // Load Vector Word & Splat Indexed
1702 def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
1705 // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
1706 // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
1707 let mayStore = 1, mayLoad = 0 in {
1709 def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, (memrix16 $DQ, $RA):$addr),
1710 "stxv $XT, $addr", IIC_LdStSTFD, []>;
1712 def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$RST, (memrix $D, $RA):$addr),
1713 "stxsd $RST, $addr", IIC_LdStSTFD, []>;
1714 // Convert DP of dword[0] to SP, and Store to dst
1715 def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$RST, (memrix $D, $RA):$addr),
1716 "stxssp $RST, $addr", IIC_LdStSTFD, []>;
1718 // Store as Integer Byte/Halfword Indexed
1719 def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc,
1720 [(PPCstxsix f64:$XT, ForceXForm:$addr, 1)]>;
1721 def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc,
1722 [(PPCstxsix f64:$XT, ForceXForm:$addr, 2)]>;
1723 let isCodeGenOnly = 1 in {
1724 def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>;
1725 def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>;
1728 // Store Vector Halfword*8/Byte*16 Indexed
1729 def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>;
1730 def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>;
1732 // Store Vector Indexed
1733 def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
1734 [(store v2f64:$XT, XForm:$addr)]>;
1736 // Store Vector (Left-justified) with Length
1737 def STXVL : XX1Form_memOp<31, 397, (outs),
1738 (ins vsrc:$XT, (memr $RA):$addr, g8rc:$RB),
1739 "stxvl $XT, $addr, $RB", IIC_LdStLoad,
1740 [(int_ppc_vsx_stxvl v4i32:$XT, addr:$addr,
1742 def STXVLL : XX1Form_memOp<31, 429, (outs),
1743 (ins vsrc:$XT, (memr $RA):$addr, g8rc:$RB),
1744 "stxvll $XT, $addr, $RB", IIC_LdStLoad,
1745 [(int_ppc_vsx_stxvll v4i32:$XT, addr:$addr,
1749 def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
1751 [(set f32:$XT, (load DSForm:$src))]>;
1752 def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
1754 [(set f64:$XT, (load DSForm:$src))]>;
1755 def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
1757 [(store f32:$XT, DSForm:$dst)]>;
1758 def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
1760 [(store f64:$XT, DSForm:$dst)]>;
1762 let mayStore = 1 in {
1763 def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
1764 (ins spilltovsrrc:$XT, memrr:$dst),
1765 "#SPILLTOVSR_STX", []>;
1766 def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
1767 "#SPILLTOVSR_ST", []>;
1769 let mayLoad = 1 in {
1770 def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
1772 "#SPILLTOVSR_LDX", []>;
1773 def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
1774 "#SPILLTOVSR_LD", []>;
1778 } // hasSideEffects = 0
1780 let PPC970_Single = 1, AddedComplexity = 400 in {
1782 def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
1783 (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
1786 def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
1787 (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
1790 (select i1:$cond, v2f64:$T, v2f64:$F))]>;
1791 def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
1792 (ins crrc:$cond, f8rc:$T, f8rc:$F,
1793 i32imm:$BROPC), "#SELECT_CC_VSFRC",
1795 def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
1796 (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
1799 (select i1:$cond, f64:$T, f64:$F))]>;
1800 def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
1801 (ins crrc:$cond, f4rc:$T, f4rc:$F,
1802 i32imm:$BROPC), "#SELECT_CC_VSSRC",
1804 def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
1805 (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
1808 (select i1:$cond, f32:$T, f32:$F))]>;
1812 //----------------------------- DAG Definitions ------------------------------//
1814 // Output dag used to bitcast f32 to i32 and f64 to i64
1816 dag FltToInt = (i32 (MFVSRWZ (EXTRACT_SUBREG (XSCVDPSPN $A), sub_64)));
1817 dag DblToLong = (i64 (MFVSRD $A));
1821 dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
1822 (COPY_TO_REGCLASS $B, VSFRC)),
1824 dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
1825 (COPY_TO_REGCLASS $B, VSFRC)),
1830 dag Li8 = (i32 (extloadi8 ForceXForm:$src));
1831 dag ZELi8 = (i32 (zextloadi8 ForceXForm:$src));
1832 dag ZELi8i64 = (i64 (zextloadi8 ForceXForm:$src));
1833 dag SELi8 = (i32 (sext_inreg (extloadi8 ForceXForm:$src), i8));
1834 dag SELi8i64 = (i64 (sext_inreg (extloadi8 ForceXForm:$src), i8));
1836 dag Li16 = (i32 (extloadi16 ForceXForm:$src));
1837 dag ZELi16 = (i32 (zextloadi16 ForceXForm:$src));
1838 dag ZELi16i64 = (i64 (zextloadi16 ForceXForm:$src));
1839 dag SELi16 = (i32 (sextloadi16 ForceXForm:$src));
1840 dag SELi16i64 = (i64 (sextloadi16 ForceXForm:$src));
1842 dag Li32 = (i32 (load ForceXForm:$src));
1845 def DWToSPExtractConv {
1846 dag El0US1 = (f32 (PPCfcfidus
1847 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
1848 dag El1US1 = (f32 (PPCfcfidus
1849 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
1850 dag El0US2 = (f32 (PPCfcfidus
1851 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
1852 dag El1US2 = (f32 (PPCfcfidus
1853 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
1854 dag El0SS1 = (f32 (PPCfcfids
1855 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
1856 dag El1SS1 = (f32 (PPCfcfids
1857 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
1858 dag El0SS2 = (f32 (PPCfcfids
1859 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
1860 dag El1SS2 = (f32 (PPCfcfids
1861 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
1862 dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
1863 dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
1866 def WToDPExtractConv {
1867 dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0))));
1868 dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1))));
1869 dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2))));
1870 dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3))));
1871 dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0))));
1872 dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1))));
1873 dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2))));
1874 dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3))));
1875 dag BV02S = (v2f64 (build_vector El0S, El2S));
1876 dag BV13S = (v2f64 (build_vector El1S, El3S));
1877 dag BV02U = (v2f64 (build_vector El0U, El2U));
1878 dag BV13U = (v2f64 (build_vector El1U, El3U));
1881 /* Direct moves of various widths from GPR's into VSR's. Each move lines
1882 the value up into element 0 (both BE and LE). Namely, entities smaller than
1883 a doubleword are shifted left and moved for BE. For LE, they're moved, then
1884 swapped to go into the least significant element of the VSR.
1890 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
1894 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
1898 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
1899 dag BE_DWORD_0 = (MTVSRD $A);
1901 dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32));
1902 dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
1903 LE_MTVSRW, sub_64));
1904 dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2);
1905 dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
1906 BE_DWORD_0, sub_64));
1907 dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2);
1910 /* Patterns for extracting elements out of vectors. Integer elements are
1911 extracted using direct move operations. Patterns for extracting elements
1912 whose indices are not available at compile time are also provided with
1913 various _VARIABLE_ patterns.
1914 The numbering for the DAG's is for LE, but when used on BE, the correct
1915 LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13).
1917 def VectorExtractions {
1918 // Doubleword extraction
1922 (XXPERMDI (COPY_TO_REGCLASS $S, VSRC),
1923 (COPY_TO_REGCLASS $S, VSRC), 2), sub_64));
1924 dag LE_DWORD_1 = (MFVSRD
1926 (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
1929 dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64));
1930 dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
1931 dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
1932 (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
1933 dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64));
1935 // Halfword extraction
1936 dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32));
1937 dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32));
1938 dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32));
1939 dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32));
1940 dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32));
1941 dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32));
1942 dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32));
1943 dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32));
1946 dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32));
1947 dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32));
1948 dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32));
1949 dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32));
1950 dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32));
1951 dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32));
1952 dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32));
1953 dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32));
1954 dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32));
1955 dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32));
1956 dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32));
1957 dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32));
1958 dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32));
1959 dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32));
1960 dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32));
1961 dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32));
1963 /* Variable element number (BE and LE patterns must be specified separately)
1964 This is a rather involved process.
1966 Conceptually, this is how the move is accomplished:
1967 1. Identify which doubleword contains the element
1968 2. Shift in the VMX register so that the correct doubleword is correctly
1969 lined up for the MFVSRD
1970 3. Perform the move so that the element (along with some extra stuff)
1972 4. Right shift within the GPR so that the element is right-justified
1974 Of course, the index is an element number which has a different meaning
1975 on LE/BE so the patterns have to be specified separately.
1977 Note: The final result will be the element right-justified with high
1978 order bits being arbitrarily defined (namely, whatever was in the
1979 vector register to the left of the value originally).
1984 - For elements 0-7, we shift left by 8 bytes since they're on the right
1985 - For elements 8-15, we need not shift (shift left by zero bytes)
1986 This is accomplished by inverting the bits of the index and AND-ing
1987 with 0x8 (i.e. clearing all bits of the index and inverting bit 60).
1989 dag LE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDC8 (LI8 8), $Idx)));
1992 // - Now that we set up the shift amount, we shift in the VMX register
1993 dag LE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, LE_VBYTE_PERM_VEC));
1996 // - The doubleword containing our element is moved to a GPR
1997 dag LE_MV_VBYTE = (MFVSRD
1999 (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)),
2003 - Truncate the element number to the range 0-7 (8-15 are symmetrical
2004 and out of range values are truncated accordingly)
2005 - Multiply by 8 as we need to shift right by the number of bits, not bytes
2006 - Shift right in the GPR by the calculated value
2008 dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60),
2010 dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT),
2013 /* LE variable halfword
2015 - For elements 0-3, we shift left by 8 since they're on the right
2016 - For elements 4-7, we need not shift (shift left by zero bytes)
2017 Similarly to the byte pattern, we invert the bits of the index, but we
2018 AND with 0x4 (i.e. clear all bits of the index and invert bit 61).
2019 Of course, the shift is still by 8 bytes, so we must multiply by 2.
2021 dag LE_VHALF_PERM_VEC =
2022 (v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62)));
2025 // - Now that we set up the shift amount, we shift in the VMX register
2026 dag LE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, LE_VHALF_PERM_VEC));
2029 // - The doubleword containing our element is moved to a GPR
2030 dag LE_MV_VHALF = (MFVSRD
2032 (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)),
2036 - Truncate the element number to the range 0-3 (4-7 are symmetrical
2037 and out of range values are truncated accordingly)
2038 - Multiply by 16 as we need to shift right by the number of bits
2039 - Shift right in the GPR by the calculated value
2041 dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59),
2043 dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT),
2048 - For elements 0-1, we shift left by 8 since they're on the right
2049 - For elements 2-3, we need not shift
2051 dag LE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
2052 (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61)));
2055 // - Now that we set up the shift amount, we shift in the VMX register
2056 dag LE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VWORD_PERM_VEC));
2059 // - The doubleword containing our element is moved to a GPR
2060 dag LE_MV_VWORD = (MFVSRD
2062 (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)),
2066 - Truncate the element number to the range 0-1 (2-3 are symmetrical
2067 and out of range values are truncated accordingly)
2068 - Multiply by 32 as we need to shift right by the number of bits
2069 - Shift right in the GPR by the calculated value
2071 dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58),
2073 dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT),
2076 /* LE variable doubleword
2078 - For element 0, we shift left by 8 since it's on the right
2079 - For element 1, we need not shift
2081 dag LE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
2082 (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60)));
2085 // - Now that we set up the shift amount, we shift in the VMX register
2086 dag LE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VDWORD_PERM_VEC));
2089 // - The doubleword containing our element is moved to a GPR
2090 // - Number 4. is not needed for the doubleword as the value is 64-bits
2091 dag LE_VARIABLE_DWORD =
2092 (MFVSRD (EXTRACT_SUBREG
2093 (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)),
2096 /* LE variable float
2097 - Shift the vector to line up the desired element to BE Word 0
2098 - Convert 32-bit float to a 64-bit single precision float
2100 dag LE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8,
2101 (RLDICR (XOR8 (LI8 3), $Idx), 2, 61)));
2102 dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC);
2103 dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE);
2105 /* LE variable double
2106 Same as the LE doubleword except there is no move.
2108 dag LE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
2109 (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
2110 LE_VDWORD_PERM_VEC));
2111 dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC);
2114 The algorithm here is the same as the LE variable byte except:
2115 - The shift in the VMX register is by 0/8 for opposite element numbers so
2116 we simply AND the element number with 0x8
2117 - The order of elements after the move to GPR is reversed, so we invert
2118 the bits of the index prior to truncating to the range 0-7
2120 dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDI8_rec $Idx, 8)));
2121 dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC));
2122 dag BE_MV_VBYTE = (MFVSRD
2124 (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)),
2126 dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60),
2128 dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT),
2131 /* BE variable halfword
2132 The algorithm here is the same as the LE variable halfword except:
2133 - The shift in the VMX register is by 0/8 for opposite element numbers so
2134 we simply AND the element number with 0x4 and multiply by 2
2135 - The order of elements after the move to GPR is reversed, so we invert
2136 the bits of the index prior to truncating to the range 0-3
2138 dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8,
2139 (RLDICR (ANDI8_rec $Idx, 4), 1, 62)));
2140 dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC));
2141 dag BE_MV_VHALF = (MFVSRD
2143 (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)),
2145 dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59),
2147 dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT),
2151 The algorithm is the same as the LE variable word except:
2152 - The shift in the VMX register happens for opposite element numbers
2153 - The order of elements after the move to GPR is reversed, so we invert
2154 the bits of the index prior to truncating to the range 0-1
2156 dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
2157 (RLDICR (ANDI8_rec $Idx, 2), 2, 61)));
2158 dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC));
2159 dag BE_MV_VWORD = (MFVSRD
2161 (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)),
2163 dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58),
2165 dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT),
2168 /* BE variable doubleword
2169 Same as the LE doubleword except we shift in the VMX register for opposite
2172 dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
2173 (RLDICR (ANDI8_rec $Idx, 1), 3, 60)));
2174 dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC));
2175 dag BE_VARIABLE_DWORD =
2176 (MFVSRD (EXTRACT_SUBREG
2177 (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)),
2180 /* BE variable float
2181 - Shift the vector to line up the desired element to BE Word 0
2182 - Convert 32-bit float to a 64-bit single precision float
2184 dag BE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR $Idx, 2, 61)));
2185 dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC);
2186 dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE);
2188 // BE variable float 32-bit version
2189 dag BE_32B_VFLOAT_PERM_VEC = (v16i8 (LVSL (i32 ZERO), (RLWINM $Idx, 2, 0, 29)));
2190 dag BE_32B_VFLOAT_PERMUTE = (VPERM $S, $S, BE_32B_VFLOAT_PERM_VEC);
2191 dag BE_32B_VARIABLE_FLOAT = (XSCVSPDPN BE_32B_VFLOAT_PERMUTE);
2193 /* BE variable double
2194 Same as the BE doubleword except there is no move.
2196 dag BE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
2197 (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
2198 BE_VDWORD_PERM_VEC));
2199 dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
2201 // BE variable double 32-bit version
2202 dag BE_32B_VDWORD_PERM_VEC = (v16i8 (LVSL (i32 ZERO),
2203 (RLWINM (ANDI_rec $Idx, 1), 3, 0, 28)));
2204 dag BE_32B_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
2205 (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
2206 BE_32B_VDWORD_PERM_VEC));
2207 dag BE_32B_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_32B_VDOUBLE_PERMUTE, VSRC);
2211 dag F32_TO_BE_WORD1 = (v4f32 (XSCVDPSPN $B));
2212 dag I32_TO_BE_WORD1 = (SUBREG_TO_REG (i64 1), (MTVSRWZ $B), sub_64);
2215 // Integer extend helper dags 32 -> 64
2217 dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32);
2218 dag B = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $B, sub_32);
2219 dag C = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $C, sub_32);
2220 dag D = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $D, sub_32);
2224 dag A0 = (f32 (any_fpround (f64 (extractelt v2f64:$A, 0))));
2225 dag A1 = (f32 (any_fpround (f64 (extractelt v2f64:$A, 1))));
2226 dag B0 = (f32 (any_fpround (f64 (extractelt v2f64:$B, 0))));
2227 dag B1 = (f32 (any_fpround (f64 (extractelt v2f64:$B, 1))));
2231 dag A0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 0))))));
2232 dag A1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 1))))));
2233 dag B0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 0))))));
2234 dag B1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 1))))));
2235 dag A0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 0))))));
2236 dag A1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 1))))));
2237 dag B0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 0))))));
2238 dag B1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 1))))));
2242 dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
2243 dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
2244 dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
2245 dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
2246 dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8));
2247 dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8));
2248 dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8));
2249 dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8));
2253 dag LE_A0 = (i64 (sext_inreg
2254 (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
2255 dag LE_A1 = (i64 (sext_inreg
2256 (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
2257 dag BE_A0 = (i64 (sext_inreg
2258 (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8));
2259 dag BE_A1 = (i64 (sext_inreg
2260 (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8));
2264 dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
2265 dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
2266 dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
2267 dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
2268 dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16));
2269 dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16));
2270 dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16));
2271 dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16));
2275 dag LE_A0 = (i64 (sext_inreg
2276 (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
2277 dag LE_A1 = (i64 (sext_inreg
2278 (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
2279 dag BE_A0 = (i64 (sext_inreg
2280 (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16));
2281 dag BE_A1 = (i64 (sext_inreg
2282 (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16));
2286 dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
2287 dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
2288 dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1))));
2289 dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3))));
2293 dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 ForceXForm:$A)))));
2296 dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 ForceXForm:$A)))));
2299 dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ForceXForm:$A)))));
2301 def FltToLongLoadP9 {
2302 dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 DSForm:$A)))));
2304 def FltToULongLoad {
2305 dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ForceXForm:$A)))));
2307 def FltToULongLoadP9 {
2308 dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 DSForm:$A)))));
2311 dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A)))));
2314 dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz (fpextend f32:$A)))));
2317 dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A))));
2318 dag B = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$B))));
2319 dag C = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$C))));
2320 dag D = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$D))));
2323 dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A))));
2324 dag B = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$B))));
2325 dag C = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$C))));
2326 dag D = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$D))));
2329 dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A))));
2332 dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz f64:$A))));
2335 dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ForceXForm:$A)))));
2337 def DblToIntLoadP9 {
2338 dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load DSForm:$A)))));
2341 dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ForceXForm:$A)))));
2343 def DblToUIntLoadP9 {
2344 dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load DSForm:$A)))));
2347 dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load ForceXForm:$A)))));
2349 def DblToULongLoad {
2350 dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load ForceXForm:$A)))));
2353 // FP load dags (for f32 -> v4f32)
2355 dag A = (f32 (load ForceXForm:$A));
2356 dag B = (f32 (load ForceXForm:$B));
2357 dag C = (f32 (load ForceXForm:$C));
2358 dag D = (f32 (load ForceXForm:$D));
2361 // FP merge dags (for f32 -> v4f32)
2363 dag LD32A = (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$A), sub_64);
2364 dag LD32B = (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$B), sub_64);
2365 dag LD32C = (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$C), sub_64);
2366 dag LD32D = (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$D), sub_64);
2367 dag AC = (XVCVDPSP (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
2368 (SUBREG_TO_REG (i64 1), $C, sub_64), 0));
2369 dag BD = (XVCVDPSP (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64),
2370 (SUBREG_TO_REG (i64 1), $D, sub_64), 0));
2371 dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0));
2372 dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3));
2373 dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0));
2374 dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3));
2377 // Word-element merge dags - conversions from f64 to i32 merged into vectors.
2379 // For big endian, we merge low and hi doublewords (A, B).
2380 dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0));
2381 dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3));
2382 dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1));
2383 dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0));
2384 dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1));
2385 dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0));
2387 // For little endian, we merge low and hi doublewords (B, A).
2388 dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0));
2389 dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3));
2390 dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1));
2391 dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0));
2392 dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1));
2393 dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0));
2395 // For big endian, we merge hi doublewords of (A, C) and (B, D), convert
2397 dag AC = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$A, sub_64),
2398 (SUBREG_TO_REG (i64 1), f64:$C, sub_64), 0));
2399 dag BD = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$B, sub_64),
2400 (SUBREG_TO_REG (i64 1), f64:$D, sub_64), 0));
2401 dag CVACS = (v4i32 (XVCVDPSXWS AC));
2402 dag CVBDS = (v4i32 (XVCVDPSXWS BD));
2403 dag CVACU = (v4i32 (XVCVDPUXWS AC));
2404 dag CVBDU = (v4i32 (XVCVDPUXWS BD));
2406 // For little endian, we merge hi doublewords of (D, B) and (C, A), convert
2408 dag DB = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$D, sub_64),
2409 (SUBREG_TO_REG (i64 1), f64:$B, sub_64), 0));
2410 dag CA = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$C, sub_64),
2411 (SUBREG_TO_REG (i64 1), f64:$A, sub_64), 0));
2412 dag CVDBS = (v4i32 (XVCVDPSXWS DB));
2413 dag CVCAS = (v4i32 (XVCVDPSXWS CA));
2414 dag CVDBU = (v4i32 (XVCVDPUXWS DB));
2415 dag CVCAU = (v4i32 (XVCVDPUXWS CA));
2419 dag SGTW = (v2i64 (v2i64 (VCMPGTSW v2i64:$vA, v2i64:$vB)));
2420 dag UGTW = (v2i64 (v2i64 (VCMPGTUW v2i64:$vA, v2i64:$vB)));
2421 dag EQW = (v2i64 (v2i64 (VCMPEQUW v2i64:$vA, v2i64:$vB)));
2422 dag UGTWSHAND = (v2i64 (XXLAND (v2i64 (XXSLDWI UGTW, UGTW, 1)), EQW));
2423 dag EQWSHAND = (v2i64 (XXLAND (v2i64 (XXSLDWI EQW, EQW, 1)), EQW));
2424 dag SGTWOR = (v2i64 (XXLOR SGTW, UGTWSHAND));
2425 dag UGTWOR = (v2i64 (XXLOR UGTW, UGTWSHAND));
2426 dag MRGSGT = (v2i64 (XXPERMDI (v2i64 (XXSPLTW SGTWOR, 0)),
2427 (v2i64 (XXSPLTW SGTWOR, 2)), 0));
2428 dag MRGUGT = (v2i64 (XXPERMDI (v2i64 (XXSPLTW UGTWOR, 0)),
2429 (v2i64 (XXSPLTW UGTWOR, 2)), 0));
2430 dag MRGEQ = (v2i64 (XXPERMDI (v2i64 (XXSPLTW EQWSHAND, 0)),
2431 (v2i64 (XXSPLTW EQWSHAND, 2)), 0));
2434 //---------------------------- Anonymous Patterns ----------------------------//
2435 // Predicate combinations are kept in roughly chronological order in terms of
2436 // instruction availability in the architecture. For example, VSX came in with
2437 // ISA 2.06 (Power7). There have since been additions in ISA 2.07 (Power8) and
2438 // ISA 3.0 (Power9). However, the granularity of features on later subtargets
2439 // is finer for various reasons. For example, we have Power8Vector,
2440 // Power8Altivec, DirectMove that all came in with ISA 2.07. The situation is
2441 // similar with ISA 3.0 with Power9Vector, Power9Altivec, IsISA3_0. Then there
2442 // are orthogonal predicates such as endianness for which the order was
2443 // arbitrarily chosen to be Big, Little.
2445 // Predicate combinations available:
2446 // [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr.
2447 // [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr.
2449 // [HasVSX, IsBigEndian]
2450 // [HasVSX, IsLittleEndian]
2451 // [HasVSX, NoP9Vector]
2452 // [HasVSX, NoP9Vector, IsLittleEndian]
2453 // [HasVSX, NoP9Vector, IsBigEndian]
2454 // [HasVSX, HasOnlySwappingMemOps]
2455 // [HasVSX, HasOnlySwappingMemOps, IsBigEndian]
2456 // [HasVSX, NoP8Vector]
2457 // [HasVSX, HasP8Vector]
2458 // [HasVSX, HasP8Vector, IsBigEndian]
2459 // [HasVSX, HasP8Vector, IsBigEndian, IsPPC64]
2460 // [HasVSX, HasP8Vector, IsLittleEndian]
2461 // [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64]
2462 // [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian]
2463 // [HasVSX, HasP8Altivec]
2464 // [HasVSX, HasDirectMove]
2465 // [HasVSX, HasDirectMove, IsBigEndian]
2466 // [HasVSX, HasDirectMove, IsLittleEndian]
2467 // [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian, IsPPC64]
2468 // [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64]
2469 // [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian]
2470 // [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian]
2471 // [HasVSX, HasP9Vector]
2472 // [HasVSX, HasP9Vector, NoP10Vector]
2473 // [HasVSX, HasP9Vector, IsBigEndian]
2474 // [HasVSX, HasP9Vector, IsBigEndian, IsPPC64]
2475 // [HasVSX, HasP9Vector, IsLittleEndian]
2476 // [HasVSX, HasP9Altivec]
2477 // [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64]
2478 // [HasVSX, HasP9Altivec, IsLittleEndian]
2479 // [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64]
2480 // [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]
2482 // These Altivec patterns are here because we need a VSX instruction to match
2483 // the intrinsic (but only for little endian system).
2484 let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in
2485 def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
2486 v16i8:$b, v16i8:$c)),
2487 (v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC),
2488 (COPY_TO_REGCLASS $c, VSRC))))>;
2489 let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
2490 def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
2491 v16i8:$b, v16i8:$c)),
2492 (v16i8 (VPERMXOR $a, $b, $c))>;
2493 let Predicates = [HasVSX, HasP8Altivec] in
2494 def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor_be v16i8:$a,
2495 v16i8:$b, v16i8:$c)),
2496 (v16i8 (VPERMXOR $a, $b, $c))>;
2498 let AddedComplexity = 400 in {
2499 // Valid for any VSX subtarget, regardless of endianness.
2500 let Predicates = [HasVSX] in {
2501 def : Pat<(v4i32 (vnot v4i32:$A)),
2502 (v4i32 (XXLNOR $A, $A))>;
2503 def : Pat<(v4i32 (or (and (vnot v4i32:$C), v4i32:$A),
2504 (and v4i32:$B, v4i32:$C))),
2505 (v4i32 (XXSEL $A, $B, $C))>;
2507 def : Pat<(f64 (fpimm0neg)),
2508 (f64 (XSNEGDP (XXLXORdpz)))>;
2510 def : Pat<(f64 (nzFPImmExactInti5:$A)),
2511 (COPY_TO_REGCLASS (XVCVSXWDP (COPY_TO_REGCLASS
2512 (VSPLTISW (getFPAs5BitExactInt fpimm:$A)), VSRC)), VSFRC)>;
2514 // Additional fnmsub pattern for PPC specific ISD opcode
2515 def : Pat<(PPCfnmsub f64:$A, f64:$B, f64:$C),
2516 (XSNMSUBADP $C, $A, $B)>;
2517 def : Pat<(fneg (PPCfnmsub f64:$A, f64:$B, f64:$C)),
2518 (XSMSUBADP $C, $A, $B)>;
2519 def : Pat<(PPCfnmsub f64:$A, f64:$B, (fneg f64:$C)),
2520 (XSNMADDADP $C, $A, $B)>;
2522 def : Pat<(PPCfnmsub v2f64:$A, v2f64:$B, v2f64:$C),
2523 (XVNMSUBADP $C, $A, $B)>;
2524 def : Pat<(fneg (PPCfnmsub v2f64:$A, v2f64:$B, v2f64:$C)),
2525 (XVMSUBADP $C, $A, $B)>;
2526 def : Pat<(PPCfnmsub v2f64:$A, v2f64:$B, (fneg v2f64:$C)),
2527 (XVNMADDADP $C, $A, $B)>;
2529 def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C),
2530 (XVNMSUBASP $C, $A, $B)>;
2531 def : Pat<(fneg (PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C)),
2532 (XVMSUBASP $C, $A, $B)>;
2533 def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, (fneg v4f32:$C)),
2534 (XVNMADDASP $C, $A, $B)>;
2536 def : Pat<(PPCfsqrt f64:$frA), (XSSQRTDP $frA)>;
2537 def : Pat<(PPCfsqrt v2f64:$frA), (XVSQRTDP $frA)>;
2538 def : Pat<(PPCfsqrt v4f32:$frA), (XVSQRTSP $frA)>;
2540 def : Pat<(v2f64 (bitconvert v4f32:$A)),
2541 (COPY_TO_REGCLASS $A, VSRC)>;
2542 def : Pat<(v2f64 (bitconvert v4i32:$A)),
2543 (COPY_TO_REGCLASS $A, VSRC)>;
2544 def : Pat<(v2f64 (bitconvert v8i16:$A)),
2545 (COPY_TO_REGCLASS $A, VSRC)>;
2546 def : Pat<(v2f64 (bitconvert v16i8:$A)),
2547 (COPY_TO_REGCLASS $A, VSRC)>;
2549 def : Pat<(v4f32 (bitconvert v2f64:$A)),
2550 (COPY_TO_REGCLASS $A, VRRC)>;
2551 def : Pat<(v4i32 (bitconvert v2f64:$A)),
2552 (COPY_TO_REGCLASS $A, VRRC)>;
2553 def : Pat<(v8i16 (bitconvert v2f64:$A)),
2554 (COPY_TO_REGCLASS $A, VRRC)>;
2555 def : Pat<(v16i8 (bitconvert v2f64:$A)),
2556 (COPY_TO_REGCLASS $A, VRRC)>;
2558 def : Pat<(v2i64 (bitconvert v4f32:$A)),
2559 (COPY_TO_REGCLASS $A, VSRC)>;
2560 def : Pat<(v2i64 (bitconvert v4i32:$A)),
2561 (COPY_TO_REGCLASS $A, VSRC)>;
2562 def : Pat<(v2i64 (bitconvert v8i16:$A)),
2563 (COPY_TO_REGCLASS $A, VSRC)>;
2564 def : Pat<(v2i64 (bitconvert v16i8:$A)),
2565 (COPY_TO_REGCLASS $A, VSRC)>;
2567 def : Pat<(v4f32 (bitconvert v2i64:$A)),
2568 (COPY_TO_REGCLASS $A, VRRC)>;
2569 def : Pat<(v4i32 (bitconvert v2i64:$A)),
2570 (COPY_TO_REGCLASS $A, VRRC)>;
2571 def : Pat<(v8i16 (bitconvert v2i64:$A)),
2572 (COPY_TO_REGCLASS $A, VRRC)>;
2573 def : Pat<(v16i8 (bitconvert v2i64:$A)),
2574 (COPY_TO_REGCLASS $A, VRRC)>;
2576 def : Pat<(v2f64 (bitconvert v2i64:$A)),
2577 (COPY_TO_REGCLASS $A, VRRC)>;
2578 def : Pat<(v2i64 (bitconvert v2f64:$A)),
2579 (COPY_TO_REGCLASS $A, VRRC)>;
2581 def : Pat<(v2f64 (bitconvert v1i128:$A)),
2582 (COPY_TO_REGCLASS $A, VRRC)>;
2583 def : Pat<(v1i128 (bitconvert v2f64:$A)),
2584 (COPY_TO_REGCLASS $A, VRRC)>;
2586 def : Pat<(v2i64 (bitconvert f128:$A)),
2587 (COPY_TO_REGCLASS $A, VRRC)>;
2588 def : Pat<(v4i32 (bitconvert f128:$A)),
2589 (COPY_TO_REGCLASS $A, VRRC)>;
2590 def : Pat<(v8i16 (bitconvert f128:$A)),
2591 (COPY_TO_REGCLASS $A, VRRC)>;
2592 def : Pat<(v16i8 (bitconvert f128:$A)),
2593 (COPY_TO_REGCLASS $A, VRRC)>;
2595 def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
2596 (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
2597 def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)),
2598 (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>;
2600 def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
2601 (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>;
2602 def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
2603 (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
2605 def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>;
2606 def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>;
2609 def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
2610 def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
2611 def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
2612 def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
2613 def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
2615 // PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
2616 // XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
2617 def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)),
2618 (XXPERMDI $src, $src, 2)>;
2621 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
2622 (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
2623 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),
2624 (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
2625 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
2626 (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
2627 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),
2628 (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
2629 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
2630 (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
2631 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
2632 (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
2633 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),
2634 (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
2635 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
2636 (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
2637 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),
2638 (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
2639 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
2640 (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
2642 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
2643 (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
2644 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
2645 (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
2646 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
2647 (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
2648 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
2649 (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
2650 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
2651 (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
2652 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
2653 (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
2654 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
2655 (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
2656 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
2657 (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
2658 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
2659 (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
2660 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
2661 (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
2664 def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
2666 def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
2669 // Vector test for software divide and sqrt.
2670 def : Pat<(i32 (int_ppc_vsx_xvtdivdp v2f64:$A, v2f64:$B)),
2671 (COPY_TO_REGCLASS (XVTDIVDP $A, $B), GPRC)>;
2672 def : Pat<(i32 (int_ppc_vsx_xvtdivsp v4f32:$A, v4f32:$B)),
2673 (COPY_TO_REGCLASS (XVTDIVSP $A, $B), GPRC)>;
2674 def : Pat<(i32 (int_ppc_vsx_xvtsqrtdp v2f64:$A)),
2675 (COPY_TO_REGCLASS (XVTSQRTDP $A), GPRC)>;
2676 def : Pat<(i32 (int_ppc_vsx_xvtsqrtsp v4f32:$A)),
2677 (COPY_TO_REGCLASS (XVTSQRTSP $A), GPRC)>;
2679 // Reciprocal estimate
2680 def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
2682 def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
2685 // Recip. square root estimate
2686 def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
2688 def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
2692 def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
2694 (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
2695 (COPY_TO_REGCLASS $vB, VSRC),
2696 (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
2697 def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
2699 (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
2700 (COPY_TO_REGCLASS $vB, VSRC),
2701 (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
2702 def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
2703 (XXSEL $vC, $vB, $vA)>;
2704 def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
2705 (XXSEL $vC, $vB, $vA)>;
2706 def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
2707 (XXSEL $vC, $vB, $vA)>;
2708 def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
2709 (XXSEL $vC, $vB, $vA)>;
2710 def : Pat<(v1i128 (vselect v1i128:$vA, v1i128:$vB, v1i128:$vC)),
2712 (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
2713 (COPY_TO_REGCLASS $vB, VSRC),
2714 (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
2716 def : Pat<(v4f32 (any_fmaxnum v4f32:$src1, v4f32:$src2)),
2717 (v4f32 (XVMAXSP $src1, $src2))>;
2718 def : Pat<(v4f32 (any_fminnum v4f32:$src1, v4f32:$src2)),
2719 (v4f32 (XVMINSP $src1, $src2))>;
2720 def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)),
2721 (v2f64 (XVMAXDP $src1, $src2))>;
2722 def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)),
2723 (v2f64 (XVMINDP $src1, $src2))>;
2726 def : Pat<(f32 (fabs f32:$S)),
2727 (f32 (COPY_TO_REGCLASS (XSABSDP
2728 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2731 def : Pat<(f32 (fneg (fabs f32:$S))),
2732 (f32 (COPY_TO_REGCLASS (XSNABSDP
2733 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2736 def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
2737 (f32 FpMinMax.F32Min)>;
2738 def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
2739 (f32 FpMinMax.F32Min)>;
2740 def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
2741 (f32 FpMinMax.F32Min)>;
2742 def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
2743 (f32 FpMinMax.F32Min)>;
2745 def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
2746 (f32 FpMinMax.F32Max)>;
2747 def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
2748 (f32 FpMinMax.F32Max)>;
2749 def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
2750 (f32 FpMinMax.F32Max)>;
2751 def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
2752 (f32 FpMinMax.F32Max)>;
2755 def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
2756 (f64 (XSMINDP $A, $B))>;
2757 def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
2758 (f64 (XSMINDP $A, $B))>;
2759 def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
2760 (f64 (XSMINDP $A, $B))>;
2761 def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
2762 (f64 (XSMINDP $A, $B))>;
2764 def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
2765 (f64 (XSMAXDP $A, $B))>;
2766 def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
2767 (f64 (XSMAXDP $A, $B))>;
2768 def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
2769 (f64 (XSMAXDP $A, $B))>;
2770 def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
2771 (f64 (XSMAXDP $A, $B))>;
2773 def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, ForceXForm:$dst),
2774 (STXVD2X $rS, ForceXForm:$dst)>;
2775 def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, ForceXForm:$dst),
2776 (STXVW4X $rS, ForceXForm:$dst)>;
2777 def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be ForceXForm:$src)), (LXVW4X ForceXForm:$src)>;
2778 def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be ForceXForm:$src)), (LXVD2X ForceXForm:$src)>;
2780 // Rounding for single precision.
2781 def : Pat<(f32 (any_fround f32:$S)),
2782 (f32 (COPY_TO_REGCLASS (XSRDPI
2783 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2784 def : Pat<(f32 (any_ffloor f32:$S)),
2785 (f32 (COPY_TO_REGCLASS (XSRDPIM
2786 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2787 def : Pat<(f32 (any_fceil f32:$S)),
2788 (f32 (COPY_TO_REGCLASS (XSRDPIP
2789 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2790 def : Pat<(f32 (any_ftrunc f32:$S)),
2791 (f32 (COPY_TO_REGCLASS (XSRDPIZ
2792 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2793 def : Pat<(f32 (any_frint f32:$S)),
2794 (f32 (COPY_TO_REGCLASS (XSRDPIC
2795 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2796 def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
2798 // Rounding for double precision.
2799 def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>;
2800 def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
2802 // Rounding without exceptions (nearbyint). Due to strange tblgen behaviour,
2803 // these need to be defined after the any_frint versions so ISEL will correctly
2804 // add the chain to the strict versions.
2805 def : Pat<(f32 (fnearbyint f32:$S)),
2806 (f32 (COPY_TO_REGCLASS (XSRDPIC
2807 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2808 def : Pat<(f64 (fnearbyint f64:$S)),
2809 (f64 (XSRDPIC $S))>;
2810 def : Pat<(v2f64 (fnearbyint v2f64:$S)),
2811 (v2f64 (XVRDPIC $S))>;
2812 def : Pat<(v4f32 (fnearbyint v4f32:$S)),
2813 (v4f32 (XVRSPIC $S))>;
2815 // Materialize a zero-vector of long long
2816 def : Pat<(v2i64 immAllZerosV),
2819 // Build vectors of floating point converted to i32.
2820 def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
2821 DblToInt.A, DblToInt.A)),
2822 (v4i32 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS $A), sub_64), 1))>;
2823 def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
2824 DblToUInt.A, DblToUInt.A)),
2825 (v4i32 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS $A), sub_64), 1))>;
2826 def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
2827 (v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPSXDS $A), sub_64),
2828 (SUBREG_TO_REG (i64 1), (XSCVDPSXDS $A), sub_64), 0))>;
2829 def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
2830 (v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64),
2831 (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), 0))>;
2832 def : Pat<(v4i32 (PPCSToV DblToInt.A)),
2833 (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPSXWS f64:$A), sub_64))>;
2834 def : Pat<(v4i32 (PPCSToV DblToUInt.A)),
2835 (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPUXWS f64:$A), sub_64))>;
2836 defm : ScalToVecWPermute<
2837 v4i32, FltToIntLoad.A,
2838 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1),
2839 (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 ForceXForm:$A)), sub_64)>;
2840 defm : ScalToVecWPermute<
2841 v4i32, FltToUIntLoad.A,
2842 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1),
2843 (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64)>;
2844 def : Pat<(v4f32 (build_vector (f32 (fpround f64:$A)), (f32 (fpround f64:$A)),
2845 (f32 (fpround f64:$A)), (f32 (fpround f64:$A)))),
2846 (v4f32 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$A), sub_64), 0))>;
2848 def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
2849 (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
2852 def : Pat<(v2f64 (PPCldsplat ForceXForm:$A)),
2853 (v2f64 (LXVDSX ForceXForm:$A))>;
2854 def : Pat<(v4f32 (PPCldsplat ForceXForm:$A)),
2855 (v4f32 (XXSPLTW (SUBREG_TO_REG (i64 1), (LFIWZX ForceXForm:$A), sub_64), 1))>;
2856 def : Pat<(v2i64 (PPCldsplat ForceXForm:$A)),
2857 (v2i64 (LXVDSX ForceXForm:$A))>;
2858 def : Pat<(v4i32 (PPCldsplat ForceXForm:$A)),
2859 (v4i32 (XXSPLTW (SUBREG_TO_REG (i64 1), (LFIWZX ForceXForm:$A), sub_64), 1))>;
2860 def : Pat<(v2i64 (PPCzextldsplat ForceXForm:$A)),
2861 (v2i64 (XXPERMDIs (LFIWZX ForceXForm:$A), 0))>;
2862 def : Pat<(v2i64 (PPCsextldsplat ForceXForm:$A)),
2863 (v2i64 (XXPERMDIs (LFIWAX ForceXForm:$A), 0))>;
2865 // Build vectors of floating point converted to i64.
2866 def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
2868 (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
2869 def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
2871 (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
2872 defm : ScalToVecWPermute<
2873 v2i64, DblToLongLoad.A,
2874 (XVCVDPSXDS (LXVDSX ForceXForm:$A)), (XVCVDPSXDS (LXVDSX ForceXForm:$A))>;
2875 defm : ScalToVecWPermute<
2876 v2i64, DblToULongLoad.A,
2877 (XVCVDPUXDS (LXVDSX ForceXForm:$A)), (XVCVDPUXDS (LXVDSX ForceXForm:$A))>;
2879 // Doubleword vector predicate comparisons without Power8.
2880 let AddedComplexity = 0 in {
2881 def : Pat<(v2i64 (PPCvcmp_rec v2i64:$vA, v2i64:$vB, 967)),
2882 (VCMPGTUB_rec DblwdCmp.MRGSGT, (v2i64 (XXLXORz)))>;
2883 def : Pat<(v2i64 (PPCvcmp_rec v2i64:$vA, v2i64:$vB, 711)),
2884 (VCMPGTUB_rec DblwdCmp.MRGUGT, (v2i64 (XXLXORz)))>;
2885 def : Pat<(v2i64 (PPCvcmp_rec v2i64:$vA, v2i64:$vB, 199)),
2886 (VCMPGTUB_rec DblwdCmp.MRGEQ, (v2i64 (XXLXORz)))>;
2887 } // AddedComplexity = 0
2889 // XL Compat builtins.
2890 def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (XSMSUBMDP $A, $B, $C)>;
2891 def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (XSNMADDMDP $A, $B, $C)>;
2892 def : Pat<(int_ppc_fre f64:$A), (XSREDP $A)>;
2893 def : Pat<(int_ppc_frsqrte vsfrc:$XB), (XSRSQRTEDP $XB)>;
2894 def : Pat<(int_ppc_fnabs f64:$A), (XSNABSDP $A)>;
2895 def : Pat<(int_ppc_fnabss f32:$A), (XSNABSDPs $A)>;
2897 // XXMRG[LH]W is a direct replacement for VMRG[LH]W respectively.
2898 // Prefer the VSX form for greater register range.
2899 def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
2900 (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vA, VSRC),
2901 (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
2902 def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
2903 (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vA, VSRC),
2904 (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
2905 def:Pat<(vmrglw_shuffle v16i8:$vA, v16i8:$vB),
2906 (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vA, VSRC),
2907 (COPY_TO_REGCLASS $vB, VSRC)), VRRC)>;
2908 def:Pat<(vmrghw_shuffle v16i8:$vA, v16i8:$vB),
2909 (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vA, VSRC),
2910 (COPY_TO_REGCLASS $vB, VSRC)), VRRC)>;
2911 def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB),
2912 (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vB, VSRC),
2913 (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
2914 def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB),
2915 (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vB, VSRC),
2916 (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
2917 def : Pat<(PPCstore_scal_int_from_vsr f64:$src, XForm:$dst, 8),
2918 (STXSDX $src, XForm:$dst)>;
2919 def : Pat<(PPCstore_scal_int_from_vsr f128:$src, XForm:$dst, 8),
2920 (STXSDX (COPY_TO_REGCLASS $src, VSFRC), XForm:$dst)>;
2923 // Any big endian VSX subtarget.
2924 let Predicates = [HasVSX, IsBigEndian] in {
2925 def : Pat<(v2f64 (scalar_to_vector f64:$A)),
2926 (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
2928 def : Pat<(f64 (extractelt v2f64:$S, 0)),
2929 (f64 (EXTRACT_SUBREG $S, sub_64))>;
2930 def : Pat<(f64 (extractelt v2f64:$S, 1)),
2931 (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
2932 def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
2933 (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
2934 def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
2935 (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
2936 def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
2937 (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
2938 def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
2939 (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
2941 def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
2942 (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
2944 def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
2946 (SUBREG_TO_REG (i64 1), $A, sub_64),
2947 (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>;
2948 // Using VMRGEW to assemble the final vector would be a lower latency
2949 // solution. However, we choose to go with the slightly higher latency
2950 // XXPERMDI for 2 reasons:
2951 // 1. This is likely to occur in unrolled loops where regpressure is high,
2952 // so we want to use the latter as it has access to all 64 VSX registers.
2953 // 2. Using Altivec instructions in this sequence would likely cause the
2954 // allocation of Altivec registers even for the loads which in turn would
2955 // force the use of LXSIWZX for the loads, adding a cycle of latency to
2956 // each of the loads which would otherwise be able to use LFIWZX.
2957 def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
2958 (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B),
2959 (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>;
2960 def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
2961 (VMRGEW MrgFP.AC, MrgFP.BD)>;
2962 def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
2963 DblToFlt.B0, DblToFlt.B1)),
2964 (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
2966 // Convert 4 doubles to a vector of ints.
2967 def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
2968 DblToInt.C, DblToInt.D)),
2969 (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
2970 def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
2971 DblToUInt.C, DblToUInt.D)),
2972 (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
2973 def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
2974 ExtDbl.B0S, ExtDbl.B1S)),
2975 (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
2976 def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
2977 ExtDbl.B0U, ExtDbl.B1U)),
2978 (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
2979 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
2980 (f64 (fpextend (extractelt v4f32:$A, 1))))),
2981 (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
2982 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
2983 (f64 (fpextend (extractelt v4f32:$A, 0))))),
2984 (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
2985 (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
2986 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
2987 (f64 (fpextend (extractelt v4f32:$A, 2))))),
2988 (v2f64 (XVCVSPDP $A))>;
2989 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
2990 (f64 (fpextend (extractelt v4f32:$A, 3))))),
2991 (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>;
2992 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
2993 (f64 (fpextend (extractelt v4f32:$A, 3))))),
2994 (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
2995 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
2996 (f64 (fpextend (extractelt v4f32:$A, 2))))),
2997 (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
2998 (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
2999 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
3000 (f64 (fpextend (extractelt v4f32:$B, 0))))),
3001 (v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>;
3002 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
3003 (f64 (fpextend (extractelt v4f32:$B, 3))))),
3004 (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
3005 (XXPERMDI $A, $B, 3), 1)))>;
3006 def : Pat<(v2i64 (fp_to_sint
3007 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
3008 (f64 (fpextend (extractelt v4f32:$A, 2)))))),
3009 (v2i64 (XVCVSPSXDS $A))>;
3010 def : Pat<(v2i64 (fp_to_uint
3011 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
3012 (f64 (fpextend (extractelt v4f32:$A, 2)))))),
3013 (v2i64 (XVCVSPUXDS $A))>;
3014 def : Pat<(v2i64 (fp_to_sint
3015 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
3016 (f64 (fpextend (extractelt v4f32:$A, 3)))))),
3017 (v2i64 (XVCVSPSXDS (XXSLDWI $A, $A, 1)))>;
3018 def : Pat<(v2i64 (fp_to_uint
3019 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
3020 (f64 (fpextend (extractelt v4f32:$A, 3)))))),
3021 (v2i64 (XVCVSPUXDS (XXSLDWI $A, $A, 1)))>;
3022 def : Pat<WToDPExtractConv.BV02S,
3023 (v2f64 (XVCVSXWDP $A))>;
3024 def : Pat<WToDPExtractConv.BV13S,
3025 (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
3026 def : Pat<WToDPExtractConv.BV02U,
3027 (v2f64 (XVCVUXWDP $A))>;
3028 def : Pat<WToDPExtractConv.BV13U,
3029 (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
3030 def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 0)),
3031 (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), $A, 1))>;
3032 def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)),
3033 (v2f64 (XXPERMDI $A, (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>;
3034 } // HasVSX, IsBigEndian
3036 // Any little endian VSX subtarget.
3037 let Predicates = [HasVSX, IsLittleEndian] in {
3038 defm : ScalToVecWPermute<v2f64, (f64 f64:$A),
3039 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
3040 (SUBREG_TO_REG (i64 1), $A, sub_64), 0),
3041 (SUBREG_TO_REG (i64 1), $A, sub_64)>;
3043 def : Pat<(f64 (extractelt v2f64:$S, 0)),
3044 (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
3045 def : Pat<(f64 (extractelt v2f64:$S, 1)),
3046 (f64 (EXTRACT_SUBREG $S, sub_64))>;
3048 def : Pat<(v2f64 (PPCld_vec_be ForceXForm:$src)), (LXVD2X ForceXForm:$src)>;
3049 def : Pat<(PPCst_vec_be v2f64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>;
3050 def : Pat<(v4f32 (PPCld_vec_be ForceXForm:$src)), (LXVW4X ForceXForm:$src)>;
3051 def : Pat<(PPCst_vec_be v4f32:$rS, ForceXForm:$dst), (STXVW4X $rS, ForceXForm:$dst)>;
3052 def : Pat<(v2i64 (PPCld_vec_be ForceXForm:$src)), (LXVD2X ForceXForm:$src)>;
3053 def : Pat<(PPCst_vec_be v2i64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>;
3054 def : Pat<(v4i32 (PPCld_vec_be ForceXForm:$src)), (LXVW4X ForceXForm:$src)>;
3055 def : Pat<(PPCst_vec_be v4i32:$rS, ForceXForm:$dst), (STXVW4X $rS, ForceXForm:$dst)>;
3056 def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
3057 (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
3058 def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
3059 (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
3060 def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
3061 (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
3062 def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
3063 (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
3065 def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
3066 (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
3068 // Little endian, available on all targets with VSX
3069 def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
3071 (SUBREG_TO_REG (i64 1), $B, sub_64),
3072 (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
3073 // Using VMRGEW to assemble the final vector would be a lower latency
3074 // solution. However, we choose to go with the slightly higher latency
3075 // XXPERMDI for 2 reasons:
3076 // 1. This is likely to occur in unrolled loops where regpressure is high,
3077 // so we want to use the latter as it has access to all 64 VSX registers.
3078 // 2. Using Altivec instructions in this sequence would likely cause the
3079 // allocation of Altivec registers even for the loads which in turn would
3080 // force the use of LXSIWZX for the loads, adding a cycle of latency to
3081 // each of the loads which would otherwise be able to use LFIWZX.
3082 def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
3083 (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C),
3084 (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>;
3085 def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
3086 (VMRGEW MrgFP.AC, MrgFP.BD)>;
3087 def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
3088 DblToFlt.B0, DblToFlt.B1)),
3089 (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
3091 // Convert 4 doubles to a vector of ints.
3092 def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
3093 DblToInt.C, DblToInt.D)),
3094 (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
3095 def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
3096 DblToUInt.C, DblToUInt.D)),
3097 (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
3098 def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
3099 ExtDbl.B0S, ExtDbl.B1S)),
3100 (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
3101 def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
3102 ExtDbl.B0U, ExtDbl.B1U)),
3103 (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
3104 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
3105 (f64 (fpextend (extractelt v4f32:$A, 1))))),
3106 (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
3107 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
3108 (f64 (fpextend (extractelt v4f32:$A, 0))))),
3109 (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
3110 (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
3111 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
3112 (f64 (fpextend (extractelt v4f32:$A, 2))))),
3113 (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>;
3114 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
3115 (f64 (fpextend (extractelt v4f32:$A, 3))))),
3116 (v2f64 (XVCVSPDP $A))>;
3117 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
3118 (f64 (fpextend (extractelt v4f32:$A, 3))))),
3119 (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
3120 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
3121 (f64 (fpextend (extractelt v4f32:$A, 2))))),
3122 (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
3123 (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
3124 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
3125 (f64 (fpextend (extractelt v4f32:$B, 0))))),
3126 (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3),
3127 (XXPERMDI $B, $A, 3), 1)))>;
3128 def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
3129 (f64 (fpextend (extractelt v4f32:$B, 3))))),
3130 (v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
3131 def : Pat<(v2i64 (fp_to_sint
3132 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
3133 (f64 (fpextend (extractelt v4f32:$A, 3)))))),
3134 (v2i64 (XVCVSPSXDS $A))>;
3135 def : Pat<(v2i64 (fp_to_uint
3136 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
3137 (f64 (fpextend (extractelt v4f32:$A, 3)))))),
3138 (v2i64 (XVCVSPUXDS $A))>;
3139 def : Pat<(v2i64 (fp_to_sint
3140 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
3141 (f64 (fpextend (extractelt v4f32:$A, 2)))))),
3142 (v2i64 (XVCVSPSXDS (XXSLDWI $A, $A, 1)))>;
3143 def : Pat<(v2i64 (fp_to_uint
3144 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
3145 (f64 (fpextend (extractelt v4f32:$A, 2)))))),
3146 (v2i64 (XVCVSPUXDS (XXSLDWI $A, $A, 1)))>;
3147 def : Pat<WToDPExtractConv.BV02S,
3148 (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
3149 def : Pat<WToDPExtractConv.BV13S,
3150 (v2f64 (XVCVSXWDP $A))>;
3151 def : Pat<WToDPExtractConv.BV02U,
3152 (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
3153 def : Pat<WToDPExtractConv.BV13U,
3154 (v2f64 (XVCVUXWDP $A))>;
3155 def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 0)),
3156 (v2f64 (XXPERMDI $A, (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>;
3157 def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)),
3158 (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), $A, 1))>;
3159 } // HasVSX, IsLittleEndian
3161 // Any pre-Power9 VSX subtarget.
3162 let Predicates = [HasVSX, NoP9Vector] in {
3163 def : Pat<(PPCstore_scal_int_from_vsr f64:$src, ForceXForm:$dst, 8),
3164 (STXSDX $src, ForceXForm:$dst)>;
3165 def : Pat<(PPCstore_scal_int_from_vsr f128:$src, ForceXForm:$dst, 8),
3166 (STXSDX (COPY_TO_REGCLASS $src, VSFRC), ForceXForm:$dst)>;
3168 // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
3169 defm : ScalToVecWPermute<
3170 v4i32, DblToIntLoad.A,
3171 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (XFLOADf64 ForceXForm:$A)), sub_64), 1),
3172 (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (XFLOADf64 ForceXForm:$A)), sub_64)>;
3173 defm : ScalToVecWPermute<
3174 v4i32, DblToUIntLoad.A,
3175 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (XFLOADf64 ForceXForm:$A)), sub_64), 1),
3176 (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (XFLOADf64 ForceXForm:$A)), sub_64)>;
3177 defm : ScalToVecWPermute<
3178 v2i64, FltToLongLoad.A,
3179 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 ForceXForm:$A), VSFRC)), 0),
3180 (SUBREG_TO_REG (i64 1), (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 ForceXForm:$A),
3182 defm : ScalToVecWPermute<
3183 v2i64, FltToULongLoad.A,
3184 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 ForceXForm:$A), VSFRC)), 0),
3185 (SUBREG_TO_REG (i64 1), (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 ForceXForm:$A),
3187 } // HasVSX, NoP9Vector
3189 // Any little endian pre-Power9 VSX subtarget.
3190 let Predicates = [HasVSX, NoP9Vector, IsLittleEndian] in {
3191 // Load-and-splat using only X-Form VSX loads.
3192 defm : ScalToVecWPermute<
3193 v2i64, (i64 (load ForceXForm:$src)),
3194 (XXPERMDIs (XFLOADf64 ForceXForm:$src), 2),
3195 (SUBREG_TO_REG (i64 1), (XFLOADf64 ForceXForm:$src), sub_64)>;
3196 defm : ScalToVecWPermute<
3197 v2f64, (f64 (load ForceXForm:$src)),
3198 (XXPERMDIs (XFLOADf64 ForceXForm:$src), 2),
3199 (SUBREG_TO_REG (i64 1), (XFLOADf64 ForceXForm:$src), sub_64)>;
3202 def : Pat<(v8i16 (PPCldsplatAlign16 ForceXForm:$A)),
3203 (v8i16 (VSPLTH 7, (LVX ForceXForm:$A)))>;
3204 def : Pat<(v16i8 (PPCldsplatAlign16 ForceXForm:$A)),
3205 (v16i8 (VSPLTB 15, (LVX ForceXForm:$A)))>;
3206 } // HasVSX, NoP9Vector, IsLittleEndian
3208 let Predicates = [HasVSX, NoP9Vector, IsBigEndian] in {
3209 def : Pat<(v2f64 (int_ppc_vsx_lxvd2x ForceXForm:$src)),
3210 (LXVD2X ForceXForm:$src)>;
3211 def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, ForceXForm:$dst),
3212 (STXVD2X $rS, ForceXForm:$dst)>;
3215 def : Pat<(v8i16 (PPCldsplatAlign16 ForceXForm:$A)),
3216 (v8i16 (VSPLTH 0, (LVX ForceXForm:$A)))>;
3217 def : Pat<(v16i8 (PPCldsplatAlign16 ForceXForm:$A)),
3218 (v16i8 (VSPLTB 0, (LVX ForceXForm:$A)))>;
3219 } // HasVSX, NoP9Vector, IsBigEndian
3221 // Any VSX subtarget that only has loads and stores that load in big endian
3222 // order regardless of endianness. This is really pre-Power9 subtargets.
3223 let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
3224 def : Pat<(v2f64 (PPClxvd2x ForceXForm:$src)), (LXVD2X ForceXForm:$src)>;
3227 def : Pat<(PPCstxvd2x v2f64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>;
3228 } // HasVSX, HasOnlySwappingMemOps
3230 // Big endian VSX subtarget that only has loads and stores that always
3231 // load in big endian order. Really big endian pre-Power9 subtargets.
3232 let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in {
3233 def : Pat<(v2f64 (load ForceXForm:$src)), (LXVD2X ForceXForm:$src)>;
3234 def : Pat<(v2i64 (load ForceXForm:$src)), (LXVD2X ForceXForm:$src)>;
3235 def : Pat<(v4i32 (load ForceXForm:$src)), (LXVW4X ForceXForm:$src)>;
3236 def : Pat<(v4i32 (int_ppc_vsx_lxvw4x ForceXForm:$src)), (LXVW4X ForceXForm:$src)>;
3237 def : Pat<(store v2f64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>;
3238 def : Pat<(store v2i64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>;
3239 def : Pat<(store v4i32:$XT, ForceXForm:$dst), (STXVW4X $XT, ForceXForm:$dst)>;
3240 def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, ForceXForm:$dst),
3241 (STXVW4X $rS, ForceXForm:$dst)>;
3242 def : Pat<(v2i64 (scalar_to_vector (i64 (load ForceXForm:$src)))),
3243 (SUBREG_TO_REG (i64 1), (XFLOADf64 ForceXForm:$src), sub_64)>;
3244 } // HasVSX, HasOnlySwappingMemOps, IsBigEndian
3246 // Target before Power8 with VSX.
3247 let Predicates = [HasVSX, NoP8Vector] in {
3248 def : Pat<(f32 (fpimm0neg)),
3249 (f32 (COPY_TO_REGCLASS (XSNEGDP (XXLXORdpz)), F4RC))>;
3251 def : Pat<(f32 (nzFPImmExactInti5:$A)),
3252 (COPY_TO_REGCLASS (XVCVSXWDP (COPY_TO_REGCLASS
3253 (VSPLTISW (getFPAs5BitExactInt fpimm:$A)), VSRC)), F4RC)>;
3255 } // HasVSX, NoP8Vector
3257 // Any Power8 VSX subtarget.
3258 let Predicates = [HasVSX, HasP8Vector] in {
3259 def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
3261 def : Pat<(f64 (extloadf32 XForm:$src)),
3262 (COPY_TO_REGCLASS (XFLOADf32 XForm:$src), VSFRC)>;
3263 def : Pat<(f32 (fpround (f64 (extloadf32 ForceXForm:$src)))),
3264 (f32 (XFLOADf32 ForceXForm:$src))>;
3265 def : Pat<(f64 (any_fpextend f32:$src)),
3266 (COPY_TO_REGCLASS $src, VSFRC)>;
3268 def : Pat<(f32 (fpimm0neg)),
3269 (f32 (COPY_TO_REGCLASS (XSNEGDP (XXLXORdpz)), VSSRC))>;
3271 def : Pat<(f32 (nzFPImmExactInti5:$A)),
3272 (COPY_TO_REGCLASS (XVCVSXWDP (COPY_TO_REGCLASS
3273 (VSPLTISW (getFPAs5BitExactInt fpimm:$A)), VSRC)), VSSRC)>;
3275 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
3276 (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
3277 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
3278 (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
3279 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
3280 (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
3281 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
3282 (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
3283 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
3284 (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
3285 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
3286 (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
3287 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
3288 (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
3289 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
3290 (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
3291 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
3292 (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
3293 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
3294 (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
3296 // Additional fnmsub pattern for PPC specific ISD opcode
3297 def : Pat<(PPCfnmsub f32:$A, f32:$B, f32:$C),
3298 (XSNMSUBASP $C, $A, $B)>;
3299 def : Pat<(fneg (PPCfnmsub f32:$A, f32:$B, f32:$C)),
3300 (XSMSUBASP $C, $A, $B)>;
3301 def : Pat<(PPCfnmsub f32:$A, f32:$B, (fneg f32:$C)),
3302 (XSNMADDASP $C, $A, $B)>;
3305 // Although XSNEGDP is available in P7, we want to select it starting from P8,
3306 // so that FNMSUBS can be selected for fneg-fmsub pattern on P7. (VSX version,
3307 // XSNMSUBASP, is available since P8)
3308 def : Pat<(f32 (fneg f32:$S)),
3309 (f32 (COPY_TO_REGCLASS (XSNEGDP
3310 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
3312 // Instructions for converting float to i32 feeding a store.
3313 def : Pat<(PPCstore_scal_int_from_vsr f64:$src, ForceXForm:$dst, 4),
3314 (STIWX $src, ForceXForm:$dst)>;
3315 def : Pat<(PPCstore_scal_int_from_vsr f128:$src, ForceXForm:$dst, 4),
3316 (STIWX (COPY_TO_REGCLASS $src, VSFRC), ForceXForm:$dst)>;
3318 def : Pat<(PPCstore_scal_int_from_vsr f64:$src, ForceXForm:$dst, 4),
3319 (STXSIWX $src, ForceXForm:$dst)>;
3320 def : Pat<(PPCstore_scal_int_from_vsr f128:$src, ForceXForm:$dst, 4),
3321 (STXSIWX (COPY_TO_REGCLASS $src, VSFRC), ForceXForm:$dst)>;
3323 def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
3324 (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
3325 (COPY_TO_REGCLASS $src2, VRRC)))>;
3326 def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
3327 (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
3328 (COPY_TO_REGCLASS $src2, VRRC)))>;
3329 def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
3330 (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
3331 (COPY_TO_REGCLASS $src2, VRRC)))>;
3332 def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
3333 (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
3334 (COPY_TO_REGCLASS $src2, VRRC)))>;
3336 def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))),
3337 (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
3338 def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))),
3339 (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
3340 def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))),
3341 (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
3342 def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
3343 (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
3345 // XL Compat builtins.
3346 def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (XSMSUBMSP $A, $B, $C)>;
3347 def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (XSNMADDMSP $A, $B, $C)>;
3348 def : Pat<(int_ppc_fres f32:$A), (XSRESP $A)>;
3349 def : Pat<(i32 (int_ppc_extract_exp f64:$A)),
3350 (EXTRACT_SUBREG (XSXEXPDP (COPY_TO_REGCLASS $A, VSFRC)), sub_32)>;
3351 def : Pat<(int_ppc_extract_sig f64:$A),
3352 (XSXSIGDP (COPY_TO_REGCLASS $A, VSFRC))>;
3353 def : Pat<(f64 (int_ppc_insert_exp f64:$A, i64:$B)),
3354 (COPY_TO_REGCLASS (XSIEXPDP (COPY_TO_REGCLASS $A, G8RC), $B), F8RC)>;
3356 def : Pat<(int_ppc_stfiw ForceXForm:$dst, f64:$XT),
3357 (STXSIWX f64:$XT, ForceXForm:$dst)>;
3358 def : Pat<(int_ppc_frsqrtes vssrc:$XB), (XSRSQRTESP $XB)>;
3359 } // HasVSX, HasP8Vector
3361 // Any big endian Power8 VSX subtarget.
3362 let Predicates = [HasVSX, HasP8Vector, IsBigEndian] in {
3363 def : Pat<DWToSPExtractConv.El0SS1,
3364 (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
3365 def : Pat<DWToSPExtractConv.El1SS1,
3366 (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
3367 def : Pat<DWToSPExtractConv.El0US1,
3368 (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
3369 def : Pat<DWToSPExtractConv.El1US1,
3370 (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
3372 // v4f32 scalar <-> vector conversions (BE)
3373 defm : ScalToVecWPermute<v4f32, (f32 f32:$A), (XSCVDPSPN $A), (XSCVDPSPN $A)>;
3374 def : Pat<(f32 (vector_extract v4f32:$S, 0)),
3375 (f32 (XSCVSPDPN $S))>;
3376 def : Pat<(f32 (vector_extract v4f32:$S, 1)),
3377 (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
3378 def : Pat<(f32 (vector_extract v4f32:$S, 2)),
3379 (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
3380 def : Pat<(f32 (vector_extract v4f32:$S, 3)),
3381 (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
3383 def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
3384 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
3385 def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
3386 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
3387 def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
3388 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
3389 def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
3390 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
3391 def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
3392 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
3393 def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
3394 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
3395 def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
3396 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
3397 def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
3398 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
3400 def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
3401 (f32 VectorExtractions.BE_32B_VARIABLE_FLOAT)>;
3403 def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
3404 (f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
3406 defm : ScalToVecWPermute<
3407 v4i32, (i32 (load ForceXForm:$src)),
3408 (XXSLDWIs (LIWZX ForceXForm:$src), 1),
3409 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
3410 defm : ScalToVecWPermute<
3411 v4f32, (f32 (load ForceXForm:$src)),
3412 (XXSLDWIs (LIWZX ForceXForm:$src), 1),
3413 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
3414 } // HasVSX, HasP8Vector, IsBigEndian
3416 // Big endian Power8 64Bit VSX subtarget.
3417 let Predicates = [HasVSX, HasP8Vector, IsBigEndian, IsPPC64] in {
3418 def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
3419 (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
3421 // LIWAX - This instruction is used for sign extending i32 -> i64.
3422 // LIWZX - This instruction will be emitted for i32, f32, and when
3423 // zero-extending i32 to i64 (zext i32 -> i64).
3424 def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 ForceXForm:$src)))),
3425 (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64))>;
3426 def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 ForceXForm:$src)))),
3427 (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64))>;
3429 def : Pat<DWToSPExtractConv.BVU,
3430 (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
3431 (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
3432 def : Pat<DWToSPExtractConv.BVS,
3433 (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
3434 (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
3435 def : Pat<(store (i32 (extractelt v4i32:$A, 1)), ForceXForm:$src),
3436 (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
3437 def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
3438 (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
3440 // Elements in a register on a BE system are in order <0, 1, 2, 3>.
3441 // The store instructions store the second word from the left.
3442 // So to align element zero, we need to modulo-left-shift by 3 words.
3443 // Similar logic applies for elements 2 and 3.
3444 foreach Idx = [ [0,3], [2,1], [3,2] ] in {
3445 def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), ForceXForm:$src),
3446 (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
3447 sub_64), ForceXForm:$src)>;
3448 def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), ForceXForm:$src),
3449 (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
3450 sub_64), ForceXForm:$src)>;
3452 } // HasVSX, HasP8Vector, IsBigEndian, IsPPC64
3454 // Little endian Power8 VSX subtarget.
3455 let Predicates = [HasVSX, HasP8Vector, IsLittleEndian] in {
3456 def : Pat<DWToSPExtractConv.El0SS1,
3457 (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
3458 def : Pat<DWToSPExtractConv.El1SS1,
3459 (f32 (XSCVSXDSP (COPY_TO_REGCLASS
3460 (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
3461 def : Pat<DWToSPExtractConv.El0US1,
3462 (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
3463 def : Pat<DWToSPExtractConv.El1US1,
3464 (f32 (XSCVUXDSP (COPY_TO_REGCLASS
3465 (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
3467 // v4f32 scalar <-> vector conversions (LE)
3468 defm : ScalToVecWPermute<v4f32, (f32 f32:$A),
3469 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1),
3471 def : Pat<(f32 (vector_extract v4f32:$S, 0)),
3472 (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
3473 def : Pat<(f32 (vector_extract v4f32:$S, 1)),
3474 (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
3475 def : Pat<(f32 (vector_extract v4f32:$S, 2)),
3476 (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
3477 def : Pat<(f32 (vector_extract v4f32:$S, 3)),
3478 (f32 (XSCVSPDPN $S))>;
3479 def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
3480 (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
3482 def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
3483 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
3484 def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
3485 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
3486 def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
3487 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
3488 def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
3489 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
3490 def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
3491 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
3492 def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
3493 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
3494 def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
3495 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
3496 def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
3497 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
3499 // LIWAX - This instruction is used for sign extending i32 -> i64.
3500 // LIWZX - This instruction will be emitted for i32, f32, and when
3501 // zero-extending i32 to i64 (zext i32 -> i64).
3502 defm : ScalToVecWPermute<
3503 v2i64, (i64 (sextloadi32 ForceXForm:$src)),
3504 (XXPERMDIs (LIWAX ForceXForm:$src), 2),
3505 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64)>;
3507 defm : ScalToVecWPermute<
3508 v2i64, (i64 (zextloadi32 ForceXForm:$src)),
3509 (XXPERMDIs (LIWZX ForceXForm:$src), 2),
3510 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
3512 defm : ScalToVecWPermute<
3513 v4i32, (i32 (load ForceXForm:$src)),
3514 (XXPERMDIs (LIWZX ForceXForm:$src), 2),
3515 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
3517 defm : ScalToVecWPermute<
3518 v4f32, (f32 (load ForceXForm:$src)),
3519 (XXPERMDIs (LIWZX ForceXForm:$src), 2),
3520 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
3522 def : Pat<DWToSPExtractConv.BVU,
3523 (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
3524 (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
3525 def : Pat<DWToSPExtractConv.BVS,
3526 (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
3527 (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
3528 def : Pat<(store (i32 (extractelt v4i32:$A, 2)), ForceXForm:$src),
3529 (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
3530 def : Pat<(store (f32 (extractelt v4f32:$A, 2)), ForceXForm:$src),
3531 (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
3533 // Elements in a register on a LE system are in order <3, 2, 1, 0>.
3534 // The store instructions store the second word from the left.
3535 // So to align element 3, we need to modulo-left-shift by 3 words.
3536 // Similar logic applies for elements 0 and 1.
3537 foreach Idx = [ [0,2], [1,1], [3,3] ] in {
3538 def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), ForceXForm:$src),
3539 (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
3540 sub_64), ForceXForm:$src)>;
3541 def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), ForceXForm:$src),
3542 (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
3543 sub_64), ForceXForm:$src)>;
3545 } // HasVSX, HasP8Vector, IsLittleEndian
3547 // Big endian pre-Power9 VSX subtarget.
3548 let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] in {
3549 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ForceXForm:$src),
3550 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
3551 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ForceXForm:$src),
3552 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
3553 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ForceXForm:$src),
3554 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
3556 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ForceXForm:$src),
3557 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
3559 } // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64
3561 // Little endian pre-Power9 VSX subtarget.
3562 let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in {
3563 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ForceXForm:$src),
3564 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
3566 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ForceXForm:$src),
3567 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
3569 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ForceXForm:$src),
3570 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
3571 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ForceXForm:$src),
3572 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
3573 } // HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian
3575 // Any VSX target with direct moves.
3576 let Predicates = [HasVSX, HasDirectMove] in {
3577 // bitconvert f32 -> i32
3578 // (convert to 32-bit fp single, shift right 1 word, move to GPR)
3579 def : Pat<(i32 (bitconvert f32:$A)), Bitcast.FltToInt>;
3581 // bitconvert i32 -> f32
3582 // (move to FPR, shift left 1 word, convert to 64-bit fp single)
3583 def : Pat<(f32 (bitconvert i32:$A)),
3585 (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
3587 // bitconvert f64 -> i64
3588 // (move to GPR, nothing else needed)
3589 def : Pat<(i64 (bitconvert f64:$A)), Bitcast.DblToLong>;
3591 // bitconvert i64 -> f64
3592 // (move to FPR, nothing else needed)
3593 def : Pat<(f64 (bitconvert i64:$S)),
3596 // Rounding to integer.
3597 def : Pat<(i64 (lrint f64:$S)),
3598 (i64 (MFVSRD (FCTID $S)))>;
3599 def : Pat<(i64 (lrint f32:$S)),
3600 (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
3601 def : Pat<(i64 (llrint f64:$S)),
3602 (i64 (MFVSRD (FCTID $S)))>;
3603 def : Pat<(i64 (llrint f32:$S)),
3604 (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
3605 def : Pat<(i64 (lround f64:$S)),
3606 (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
3607 def : Pat<(i64 (lround f32:$S)),
3608 (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
3609 def : Pat<(i64 (llround f64:$S)),
3610 (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
3611 def : Pat<(i64 (llround f32:$S)),
3612 (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
3614 // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
3616 def : Pat<(v8i16 (PPCmtvsrz i32:$A)),
3617 (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
3618 def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
3619 (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
3621 // Endianness-neutral constant splat on P8 and newer targets. The reason
3622 // for this pattern is that on targets with direct moves, we don't expand
3623 // BUILD_VECTOR nodes for v4i32.
3624 def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
3625 immSExt5NonZero:$A, immSExt5NonZero:$A)),
3626 (v4i32 (VSPLTISW imm:$A))>;
3629 def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)),
3630 (v8i16 (VSPLTHs 3, (MTVSRWZ (LHZX ForceXForm:$A))))>;
3631 def : Pat<(v16i8 (PPCldsplat ForceXForm:$A)),
3632 (v16i8 (VSPLTBs 7, (MTVSRWZ (LBZX ForceXForm:$A))))>;
3633 } // HasVSX, HasDirectMove
3635 // Big endian VSX subtarget with direct moves.
3636 let Predicates = [HasVSX, HasDirectMove, IsBigEndian] in {
3637 // v16i8 scalar <-> vector conversions (BE)
3638 defm : ScalToVecWPermute<
3639 v16i8, (i32 i32:$A),
3640 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64),
3641 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
3642 defm : ScalToVecWPermute<
3643 v8i16, (i32 i32:$A),
3644 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64),
3645 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
3646 defm : ScalToVecWPermute<
3647 v4i32, (i32 i32:$A),
3648 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64),
3649 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
3650 def : Pat<(v2i64 (scalar_to_vector i64:$A)),
3651 (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
3653 // v2i64 scalar <-> vector conversions (BE)
3654 def : Pat<(i64 (vector_extract v2i64:$S, 0)),
3655 (i64 VectorExtractions.LE_DWORD_1)>;
3656 def : Pat<(i64 (vector_extract v2i64:$S, 1)),
3657 (i64 VectorExtractions.LE_DWORD_0)>;
3658 def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
3659 (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
3660 } // HasVSX, HasDirectMove, IsBigEndian
3662 // Little endian VSX subtarget with direct moves.
3663 let Predicates = [HasVSX, HasDirectMove, IsLittleEndian] in {
3664 // v16i8 scalar <-> vector conversions (LE)
3665 defm : ScalToVecWPermute<v16i8, (i32 i32:$A),
3666 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC),
3667 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_1, VSRC)>;
3668 defm : ScalToVecWPermute<v8i16, (i32 i32:$A),
3669 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC),
3670 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_1, VSRC)>;
3671 defm : ScalToVecWPermute<v4i32, (i32 i32:$A), MovesToVSR.LE_WORD_0,
3672 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
3673 defm : ScalToVecWPermute<v2i64, (i64 i64:$A), MovesToVSR.LE_DWORD_0,
3674 MovesToVSR.LE_DWORD_1>;
3676 // v2i64 scalar <-> vector conversions (LE)
3677 def : Pat<(i64 (vector_extract v2i64:$S, 0)),
3678 (i64 VectorExtractions.LE_DWORD_0)>;
3679 def : Pat<(i64 (vector_extract v2i64:$S, 1)),
3680 (i64 VectorExtractions.LE_DWORD_1)>;
3681 def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
3682 (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
3683 } // HasVSX, HasDirectMove, IsLittleEndian
3685 // Big endian pre-P9 VSX subtarget with direct moves.
3686 let Predicates = [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian] in {
3687 def : Pat<(i32 (vector_extract v16i8:$S, 0)),
3688 (i32 VectorExtractions.LE_BYTE_15)>;
3689 def : Pat<(i32 (vector_extract v16i8:$S, 1)),
3690 (i32 VectorExtractions.LE_BYTE_14)>;
3691 def : Pat<(i32 (vector_extract v16i8:$S, 2)),
3692 (i32 VectorExtractions.LE_BYTE_13)>;
3693 def : Pat<(i32 (vector_extract v16i8:$S, 3)),
3694 (i32 VectorExtractions.LE_BYTE_12)>;
3695 def : Pat<(i32 (vector_extract v16i8:$S, 4)),
3696 (i32 VectorExtractions.LE_BYTE_11)>;
3697 def : Pat<(i32 (vector_extract v16i8:$S, 5)),
3698 (i32 VectorExtractions.LE_BYTE_10)>;
3699 def : Pat<(i32 (vector_extract v16i8:$S, 6)),
3700 (i32 VectorExtractions.LE_BYTE_9)>;
3701 def : Pat<(i32 (vector_extract v16i8:$S, 7)),
3702 (i32 VectorExtractions.LE_BYTE_8)>;
3703 def : Pat<(i32 (vector_extract v16i8:$S, 8)),
3704 (i32 VectorExtractions.LE_BYTE_7)>;
3705 def : Pat<(i32 (vector_extract v16i8:$S, 9)),
3706 (i32 VectorExtractions.LE_BYTE_6)>;
3707 def : Pat<(i32 (vector_extract v16i8:$S, 10)),
3708 (i32 VectorExtractions.LE_BYTE_5)>;
3709 def : Pat<(i32 (vector_extract v16i8:$S, 11)),
3710 (i32 VectorExtractions.LE_BYTE_4)>;
3711 def : Pat<(i32 (vector_extract v16i8:$S, 12)),
3712 (i32 VectorExtractions.LE_BYTE_3)>;
3713 def : Pat<(i32 (vector_extract v16i8:$S, 13)),
3714 (i32 VectorExtractions.LE_BYTE_2)>;
3715 def : Pat<(i32 (vector_extract v16i8:$S, 14)),
3716 (i32 VectorExtractions.LE_BYTE_1)>;
3717 def : Pat<(i32 (vector_extract v16i8:$S, 15)),
3718 (i32 VectorExtractions.LE_BYTE_0)>;
3719 def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
3720 (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
3722 // v8i16 scalar <-> vector conversions (BE)
3723 def : Pat<(i32 (vector_extract v8i16:$S, 0)),
3724 (i32 VectorExtractions.LE_HALF_7)>;
3725 def : Pat<(i32 (vector_extract v8i16:$S, 1)),
3726 (i32 VectorExtractions.LE_HALF_6)>;
3727 def : Pat<(i32 (vector_extract v8i16:$S, 2)),
3728 (i32 VectorExtractions.LE_HALF_5)>;
3729 def : Pat<(i32 (vector_extract v8i16:$S, 3)),
3730 (i32 VectorExtractions.LE_HALF_4)>;
3731 def : Pat<(i32 (vector_extract v8i16:$S, 4)),
3732 (i32 VectorExtractions.LE_HALF_3)>;
3733 def : Pat<(i32 (vector_extract v8i16:$S, 5)),
3734 (i32 VectorExtractions.LE_HALF_2)>;
3735 def : Pat<(i32 (vector_extract v8i16:$S, 6)),
3736 (i32 VectorExtractions.LE_HALF_1)>;
3737 def : Pat<(i32 (vector_extract v8i16:$S, 7)),
3738 (i32 VectorExtractions.LE_HALF_0)>;
3739 def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
3740 (i32 VectorExtractions.BE_VARIABLE_HALF)>;
3742 // v4i32 scalar <-> vector conversions (BE)
3743 def : Pat<(i32 (vector_extract v4i32:$S, 0)),
3744 (i32 VectorExtractions.LE_WORD_3)>;
3745 def : Pat<(i32 (vector_extract v4i32:$S, 1)),
3746 (i32 VectorExtractions.LE_WORD_2)>;
3747 def : Pat<(i32 (vector_extract v4i32:$S, 2)),
3748 (i32 VectorExtractions.LE_WORD_1)>;
3749 def : Pat<(i32 (vector_extract v4i32:$S, 3)),
3750 (i32 VectorExtractions.LE_WORD_0)>;
3751 def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
3752 (i32 VectorExtractions.BE_VARIABLE_WORD)>;
3753 } // HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian
3755 // Little endian pre-P9 VSX subtarget with direct moves.
3756 let Predicates = [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian] in {
3757 def : Pat<(i32 (vector_extract v16i8:$S, 0)),
3758 (i32 VectorExtractions.LE_BYTE_0)>;
3759 def : Pat<(i32 (vector_extract v16i8:$S, 1)),
3760 (i32 VectorExtractions.LE_BYTE_1)>;
3761 def : Pat<(i32 (vector_extract v16i8:$S, 2)),
3762 (i32 VectorExtractions.LE_BYTE_2)>;
3763 def : Pat<(i32 (vector_extract v16i8:$S, 3)),
3764 (i32 VectorExtractions.LE_BYTE_3)>;
3765 def : Pat<(i32 (vector_extract v16i8:$S, 4)),
3766 (i32 VectorExtractions.LE_BYTE_4)>;
3767 def : Pat<(i32 (vector_extract v16i8:$S, 5)),
3768 (i32 VectorExtractions.LE_BYTE_5)>;
3769 def : Pat<(i32 (vector_extract v16i8:$S, 6)),
3770 (i32 VectorExtractions.LE_BYTE_6)>;
3771 def : Pat<(i32 (vector_extract v16i8:$S, 7)),
3772 (i32 VectorExtractions.LE_BYTE_7)>;
3773 def : Pat<(i32 (vector_extract v16i8:$S, 8)),
3774 (i32 VectorExtractions.LE_BYTE_8)>;
3775 def : Pat<(i32 (vector_extract v16i8:$S, 9)),
3776 (i32 VectorExtractions.LE_BYTE_9)>;
3777 def : Pat<(i32 (vector_extract v16i8:$S, 10)),
3778 (i32 VectorExtractions.LE_BYTE_10)>;
3779 def : Pat<(i32 (vector_extract v16i8:$S, 11)),
3780 (i32 VectorExtractions.LE_BYTE_11)>;
3781 def : Pat<(i32 (vector_extract v16i8:$S, 12)),
3782 (i32 VectorExtractions.LE_BYTE_12)>;
3783 def : Pat<(i32 (vector_extract v16i8:$S, 13)),
3784 (i32 VectorExtractions.LE_BYTE_13)>;
3785 def : Pat<(i32 (vector_extract v16i8:$S, 14)),
3786 (i32 VectorExtractions.LE_BYTE_14)>;
3787 def : Pat<(i32 (vector_extract v16i8:$S, 15)),
3788 (i32 VectorExtractions.LE_BYTE_15)>;
3789 def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
3790 (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
3792 // v8i16 scalar <-> vector conversions (LE)
3793 def : Pat<(i32 (vector_extract v8i16:$S, 0)),
3794 (i32 VectorExtractions.LE_HALF_0)>;
3795 def : Pat<(i32 (vector_extract v8i16:$S, 1)),
3796 (i32 VectorExtractions.LE_HALF_1)>;
3797 def : Pat<(i32 (vector_extract v8i16:$S, 2)),
3798 (i32 VectorExtractions.LE_HALF_2)>;
3799 def : Pat<(i32 (vector_extract v8i16:$S, 3)),
3800 (i32 VectorExtractions.LE_HALF_3)>;
3801 def : Pat<(i32 (vector_extract v8i16:$S, 4)),
3802 (i32 VectorExtractions.LE_HALF_4)>;
3803 def : Pat<(i32 (vector_extract v8i16:$S, 5)),
3804 (i32 VectorExtractions.LE_HALF_5)>;
3805 def : Pat<(i32 (vector_extract v8i16:$S, 6)),
3806 (i32 VectorExtractions.LE_HALF_6)>;
3807 def : Pat<(i32 (vector_extract v8i16:$S, 7)),
3808 (i32 VectorExtractions.LE_HALF_7)>;
3809 def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
3810 (i32 VectorExtractions.LE_VARIABLE_HALF)>;
3812 // v4i32 scalar <-> vector conversions (LE)
3813 def : Pat<(i32 (vector_extract v4i32:$S, 0)),
3814 (i32 VectorExtractions.LE_WORD_0)>;
3815 def : Pat<(i32 (vector_extract v4i32:$S, 1)),
3816 (i32 VectorExtractions.LE_WORD_1)>;
3817 def : Pat<(i32 (vector_extract v4i32:$S, 2)),
3818 (i32 VectorExtractions.LE_WORD_2)>;
3819 def : Pat<(i32 (vector_extract v4i32:$S, 3)),
3820 (i32 VectorExtractions.LE_WORD_3)>;
3821 def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
3822 (i32 VectorExtractions.LE_VARIABLE_WORD)>;
3823 } // HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian
3825 // Big endian pre-Power9 64Bit VSX subtarget that has direct moves.
3826 let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64] in {
3827 // Big endian integer vectors using direct moves.
3828 def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
3830 (SUBREG_TO_REG (i64 1), (MTVSRD $A), sub_64),
3831 (SUBREG_TO_REG (i64 1), (MTVSRD $B), sub_64), 0))>;
3832 def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
3834 (SUBREG_TO_REG (i64 1),
3835 (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), sub_64),
3836 (SUBREG_TO_REG (i64 1),
3837 (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), sub_64), 0)>;
3838 def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
3839 (XXSPLTW (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64), 1)>;
3840 } // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64
3842 // Little endian pre-Power9 VSX subtarget that has direct moves.
3843 let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in {
3844 // Little endian integer vectors using direct moves.
3845 def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
3847 (SUBREG_TO_REG (i64 1), (MTVSRD $B), sub_64),
3848 (SUBREG_TO_REG (i64 1), (MTVSRD $A), sub_64), 0))>;
3849 def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
3851 (SUBREG_TO_REG (i64 1),
3852 (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), sub_64),
3853 (SUBREG_TO_REG (i64 1),
3854 (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), sub_64), 0)>;
3855 def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
3856 (XXSPLTW (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64), 1)>;
3859 // Any Power9 VSX subtarget.
3860 let Predicates = [HasVSX, HasP9Vector] in {
3861 // Additional fnmsub pattern for PPC specific ISD opcode
3862 def : Pat<(PPCfnmsub f128:$A, f128:$B, f128:$C),
3863 (XSNMSUBQP $C, $A, $B)>;
3864 def : Pat<(fneg (PPCfnmsub f128:$A, f128:$B, f128:$C)),
3865 (XSMSUBQP $C, $A, $B)>;
3866 def : Pat<(PPCfnmsub f128:$A, f128:$B, (fneg f128:$C)),
3867 (XSNMADDQP $C, $A, $B)>;
3869 def : Pat<(f128 (any_sint_to_fp i64:$src)),
3870 (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
3871 def : Pat<(f128 (any_sint_to_fp (i64 (PPCmfvsr f64:$src)))),
3872 (f128 (XSCVSDQP $src))>;
3873 def : Pat<(f128 (any_sint_to_fp (i32 (PPCmfvsr f64:$src)))),
3874 (f128 (XSCVSDQP (VEXTSW2Ds $src)))>;
3875 def : Pat<(f128 (any_uint_to_fp i64:$src)),
3876 (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
3877 def : Pat<(f128 (any_uint_to_fp (i64 (PPCmfvsr f64:$src)))),
3878 (f128 (XSCVUDQP $src))>;
3880 // Convert (Un)Signed Word -> QP.
3881 def : Pat<(f128 (any_sint_to_fp i32:$src)),
3882 (f128 (XSCVSDQP (MTVSRWA $src)))>;
3883 def : Pat<(f128 (any_sint_to_fp (i32 (load ForceXForm:$src)))),
3884 (f128 (XSCVSDQP (LIWAX ForceXForm:$src)))>;
3885 def : Pat<(f128 (any_uint_to_fp i32:$src)),
3886 (f128 (XSCVUDQP (MTVSRWZ $src)))>;
3887 def : Pat<(f128 (any_uint_to_fp (i32 (load ForceXForm:$src)))),
3888 (f128 (XSCVUDQP (LIWZX ForceXForm:$src)))>;
3890 // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
3891 // separate pattern so that it can convert the input register class from
3892 // VRRC(v8i16) to VSRC.
3893 def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
3894 (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
3896 // Use current rounding mode
3897 def : Pat<(f128 (any_fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>;
3898 // Round to nearest, ties away from zero
3899 def : Pat<(f128 (any_fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>;
3900 // Round towards Zero
3901 def : Pat<(f128 (any_ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>;
3902 // Round towards +Inf
3903 def : Pat<(f128 (any_fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>;
3904 // Round towards -Inf
3905 def : Pat<(f128 (any_ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>;
3906 // Use current rounding mode, [with Inexact]
3907 def : Pat<(f128 (any_frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>;
3909 def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
3910 (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
3912 def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)),
3913 (i64 (MFVSRD (EXTRACT_SUBREG
3914 (v2i64 (XSXEXPQP $vA)), sub_64)))>;
3916 // Extra patterns expanding to vector Extract Word/Insert Word
3917 def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)),
3918 (v4i32 (XXINSERTW $A, $B, imm:$IMM))>;
3919 def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)),
3920 (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>;
3923 def : Pat<(v8i16 (bswap v8i16 :$A)),
3924 (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
3925 def : Pat<(v1i128 (bswap v1i128 :$A)),
3926 (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
3928 // D-Form Load/Store
3929 foreach Ty = [v4i32, v4f32, v2i64, v2f64] in {
3930 def : Pat<(Ty (load DQForm:$src)), (LXV memrix16:$src)>;
3931 def : Pat<(Ty (load XForm:$src)), (LXVX XForm:$src)>;
3932 def : Pat<(store Ty:$rS, DQForm:$dst), (STXV $rS, memrix16:$dst)>;
3933 def : Pat<(store Ty:$rS, XForm:$dst), (STXVX $rS, XForm:$dst)>;
3936 def : Pat<(f128 (load DQForm:$src)),
3937 (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
3938 def : Pat<(f128 (load XForm:$src)),
3939 (COPY_TO_REGCLASS (LXVX XForm:$src), VRRC)>;
3940 def : Pat<(v4i32 (int_ppc_vsx_lxvw4x DQForm:$src)), (LXV memrix16:$src)>;
3941 def : Pat<(v2f64 (int_ppc_vsx_lxvd2x DQForm:$src)), (LXV memrix16:$src)>;
3942 def : Pat<(v4i32 (int_ppc_vsx_lxvw4x XForm:$src)), (LXVX XForm:$src)>;
3943 def : Pat<(v2f64 (int_ppc_vsx_lxvd2x XForm:$src)), (LXVX XForm:$src)>;
3945 def : Pat<(store f128:$rS, DQForm:$dst),
3946 (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
3947 def : Pat<(store f128:$rS, XForm:$dst),
3948 (STXVX (COPY_TO_REGCLASS $rS, VSRC), XForm:$dst)>;
3949 def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, DQForm:$dst),
3950 (STXV $rS, memrix16:$dst)>;
3951 def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, DQForm:$dst),
3952 (STXV $rS, memrix16:$dst)>;
3953 def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, XForm:$dst),
3954 (STXVX $rS, XForm:$dst)>;
3955 def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, XForm:$dst),
3956 (STXVX $rS, XForm:$dst)>;
3958 // Build vectors from i8 loads
3959 defm : ScalToVecWPermute<v8i16, ScalarLoads.ZELi8,
3960 (VSPLTHs 3, (LXSIBZX ForceXForm:$src)),
3961 (SUBREG_TO_REG (i64 1), (LXSIBZX ForceXForm:$src), sub_64)>;
3962 defm : ScalToVecWPermute<v4i32, ScalarLoads.ZELi8,
3963 (XXSPLTWs (LXSIBZX ForceXForm:$src), 1),
3964 (SUBREG_TO_REG (i64 1), (LXSIBZX ForceXForm:$src), sub_64)>;
3965 defm : ScalToVecWPermute<v2i64, ScalarLoads.ZELi8i64,
3966 (XXPERMDIs (LXSIBZX ForceXForm:$src), 0),
3967 (SUBREG_TO_REG (i64 1), (LXSIBZX ForceXForm:$src), sub_64)>;
3968 defm : ScalToVecWPermute<
3969 v4i32, ScalarLoads.SELi8,
3970 (XXSPLTWs (VEXTSB2Ws (LXSIBZX ForceXForm:$src)), 1),
3971 (SUBREG_TO_REG (i64 1), (VEXTSB2Ws (LXSIBZX ForceXForm:$src)), sub_64)>;
3972 defm : ScalToVecWPermute<
3973 v2i64, ScalarLoads.SELi8i64,
3974 (XXPERMDIs (VEXTSB2Ds (LXSIBZX ForceXForm:$src)), 0),
3975 (SUBREG_TO_REG (i64 1), (VEXTSB2Ds (LXSIBZX ForceXForm:$src)), sub_64)>;
3977 // Build vectors from i16 loads
3978 defm : ScalToVecWPermute<
3979 v4i32, ScalarLoads.ZELi16,
3980 (XXSPLTWs (LXSIHZX ForceXForm:$src), 1),
3981 (SUBREG_TO_REG (i64 1), (LXSIHZX ForceXForm:$src), sub_64)>;
3982 defm : ScalToVecWPermute<
3983 v2i64, ScalarLoads.ZELi16i64,
3984 (XXPERMDIs (LXSIHZX ForceXForm:$src), 0),
3985 (SUBREG_TO_REG (i64 1), (LXSIHZX ForceXForm:$src), sub_64)>;
3986 defm : ScalToVecWPermute<
3987 v4i32, ScalarLoads.SELi16,
3988 (XXSPLTWs (VEXTSH2Ws (LXSIHZX ForceXForm:$src)), 1),
3989 (SUBREG_TO_REG (i64 1), (VEXTSH2Ws (LXSIHZX ForceXForm:$src)), sub_64)>;
3990 defm : ScalToVecWPermute<
3991 v2i64, ScalarLoads.SELi16i64,
3992 (XXPERMDIs (VEXTSH2Ds (LXSIHZX ForceXForm:$src)), 0),
3993 (SUBREG_TO_REG (i64 1), (VEXTSH2Ds (LXSIHZX ForceXForm:$src)), sub_64)>;
3995 // Load/convert and convert/store patterns for f16.
3996 def : Pat<(f64 (extloadf16 ForceXForm:$src)),
3997 (f64 (XSCVHPDP (LXSIHZX ForceXForm:$src)))>;
3998 def : Pat<(truncstoref16 f64:$src, ForceXForm:$dst),
3999 (STXSIHX (XSCVDPHP $src), ForceXForm:$dst)>;
4000 def : Pat<(f32 (extloadf16 ForceXForm:$src)),
4001 (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX ForceXForm:$src)), VSSRC))>;
4002 def : Pat<(truncstoref16 f32:$src, ForceXForm:$dst),
4003 (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), ForceXForm:$dst)>;
4004 def : Pat<(f64 (f16_to_fp i32:$A)),
4005 (f64 (XSCVHPDP (MTVSRWZ $A)))>;
4006 def : Pat<(f32 (f16_to_fp i32:$A)),
4007 (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
4008 def : Pat<(i32 (fp_to_f16 f32:$A)),
4009 (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
4010 def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
4012 // Vector sign extensions
4013 def : Pat<(f64 (PPCVexts f64:$A, 1)),
4014 (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
4015 def : Pat<(f64 (PPCVexts f64:$A, 2)),
4016 (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
4018 def : Pat<(f64 (extloadf32 DSForm:$src)),
4019 (COPY_TO_REGCLASS (DFLOADf32 DSForm:$src), VSFRC)>;
4020 def : Pat<(f32 (fpround (f64 (extloadf32 DSForm:$src)))),
4021 (f32 (DFLOADf32 DSForm:$src))>;
4023 def : Pat<(v4f32 (PPCldvsxlh XForm:$src)),
4024 (SUBREG_TO_REG (i64 1), (XFLOADf64 XForm:$src), sub_64)>;
4025 def : Pat<(v4f32 (PPCldvsxlh DSForm:$src)),
4026 (SUBREG_TO_REG (i64 1), (DFLOADf64 DSForm:$src), sub_64)>;
4028 // Convert (Un)Signed DWord in memory -> QP
4029 def : Pat<(f128 (sint_to_fp (i64 (load XForm:$src)))),
4030 (f128 (XSCVSDQP (LXSDX XForm:$src)))>;
4031 def : Pat<(f128 (sint_to_fp (i64 (load DSForm:$src)))),
4032 (f128 (XSCVSDQP (LXSD DSForm:$src)))>;
4033 def : Pat<(f128 (uint_to_fp (i64 (load XForm:$src)))),
4034 (f128 (XSCVUDQP (LXSDX XForm:$src)))>;
4035 def : Pat<(f128 (uint_to_fp (i64 (load DSForm:$src)))),
4036 (f128 (XSCVUDQP (LXSD DSForm:$src)))>;
4038 // Convert Unsigned HWord in memory -> QP
4039 def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
4040 (f128 (XSCVUDQP (LXSIHZX XForm:$src)))>;
4042 // Convert Unsigned Byte in memory -> QP
4043 def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)),
4044 (f128 (XSCVUDQP (LXSIBZX ForceXForm:$src)))>;
4046 // Truncate & Convert QP -> (Un)Signed (D)Word.
4047 def : Pat<(i64 (any_fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
4048 def : Pat<(i64 (any_fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
4049 def : Pat<(i32 (any_fp_to_sint f128:$src)),
4050 (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>;
4051 def : Pat<(i32 (any_fp_to_uint f128:$src)),
4052 (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>;
4054 // Instructions for store(fptosi).
4055 def : Pat<(PPCstore_scal_int_from_vsr f64:$src, DSForm:$dst, 8),
4056 (STXSD $src, DSForm:$dst)>;
4057 def : Pat<(PPCstore_scal_int_from_vsr f64:$src, ForceXForm:$dst, 2),
4058 (STXSIHX $src, ForceXForm:$dst)>;
4059 def : Pat<(PPCstore_scal_int_from_vsr f64:$src, ForceXForm:$dst, 1),
4060 (STXSIBX $src, ForceXForm:$dst)>;
4062 def : Pat<(PPCstore_scal_int_from_vsr f128:$src, DSForm:$dst, 8),
4063 (STXSD (COPY_TO_REGCLASS $src, VFRC), DSForm:$dst)>;
4064 def : Pat<(PPCstore_scal_int_from_vsr f128:$src, ForceXForm:$dst, 2),
4065 (STXSIHX (COPY_TO_REGCLASS $src, VSFRC), ForceXForm:$dst)>;
4066 def : Pat<(PPCstore_scal_int_from_vsr f128:$src, ForceXForm:$dst, 1),
4067 (STXSIBX (COPY_TO_REGCLASS $src, VSFRC), ForceXForm:$dst)>;
4069 // Round & Convert QP -> DP/SP
4070 def : Pat<(f64 (any_fpround f128:$src)), (f64 (XSCVQPDP $src))>;
4071 def : Pat<(f32 (any_fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>;
4074 def : Pat<(f128 (any_fpextend f32:$src)),
4075 (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
4077 def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
4078 (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
4079 (COPY_TO_REGCLASS $XB, VSSRC)),
4081 def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
4082 (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
4083 (COPY_TO_REGCLASS $XB, VSSRC)),
4086 // Endianness-neutral patterns for const splats with ISA 3.0 instructions.
4087 defm : ScalToVecWPermute<v4i32, (i32 i32:$A), (MTVSRWS $A),
4088 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
4089 def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
4090 (v4i32 (MTVSRWS $A))>;
4091 def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
4092 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
4093 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
4094 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
4095 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
4096 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
4097 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
4098 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
4099 (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
4100 defm : ScalToVecWPermute<
4101 v4i32, FltToIntLoad.A,
4102 (XVCVSPSXWS (LXVWSX ForceXForm:$A)),
4103 (XVCVSPSXWS (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$A), sub_64))>;
4104 defm : ScalToVecWPermute<
4105 v4i32, FltToUIntLoad.A,
4106 (XVCVSPUXWS (LXVWSX ForceXForm:$A)),
4107 (XVCVSPUXWS (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$A), sub_64))>;
4108 defm : ScalToVecWPermute<
4109 v4i32, DblToIntLoadP9.A,
4110 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 DSForm:$A)), sub_64), 1),
4111 (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 DSForm:$A)), sub_64)>;
4112 defm : ScalToVecWPermute<
4113 v4i32, DblToUIntLoadP9.A,
4114 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 DSForm:$A)), sub_64), 1),
4115 (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 DSForm:$A)), sub_64)>;
4116 defm : ScalToVecWPermute<
4117 v2i64, FltToLongLoadP9.A,
4118 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 DSForm:$A), VSFRC)), 0),
4121 (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 DSForm:$A), VSFRC)), sub_64)>;
4122 defm : ScalToVecWPermute<
4123 v2i64, FltToULongLoadP9.A,
4124 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 DSForm:$A), VSFRC)), 0),
4127 (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 DSForm:$A), VSFRC)), sub_64)>;
4128 def : Pat<(v4f32 (PPCldsplat ForceXForm:$A)),
4129 (v4f32 (LXVWSX ForceXForm:$A))>;
4130 def : Pat<(v4i32 (PPCldsplat ForceXForm:$A)),
4131 (v4i32 (LXVWSX ForceXForm:$A))>;
4132 def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)),
4133 (v8i16 (VSPLTHs 3, (LXSIHZX ForceXForm:$A)))>;
4134 def : Pat<(v16i8 (PPCldsplat ForceXForm:$A)),
4135 (v16i8 (VSPLTBs 7, (LXSIBZX ForceXForm:$A)))>;
4136 def : Pat<(v2f64 (PPCxxperm v2f64:$XT, v2f64:$XB, v4i32:$C)),
4137 (XXPERM v2f64:$XT, v2f64:$XB, v4i32:$C)>;
4138 } // HasVSX, HasP9Vector
4140 // Any Power9 VSX subtarget with equivalent length but better Power10 VSX
4142 // Two identical blocks are required due to the slightly different predicates:
4143 // One without P10 instructions, the other is BigEndian only with P10 instructions.
4144 let Predicates = [HasVSX, HasP9Vector, NoP10Vector] in {
4145 // Little endian Power10 subtargets produce a shorter pattern but require a
4146 // COPY_TO_REGCLASS. The COPY_TO_REGCLASS makes it appear to need two instructions
4147 // to perform the operation, when only one instruction is produced in practice.
4148 // The NoP10Vector predicate excludes these patterns from Power10 VSX subtargets.
4149 defm : ScalToVecWPermute<
4150 v16i8, ScalarLoads.Li8,
4151 (VSPLTBs 7, (LXSIBZX ForceXForm:$src)),
4152 (SUBREG_TO_REG (i64 1), (LXSIBZX ForceXForm:$src), sub_64)>;
4153 // Build vectors from i16 loads
4154 defm : ScalToVecWPermute<
4155 v8i16, ScalarLoads.Li16,
4156 (VSPLTHs 3, (LXSIHZX ForceXForm:$src)),
4157 (SUBREG_TO_REG (i64 1), (LXSIHZX ForceXForm:$src), sub_64)>;
4158 } // HasVSX, HasP9Vector, NoP10Vector
4160 // Any big endian Power9 VSX subtarget
4161 let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in {
4162 // Power10 VSX subtargets produce a shorter pattern for little endian targets
4163 // but this is still the best pattern for Power9 and Power10 VSX big endian
4164 // Build vectors from i8 loads
4165 defm : ScalToVecWPermute<
4166 v16i8, ScalarLoads.Li8,
4167 (VSPLTBs 7, (LXSIBZX ForceXForm:$src)),
4168 (SUBREG_TO_REG (i64 1), (LXSIBZX ForceXForm:$src), sub_64)>;
4169 // Build vectors from i16 loads
4170 defm : ScalToVecWPermute<
4171 v8i16, ScalarLoads.Li16,
4172 (VSPLTHs 3, (LXSIHZX ForceXForm:$src)),
4173 (SUBREG_TO_REG (i64 1), (LXSIHZX ForceXForm:$src), sub_64)>;
4175 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
4176 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
4177 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
4178 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
4179 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
4180 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
4181 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
4182 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
4183 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
4184 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
4185 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
4186 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
4187 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
4188 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
4189 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
4190 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
4191 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
4192 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
4193 def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)),
4194 (v4i32 (XXINSERTW v4i32:$A,
4195 (SUBREG_TO_REG (i64 1),
4196 (XSCVDPSXWS f64:$B), sub_64),
4198 def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)),
4199 (v4i32 (XXINSERTW v4i32:$A,
4200 (SUBREG_TO_REG (i64 1),
4201 (XSCVDPUXWS f64:$B), sub_64),
4203 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
4204 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
4205 def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)),
4206 (v4i32 (XXINSERTW v4i32:$A,
4207 (SUBREG_TO_REG (i64 1),
4208 (XSCVDPSXWS f64:$B), sub_64),
4210 def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)),
4211 (v4i32 (XXINSERTW v4i32:$A,
4212 (SUBREG_TO_REG (i64 1),
4213 (XSCVDPUXWS f64:$B), sub_64),
4215 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
4216 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
4217 def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)),
4218 (v4i32 (XXINSERTW v4i32:$A,
4219 (SUBREG_TO_REG (i64 1),
4220 (XSCVDPSXWS f64:$B), sub_64),
4222 def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)),
4223 (v4i32 (XXINSERTW v4i32:$A,
4224 (SUBREG_TO_REG (i64 1),
4225 (XSCVDPUXWS f64:$B), sub_64),
4227 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
4228 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
4229 def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)),
4230 (v4i32 (XXINSERTW v4i32:$A,
4231 (SUBREG_TO_REG (i64 1),
4232 (XSCVDPSXWS f64:$B), sub_64),
4234 def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)),
4235 (v4i32 (XXINSERTW v4i32:$A,
4236 (SUBREG_TO_REG (i64 1),
4237 (XSCVDPUXWS f64:$B), sub_64),
4239 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
4240 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
4241 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
4242 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
4243 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
4244 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
4245 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
4246 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
4248 def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 0)),
4249 (v4f32 (XXINSERTW v4f32:$A,
4250 (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 0))>;
4251 def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 1)),
4252 (v4f32 (XXINSERTW v4f32:$A,
4253 (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 4))>;
4254 def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 2)),
4255 (v4f32 (XXINSERTW v4f32:$A,
4256 (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 8))>;
4257 def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 3)),
4258 (v4f32 (XXINSERTW v4f32:$A,
4259 (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 12))>;
4261 // Scalar stores of i8
4262 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), ForceXForm:$dst),
4263 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), ForceXForm:$dst)>;
4264 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), ForceXForm:$dst),
4265 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), ForceXForm:$dst)>;
4266 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), ForceXForm:$dst),
4267 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), ForceXForm:$dst)>;
4268 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), ForceXForm:$dst),
4269 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), ForceXForm:$dst)>;
4270 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), ForceXForm:$dst),
4271 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), ForceXForm:$dst)>;
4272 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), ForceXForm:$dst),
4273 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), ForceXForm:$dst)>;
4274 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), ForceXForm:$dst),
4275 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), ForceXForm:$dst)>;
4276 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), ForceXForm:$dst),
4277 (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>;
4278 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), ForceXForm:$dst),
4279 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), ForceXForm:$dst)>;
4280 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), ForceXForm:$dst),
4281 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), ForceXForm:$dst)>;
4282 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), ForceXForm:$dst),
4283 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), ForceXForm:$dst)>;
4284 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), ForceXForm:$dst),
4285 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), ForceXForm:$dst)>;
4286 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), ForceXForm:$dst),
4287 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), ForceXForm:$dst)>;
4288 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), ForceXForm:$dst),
4289 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), ForceXForm:$dst)>;
4290 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), ForceXForm:$dst),
4291 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), ForceXForm:$dst)>;
4292 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), ForceXForm:$dst),
4293 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), ForceXForm:$dst)>;
4295 // Scalar stores of i16
4296 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), ForceXForm:$dst),
4297 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), ForceXForm:$dst)>;
4298 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), ForceXForm:$dst),
4299 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), ForceXForm:$dst)>;
4300 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), ForceXForm:$dst),
4301 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), ForceXForm:$dst)>;
4302 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), ForceXForm:$dst),
4303 (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>;
4304 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), ForceXForm:$dst),
4305 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), ForceXForm:$dst)>;
4306 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), ForceXForm:$dst),
4307 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), ForceXForm:$dst)>;
4308 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), ForceXForm:$dst),
4309 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), ForceXForm:$dst)>;
4310 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), ForceXForm:$dst),
4311 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), ForceXForm:$dst)>;
4312 } // HasVSX, HasP9Vector, IsBigEndian
4314 // Big endian 64Bit Power9 subtarget.
4315 let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in {
4316 def : Pat<(v2i64 (scalar_to_vector (i64 (load DSForm:$src)))),
4317 (v2i64 (SUBREG_TO_REG (i64 1), (DFLOADf64 DSForm:$src), sub_64))>;
4318 def : Pat<(v2i64 (scalar_to_vector (i64 (load XForm:$src)))),
4319 (v2i64 (SUBREG_TO_REG (i64 1), (XFLOADf64 XForm:$src), sub_64))>;
4321 def : Pat<(v2f64 (scalar_to_vector (f64 (load DSForm:$src)))),
4322 (v2f64 (SUBREG_TO_REG (i64 1), (DFLOADf64 DSForm:$src), sub_64))>;
4323 def : Pat<(v2f64 (scalar_to_vector (f64 (load XForm:$src)))),
4324 (v2f64 (SUBREG_TO_REG (i64 1), (XFLOADf64 XForm:$src), sub_64))>;
4325 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), XForm:$src),
4326 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
4327 sub_64), XForm:$src)>;
4328 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), XForm:$src),
4329 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
4330 sub_64), XForm:$src)>;
4331 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), XForm:$src),
4332 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), XForm:$src)>;
4333 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), XForm:$src),
4334 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), XForm:$src)>;
4335 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), DSForm:$src),
4336 (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
4337 sub_64), DSForm:$src)>;
4338 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), DSForm:$src),
4339 (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
4340 sub_64), DSForm:$src)>;
4341 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), DSForm:$src),
4342 (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), DSForm:$src)>;
4343 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), DSForm:$src),
4344 (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), DSForm:$src)>;
4346 // (Un)Signed DWord vector extract -> QP
4347 def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
4348 (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
4349 def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
4351 (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
4352 def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
4353 (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
4354 def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
4356 (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
4358 // (Un)Signed Word vector extract -> QP
4359 def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))),
4360 (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
4361 foreach Idx = [0,2,3] in {
4362 def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
4363 (f128 (XSCVSDQP (EXTRACT_SUBREG
4364 (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>;
4366 foreach Idx = 0-3 in {
4367 def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
4368 (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>;
4371 // (Un)Signed HWord vector extract -> QP/DP/SP
4372 foreach Idx = 0-7 in {
4373 def : Pat<(f128 (sint_to_fp
4375 (vector_extract v8i16:$src, Idx), i16)))),
4376 (f128 (XSCVSDQP (EXTRACT_SUBREG
4377 (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
4379 // The SDAG adds the `and` since an `i16` is being extracted as an `i32`.
4380 def : Pat<(f128 (uint_to_fp
4381 (and (i32 (vector_extract v8i16:$src, Idx)), 65535))),
4382 (f128 (XSCVUDQP (EXTRACT_SUBREG
4383 (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
4384 def : Pat<(f32 (PPCfcfidus
4385 (f64 (PPCmtvsrz (and (i32 (vector_extract v8i16:$src, Idx)),
4387 (f32 (XSCVUXDSP (EXTRACT_SUBREG
4388 (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
4389 def : Pat<(f32 (PPCfcfids
4391 (i32 (sext_inreg (vector_extract v8i16:$src, Idx),
4393 (f32 (XSCVSXDSP (EXTRACT_SUBREG
4394 (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
4396 def : Pat<(f64 (PPCfcfidu
4398 (and (i32 (vector_extract v8i16:$src, Idx)),
4400 (f64 (XSCVUXDDP (EXTRACT_SUBREG
4401 (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
4402 def : Pat<(f64 (PPCfcfid
4404 (i32 (sext_inreg (vector_extract v8i16:$src, Idx),
4406 (f64 (XSCVSXDDP (EXTRACT_SUBREG
4407 (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
4411 // (Un)Signed Byte vector extract -> QP
4412 foreach Idx = 0-15 in {
4413 def : Pat<(f128 (sint_to_fp
4414 (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
4416 (f128 (XSCVSDQP (EXTRACT_SUBREG
4417 (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>;
4418 def : Pat<(f128 (uint_to_fp
4419 (and (i32 (vector_extract v16i8:$src, Idx)), 255))),
4421 (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>;
4423 def : Pat<(f32 (PPCfcfidus
4425 (and (i32 (vector_extract v16i8:$src, Idx)),
4427 (f32 (XSCVUXDSP (EXTRACT_SUBREG
4428 (VEXTRACTUB !add(Idx, Idx), $src), sub_64)))>;
4429 def : Pat<(f32 (PPCfcfids
4431 (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
4433 (f32 (XSCVSXDSP (EXTRACT_SUBREG
4434 (VEXTSH2D (VEXTRACTUB !add(Idx, Idx), $src)),
4436 def : Pat<(f64 (PPCfcfidu
4438 (and (i32 (vector_extract v16i8:$src, Idx)),
4440 (f64 (XSCVUXDDP (EXTRACT_SUBREG
4441 (VEXTRACTUB !add(Idx, Idx), $src), sub_64)))>;
4442 def : Pat<(f64 (PPCfcfid
4444 (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
4446 (f64 (XSCVSXDDP (EXTRACT_SUBREG
4447 (VEXTSH2D (VEXTRACTUB !add(Idx, Idx), $src)),
4451 // Unsiged int in vsx register -> QP
4452 def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
4454 (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>;
4455 } // HasVSX, HasP9Vector, IsBigEndian, IsPPC64
4457 // Little endian Power9 subtarget.
4458 let Predicates = [HasVSX, HasP9Vector, IsLittleEndian] in {
4459 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
4460 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
4461 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
4462 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
4463 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
4464 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
4465 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
4466 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
4467 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
4468 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
4469 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
4470 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
4471 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
4472 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
4473 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
4474 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
4475 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
4476 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
4477 def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)),
4478 (v4i32 (XXINSERTW v4i32:$A,
4479 (SUBREG_TO_REG (i64 1),
4480 (XSCVDPSXWS f64:$B), sub_64),
4482 def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)),
4483 (v4i32 (XXINSERTW v4i32:$A,
4484 (SUBREG_TO_REG (i64 1),
4485 (XSCVDPUXWS f64:$B), sub_64),
4487 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
4488 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
4489 def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)),
4490 (v4i32 (XXINSERTW v4i32:$A,
4491 (SUBREG_TO_REG (i64 1),
4492 (XSCVDPSXWS f64:$B), sub_64),
4494 def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)),
4495 (v4i32 (XXINSERTW v4i32:$A,
4496 (SUBREG_TO_REG (i64 1),
4497 (XSCVDPUXWS f64:$B), sub_64),
4499 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
4500 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
4501 def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)),
4502 (v4i32 (XXINSERTW v4i32:$A,
4503 (SUBREG_TO_REG (i64 1),
4504 (XSCVDPSXWS f64:$B), sub_64),
4506 def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)),
4507 (v4i32 (XXINSERTW v4i32:$A,
4508 (SUBREG_TO_REG (i64 1),
4509 (XSCVDPUXWS f64:$B), sub_64),
4511 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
4512 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
4513 def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)),
4514 (v4i32 (XXINSERTW v4i32:$A,
4515 (SUBREG_TO_REG (i64 1),
4516 (XSCVDPSXWS f64:$B), sub_64),
4518 def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)),
4519 (v4i32 (XXINSERTW v4i32:$A,
4520 (SUBREG_TO_REG (i64 1),
4521 (XSCVDPUXWS f64:$B), sub_64),
4523 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
4524 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
4525 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
4526 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
4527 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
4528 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
4529 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
4530 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
4532 def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 0)),
4533 (v4f32 (XXINSERTW v4f32:$A,
4534 (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 12))>;
4535 def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 1)),
4536 (v4f32 (XXINSERTW v4f32:$A,
4537 (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 8))>;
4538 def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 2)),
4539 (v4f32 (XXINSERTW v4f32:$A,
4540 (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 4))>;
4541 def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 3)),
4542 (v4f32 (XXINSERTW v4f32:$A,
4543 (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 0))>;
4545 def : Pat<(v8i16 (PPCld_vec_be ForceXForm:$src)),
4546 (COPY_TO_REGCLASS (LXVH8X ForceXForm:$src), VRRC)>;
4547 def : Pat<(PPCst_vec_be v8i16:$rS, ForceXForm:$dst),
4548 (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), ForceXForm:$dst)>;
4550 def : Pat<(v16i8 (PPCld_vec_be ForceXForm:$src)),
4551 (COPY_TO_REGCLASS (LXVB16X ForceXForm:$src), VRRC)>;
4552 def : Pat<(PPCst_vec_be v16i8:$rS, ForceXForm:$dst),
4553 (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), ForceXForm:$dst)>;
4555 // Scalar stores of i8
4556 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), ForceXForm:$dst),
4557 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), ForceXForm:$dst)>;
4558 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), ForceXForm:$dst),
4559 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), ForceXForm:$dst)>;
4560 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), ForceXForm:$dst),
4561 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), ForceXForm:$dst)>;
4562 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), ForceXForm:$dst),
4563 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), ForceXForm:$dst)>;
4564 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), ForceXForm:$dst),
4565 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), ForceXForm:$dst)>;
4566 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), ForceXForm:$dst),
4567 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), ForceXForm:$dst)>;
4568 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), ForceXForm:$dst),
4569 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), ForceXForm:$dst)>;
4570 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), ForceXForm:$dst),
4571 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), ForceXForm:$dst)>;
4572 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), ForceXForm:$dst),
4573 (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>;
4574 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), ForceXForm:$dst),
4575 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), ForceXForm:$dst)>;
4576 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), ForceXForm:$dst),
4577 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), ForceXForm:$dst)>;
4578 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), ForceXForm:$dst),
4579 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), ForceXForm:$dst)>;
4580 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), ForceXForm:$dst),
4581 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), ForceXForm:$dst)>;
4582 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), ForceXForm:$dst),
4583 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), ForceXForm:$dst)>;
4584 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), ForceXForm:$dst),
4585 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), ForceXForm:$dst)>;
4586 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), ForceXForm:$dst),
4587 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), ForceXForm:$dst)>;
4589 // Scalar stores of i16
4590 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), ForceXForm:$dst),
4591 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), ForceXForm:$dst)>;
4592 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), ForceXForm:$dst),
4593 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), ForceXForm:$dst)>;
4594 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), ForceXForm:$dst),
4595 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), ForceXForm:$dst)>;
4596 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), ForceXForm:$dst),
4597 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), ForceXForm:$dst)>;
4598 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), ForceXForm:$dst),
4599 (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>;
4600 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), ForceXForm:$dst),
4601 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), ForceXForm:$dst)>;
4602 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), ForceXForm:$dst),
4603 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), ForceXForm:$dst)>;
4604 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), ForceXForm:$dst),
4605 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), ForceXForm:$dst)>;
4607 defm : ScalToVecWPermute<
4608 v2i64, (i64 (load DSForm:$src)),
4609 (XXPERMDIs (DFLOADf64 DSForm:$src), 2),
4610 (SUBREG_TO_REG (i64 1), (DFLOADf64 DSForm:$src), sub_64)>;
4611 defm : ScalToVecWPermute<
4612 v2i64, (i64 (load XForm:$src)),
4613 (XXPERMDIs (XFLOADf64 XForm:$src), 2),
4614 (SUBREG_TO_REG (i64 1), (XFLOADf64 XForm:$src), sub_64)>;
4615 defm : ScalToVecWPermute<
4616 v2f64, (f64 (load DSForm:$src)),
4617 (XXPERMDIs (DFLOADf64 DSForm:$src), 2),
4618 (SUBREG_TO_REG (i64 1), (DFLOADf64 DSForm:$src), sub_64)>;
4619 defm : ScalToVecWPermute<
4620 v2f64, (f64 (load XForm:$src)),
4621 (XXPERMDIs (XFLOADf64 XForm:$src), 2),
4622 (SUBREG_TO_REG (i64 1), (XFLOADf64 XForm:$src), sub_64)>;
4624 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), XForm:$src),
4625 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
4626 sub_64), XForm:$src)>;
4627 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), XForm:$src),
4628 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
4629 sub_64), XForm:$src)>;
4630 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), XForm:$src),
4631 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), XForm:$src)>;
4632 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), XForm:$src),
4633 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), XForm:$src)>;
4634 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), DSForm:$src),
4635 (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
4636 sub_64), DSForm:$src)>;
4637 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), DSForm:$src),
4638 (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
4640 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), DSForm:$src),
4641 (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), DSForm:$src)>;
4642 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), DSForm:$src),
4643 (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), DSForm:$src)>;
4645 // (Un)Signed DWord vector extract -> QP
4646 def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
4648 (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
4649 def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
4650 (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
4651 def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
4653 (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
4654 def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
4655 (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
4657 // (Un)Signed Word vector extract -> QP
4658 foreach Idx = [[0,3],[1,2],[3,0]] in {
4659 def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
4660 (f128 (XSCVSDQP (EXTRACT_SUBREG
4661 (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)),
4664 def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))),
4665 (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
4667 foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in {
4668 def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
4669 (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>;
4672 // (Un)Signed HWord vector extract -> QP/DP/SP
4673 // The Nested foreach lists identifies the vector element and corresponding
4674 // register byte location.
4675 foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
4676 def : Pat<(f128 (sint_to_fp
4678 (vector_extract v8i16:$src, !head(Idx)), i16)))),
4680 (EXTRACT_SUBREG (VEXTSH2D
4681 (VEXTRACTUH !head(!tail(Idx)), $src)),
4683 def : Pat<(f128 (uint_to_fp
4684 (and (i32 (vector_extract v8i16:$src, !head(Idx))),
4686 (f128 (XSCVUDQP (EXTRACT_SUBREG
4687 (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
4688 def : Pat<(f32 (PPCfcfidus
4690 (and (i32 (vector_extract v8i16:$src, !head(Idx))),
4692 (f32 (XSCVUXDSP (EXTRACT_SUBREG
4693 (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
4694 def : Pat<(f32 (PPCfcfids
4696 (i32 (sext_inreg (vector_extract v8i16:$src,
4697 !head(Idx)), i16)))))),
4700 (VEXTSH2D (VEXTRACTUH !head(!tail(Idx)), $src)),
4702 def : Pat<(f64 (PPCfcfidu
4704 (and (i32 (vector_extract v8i16:$src, !head(Idx))),
4706 (f64 (XSCVUXDDP (EXTRACT_SUBREG
4707 (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
4708 def : Pat<(f64 (PPCfcfid
4711 (vector_extract v8i16:$src, !head(Idx)), i16)))))),
4713 (EXTRACT_SUBREG (VEXTSH2D
4714 (VEXTRACTUH !head(!tail(Idx)), $src)),
4718 // (Un)Signed Byte vector extract -> QP/DP/SP
4719 foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
4720 [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in {
4721 def : Pat<(f128 (sint_to_fp
4723 (vector_extract v16i8:$src, !head(Idx)), i8)))),
4726 (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)),
4728 def : Pat<(f128 (uint_to_fp
4729 (and (i32 (vector_extract v16i8:$src, !head(Idx))),
4733 (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
4735 def : Pat<(f32 (PPCfcfidus
4737 (and (i32 (vector_extract v16i8:$src, !head(Idx))),
4739 (f32 (XSCVUXDSP (EXTRACT_SUBREG
4740 (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
4741 def : Pat<(f32 (PPCfcfids
4744 (vector_extract v16i8:$src, !head(Idx)), i8)))))),
4746 (EXTRACT_SUBREG (VEXTSH2D
4747 (VEXTRACTUB !head(!tail(Idx)), $src)),
4749 def : Pat<(f64 (PPCfcfidu
4752 (vector_extract v16i8:$src, !head(Idx))), 255))))),
4753 (f64 (XSCVUXDDP (EXTRACT_SUBREG
4754 (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
4755 def : Pat<(f64 (PPCfcfidu
4758 (vector_extract v16i8:$src, !head(Idx)), i8)))))),
4760 (EXTRACT_SUBREG (VEXTSH2D
4761 (VEXTRACTUB !head(!tail(Idx)), $src)),
4764 def : Pat<(f64 (PPCfcfid
4767 (vector_extract v16i8:$src, !head(Idx)), i8)))))),
4769 (EXTRACT_SUBREG (VEXTSH2D
4770 (VEXTRACTUH !head(!tail(Idx)), $src)),
4774 // Unsiged int in vsx register -> QP
4775 def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
4777 (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>;
4778 } // HasVSX, HasP9Vector, IsLittleEndian
4780 // Any Power9 VSX subtarget that supports Power9 Altivec.
4781 let Predicates = [HasVSX, HasP9Altivec] in {
4782 // Unsigned absolute-difference.
4783 def : Pat<(v4i32 (abdu v4i32:$A, v4i32:$B)),
4784 (v4i32 (VABSDUW $A, $B))>;
4786 def : Pat<(v8i16 (abdu v8i16:$A, v8i16:$B)),
4787 (v8i16 (VABSDUH $A, $B))>;
4789 def : Pat<(v16i8 (abdu v16i8:$A, v16i8:$B)),
4790 (v16i8 (VABSDUB $A, $B))>;
4792 // Signed absolute-difference.
4793 // Power9 VABSD* instructions are designed to support unsigned integer
4794 // vectors (byte/halfword/word), if we want to make use of them for signed
4795 // integer vectors, we have to flip their sign bits first. To flip sign bit
4796 // for byte/halfword integer vector would become inefficient, but for word
4797 // integer vector, we can leverage XVNEGSP to make it efficiently.
4798 def : Pat<(v4i32 (abds v4i32:$A, v4i32:$B)),
4799 (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
4800 } // HasVSX, HasP9Altivec
4802 // Big endian Power9 64Bit VSX subtargets with P9 Altivec support.
4803 let Predicates = [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64] in {
4804 def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
4805 (VEXTUBLX $Idx, $S)>;
4807 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
4808 (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>;
4809 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
4810 (VEXTUHLX (LI8 0), $S)>;
4811 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
4812 (VEXTUHLX (LI8 2), $S)>;
4813 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
4814 (VEXTUHLX (LI8 4), $S)>;
4815 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
4816 (VEXTUHLX (LI8 6), $S)>;
4817 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
4818 (VEXTUHLX (LI8 8), $S)>;
4819 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
4820 (VEXTUHLX (LI8 10), $S)>;
4821 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
4822 (VEXTUHLX (LI8 12), $S)>;
4823 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
4824 (VEXTUHLX (LI8 14), $S)>;
4826 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
4827 (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>;
4828 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
4829 (VEXTUWLX (LI8 0), $S)>;
4831 // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
4832 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
4833 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
4834 (i32 VectorExtractions.LE_WORD_2), sub_32)>;
4835 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
4836 (VEXTUWLX (LI8 8), $S)>;
4837 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
4838 (VEXTUWLX (LI8 12), $S)>;
4840 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
4841 (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>;
4842 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
4843 (EXTSW (VEXTUWLX (LI8 0), $S))>;
4844 // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
4845 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
4846 (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
4847 (i32 VectorExtractions.LE_WORD_2), sub_32))>;
4848 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
4849 (EXTSW (VEXTUWLX (LI8 8), $S))>;
4850 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
4851 (EXTSW (VEXTUWLX (LI8 12), $S))>;
4853 def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
4854 (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>;
4855 def : Pat<(i32 (vector_extract v16i8:$S, 0)),
4856 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>;
4857 def : Pat<(i32 (vector_extract v16i8:$S, 1)),
4858 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>;
4859 def : Pat<(i32 (vector_extract v16i8:$S, 2)),
4860 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>;
4861 def : Pat<(i32 (vector_extract v16i8:$S, 3)),
4862 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>;
4863 def : Pat<(i32 (vector_extract v16i8:$S, 4)),
4864 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>;
4865 def : Pat<(i32 (vector_extract v16i8:$S, 5)),
4866 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>;
4867 def : Pat<(i32 (vector_extract v16i8:$S, 6)),
4868 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>;
4869 def : Pat<(i32 (vector_extract v16i8:$S, 7)),
4870 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>;
4871 def : Pat<(i32 (vector_extract v16i8:$S, 8)),
4872 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>;
4873 def : Pat<(i32 (vector_extract v16i8:$S, 9)),
4874 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>;
4875 def : Pat<(i32 (vector_extract v16i8:$S, 10)),
4876 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>;
4877 def : Pat<(i32 (vector_extract v16i8:$S, 11)),
4878 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>;
4879 def : Pat<(i32 (vector_extract v16i8:$S, 12)),
4880 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>;
4881 def : Pat<(i32 (vector_extract v16i8:$S, 13)),
4882 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>;
4883 def : Pat<(i32 (vector_extract v16i8:$S, 14)),
4884 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>;
4885 def : Pat<(i32 (vector_extract v16i8:$S, 15)),
4886 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>;
4888 def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
4889 (i32 (EXTRACT_SUBREG (VEXTUHLX
4890 (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
4891 def : Pat<(i32 (vector_extract v8i16:$S, 0)),
4892 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>;
4893 def : Pat<(i32 (vector_extract v8i16:$S, 1)),
4894 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>;
4895 def : Pat<(i32 (vector_extract v8i16:$S, 2)),
4896 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>;
4897 def : Pat<(i32 (vector_extract v8i16:$S, 3)),
4898 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>;
4899 def : Pat<(i32 (vector_extract v8i16:$S, 4)),
4900 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>;
4901 def : Pat<(i32 (vector_extract v8i16:$S, 5)),
4902 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>;
4903 def : Pat<(i32 (vector_extract v8i16:$S, 6)),
4904 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>;
4905 def : Pat<(i32 (vector_extract v8i16:$S, 6)),
4906 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>;
4908 def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
4909 (i32 (EXTRACT_SUBREG (VEXTUWLX
4910 (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
4911 def : Pat<(i32 (vector_extract v4i32:$S, 0)),
4912 (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>;
4913 // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
4914 def : Pat<(i32 (vector_extract v4i32:$S, 1)),
4915 (i32 VectorExtractions.LE_WORD_2)>;
4916 def : Pat<(i32 (vector_extract v4i32:$S, 2)),
4917 (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>;
4918 def : Pat<(i32 (vector_extract v4i32:$S, 3)),
4919 (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>;
4921 // P9 Altivec instructions that can be used to build vectors.
4922 // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
4923 // with complexities of existing build vector patterns in this file.
4924 def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
4925 (v2i64 (VEXTSW2D $A))>;
4926 def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
4927 (v2i64 (VEXTSH2D $A))>;
4928 def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
4929 HWordToWord.BE_A2, HWordToWord.BE_A3)),
4930 (v4i32 (VEXTSH2W $A))>;
4931 def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
4932 ByteToWord.BE_A2, ByteToWord.BE_A3)),
4933 (v4i32 (VEXTSB2W $A))>;
4934 def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
4935 (v2i64 (VEXTSB2D $A))>;
4936 } // HasVSX, HasP9Altivec, IsBigEndian, IsPPC64
4938 // Little endian Power9 VSX subtargets with P9 Altivec support.
4939 let Predicates = [HasVSX, HasP9Altivec, IsLittleEndian] in {
4940 def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
4941 (VEXTUBRX $Idx, $S)>;
4943 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
4944 (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>;
4945 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
4946 (VEXTUHRX (LI8 0), $S)>;
4947 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
4948 (VEXTUHRX (LI8 2), $S)>;
4949 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
4950 (VEXTUHRX (LI8 4), $S)>;
4951 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
4952 (VEXTUHRX (LI8 6), $S)>;
4953 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
4954 (VEXTUHRX (LI8 8), $S)>;
4955 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
4956 (VEXTUHRX (LI8 10), $S)>;
4957 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
4958 (VEXTUHRX (LI8 12), $S)>;
4959 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
4960 (VEXTUHRX (LI8 14), $S)>;
4962 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
4963 (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>;
4964 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
4965 (VEXTUWRX (LI8 0), $S)>;
4966 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
4967 (VEXTUWRX (LI8 4), $S)>;
4968 // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
4969 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
4970 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
4971 (i32 VectorExtractions.LE_WORD_2), sub_32)>;
4972 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
4973 (VEXTUWRX (LI8 12), $S)>;
4975 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
4976 (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>;
4977 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
4978 (EXTSW (VEXTUWRX (LI8 0), $S))>;
4979 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
4980 (EXTSW (VEXTUWRX (LI8 4), $S))>;
4981 // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
4982 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
4983 (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
4984 (i32 VectorExtractions.LE_WORD_2), sub_32))>;
4985 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
4986 (EXTSW (VEXTUWRX (LI8 12), $S))>;
4988 def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
4989 (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>;
4990 def : Pat<(i32 (vector_extract v16i8:$S, 0)),
4991 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>;
4992 def : Pat<(i32 (vector_extract v16i8:$S, 1)),
4993 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>;
4994 def : Pat<(i32 (vector_extract v16i8:$S, 2)),
4995 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>;
4996 def : Pat<(i32 (vector_extract v16i8:$S, 3)),
4997 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>;
4998 def : Pat<(i32 (vector_extract v16i8:$S, 4)),
4999 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>;
5000 def : Pat<(i32 (vector_extract v16i8:$S, 5)),
5001 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>;
5002 def : Pat<(i32 (vector_extract v16i8:$S, 6)),
5003 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>;
5004 def : Pat<(i32 (vector_extract v16i8:$S, 7)),
5005 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>;
5006 def : Pat<(i32 (vector_extract v16i8:$S, 8)),
5007 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>;
5008 def : Pat<(i32 (vector_extract v16i8:$S, 9)),
5009 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>;
5010 def : Pat<(i32 (vector_extract v16i8:$S, 10)),
5011 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>;
5012 def : Pat<(i32 (vector_extract v16i8:$S, 11)),
5013 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>;
5014 def : Pat<(i32 (vector_extract v16i8:$S, 12)),
5015 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>;
5016 def : Pat<(i32 (vector_extract v16i8:$S, 13)),
5017 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>;
5018 def : Pat<(i32 (vector_extract v16i8:$S, 14)),
5019 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>;
5020 def : Pat<(i32 (vector_extract v16i8:$S, 15)),
5021 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>;
5023 def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
5024 (i32 (EXTRACT_SUBREG (VEXTUHRX
5025 (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
5026 def : Pat<(i32 (vector_extract v8i16:$S, 0)),
5027 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>;
5028 def : Pat<(i32 (vector_extract v8i16:$S, 1)),
5029 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>;
5030 def : Pat<(i32 (vector_extract v8i16:$S, 2)),
5031 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>;
5032 def : Pat<(i32 (vector_extract v8i16:$S, 3)),
5033 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>;
5034 def : Pat<(i32 (vector_extract v8i16:$S, 4)),
5035 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>;
5036 def : Pat<(i32 (vector_extract v8i16:$S, 5)),
5037 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>;
5038 def : Pat<(i32 (vector_extract v8i16:$S, 6)),
5039 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>;
5040 def : Pat<(i32 (vector_extract v8i16:$S, 6)),
5041 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>;
5043 def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
5044 (i32 (EXTRACT_SUBREG (VEXTUWRX
5045 (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
5046 def : Pat<(i32 (vector_extract v4i32:$S, 0)),
5047 (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>;
5048 def : Pat<(i32 (vector_extract v4i32:$S, 1)),
5049 (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>;
5050 // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
5051 def : Pat<(i32 (vector_extract v4i32:$S, 2)),
5052 (i32 VectorExtractions.LE_WORD_2)>;
5053 def : Pat<(i32 (vector_extract v4i32:$S, 3)),
5054 (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>;
5056 // P9 Altivec instructions that can be used to build vectors.
5057 // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
5058 // with complexities of existing build vector patterns in this file.
5059 def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
5060 (v2i64 (VEXTSW2D $A))>;
5061 def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
5062 (v2i64 (VEXTSH2D $A))>;
5063 def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
5064 HWordToWord.LE_A2, HWordToWord.LE_A3)),
5065 (v4i32 (VEXTSH2W $A))>;
5066 def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
5067 ByteToWord.LE_A2, ByteToWord.LE_A3)),
5068 (v4i32 (VEXTSB2W $A))>;
5069 def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
5070 (v2i64 (VEXTSB2D $A))>;
5071 } // HasVSX, HasP9Altivec, IsLittleEndian
5073 // Big endian 64Bit VSX subtarget that supports additional direct moves from
5075 let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] in {
5076 def : Pat<(i64 (extractelt v2i64:$A, 1)),
5077 (i64 (MFVSRLD $A))>;
5078 // Better way to build integer vectors if we have MTVSRDD. Big endian.
5079 def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
5080 (v2i64 (MTVSRDD $rB, $rA))>;
5081 def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
5083 (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
5084 (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
5086 def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)),
5087 (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
5088 } // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64
5090 // Little endian VSX subtarget that supports direct moves from ISA3.0.
5091 let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] in {
5092 def : Pat<(i64 (extractelt v2i64:$A, 0)),
5093 (i64 (MFVSRLD $A))>;
5094 // Better way to build integer vectors if we have MTVSRDD. Little endian.
5095 def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
5096 (v2i64 (MTVSRDD $rB, $rA))>;
5097 def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
5099 (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
5100 (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
5102 def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)),
5103 (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
5104 } // HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian
5105 } // AddedComplexity = 400
5107 //---------------------------- Instruction aliases ---------------------------//
5108 def : InstAlias<"xvmovdp $XT, $XB",
5109 (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
5110 def : InstAlias<"xvmovsp $XT, $XB",
5111 (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
5113 // Certain versions of the AIX assembler may missassemble these mnemonics.
5114 let Predicates = [ModernAs] in {
5115 def : InstAlias<"xxspltd $XT, $XB, 0",
5116 (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
5117 def : InstAlias<"xxspltd $XT, $XB, 1",
5118 (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
5119 def : InstAlias<"xxspltd $XT, $XB, 0",
5120 (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
5121 def : InstAlias<"xxspltd $XT, $XB, 1",
5122 (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
5125 def : InstAlias<"xxmrghd $XT, $XA, $XB",
5126 (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
5127 def : InstAlias<"xxmrgld $XT, $XA, $XB",
5128 (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
5129 def : InstAlias<"xxswapd $XT, $XB",
5130 (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
5131 def : InstAlias<"xxswapd $XT, $XB",
5132 (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
5133 def : InstAlias<"mfvrd $rA, $XT",
5134 (MFVRD g8rc:$rA, vrrc:$XT), 0>;
5135 def : InstAlias<"mffprd $rA, $src",
5136 (MFVSRD g8rc:$rA, f8rc:$src)>;
5137 def : InstAlias<"mtvrd $XT, $rA",
5138 (MTVRD vrrc:$XT, g8rc:$rA), 0>;
5139 def : InstAlias<"mtfprd $dst, $rA",
5140 (MTVSRD f8rc:$dst, g8rc:$rA)>;
5141 def : InstAlias<"mfvrwz $rA, $XT",
5142 (MFVRWZ gprc:$rA, vrrc:$XT), 0>;
5143 def : InstAlias<"mffprwz $rA, $src",
5144 (MFVSRWZ gprc:$rA, f8rc:$src)>;
5145 def : InstAlias<"mtvrwa $XT, $rA",
5146 (MTVRWA vrrc:$XT, gprc:$rA), 0>;
5147 def : InstAlias<"mtfprwa $dst, $rA",
5148 (MTVSRWA f8rc:$dst, gprc:$rA)>;
5149 def : InstAlias<"mtvrwz $XT, $rA",
5150 (MTVRWZ vrrc:$XT, gprc:$rA), 0>;
5151 def : InstAlias<"mtfprwz $dst, $rA",
5152 (MTVSRWZ f8rc:$dst, gprc:$rA)>;