1 //===- README_P9.txt - Notes for improving Power9 code gen ----------------===//
3 TODO: Instructions Need Implement Instrinstics or Map to LLVM IR
6 - Vector Compare Not Equal (Zero):
7 vcmpneb(.) vcmpneh(.) vcmpnew(.)
8 vcmpnezb(.) vcmpnezh(.) vcmpnezw(.)
9 . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic)
11 - Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd
12 . Don't use llvm extractelement because they have different semantics
14 (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM))
15 (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM))
16 (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM))
17 (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM))
19 - Vector Extract Unsigned Byte Left/Right-Indexed:
20 vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx
23 (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB))
24 (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB))
25 (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB))
28 (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB))
29 (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB))
30 (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB))
32 - Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw
33 (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM))
34 (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM))
35 (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM))
36 (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM))
38 - Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]:
41 (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB))
42 (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB))
44 - Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd
46 (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb
47 (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh
48 (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw
49 (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd
51 - Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d
53 (set v4i32:$vD, (sext v4i8:$vB))
57 VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3])
61 (set v4i32:$vD, (sext v4i16:$vB))
65 VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1])
69 (set v2i64:$vD, (sext v2i8:$vB))
73 VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7])
77 (set v2i64:$vD, (sext v2i16:$vB))
81 VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3])
85 (set v2i64:$vD, (sext v2i32:$vB))
89 VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1])
92 - Vector Integer Negate: vnegw vnegd
94 (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw
95 (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd
97 - Vector Parity Byte: vprtybw vprtybd vprtybq
99 (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB))
100 (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB))
101 (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB))
103 - Vector (Bit) Permute (Right-indexed):
104 . vbpermd: Same as "vbpermq", use VX1_Int_Ty2:
105 VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>;
107 . vpermr: use VA1a_Int_Ty3
108 VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>;
110 - Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi
112 VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>;
113 VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>;
114 VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>;
115 VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>;
117 - Vector Shift Left/Right: vslv vsrv
118 . Use intrinsic, don't map to llvm shl and lshr, because they have different
119 semantics, e.g. vslv:
122 sh ← VR[VRB].byte[i].bit[5:7]
123 VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7]
126 VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1]
128 . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>;
129 VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>;
131 - Vector Multiply-by-10 (& Write Carry) Unsigned Quadword:
134 VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>;
135 VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>;
137 - Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword:
140 VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>;
141 VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>;
143 - Decimal Convert From/to National/Zoned/Signed-QWord:
144 bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq.
146 (set v1i128:$vD, (int_ppc_altivec_bcdcfno v1i128:$vB, i1:$PS))
147 (set v1i128:$vD, (int_ppc_altivec_bcdcfzo v1i128:$vB, i1:$PS))
148 (set v1i128:$vD, (int_ppc_altivec_bcdctno v1i128:$vB))
149 (set v1i128:$vD, (int_ppc_altivec_bcdctzo v1i128:$vB, i1:$PS))
150 (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS))
151 (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB))
153 - Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn.
155 (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB))
156 (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS))
158 - Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr.
160 (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS))
161 (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB))
162 (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS))
164 . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7]
166 - Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc.
168 (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS))
169 (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB))
171 . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63])
174 - QP Copy Sign: xscpsgnqp
175 . Similar to xscpsgndp
176 . (set f128:$vT, (fcopysign f128:$vB, f128:$vA)
178 - QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp
179 . Similar to xsabsdp/xsnabsdp/xsnegdp
180 . (set f128:$vT, (fabs f128:$vB)) // xsabsqp
181 (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp
182 (set f128:$vT, (fneg f128:$vB)) // xsnegqp
184 - QP Add/Divide/Multiply/Subtract/Square-Root:
185 xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp
188 (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp
189 (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp
192 (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp
193 (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp
194 (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp
196 - Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root:
197 xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo
198 . Similar to xsrsqrtedp??
199 def XSRSQRTEDP : XX2Form<60, 74,
200 (outs vsfrc:$XT), (ins vsfrc:$XB),
201 "xsrsqrtedp $XT, $XB", IIC_VecFP,
202 [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
204 . Define DAG Node in PPCInstrInfo.td:
205 def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>;
206 def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>;
207 def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>;
208 def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>;
209 def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>;
211 DAG patterns of each instruction (PPCInstrVSX.td):
213 (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo
214 (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo
217 (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo
218 (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo
219 (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo
221 - QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp
222 . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp
226 [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>,
227 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
231 [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
232 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
236 [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>,
237 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
241 [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
242 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
245 - Round to Odd of QP (Negative) Multiply-{Add/Subtract}:
246 xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo
247 . Similar to xsrsqrtedp??
249 . Define DAG Node in PPCInstrInfo.td:
250 def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>;
252 It looks like we only need to define "PPCfmarto" for these instructions,
253 because according to PowerISA_V3.0, these instructions perform RTO on
256 v ← bfp_MULTIPLY_ADD(src1, src3, src2)
257 rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
258 result ← bfp_CONVERT_TO_BFP128(rnd)
261 v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
262 rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
263 result ← bfp_CONVERT_TO_BFP128(rnd)
266 v ← bfp_MULTIPLY_ADD(src1,src3,src2)
267 rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
268 result ← bfp_CONVERT_TO_BFP128(rnd)
271 v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
272 rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
273 result ← bfp_CONVERT_TO_BFP128(rnd)
275 DAG patterns of each instruction (PPCInstrVSX.td):
278 [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>,
279 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
283 [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
284 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
288 [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>,
289 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
293 [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
294 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
297 - QP Compare Ordered/Unordered: xscmpoqp xscmpuqp
299 def XSCMPUDP : XX3Form_1<60, 35,
300 (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
301 "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
303 . No SDAG, intrinsic, builtin are required??
304 Or llvm fcmp order/unorder compare??
306 - DP/QP Compare Exponents: xscmpexpdp xscmpexpqp
307 . No SDAG, intrinsic, builtin are required?
309 - DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp
310 . I checked existing instruction "XSCMPUDP". They are different in target
311 register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register
314 (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB))
315 (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB))
316 (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB))
317 (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB))
319 - Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp.
320 . Similar to xvcmpeqdp:
321 defm XVCMPEQDP : XX3Form_Rcr<60, 99,
322 "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
323 int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
325 . So we should use "XX3Form_Rcr" to implement intrinsic
327 - Convert DP -> QP: xscvdpqp
328 . Similar to XSCVDPSP:
329 def XSCVDPSP : XX2Form<60, 265,
330 (outs vsfrc:$XT), (ins vsfrc:$XB),
331 "xscvdpsp $XT, $XB", IIC_VecFP, []>;
332 . So, No SDAG, intrinsic, builtin are required??
334 - Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo
335 . Similar to XSCVDPSP
336 . No SDAG, intrinsic, builtin are required??
338 - Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero):
339 xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz
340 . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS",
341 "XSCVDPUXDS", "XSCVDPUXWS"
344 (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz
345 (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz
346 (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz
347 (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz
349 - Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp
350 . Similar to XSCVSXDSP
351 . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp
352 (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp
354 - (Round &) Convert DP <-> HP: xscvdphp xscvhpdp
355 . Similar to XSCVDPSP
356 . No SDAG, intrinsic, builtin are required??
358 - Vector HP -> SP: xvcvhpsp xvcvsphp
359 . Similar to XVCVDPSP:
360 def XVCVDPSP : XX2Form<60, 393,
361 (outs vsrc:$XT), (ins vsrc:$XB),
362 "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
363 . No SDAG, intrinsic, builtin are required??
365 - Round to Quad-Precision Integer: xsrqpi xsrqpix
366 . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you
367 need to assign rounding mode in instruction
369 (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB))
370 (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB))
372 - Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp
374 (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB))
376 Fixed Point Facility:
378 - Exploit cmprb and cmpeqb (perhaps for something like
379 isalpha/isdigit/isupper/islower and isspace respectivelly). This can
380 perhaps be done through a builtin.
382 - Provide testing for cnttz[dw]
383 - Insert Exponent DP/QP: xsiexpdp xsiexpqp
386 // Note: rA and rB are the unsigned integer value.
387 (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB))
390 (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB))
392 - Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp
394 . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB)) // xsxexpdp
395 (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB)) // xsxsigdp
396 (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB)) // xsxexpqp
397 (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp
399 - Vector Insert Word: xxinsertw
400 - Useful for inserting f32/i32 elements into vectors (the element to be
401 inserted needs to be prepared)
402 . Note: llvm has insertelem in "Vector Operations"
404 <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>
406 But how to map to it??
407 [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>,
408 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
411 (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM))
413 - Vector Extract Unsigned Word: xxextractuw
414 - Not useful for extraction of f32 from v4f32 (the current pattern is better -
416 - It is useful for (uint_to_fp (vector_extract v4i32, N))
417 - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N))
418 . Note: llvm has extractelement in "Vector Operations"
420 <result> = extractelement <n x <ty>> <val>, <ty2> <idx>
423 [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))]
426 (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM))
428 - Vector Insert Exponent DP/SP: xviexpdp xviexpsp
430 (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB))
431 (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB))
433 - Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp
435 (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB))
436 (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB))
437 (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB))
438 (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB))
440 - Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp
441 . No SDAG, intrinsic, builtin are required?
442 Because it seems that we have no way to map BF field?
444 Instruction Form: [PO T XO B XO BX TX]
445 Asm: xststd* BF,XB,DCMX
447 BF is an index to CR register field.
449 - Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp
451 (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX))
452 (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX))
454 - Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp
456 "xsmaxcdp can be used to implement the C/C++/Java conditional operation
457 (x>y)?x:y for single-precision and double-precision arguments."
459 Note! c type and j type have different behavior when:
460 1. Either input is NaN
461 2. Both input are +-Infinity, +-Zero
463 . dtype map to llvm fmaxnum/fminnum
467 (set f64:$XT, (fmaxnum f64:$XA, f64:$XB))
468 (set f64:$XT, (fminnum f64:$XA, f64:$XB))
471 (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB))
472 (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB))
474 - Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq
476 (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB))
477 (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB))
478 (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB))
479 (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB))
481 - Vector Permute: xxperm xxpermr
482 . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different
484 (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB))
485 (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB))
487 - Vector Splat Immediate Byte: xxspltib
488 . Similar to XXSPLTW:
489 def XXSPLTW : XX2Form_2<60, 164,
490 (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
491 "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
493 . No SDAG, intrinsic, builtin are required?
495 - Load/Store Vector: lxv stxv
496 . Has likely SDAG match:
497 (set v?:$XT, (load ix16addr:$src))
498 (set v?:$XT, (store ix16addr:$dst))
500 . Need define ix16addr in PPCInstrInfo.td
501 ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td
503 - Load/Store Vector Indexed: lxvx stxvx
504 . Has likely SDAG match:
505 (set v?:$XT, (load xoaddr:$src))
506 (set v?:$XT, (store xoaddr:$dst))
508 - Load/Store DWord: lxsd stxsd
509 . Similar to lxsdx/stxsdx:
510 def LXSDX : XX1Form<31, 588,
511 (outs vsfrc:$XT), (ins memrr:$src),
512 "lxsdx $XT, $src", IIC_LdStLFD,
513 [(set f64:$XT, (load xoaddr:$src))]>;
515 . (set f64:$XT, (load iaddrX4:$src))
516 (set f64:$XT, (store iaddrX4:$dst))
518 - Load/Store SP, with conversion from/to DP: lxssp stxssp
519 . Similar to lxsspx/stxsspx:
520 def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
521 "lxsspx $XT, $src", IIC_LdStLFD,
522 [(set f32:$XT, (load xoaddr:$src))]>;
524 . (set f32:$XT, (load iaddrX4:$src))
525 (set f32:$XT, (store iaddrX4:$dst))
527 - Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx
528 . Similar to lxsiwzx:
529 def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
530 "lxsiwzx $XT, $src", IIC_LdStLFD,
531 [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
533 . (set f64:$XT, (PPClfiwzx xoaddr:$src))
535 - Store as Integer Byte/Halfword Indexed: stxsibx stxsihx
536 . Similar to stxsiwx:
537 def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
538 "stxsiwx $XT, $dst", IIC_LdStSTFD,
539 [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
541 . (PPCstfiwx f64:$XT, xoaddr:$dst)
543 - Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x
544 . Similar to lxvd2x/lxvw4x:
545 def LXVD2X : XX1Form<31, 844,
546 (outs vsrc:$XT), (ins memrr:$src),
547 "lxvd2x $XT, $src", IIC_LdStLFD,
548 [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>;
550 . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src))
551 (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src))
553 - Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x
554 . Similar to stxvd2x/stxvw4x:
555 def STXVD2X : XX1Form<31, 972,
556 (outs), (ins vsrc:$XT, memrr:$dst),
557 "stxvd2x $XT, $dst", IIC_LdStSTFD,
558 [(store v2f64:$XT, xoaddr:$dst)]>;
560 . (store v8i16:$XT, xoaddr:$dst)
561 (store v16i8:$XT, xoaddr:$dst)
563 - Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll
564 . Likely needs an intrinsic
565 . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src))
566 (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src))
568 . (int_ppc_vsx_stxvl xoaddr:$dst))
569 (int_ppc_vsx_stxvll xoaddr:$dst))
571 - Load Vector Word & Splat Indexed: lxvwsx
572 . Likely needs an intrinsic
573 . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src))
575 Atomic operations (l[dw]at, st[dw]at):
576 - Provide custom lowering for common atomic operations to use these
577 instructions with the correct Function Code
578 - Ensure the operands are in the correct register (i.e. RT+1, RT+2)
579 - Provide builtins since not all FC's necessarily have an existing LLVM
582 Move to CR from XER Extended (mcrxrx):
583 - Is there a use for this in LLVM?
585 Fixed Point Facility:
587 - Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last
589 (int_ppc_copy_first i32:$rA, i32:$rB)
590 (int_ppc_copy i32:$rA, i32:$rB)
592 (int_ppc_paste i32:$rA, i32:$rB)
593 (int_ppc_paste_last i32:$rA, i32:$rB)
597 - Message Synchronize: msgsync
598 - SLB*: slbieg slbsync