1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s
5 ; SimplifyDemandedVectorEltsForTargetNode Handling
8 define <16 x i8> @demandedelts_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
9 ; CHECK-LABEL: demandedelts_vpshab:
11 ; CHECK-NEXT: vpshab %xmm1, %xmm0, %xmm0
12 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
13 ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0
15 %shuffle = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
16 %shift = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %shuffle, <16 x i8> %a1)
17 %res = shufflevector <16 x i8> %shift, <16 x i8> undef, <16 x i32> zeroinitializer
21 define <4 x i32> @demandedelts_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
22 ; CHECK-LABEL: demandedelts_vpshld:
24 ; CHECK-NEXT: vpshld %xmm1, %xmm0, %xmm0
25 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
27 %shuffle = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> zeroinitializer
28 %shift = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %shuffle)
29 %result = shufflevector <4 x i32> %shift, <4 x i32> undef, <4 x i32> zeroinitializer
37 define <8 x i16> @binop_shuffle_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
38 ; CHECK-LABEL: binop_shuffle_vpshaw:
40 ; CHECK-NEXT: vpshlw %xmm1, %xmm0, %xmm0
42 %shuffle0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
43 %shuffle1 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
44 %shift = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %shuffle0, <8 x i16> %shuffle1)
45 %result = shufflevector <8 x i16> %shift, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
49 ; TODO - canonicalizeShuffleWithBinOps - handle scaled shuffle masks.
50 define <2 x i64> @binop_shuffle_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
51 ; CHECK-LABEL: binop_shuffle_vpshlq:
53 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
54 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
55 ; CHECK-NEXT: vpshlq %xmm1, %xmm0, %xmm0
56 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
58 %shuffle0 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
59 %shuffle1 = shufflevector <2 x i64> %a1, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
60 %shift = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %shuffle0, <2 x i64> %shuffle1)
61 %result = shufflevector <2 x i64> %shift, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
65 declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
66 declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
67 declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
68 declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
70 declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
71 declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
72 declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
73 declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone