1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
7 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
9 define <16 x i8> @combine_pavgb_self(<16 x i8> %a0) {
10 ; SSE-LABEL: combine_pavgb_self:
14 ; AVX-LABEL: combine_pavgb_self:
17 %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a0)
21 define <16 x i8> @combine_pavgb_zero(<16 x i8> %a0) {
22 ; SSE-LABEL: combine_pavgb_zero:
24 ; SSE-NEXT: pxor %xmm1, %xmm1
25 ; SSE-NEXT: pavgb %xmm1, %xmm0
28 ; AVX-LABEL: combine_pavgb_zero:
30 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31 ; AVX-NEXT: vpavgb %xmm1, %xmm0, %xmm0
33 %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> zeroinitializer, <16 x i8> %a0)
37 define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
38 ; SSE-LABEL: combine_pavgw_knownbits:
40 ; SSE-NEXT: pmovsxbw {{.*#+}} xmm4 = [31,31,31,31,31,31,31,31]
41 ; SSE-NEXT: pand %xmm4, %xmm0
42 ; SSE-NEXT: pand %xmm4, %xmm1
43 ; SSE-NEXT: pavgw %xmm1, %xmm0
44 ; SSE-NEXT: pand %xmm4, %xmm2
45 ; SSE-NEXT: pand %xmm4, %xmm3
46 ; SSE-NEXT: pavgw %xmm2, %xmm3
47 ; SSE-NEXT: packuswb %xmm3, %xmm0
50 ; AVX1-LABEL: combine_pavgw_knownbits:
52 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [31,31,31,31,31,31,31,31]
53 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
54 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
55 ; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0
56 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm1
57 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm2
58 ; AVX1-NEXT: vpavgw %xmm2, %xmm1, %xmm1
59 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
62 ; AVX2-LABEL: combine_pavgw_knownbits:
64 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm4 = [31,31,31,31,31,31,31,31]
65 ; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0
66 ; AVX2-NEXT: vpand %xmm4, %xmm1, %xmm1
67 ; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0
68 ; AVX2-NEXT: vpand %xmm4, %xmm2, %xmm1
69 ; AVX2-NEXT: vpand %xmm4, %xmm3, %xmm2
70 ; AVX2-NEXT: vpavgw %xmm2, %xmm1, %xmm1
71 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
73 %m0 = and <8 x i16> %a0, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
74 %m1 = and <8 x i16> %a1, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
75 %m2 = and <8 x i16> %a2, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
76 %m3 = and <8 x i16> %a3, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
77 %avg01 = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %m0, <8 x i16> %m1)
78 %avg23 = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %m2, <8 x i16> %m3)
79 %shuffle = shufflevector <8 x i16> %avg01, <8 x i16> %avg23, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
80 %trunc = trunc <16 x i16> %shuffle to <16 x i8>
84 define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
85 ; SSE-LABEL: combine_pavgw_demandedelts:
87 ; SSE-NEXT: pavgw %xmm1, %xmm0
88 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
89 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
92 ; AVX1-LABEL: combine_pavgw_demandedelts:
94 ; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0
95 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
96 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
99 ; AVX2-LABEL: combine_pavgw_demandedelts:
101 ; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0
102 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
104 %s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
105 %avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1)
106 %shuffle = shufflevector <8 x i16> %avg, <8 x i16> poison, <8 x i32> zeroinitializer
107 ret <8 x i16> %shuffle