1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX
8 ; Test multiplies of various narrow types.
10 define <2 x i8> @mul_v2i8(<2 x i8> %x, <2 x i8> %y) {
11 ; SSE2-LABEL: mul_v2i8:
13 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
14 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
15 ; SSE2-NEXT: pmullw %xmm1, %xmm0
16 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17 ; SSE2-NEXT: packuswb %xmm0, %xmm0
20 ; SSE41-LABEL: mul_v2i8:
22 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
23 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
24 ; SSE41-NEXT: pmullw %xmm1, %xmm0
25 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
28 ; AVX-LABEL: mul_v2i8:
30 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
31 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
32 ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
33 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
35 %res = mul <2 x i8> %x, %y
39 define <4 x i8> @mul_v4i8(<4 x i8> %x, <4 x i8> %y) {
40 ; SSE2-LABEL: mul_v4i8:
42 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
43 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
44 ; SSE2-NEXT: pmullw %xmm1, %xmm0
45 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
46 ; SSE2-NEXT: packuswb %xmm0, %xmm0
49 ; SSE41-LABEL: mul_v4i8:
51 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
52 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
53 ; SSE41-NEXT: pmullw %xmm1, %xmm0
54 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
57 ; AVX-LABEL: mul_v4i8:
59 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
60 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
61 ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
62 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
64 %res = mul <4 x i8> %x, %y
68 define <8 x i8> @mul_v8i8(<8 x i8> %x, <8 x i8> %y) {
69 ; SSE2-LABEL: mul_v8i8:
71 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
72 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
73 ; SSE2-NEXT: pmullw %xmm1, %xmm0
74 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
75 ; SSE2-NEXT: packuswb %xmm0, %xmm0
78 ; SSE41-LABEL: mul_v8i8:
80 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
81 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
82 ; SSE41-NEXT: pmullw %xmm1, %xmm0
83 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
86 ; AVX-LABEL: mul_v8i8:
88 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
89 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
90 ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
91 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
93 %res = mul <8 x i8> %x, %y
97 define <2 x i16> @mul_v2i16(<2 x i16> %x, <2 x i16> %y) {
98 ; SSE-LABEL: mul_v2i16:
100 ; SSE-NEXT: pmullw %xmm1, %xmm0
103 ; AVX-LABEL: mul_v2i16:
105 ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
107 %res = mul <2 x i16> %x, %y
111 define <4 x i16> @mul_v4i16(<4 x i16> %x, <4 x i16> %y) {
112 ; SSE-LABEL: mul_v4i16:
114 ; SSE-NEXT: pmullw %xmm1, %xmm0
117 ; AVX-LABEL: mul_v4i16:
119 ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
121 %res = mul <4 x i16> %x, %y
125 define <2 x i32> @mul_v2i32(<2 x i32> %x, <2 x i32> %y) {
126 ; SSE2-LABEL: mul_v2i32:
128 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
129 ; SSE2-NEXT: pmuludq %xmm1, %xmm0
130 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
131 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
132 ; SSE2-NEXT: pmuludq %xmm2, %xmm1
133 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
134 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
137 ; SSE41-LABEL: mul_v2i32:
139 ; SSE41-NEXT: pmulld %xmm1, %xmm0
142 ; AVX-LABEL: mul_v2i32:
144 ; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
146 %res = mul <2 x i32> %x, %y