1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
4 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=AVX
5 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE
6 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
7 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=AVX
9 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c
11 define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) {
12 ; SSE-LABEL: test_mm_abs_epi8:
14 ; SSE-NEXT: pabsb %xmm0, %xmm0
15 ; SSE-NEXT: ret{{[l|q]}}
17 ; AVX-LABEL: test_mm_abs_epi8:
19 ; AVX-NEXT: vpabsb %xmm0, %xmm0
20 ; AVX-NEXT: ret{{[l|q]}}
21 %arg = bitcast <2 x i64> %a0 to <16 x i8>
22 %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %arg, i1 false)
23 %res = bitcast <16 x i8> %abs to <2 x i64>
26 declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) nounwind readnone
28 define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) {
29 ; SSE-LABEL: test_mm_abs_epi16:
31 ; SSE-NEXT: pabsw %xmm0, %xmm0
32 ; SSE-NEXT: ret{{[l|q]}}
34 ; AVX-LABEL: test_mm_abs_epi16:
36 ; AVX-NEXT: vpabsw %xmm0, %xmm0
37 ; AVX-NEXT: ret{{[l|q]}}
38 %arg = bitcast <2 x i64> %a0 to <8 x i16>
39 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %arg, i1 false)
40 %res = bitcast <8 x i16> %abs to <2 x i64>
43 declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) nounwind readnone
45 define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) {
46 ; SSE-LABEL: test_mm_abs_epi32:
48 ; SSE-NEXT: pabsd %xmm0, %xmm0
49 ; SSE-NEXT: ret{{[l|q]}}
51 ; AVX-LABEL: test_mm_abs_epi32:
53 ; AVX-NEXT: vpabsd %xmm0, %xmm0
54 ; AVX-NEXT: ret{{[l|q]}}
55 %arg = bitcast <2 x i64> %a0 to <4 x i32>
56 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
57 %res = bitcast <4 x i32> %abs to <2 x i64>
60 declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) nounwind readnone
62 define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
63 ; SSE-LABEL: test_mm_alignr_epi8:
65 ; SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
66 ; SSE-NEXT: movdqa %xmm1, %xmm0
67 ; SSE-NEXT: ret{{[l|q]}}
69 ; AVX-LABEL: test_mm_alignr_epi8:
71 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
72 ; AVX-NEXT: ret{{[l|q]}}
73 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
74 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
75 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
76 %res = bitcast <16 x i8> %shuf to <2 x i64>
80 define <2 x i64> @test2_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
81 ; SSE-LABEL: test2_mm_alignr_epi8:
83 ; SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
84 ; SSE-NEXT: movdqa %xmm1, %xmm0
85 ; SSE-NEXT: ret{{[l|q]}}
87 ; AVX-LABEL: test2_mm_alignr_epi8:
89 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
90 ; AVX-NEXT: ret{{[l|q]}}
91 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
92 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
93 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
94 %res = bitcast <16 x i8> %shuf to <2 x i64>
98 define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) {
99 ; SSE-LABEL: test_mm_hadd_epi16:
101 ; SSE-NEXT: phaddw %xmm1, %xmm0
102 ; SSE-NEXT: ret{{[l|q]}}
104 ; AVX-LABEL: test_mm_hadd_epi16:
106 ; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0
107 ; AVX-NEXT: ret{{[l|q]}}
108 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
109 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
110 %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
111 %res = bitcast <8 x i16> %call to <2 x i64>
114 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
116 define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) {
117 ; SSE-LABEL: test_mm_hadd_epi32:
119 ; SSE-NEXT: phaddd %xmm1, %xmm0
120 ; SSE-NEXT: ret{{[l|q]}}
122 ; AVX-LABEL: test_mm_hadd_epi32:
124 ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0
125 ; AVX-NEXT: ret{{[l|q]}}
126 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
127 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
128 %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
129 %res = bitcast <4 x i32> %call to <2 x i64>
132 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
134 define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) {
135 ; SSE-LABEL: test_mm_hadds_epi16:
137 ; SSE-NEXT: phaddsw %xmm1, %xmm0
138 ; SSE-NEXT: ret{{[l|q]}}
140 ; AVX-LABEL: test_mm_hadds_epi16:
142 ; AVX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0
143 ; AVX-NEXT: ret{{[l|q]}}
144 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
145 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
146 %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
147 %res = bitcast <8 x i16> %call to <2 x i64>
150 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
152 define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) {
153 ; SSE-LABEL: test_mm_hsub_epi16:
155 ; SSE-NEXT: phsubw %xmm1, %xmm0
156 ; SSE-NEXT: ret{{[l|q]}}
158 ; AVX-LABEL: test_mm_hsub_epi16:
160 ; AVX-NEXT: vphsubw %xmm1, %xmm0, %xmm0
161 ; AVX-NEXT: ret{{[l|q]}}
162 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
163 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
164 %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
165 %res = bitcast <8 x i16> %call to <2 x i64>
168 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
170 define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) {
171 ; SSE-LABEL: test_mm_hsub_epi32:
173 ; SSE-NEXT: phsubd %xmm1, %xmm0
174 ; SSE-NEXT: ret{{[l|q]}}
176 ; AVX-LABEL: test_mm_hsub_epi32:
178 ; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0
179 ; AVX-NEXT: ret{{[l|q]}}
180 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
181 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
182 %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
183 %res = bitcast <4 x i32> %call to <2 x i64>
186 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
188 define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
189 ; SSE-LABEL: test_mm_hsubs_epi16:
191 ; SSE-NEXT: phsubsw %xmm1, %xmm0
192 ; SSE-NEXT: ret{{[l|q]}}
194 ; AVX-LABEL: test_mm_hsubs_epi16:
196 ; AVX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0
197 ; AVX-NEXT: ret{{[l|q]}}
198 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
199 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
200 %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
201 %res = bitcast <8 x i16> %call to <2 x i64>
204 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
206 define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
207 ; SSE-LABEL: test_mm_maddubs_epi16:
209 ; SSE-NEXT: pmaddubsw %xmm1, %xmm0
210 ; SSE-NEXT: ret{{[l|q]}}
212 ; AVX-LABEL: test_mm_maddubs_epi16:
214 ; AVX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
215 ; AVX-NEXT: ret{{[l|q]}}
216 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
217 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
218 %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1)
219 %res = bitcast <8 x i16> %call to <2 x i64>
222 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
224 define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
225 ; SSE-LABEL: test_mm_mulhrs_epi16:
227 ; SSE-NEXT: pmulhrsw %xmm1, %xmm0
228 ; SSE-NEXT: ret{{[l|q]}}
230 ; AVX-LABEL: test_mm_mulhrs_epi16:
232 ; AVX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0
233 ; AVX-NEXT: ret{{[l|q]}}
234 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
235 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
236 %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
237 %res = bitcast <8 x i16> %call to <2 x i64>
240 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
242 define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) {
243 ; SSE-LABEL: test_mm_shuffle_epi8:
245 ; SSE-NEXT: pshufb %xmm1, %xmm0
246 ; SSE-NEXT: ret{{[l|q]}}
248 ; AVX-LABEL: test_mm_shuffle_epi8:
250 ; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
251 ; AVX-NEXT: ret{{[l|q]}}
252 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
253 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
254 %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
255 %res = bitcast <16 x i8> %call to <2 x i64>
258 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
260 define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) {
261 ; SSE-LABEL: test_mm_sign_epi8:
263 ; SSE-NEXT: psignb %xmm1, %xmm0
264 ; SSE-NEXT: ret{{[l|q]}}
266 ; AVX-LABEL: test_mm_sign_epi8:
268 ; AVX-NEXT: vpsignb %xmm1, %xmm0, %xmm0
269 ; AVX-NEXT: ret{{[l|q]}}
270 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
271 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
272 %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
273 %res = bitcast <16 x i8> %call to <2 x i64>
276 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
278 define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) {
279 ; SSE-LABEL: test_mm_sign_epi16:
281 ; SSE-NEXT: psignw %xmm1, %xmm0
282 ; SSE-NEXT: ret{{[l|q]}}
284 ; AVX-LABEL: test_mm_sign_epi16:
286 ; AVX-NEXT: vpsignw %xmm1, %xmm0, %xmm0
287 ; AVX-NEXT: ret{{[l|q]}}
288 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
289 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
290 %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
291 %res = bitcast <8 x i16> %call to <2 x i64>
294 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
296 define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) {
297 ; SSE-LABEL: test_mm_sign_epi32:
299 ; SSE-NEXT: psignd %xmm1, %xmm0
300 ; SSE-NEXT: ret{{[l|q]}}
302 ; AVX-LABEL: test_mm_sign_epi32:
304 ; AVX-NEXT: vpsignd %xmm1, %xmm0, %xmm0
305 ; AVX-NEXT: ret{{[l|q]}}
306 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
307 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
308 %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
309 %res = bitcast <4 x i32> %call to <2 x i64>
312 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone