1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
4 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
5 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
7 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
9 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c
11 define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) {
12 ; SSE-LABEL: test_mm_abs_epi8:
14 ; SSE-NEXT: pabsb %xmm0, %xmm0
15 ; SSE-NEXT: ret{{[l|q]}}
17 ; AVX-LABEL: test_mm_abs_epi8:
19 ; AVX-NEXT: vpabsb %xmm0, %xmm0
20 ; AVX-NEXT: ret{{[l|q]}}
21 %arg = bitcast <2 x i64> %a0 to <16 x i8>
22 %sub = sub <16 x i8> zeroinitializer, %arg
23 %cmp = icmp sgt <16 x i8> %arg, zeroinitializer
24 %sel = select <16 x i1> %cmp, <16 x i8> %arg, <16 x i8> %sub
25 %res = bitcast <16 x i8> %sel to <2 x i64>
28 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
30 define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) {
31 ; SSE-LABEL: test_mm_abs_epi16:
33 ; SSE-NEXT: pabsw %xmm0, %xmm0
34 ; SSE-NEXT: ret{{[l|q]}}
36 ; AVX-LABEL: test_mm_abs_epi16:
38 ; AVX-NEXT: vpabsw %xmm0, %xmm0
39 ; AVX-NEXT: ret{{[l|q]}}
40 %arg = bitcast <2 x i64> %a0 to <8 x i16>
41 %sub = sub <8 x i16> zeroinitializer, %arg
42 %cmp = icmp sgt <8 x i16> %arg, zeroinitializer
43 %sel = select <8 x i1> %cmp, <8 x i16> %arg, <8 x i16> %sub
44 %res = bitcast <8 x i16> %sel to <2 x i64>
47 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
49 define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) {
50 ; SSE-LABEL: test_mm_abs_epi32:
52 ; SSE-NEXT: pabsd %xmm0, %xmm0
53 ; SSE-NEXT: ret{{[l|q]}}
55 ; AVX-LABEL: test_mm_abs_epi32:
57 ; AVX-NEXT: vpabsd %xmm0, %xmm0
58 ; AVX-NEXT: ret{{[l|q]}}
59 %arg = bitcast <2 x i64> %a0 to <4 x i32>
60 %sub = sub <4 x i32> zeroinitializer, %arg
61 %cmp = icmp sgt <4 x i32> %arg, zeroinitializer
62 %sel = select <4 x i1> %cmp, <4 x i32> %arg, <4 x i32> %sub
63 %res = bitcast <4 x i32> %sel to <2 x i64>
66 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
68 define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
69 ; SSE-LABEL: test_mm_alignr_epi8:
71 ; SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
72 ; SSE-NEXT: movdqa %xmm1, %xmm0
73 ; SSE-NEXT: ret{{[l|q]}}
75 ; AVX-LABEL: test_mm_alignr_epi8:
77 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
78 ; AVX-NEXT: ret{{[l|q]}}
79 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
80 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
81 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
82 %res = bitcast <16 x i8> %shuf to <2 x i64>
86 define <2 x i64> @test2_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
87 ; SSE-LABEL: test2_mm_alignr_epi8:
89 ; SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
90 ; SSE-NEXT: movdqa %xmm1, %xmm0
91 ; SSE-NEXT: ret{{[l|q]}}
93 ; AVX-LABEL: test2_mm_alignr_epi8:
95 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
96 ; AVX-NEXT: ret{{[l|q]}}
97 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
98 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
99 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
100 %res = bitcast <16 x i8> %shuf to <2 x i64>
104 define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) {
105 ; SSE-LABEL: test_mm_hadd_epi16:
107 ; SSE-NEXT: phaddw %xmm1, %xmm0
108 ; SSE-NEXT: ret{{[l|q]}}
110 ; AVX-LABEL: test_mm_hadd_epi16:
112 ; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0
113 ; AVX-NEXT: ret{{[l|q]}}
114 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
115 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
116 %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
117 %res = bitcast <8 x i16> %call to <2 x i64>
120 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
122 define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) {
123 ; SSE-LABEL: test_mm_hadd_epi32:
125 ; SSE-NEXT: phaddd %xmm1, %xmm0
126 ; SSE-NEXT: ret{{[l|q]}}
128 ; AVX-LABEL: test_mm_hadd_epi32:
130 ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0
131 ; AVX-NEXT: ret{{[l|q]}}
132 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
133 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
134 %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
135 %res = bitcast <4 x i32> %call to <2 x i64>
138 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
140 define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) {
141 ; SSE-LABEL: test_mm_hadds_epi16:
143 ; SSE-NEXT: phaddsw %xmm1, %xmm0
144 ; SSE-NEXT: ret{{[l|q]}}
146 ; AVX-LABEL: test_mm_hadds_epi16:
148 ; AVX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0
149 ; AVX-NEXT: ret{{[l|q]}}
150 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
151 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
152 %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
153 %res = bitcast <8 x i16> %call to <2 x i64>
156 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
158 define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) {
159 ; SSE-LABEL: test_mm_hsub_epi16:
161 ; SSE-NEXT: phsubw %xmm1, %xmm0
162 ; SSE-NEXT: ret{{[l|q]}}
164 ; AVX-LABEL: test_mm_hsub_epi16:
166 ; AVX-NEXT: vphsubw %xmm1, %xmm0, %xmm0
167 ; AVX-NEXT: ret{{[l|q]}}
168 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
169 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
170 %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
171 %res = bitcast <8 x i16> %call to <2 x i64>
174 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
176 define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) {
177 ; SSE-LABEL: test_mm_hsub_epi32:
179 ; SSE-NEXT: phsubd %xmm1, %xmm0
180 ; SSE-NEXT: ret{{[l|q]}}
182 ; AVX-LABEL: test_mm_hsub_epi32:
184 ; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0
185 ; AVX-NEXT: ret{{[l|q]}}
186 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
187 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
188 %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
189 %res = bitcast <4 x i32> %call to <2 x i64>
192 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
194 define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
195 ; SSE-LABEL: test_mm_hsubs_epi16:
197 ; SSE-NEXT: phsubsw %xmm1, %xmm0
198 ; SSE-NEXT: ret{{[l|q]}}
200 ; AVX-LABEL: test_mm_hsubs_epi16:
202 ; AVX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0
203 ; AVX-NEXT: ret{{[l|q]}}
204 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
205 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
206 %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
207 %res = bitcast <8 x i16> %call to <2 x i64>
210 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
212 define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
213 ; SSE-LABEL: test_mm_maddubs_epi16:
215 ; SSE-NEXT: pmaddubsw %xmm1, %xmm0
216 ; SSE-NEXT: ret{{[l|q]}}
218 ; AVX-LABEL: test_mm_maddubs_epi16:
220 ; AVX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
221 ; AVX-NEXT: ret{{[l|q]}}
222 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
223 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
224 %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1)
225 %res = bitcast <8 x i16> %call to <2 x i64>
228 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
230 define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
231 ; SSE-LABEL: test_mm_mulhrs_epi16:
233 ; SSE-NEXT: pmulhrsw %xmm1, %xmm0
234 ; SSE-NEXT: ret{{[l|q]}}
236 ; AVX-LABEL: test_mm_mulhrs_epi16:
238 ; AVX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0
239 ; AVX-NEXT: ret{{[l|q]}}
240 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
241 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
242 %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
243 %res = bitcast <8 x i16> %call to <2 x i64>
246 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
248 define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) {
249 ; SSE-LABEL: test_mm_shuffle_epi8:
251 ; SSE-NEXT: pshufb %xmm1, %xmm0
252 ; SSE-NEXT: ret{{[l|q]}}
254 ; AVX-LABEL: test_mm_shuffle_epi8:
256 ; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
257 ; AVX-NEXT: ret{{[l|q]}}
258 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
259 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
260 %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
261 %res = bitcast <16 x i8> %call to <2 x i64>
264 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
266 define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) {
267 ; SSE-LABEL: test_mm_sign_epi8:
269 ; SSE-NEXT: psignb %xmm1, %xmm0
270 ; SSE-NEXT: ret{{[l|q]}}
272 ; AVX-LABEL: test_mm_sign_epi8:
274 ; AVX-NEXT: vpsignb %xmm1, %xmm0, %xmm0
275 ; AVX-NEXT: ret{{[l|q]}}
276 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
277 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
278 %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
279 %res = bitcast <16 x i8> %call to <2 x i64>
282 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
284 define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) {
285 ; SSE-LABEL: test_mm_sign_epi16:
287 ; SSE-NEXT: psignw %xmm1, %xmm0
288 ; SSE-NEXT: ret{{[l|q]}}
290 ; AVX-LABEL: test_mm_sign_epi16:
292 ; AVX-NEXT: vpsignw %xmm1, %xmm0, %xmm0
293 ; AVX-NEXT: ret{{[l|q]}}
294 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
295 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
296 %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
297 %res = bitcast <8 x i16> %call to <2 x i64>
300 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
302 define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) {
303 ; SSE-LABEL: test_mm_sign_epi32:
305 ; SSE-NEXT: psignd %xmm1, %xmm0
306 ; SSE-NEXT: ret{{[l|q]}}
308 ; AVX-LABEL: test_mm_sign_epi32:
310 ; AVX-NEXT: vpsignd %xmm1, %xmm0, %xmm0
311 ; AVX-NEXT: ret{{[l|q]}}
312 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
313 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
314 %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
315 %res = bitcast <4 x i32> %call to <2 x i64>
318 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone