Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / X86 / xop-builtins.c
blob113af58a69339de17d5f84b56914a7ec30346d18
1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
3 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s
4 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
7 #include <x86intrin.h>
9 // NOTE: This should match the tests in llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
11 __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) {
12 // CHECK-LABEL: test_mm_maccs_epi16
13 // CHECK: call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
14 return _mm_maccs_epi16(a, b, c);
17 __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) {
18 // CHECK-LABEL: test_mm_macc_epi16
19 // CHECK: call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
20 return _mm_macc_epi16(a, b, c);
23 __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) {
24 // CHECK-LABEL: test_mm_maccsd_epi16
25 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
26 return _mm_maccsd_epi16(a, b, c);
29 __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) {
30 // CHECK-LABEL: test_mm_maccd_epi16
31 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
32 return _mm_maccd_epi16(a, b, c);
35 __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) {
36 // CHECK-LABEL: test_mm_maccs_epi32
37 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
38 return _mm_maccs_epi32(a, b, c);
41 __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) {
42 // CHECK-LABEL: test_mm_macc_epi32
43 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
44 return _mm_macc_epi32(a, b, c);
47 __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) {
48 // CHECK-LABEL: test_mm_maccslo_epi32
49 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
50 return _mm_maccslo_epi32(a, b, c);
53 __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) {
54 // CHECK-LABEL: test_mm_macclo_epi32
55 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
56 return _mm_macclo_epi32(a, b, c);
59 __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) {
60 // CHECK-LABEL: test_mm_maccshi_epi32
61 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
62 return _mm_maccshi_epi32(a, b, c);
65 __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) {
66 // CHECK-LABEL: test_mm_macchi_epi32
67 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
68 return _mm_macchi_epi32(a, b, c);
71 __m128i test_mm_maddsd_epi16(__m128i a, __m128i b, __m128i c) {
72 // CHECK-LABEL: test_mm_maddsd_epi16
73 // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
74 return _mm_maddsd_epi16(a, b, c);
77 __m128i test_mm_maddd_epi16(__m128i a, __m128i b, __m128i c) {
78 // CHECK-LABEL: test_mm_maddd_epi16
79 // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
80 return _mm_maddd_epi16(a, b, c);
83 __m128i test_mm_haddw_epi8(__m128i a) {
84 // CHECK-LABEL: test_mm_haddw_epi8
85 // CHECK: call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %{{.*}})
86 return _mm_haddw_epi8(a);
89 __m128i test_mm_haddd_epi8(__m128i a) {
90 // CHECK-LABEL: test_mm_haddd_epi8
91 // CHECK: call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %{{.*}})
92 return _mm_haddd_epi8(a);
95 __m128i test_mm_haddq_epi8(__m128i a) {
96 // CHECK-LABEL: test_mm_haddq_epi8
97 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %{{.*}})
98 return _mm_haddq_epi8(a);
101 __m128i test_mm_haddd_epi16(__m128i a) {
102 // CHECK-LABEL: test_mm_haddd_epi16
103 // CHECK: call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %{{.*}})
104 return _mm_haddd_epi16(a);
107 __m128i test_mm_haddq_epi16(__m128i a) {
108 // CHECK-LABEL: test_mm_haddq_epi16
109 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %{{.*}})
110 return _mm_haddq_epi16(a);
113 __m128i test_mm_haddq_epi32(__m128i a) {
114 // CHECK-LABEL: test_mm_haddq_epi32
115 // CHECK: call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %{{.*}})
116 return _mm_haddq_epi32(a);
119 __m128i test_mm_haddw_epu8(__m128i a) {
120 // CHECK-LABEL: test_mm_haddw_epu8
121 // CHECK: call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %{{.*}})
122 return _mm_haddw_epu8(a);
125 __m128i test_mm_haddd_epu8(__m128i a) {
126 // CHECK-LABEL: test_mm_haddd_epu8
127 // CHECK: call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %{{.*}})
128 return _mm_haddd_epu8(a);
131 __m128i test_mm_haddq_epu8(__m128i a) {
132 // CHECK-LABEL: test_mm_haddq_epu8
133 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %{{.*}})
134 return _mm_haddq_epu8(a);
137 __m128i test_mm_haddd_epu16(__m128i a) {
138 // CHECK-LABEL: test_mm_haddd_epu16
139 // CHECK: call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %{{.*}})
140 return _mm_haddd_epu16(a);
143 __m128i test_mm_haddq_epu16(__m128i a) {
144 // CHECK-LABEL: test_mm_haddq_epu16
145 // CHECK: call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %{{.*}})
146 return _mm_haddq_epu16(a);
149 __m128i test_mm_haddq_epu32(__m128i a) {
150 // CHECK-LABEL: test_mm_haddq_epu32
151 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %{{.*}})
152 return _mm_haddq_epu32(a);
155 __m128i test_mm_hsubw_epi8(__m128i a) {
156 // CHECK-LABEL: test_mm_hsubw_epi8
157 // CHECK: call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %{{.*}})
158 return _mm_hsubw_epi8(a);
161 __m128i test_mm_hsubd_epi16(__m128i a) {
162 // CHECK-LABEL: test_mm_hsubd_epi16
163 // CHECK: call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %{{.*}})
164 return _mm_hsubd_epi16(a);
167 __m128i test_mm_hsubq_epi32(__m128i a) {
168 // CHECK-LABEL: test_mm_hsubq_epi32
169 // CHECK: call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %{{.*}})
170 return _mm_hsubq_epi32(a);
173 __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) {
174 // CHECK-LABEL: test_mm_cmov_si128
175 // CHECK: [[AND:%.*]] = and <2 x i64> %{{.*}}, %{{.*}}
176 // CHECK: [[NEG:%.*]] = xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
177 // CHECK-NEXT: [[ANDN:%.*]] = and <2 x i64> %{{.*}}, [[NEG]]
178 // CHECK-NEXT: %{{.*}} = or <2 x i64> [[AND]], [[ANDN]]
179 return _mm_cmov_si128(a, b, c);
182 __m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) {
183 // CHECK-LABEL: test_mm256_cmov_si256
184 // CHECK: [[AND:%.*]] = and <4 x i64> %{{.*}}, %{{.*}}
185 // CHECK: [[NEG:%.*]] = xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
186 // CHECK-NEXT: [[ANDN:%.*]] = and <4 x i64> %{{.*}}, [[NEG]]
187 // CHECK-NEXT: %{{.*}} = or <4 x i64> [[AND]], [[ANDN]]
188 return _mm256_cmov_si256(a, b, c);
191 __m128i test_mm_perm_epi8(__m128i a, __m128i b, __m128i c) {
192 // CHECK-LABEL: test_mm_perm_epi8
193 // CHECK: call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
194 return _mm_perm_epi8(a, b, c);
197 __m128i test_mm_rot_epi8(__m128i a, __m128i b) {
198 // CHECK-LABEL: test_mm_rot_epi8
199 // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
200 return _mm_rot_epi8(a, b);
203 __m128i test_mm_rot_epi16(__m128i a, __m128i b) {
204 // CHECK-LABEL: test_mm_rot_epi16
205 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
206 return _mm_rot_epi16(a, b);
209 __m128i test_mm_rot_epi32(__m128i a, __m128i b) {
210 // CHECK-LABEL: test_mm_rot_epi32
211 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
212 return _mm_rot_epi32(a, b);
215 __m128i test_mm_rot_epi64(__m128i a, __m128i b) {
216 // CHECK-LABEL: test_mm_rot_epi64
217 // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
218 return _mm_rot_epi64(a, b);
221 __m128i test_mm_roti_epi8(__m128i a) {
222 // CHECK-LABEL: test_mm_roti_epi8
223 // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
224 return _mm_roti_epi8(a, 1);
227 __m128i test_mm_roti_epi16(__m128i a) {
228 // CHECK-LABEL: test_mm_roti_epi16
229 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> <i16 50, i16 50, i16 50, i16 50, i16 50, i16 50, i16 50, i16 50>)
230 return _mm_roti_epi16(a, 50);
233 __m128i test_mm_roti_epi32(__m128i a) {
234 // CHECK-LABEL: test_mm_roti_epi32
235 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 226, i32 226, i32 226, i32 226>)
236 return _mm_roti_epi32(a, -30);
239 __m128i test_mm_roti_epi64(__m128i a) {
240 // CHECK-LABEL: test_mm_roti_epi64
241 // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> <i64 100, i64 100>)
242 return _mm_roti_epi64(a, 100);
245 __m128i test_mm_shl_epi8(__m128i a, __m128i b) {
246 // CHECK-LABEL: test_mm_shl_epi8
247 // CHECK: call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
248 return _mm_shl_epi8(a, b);
251 __m128i test_mm_shl_epi16(__m128i a, __m128i b) {
252 // CHECK-LABEL: test_mm_shl_epi16
253 // CHECK: call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
254 return _mm_shl_epi16(a, b);
257 __m128i test_mm_shl_epi32(__m128i a, __m128i b) {
258 // CHECK-LABEL: test_mm_shl_epi32
259 // CHECK: call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
260 return _mm_shl_epi32(a, b);
263 __m128i test_mm_shl_epi64(__m128i a, __m128i b) {
264 // CHECK-LABEL: test_mm_shl_epi64
265 // CHECK: call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
266 return _mm_shl_epi64(a, b);
269 __m128i test_mm_sha_epi8(__m128i a, __m128i b) {
270 // CHECK-LABEL: test_mm_sha_epi8
271 // CHECK: call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
272 return _mm_sha_epi8(a, b);
275 __m128i test_mm_sha_epi16(__m128i a, __m128i b) {
276 // CHECK-LABEL: test_mm_sha_epi16
277 // CHECK: call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
278 return _mm_sha_epi16(a, b);
281 __m128i test_mm_sha_epi32(__m128i a, __m128i b) {
282 // CHECK-LABEL: test_mm_sha_epi32
283 // CHECK: call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
284 return _mm_sha_epi32(a, b);
287 __m128i test_mm_sha_epi64(__m128i a, __m128i b) {
288 // CHECK-LABEL: test_mm_sha_epi64
289 // CHECK: call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
290 return _mm_sha_epi64(a, b);
293 __m128i test_mm_com_epu8(__m128i a, __m128i b) {
294 // CHECK-LABEL: test_mm_com_epu8
295 // CHECK: icmp ult <16 x i8> %{{.*}}, %{{.*}}
296 // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
297 return _mm_com_epu8(a, b, 0);
300 __m128i test_mm_com_epu16(__m128i a, __m128i b) {
301 // CHECK-LABEL: test_mm_com_epu16
302 // CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}}
303 // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
304 return _mm_com_epu16(a, b, 0);
307 __m128i test_mm_com_epu32(__m128i a, __m128i b) {
308 // CHECK-LABEL: test_mm_com_epu32
309 // CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}}
310 // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
311 return _mm_com_epu32(a, b, 0);
314 __m128i test_mm_com_epu64(__m128i a, __m128i b) {
315 // CHECK-LABEL: test_mm_com_epu64
316 // CHECK: icmp ult <2 x i64> %{{.*}}, %{{.*}}
317 // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
318 return _mm_com_epu64(a, b, 0);
321 __m128i test_mm_com_epi8(__m128i a, __m128i b) {
322 // CHECK-LABEL: test_mm_com_epi8
323 // CHECK: icmp slt <16 x i8> %{{.*}}, %{{.*}}
324 // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
325 return _mm_com_epi8(a, b, 0);
328 __m128i test_mm_com_epi16(__m128i a, __m128i b) {
329 // CHECK-LABEL: test_mm_com_epi16
330 // CHECK: icmp slt <8 x i16> %{{.*}}, %{{.*}}
331 // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
332 return _mm_com_epi16(a, b, 0);
335 __m128i test_mm_com_epi32(__m128i a, __m128i b) {
336 // CHECK-LABEL: test_mm_com_epi32
337 // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}}
338 // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
339 return _mm_com_epi32(a, b, 0);
342 __m128i test_mm_com_epi64(__m128i a, __m128i b) {
343 // CHECK-LABEL: test_mm_com_epi64
344 // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}}
345 // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
346 return _mm_com_epi64(a, b, 0);
349 __m128d test_mm_permute2_pd(__m128d a, __m128d b, __m128i c) {
350 // CHECK-LABEL: test_mm_permute2_pd
351 // CHECK: call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
352 return _mm_permute2_pd(a, b, c, 0);
355 __m256d test_mm256_permute2_pd(__m256d a, __m256d b, __m256i c) {
356 // CHECK-LABEL: test_mm256_permute2_pd
357 // CHECK: call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i64> %{{.*}}, i8 0)
358 return _mm256_permute2_pd(a, b, c, 0);
361 __m128 test_mm_permute2_ps(__m128 a, __m128 b, __m128i c) {
362 // CHECK-LABEL: test_mm_permute2_ps
363 // CHECK: call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
364 return _mm_permute2_ps(a, b, c, 0);
367 __m256 test_mm256_permute2_ps(__m256 a, __m256 b, __m256i c) {
368 // CHECK-LABEL: test_mm256_permute2_ps
369 // CHECK: call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> %{{.*}}, i8 0)
370 return _mm256_permute2_ps(a, b, c, 0);
373 __m128 test_mm_frcz_ss(__m128 a) {
374 // CHECK-LABEL: test_mm_frcz_ss
375 // CHECK: call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %{{.*}})
376 return _mm_frcz_ss(a);
379 __m128d test_mm_frcz_sd(__m128d a) {
380 // CHECK-LABEL: test_mm_frcz_sd
381 // CHECK: call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %{{.*}})
382 return _mm_frcz_sd(a);
385 __m128 test_mm_frcz_ps(__m128 a) {
386 // CHECK-LABEL: test_mm_frcz_ps
387 // CHECK: call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %{{.*}})
388 return _mm_frcz_ps(a);
391 __m128d test_mm_frcz_pd(__m128d a) {
392 // CHECK-LABEL: test_mm_frcz_pd
393 // CHECK: call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %{{.*}})
394 return _mm_frcz_pd(a);
397 __m256 test_mm256_frcz_ps(__m256 a) {
398 // CHECK-LABEL: test_mm256_frcz_ps
399 // CHECK: call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %{{.*}})
400 return _mm256_frcz_ps(a);
403 __m256d test_mm256_frcz_pd(__m256d a) {
404 // CHECK-LABEL: test_mm256_frcz_pd
405 // CHECK: call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %{{.*}})
406 return _mm256_frcz_pd(a);