1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
3 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s
4 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
9 // NOTE: This should match the tests in llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
11 __m128i
test_mm_maccs_epi16(__m128i a
, __m128i b
, __m128i c
) {
12 // CHECK-LABEL: test_mm_maccs_epi16
13 // CHECK: call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
14 return _mm_maccs_epi16(a
, b
, c
);
17 __m128i
test_mm_macc_epi16(__m128i a
, __m128i b
, __m128i c
) {
18 // CHECK-LABEL: test_mm_macc_epi16
19 // CHECK: call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
20 return _mm_macc_epi16(a
, b
, c
);
23 __m128i
test_mm_maccsd_epi16(__m128i a
, __m128i b
, __m128i c
) {
24 // CHECK-LABEL: test_mm_maccsd_epi16
25 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
26 return _mm_maccsd_epi16(a
, b
, c
);
29 __m128i
test_mm_maccd_epi16(__m128i a
, __m128i b
, __m128i c
) {
30 // CHECK-LABEL: test_mm_maccd_epi16
31 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
32 return _mm_maccd_epi16(a
, b
, c
);
35 __m128i
test_mm_maccs_epi32(__m128i a
, __m128i b
, __m128i c
) {
36 // CHECK-LABEL: test_mm_maccs_epi32
37 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
38 return _mm_maccs_epi32(a
, b
, c
);
41 __m128i
test_mm_macc_epi32(__m128i a
, __m128i b
, __m128i c
) {
42 // CHECK-LABEL: test_mm_macc_epi32
43 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
44 return _mm_macc_epi32(a
, b
, c
);
47 __m128i
test_mm_maccslo_epi32(__m128i a
, __m128i b
, __m128i c
) {
48 // CHECK-LABEL: test_mm_maccslo_epi32
49 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
50 return _mm_maccslo_epi32(a
, b
, c
);
53 __m128i
test_mm_macclo_epi32(__m128i a
, __m128i b
, __m128i c
) {
54 // CHECK-LABEL: test_mm_macclo_epi32
55 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
56 return _mm_macclo_epi32(a
, b
, c
);
59 __m128i
test_mm_maccshi_epi32(__m128i a
, __m128i b
, __m128i c
) {
60 // CHECK-LABEL: test_mm_maccshi_epi32
61 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
62 return _mm_maccshi_epi32(a
, b
, c
);
65 __m128i
test_mm_macchi_epi32(__m128i a
, __m128i b
, __m128i c
) {
66 // CHECK-LABEL: test_mm_macchi_epi32
67 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
68 return _mm_macchi_epi32(a
, b
, c
);
71 __m128i
test_mm_maddsd_epi16(__m128i a
, __m128i b
, __m128i c
) {
72 // CHECK-LABEL: test_mm_maddsd_epi16
73 // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
74 return _mm_maddsd_epi16(a
, b
, c
);
77 __m128i
test_mm_maddd_epi16(__m128i a
, __m128i b
, __m128i c
) {
78 // CHECK-LABEL: test_mm_maddd_epi16
79 // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
80 return _mm_maddd_epi16(a
, b
, c
);
83 __m128i
test_mm_haddw_epi8(__m128i a
) {
84 // CHECK-LABEL: test_mm_haddw_epi8
85 // CHECK: call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %{{.*}})
86 return _mm_haddw_epi8(a
);
89 __m128i
test_mm_haddd_epi8(__m128i a
) {
90 // CHECK-LABEL: test_mm_haddd_epi8
91 // CHECK: call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %{{.*}})
92 return _mm_haddd_epi8(a
);
95 __m128i
test_mm_haddq_epi8(__m128i a
) {
96 // CHECK-LABEL: test_mm_haddq_epi8
97 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %{{.*}})
98 return _mm_haddq_epi8(a
);
101 __m128i
test_mm_haddd_epi16(__m128i a
) {
102 // CHECK-LABEL: test_mm_haddd_epi16
103 // CHECK: call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %{{.*}})
104 return _mm_haddd_epi16(a
);
107 __m128i
test_mm_haddq_epi16(__m128i a
) {
108 // CHECK-LABEL: test_mm_haddq_epi16
109 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %{{.*}})
110 return _mm_haddq_epi16(a
);
113 __m128i
test_mm_haddq_epi32(__m128i a
) {
114 // CHECK-LABEL: test_mm_haddq_epi32
115 // CHECK: call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %{{.*}})
116 return _mm_haddq_epi32(a
);
119 __m128i
test_mm_haddw_epu8(__m128i a
) {
120 // CHECK-LABEL: test_mm_haddw_epu8
121 // CHECK: call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %{{.*}})
122 return _mm_haddw_epu8(a
);
125 __m128i
test_mm_haddd_epu8(__m128i a
) {
126 // CHECK-LABEL: test_mm_haddd_epu8
127 // CHECK: call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %{{.*}})
128 return _mm_haddd_epu8(a
);
131 __m128i
test_mm_haddq_epu8(__m128i a
) {
132 // CHECK-LABEL: test_mm_haddq_epu8
133 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %{{.*}})
134 return _mm_haddq_epu8(a
);
137 __m128i
test_mm_haddd_epu16(__m128i a
) {
138 // CHECK-LABEL: test_mm_haddd_epu16
139 // CHECK: call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %{{.*}})
140 return _mm_haddd_epu16(a
);
143 __m128i
test_mm_haddq_epu16(__m128i a
) {
144 // CHECK-LABEL: test_mm_haddq_epu16
145 // CHECK: call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %{{.*}})
146 return _mm_haddq_epu16(a
);
149 __m128i
test_mm_haddq_epu32(__m128i a
) {
150 // CHECK-LABEL: test_mm_haddq_epu32
151 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %{{.*}})
152 return _mm_haddq_epu32(a
);
155 __m128i
test_mm_hsubw_epi8(__m128i a
) {
156 // CHECK-LABEL: test_mm_hsubw_epi8
157 // CHECK: call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %{{.*}})
158 return _mm_hsubw_epi8(a
);
161 __m128i
test_mm_hsubd_epi16(__m128i a
) {
162 // CHECK-LABEL: test_mm_hsubd_epi16
163 // CHECK: call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %{{.*}})
164 return _mm_hsubd_epi16(a
);
167 __m128i
test_mm_hsubq_epi32(__m128i a
) {
168 // CHECK-LABEL: test_mm_hsubq_epi32
169 // CHECK: call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %{{.*}})
170 return _mm_hsubq_epi32(a
);
173 __m128i
test_mm_cmov_si128(__m128i a
, __m128i b
, __m128i c
) {
174 // CHECK-LABEL: test_mm_cmov_si128
175 // CHECK: [[AND:%.*]] = and <2 x i64> %{{.*}}, %{{.*}}
176 // CHECK: [[NEG:%.*]] = xor <2 x i64> %{{.*}}, splat (i64 -1)
177 // CHECK-NEXT: [[ANDN:%.*]] = and <2 x i64> %{{.*}}, [[NEG]]
178 // CHECK-NEXT: %{{.*}} = or <2 x i64> [[AND]], [[ANDN]]
179 return _mm_cmov_si128(a
, b
, c
);
182 __m256i
test_mm256_cmov_si256(__m256i a
, __m256i b
, __m256i c
) {
183 // CHECK-LABEL: test_mm256_cmov_si256
184 // CHECK: [[AND:%.*]] = and <4 x i64> %{{.*}}, %{{.*}}
185 // CHECK: [[NEG:%.*]] = xor <4 x i64> %{{.*}}, splat (i64 -1)
186 // CHECK-NEXT: [[ANDN:%.*]] = and <4 x i64> %{{.*}}, [[NEG]]
187 // CHECK-NEXT: %{{.*}} = or <4 x i64> [[AND]], [[ANDN]]
188 return _mm256_cmov_si256(a
, b
, c
);
191 __m128i
test_mm_perm_epi8(__m128i a
, __m128i b
, __m128i c
) {
192 // CHECK-LABEL: test_mm_perm_epi8
193 // CHECK: call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
194 return _mm_perm_epi8(a
, b
, c
);
197 __m128i
test_mm_rot_epi8(__m128i a
, __m128i b
) {
198 // CHECK-LABEL: test_mm_rot_epi8
199 // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
200 return _mm_rot_epi8(a
, b
);
203 __m128i
test_mm_rot_epi16(__m128i a
, __m128i b
) {
204 // CHECK-LABEL: test_mm_rot_epi16
205 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
206 return _mm_rot_epi16(a
, b
);
209 __m128i
test_mm_rot_epi32(__m128i a
, __m128i b
) {
210 // CHECK-LABEL: test_mm_rot_epi32
211 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
212 return _mm_rot_epi32(a
, b
);
215 __m128i
test_mm_rot_epi64(__m128i a
, __m128i b
) {
216 // CHECK-LABEL: test_mm_rot_epi64
217 // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
218 return _mm_rot_epi64(a
, b
);
221 __m128i
test_mm_roti_epi8(__m128i a
) {
222 // CHECK-LABEL: test_mm_roti_epi8
223 // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> splat (i8 1))
224 return _mm_roti_epi8(a
, 1);
227 __m128i
test_mm_roti_epi16(__m128i a
) {
228 // CHECK-LABEL: test_mm_roti_epi16
229 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 50))
230 return _mm_roti_epi16(a
, 50);
233 __m128i
test_mm_roti_epi32(__m128i a
) {
234 // CHECK-LABEL: test_mm_roti_epi32
235 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 226))
236 return _mm_roti_epi32(a
, -30);
239 __m128i
test_mm_roti_epi64(__m128i a
) {
240 // CHECK-LABEL: test_mm_roti_epi64
241 // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 100))
242 return _mm_roti_epi64(a
, 100);
245 __m128i
test_mm_shl_epi8(__m128i a
, __m128i b
) {
246 // CHECK-LABEL: test_mm_shl_epi8
247 // CHECK: call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
248 return _mm_shl_epi8(a
, b
);
251 __m128i
test_mm_shl_epi16(__m128i a
, __m128i b
) {
252 // CHECK-LABEL: test_mm_shl_epi16
253 // CHECK: call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
254 return _mm_shl_epi16(a
, b
);
257 __m128i
test_mm_shl_epi32(__m128i a
, __m128i b
) {
258 // CHECK-LABEL: test_mm_shl_epi32
259 // CHECK: call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
260 return _mm_shl_epi32(a
, b
);
263 __m128i
test_mm_shl_epi64(__m128i a
, __m128i b
) {
264 // CHECK-LABEL: test_mm_shl_epi64
265 // CHECK: call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
266 return _mm_shl_epi64(a
, b
);
269 __m128i
test_mm_sha_epi8(__m128i a
, __m128i b
) {
270 // CHECK-LABEL: test_mm_sha_epi8
271 // CHECK: call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
272 return _mm_sha_epi8(a
, b
);
275 __m128i
test_mm_sha_epi16(__m128i a
, __m128i b
) {
276 // CHECK-LABEL: test_mm_sha_epi16
277 // CHECK: call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
278 return _mm_sha_epi16(a
, b
);
281 __m128i
test_mm_sha_epi32(__m128i a
, __m128i b
) {
282 // CHECK-LABEL: test_mm_sha_epi32
283 // CHECK: call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
284 return _mm_sha_epi32(a
, b
);
287 __m128i
test_mm_sha_epi64(__m128i a
, __m128i b
) {
288 // CHECK-LABEL: test_mm_sha_epi64
289 // CHECK: call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
290 return _mm_sha_epi64(a
, b
);
293 __m128i
test_mm_com_epu8(__m128i a
, __m128i b
) {
294 // CHECK-LABEL: test_mm_com_epu8
295 // CHECK: icmp ult <16 x i8> %{{.*}}, %{{.*}}
296 // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
297 return _mm_com_epu8(a
, b
, 0);
300 __m128i
test_mm_com_epu16(__m128i a
, __m128i b
) {
301 // CHECK-LABEL: test_mm_com_epu16
302 // CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}}
303 // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
304 return _mm_com_epu16(a
, b
, 0);
307 __m128i
test_mm_com_epu32(__m128i a
, __m128i b
) {
308 // CHECK-LABEL: test_mm_com_epu32
309 // CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}}
310 // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
311 return _mm_com_epu32(a
, b
, 0);
314 __m128i
test_mm_com_epu64(__m128i a
, __m128i b
) {
315 // CHECK-LABEL: test_mm_com_epu64
316 // CHECK: icmp ult <2 x i64> %{{.*}}, %{{.*}}
317 // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
318 return _mm_com_epu64(a
, b
, 0);
321 __m128i
test_mm_com_epi8(__m128i a
, __m128i b
) {
322 // CHECK-LABEL: test_mm_com_epi8
323 // CHECK: icmp slt <16 x i8> %{{.*}}, %{{.*}}
324 // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
325 return _mm_com_epi8(a
, b
, 0);
328 __m128i
test_mm_com_epi16(__m128i a
, __m128i b
) {
329 // CHECK-LABEL: test_mm_com_epi16
330 // CHECK: icmp slt <8 x i16> %{{.*}}, %{{.*}}
331 // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
332 return _mm_com_epi16(a
, b
, 0);
335 __m128i
test_mm_com_epi32(__m128i a
, __m128i b
) {
336 // CHECK-LABEL: test_mm_com_epi32
337 // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}}
338 // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
339 return _mm_com_epi32(a
, b
, 0);
342 __m128i
test_mm_com_epi64(__m128i a
, __m128i b
) {
343 // CHECK-LABEL: test_mm_com_epi64
344 // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}}
345 // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
346 return _mm_com_epi64(a
, b
, 0);
349 __m128d
test_mm_permute2_pd(__m128d a
, __m128d b
, __m128i c
) {
350 // CHECK-LABEL: test_mm_permute2_pd
351 // CHECK: call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
352 return _mm_permute2_pd(a
, b
, c
, 0);
355 __m256d
test_mm256_permute2_pd(__m256d a
, __m256d b
, __m256i c
) {
356 // CHECK-LABEL: test_mm256_permute2_pd
357 // CHECK: call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i64> %{{.*}}, i8 0)
358 return _mm256_permute2_pd(a
, b
, c
, 0);
361 __m128
test_mm_permute2_ps(__m128 a
, __m128 b
, __m128i c
) {
362 // CHECK-LABEL: test_mm_permute2_ps
363 // CHECK: call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
364 return _mm_permute2_ps(a
, b
, c
, 0);
367 __m256
test_mm256_permute2_ps(__m256 a
, __m256 b
, __m256i c
) {
368 // CHECK-LABEL: test_mm256_permute2_ps
369 // CHECK: call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> %{{.*}}, i8 0)
370 return _mm256_permute2_ps(a
, b
, c
, 0);
373 __m128
test_mm_frcz_ss(__m128 a
) {
374 // CHECK-LABEL: test_mm_frcz_ss
375 // CHECK: call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %{{.*}})
376 return _mm_frcz_ss(a
);
379 __m128d
test_mm_frcz_sd(__m128d a
) {
380 // CHECK-LABEL: test_mm_frcz_sd
381 // CHECK: call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %{{.*}})
382 return _mm_frcz_sd(a
);
385 __m128
test_mm_frcz_ps(__m128 a
) {
386 // CHECK-LABEL: test_mm_frcz_ps
387 // CHECK: call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %{{.*}})
388 return _mm_frcz_ps(a
);
391 __m128d
test_mm_frcz_pd(__m128d a
) {
392 // CHECK-LABEL: test_mm_frcz_pd
393 // CHECK: call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %{{.*}})
394 return _mm_frcz_pd(a
);
397 __m256
test_mm256_frcz_ps(__m256 a
) {
398 // CHECK-LABEL: test_mm256_frcz_ps
399 // CHECK: call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %{{.*}})
400 return _mm256_frcz_ps(a
);
403 __m256d
test_mm256_frcz_pd(__m256d a
) {
404 // CHECK-LABEL: test_mm256_frcz_pd
405 // CHECK: call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %{{.*}})
406 return _mm256_frcz_pd(a
);