1 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
2 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
3 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
4 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
5 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
6 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
7 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
8 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
11 #include <immintrin.h>
12 #include "builtin_test_helpers.h"
14 __m64
test_mm_abs_pi8(__m64 a
) {
15 // CHECK-LABEL: test_mm_abs_pi8
16 // CHECK: call <8 x i8> @llvm.abs.v8i8(
17 return _mm_abs_pi8(a
);
20 __m64
test_mm_abs_pi16(__m64 a
) {
21 // CHECK-LABEL: test_mm_abs_pi16
22 // CHECK: call <4 x i16> @llvm.abs.v4i16(
23 return _mm_abs_pi16(a
);
26 __m64
test_mm_abs_pi32(__m64 a
) {
27 // CHECK-LABEL: test_mm_abs_pi32
28 // CHECK: call <2 x i32> @llvm.abs.v2i32(
29 return _mm_abs_pi32(a
);
32 __m64
test_mm_add_pi8(__m64 a
, __m64 b
) {
33 // CHECK-LABEL: test_mm_add_pi8
34 // CHECK: add <8 x i8> {{%.*}}, {{%.*}}
35 return _mm_add_pi8(a
, b
);
38 __m64
test_mm_add_pi16(__m64 a
, __m64 b
) {
39 // CHECK-LABEL: test_mm_add_pi16
40 // CHECK: add <4 x i16> {{%.*}}, {{%.*}}
41 return _mm_add_pi16(a
, b
);
44 __m64
test_mm_add_pi32(__m64 a
, __m64 b
) {
45 // CHECK-LABEL: test_mm_add_pi32
46 // CHECK: add <2 x i32> {{%.*}}, {{%.*}}
47 return _mm_add_pi32(a
, b
);
50 __m64
test_mm_add_si64(__m64 a
, __m64 b
) {
51 // CHECK-LABEL: test_mm_add_si64
52 // CHECK: add i64 {{%.*}}, {{%.*}}
53 return _mm_add_si64(a
, b
);
56 __m64
test_mm_adds_pi8(__m64 a
, __m64 b
) {
57 // CHECK-LABEL: test_mm_adds_pi8
58 // CHECK: call <8 x i8> @llvm.sadd.sat.v8i8(
59 return _mm_adds_pi8(a
, b
);
62 __m64
test_mm_adds_pi16(__m64 a
, __m64 b
) {
63 // CHECK-LABEL: test_mm_adds_pi16
64 // CHECK: call <4 x i16> @llvm.sadd.sat.v4i16(
65 return _mm_adds_pi16(a
, b
);
68 __m64
test_mm_adds_pu8(__m64 a
, __m64 b
) {
69 // CHECK-LABEL: test_mm_adds_pu8
70 // CHECK: call <8 x i8> @llvm.uadd.sat.v8i8(
71 return _mm_adds_pu8(a
, b
);
74 __m64
test_mm_adds_pu16(__m64 a
, __m64 b
) {
75 // CHECK-LABEL: test_mm_adds_pu16
76 // CHECK: call <4 x i16> @llvm.uadd.sat.v4i16(
77 return _mm_adds_pu16(a
, b
);
80 __m64
test_mm_alignr_pi8(__m64 a
, __m64 b
) {
81 // CHECK-LABEL: test_mm_alignr_pi8
82 // CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
83 return _mm_alignr_pi8(a
, b
, 2);
86 __m64
test_mm_and_si64(__m64 a
, __m64 b
) {
87 // CHECK-LABEL: test_mm_and_si64
88 // CHECK: and <1 x i64> {{%.*}}, {{%.*}}
89 return _mm_and_si64(a
, b
);
92 __m64
test_mm_andnot_si64(__m64 a
, __m64 b
) {
93 // CHECK-LABEL: test_mm_andnot_si64
94 // CHECK: [[TMP:%.*]] = xor <1 x i64> {{%.*}}, splat (i64 -1)
95 // CHECK: and <1 x i64> [[TMP]], {{%.*}}
96 return _mm_andnot_si64(a
, b
);
99 __m64
test_mm_avg_pu8(__m64 a
, __m64 b
) {
100 // CHECK-LABEL: test_mm_avg_pu8
101 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(
102 return _mm_avg_pu8(a
, b
);
105 __m64
test_mm_avg_pu16(__m64 a
, __m64 b
) {
106 // CHECK-LABEL: test_mm_avg_pu16
107 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(
108 return _mm_avg_pu16(a
, b
);
111 __m64
test_mm_cmpeq_pi8(__m64 a
, __m64 b
) {
112 // CHECK-LABEL: test_mm_cmpeq_pi8
113 // CHECK: [[CMP:%.*]] = icmp eq <8 x i8> {{%.*}}, {{%.*}}
114 // CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8>
115 return _mm_cmpeq_pi8(a
, b
);
118 __m64
test_mm_cmpeq_pi16(__m64 a
, __m64 b
) {
119 // CHECK-LABEL: test_mm_cmpeq_pi16
120 // CHECK: [[CMP:%.*]] = icmp eq <4 x i16> {{%.*}}, {{%.*}}
121 // CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16>
122 return _mm_cmpeq_pi16(a
, b
);
125 __m64
test_mm_cmpeq_pi32(__m64 a
, __m64 b
) {
126 // CHECK-LABEL: test_mm_cmpeq_pi32
127 // CHECK: [[CMP:%.*]] = icmp eq <2 x i32> {{%.*}}, {{%.*}}
128 // CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32>
129 return _mm_cmpeq_pi32(a
, b
);
132 __m64
test_mm_cmpgt_pi8(__m64 a
, __m64 b
) {
133 // CHECK-LABEL: test_mm_cmpgt_pi8
134 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i8> {{%.*}}, {{%.*}}
135 // CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8>
136 return _mm_cmpgt_pi8(a
, b
);
139 __m64
test_mm_cmpgt_pi16(__m64 a
, __m64 b
) {
140 // CHECK-LABEL: test_mm_cmpgt_pi16
141 // CHECK: [[CMP:%.*]] = icmp sgt <4 x i16> {{%.*}}, {{%.*}}
142 // CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16>
143 return _mm_cmpgt_pi16(a
, b
);
146 __m64
test_mm_cmpgt_pi32(__m64 a
, __m64 b
) {
147 // CHECK-LABEL: test_mm_cmpgt_pi32
148 // CHECK: [[CMP:%.*]] = icmp sgt <2 x i32> {{%.*}}, {{%.*}}
149 // CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32>
150 return _mm_cmpgt_pi32(a
, b
);
153 __m128
test_mm_cvt_pi2ps(__m128 a
, __m64 b
) {
154 // CHECK-LABEL: test_mm_cvt_pi2ps
155 // CHECK: sitofp <4 x i32> {{%.*}} to <4 x float>
156 return _mm_cvt_pi2ps(a
, b
);
159 __m64
test_mm_cvt_ps2pi(__m128 a
) {
160 // CHECK-LABEL: test_mm_cvt_ps2pi
161 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(
162 return _mm_cvt_ps2pi(a
);
165 __m64
test_mm_cvtpd_pi32(__m128d a
) {
166 // CHECK-LABEL: test_mm_cvtpd_pi32
167 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(
168 return _mm_cvtpd_pi32(a
);
171 __m128
test_mm_cvtpi16_ps(__m64 a
) {
172 // CHECK-LABEL: test_mm_cvtpi16_ps
173 // CHECK: sitofp <4 x i16> {{%.*}} to <4 x float>
174 return _mm_cvtpi16_ps(a
);
177 __m128d
test_mm_cvtpi32_pd(__m64 a
) {
178 // CHECK-LABEL: test_mm_cvtpi32_pd
179 // CHECK: sitofp <2 x i32> {{%.*}} to <2 x double>
180 return _mm_cvtpi32_pd(a
);
183 __m128
test_mm_cvtpi32_ps(__m128 a
, __m64 b
) {
184 // CHECK-LABEL: test_mm_cvtpi32_ps
185 // CHECK: sitofp <4 x i32> {{%.*}} to <4 x float>
186 return _mm_cvtpi32_ps(a
, b
);
189 __m128
test_mm_cvtpi32x2_ps(__m64 a
, __m64 b
) {
190 // CHECK-LABEL: test_mm_cvtpi32x2_ps
191 // CHECK: sitofp <4 x i32> {{%.*}} to <4 x float>
192 return _mm_cvtpi32x2_ps(a
, b
);
195 __m64
test_mm_cvtps_pi16(__m128 a
) {
196 // CHECK-LABEL: test_mm_cvtps_pi16
197 // CHECK: [[TMP0:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> {{%.*}})
198 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP0]],
199 return _mm_cvtps_pi16(a
);
202 __m64
test_mm_cvtps_pi32(__m128 a
) {
203 // CHECK-LABEL: test_mm_cvtps_pi32
204 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(
205 return _mm_cvtps_pi32(a
);
208 __m64
test_mm_cvtsi32_si64(int a
) {
209 // CHECK-LABEL: test_mm_cvtsi32_si64
210 // CHECK: insertelement <2 x i32>
211 return _mm_cvtsi32_si64(a
);
214 int test_mm_cvtsi64_si32(__m64 a
) {
215 // CHECK-LABEL: test_mm_cvtsi64_si32
216 // CHECK: extractelement <2 x i32>
217 return _mm_cvtsi64_si32(a
);
220 __m64
test_mm_cvttpd_pi32(__m128d a
) {
221 // CHECK-LABEL: test_mm_cvttpd_pi32
222 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(
223 return _mm_cvttpd_pi32(a
);
226 __m64
test_mm_cvttps_pi32(__m128 a
) {
227 // CHECK-LABEL: test_mm_cvttps_pi32
228 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(
229 return _mm_cvttps_pi32(a
);
232 int test_mm_extract_pi16(__m64 a
) {
233 // CHECK-LABEL: test_mm_extract_pi16
234 // CHECK: extractelement <4 x i16> {{%.*}}, i64 2
235 return _mm_extract_pi16(a
, 2);
238 __m64
test_m_from_int(int a
) {
239 // CHECK-LABEL: test_m_from_int
240 // CHECK: insertelement <2 x i32>
241 return _m_from_int(a
);
244 __m64
test_m_from_int64(long long a
) {
245 // CHECK-LABEL: test_m_from_int64
246 return _m_from_int64(a
);
249 __m64
test_mm_hadd_pi16(__m64 a
, __m64 b
) {
250 // CHECK-LABEL: test_mm_hadd_pi16
251 // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(
252 return _mm_hadd_pi16(a
, b
);
255 __m64
test_mm_hadd_pi32(__m64 a
, __m64 b
) {
256 // CHECK-LABEL: test_mm_hadd_pi32
257 // CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(
258 return _mm_hadd_pi32(a
, b
);
261 __m64
test_mm_hadds_pi16(__m64 a
, __m64 b
) {
262 // CHECK-LABEL: test_mm_hadds_pi16
263 // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(
264 return _mm_hadds_pi16(a
, b
);
267 __m64
test_mm_hsub_pi16(__m64 a
, __m64 b
) {
268 // CHECK-LABEL: test_mm_hsub_pi16
269 // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(
270 return _mm_hsub_pi16(a
, b
);
273 __m64
test_mm_hsub_pi32(__m64 a
, __m64 b
) {
274 // CHECK-LABEL: test_mm_hsub_pi32
275 // CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(
276 return _mm_hsub_pi32(a
, b
);
279 __m64
test_mm_hsubs_pi16(__m64 a
, __m64 b
) {
280 // CHECK-LABEL: test_mm_hsubs_pi16
281 // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(
282 return _mm_hsubs_pi16(a
, b
);
285 __m64
test_mm_insert_pi16(__m64 a
, int d
) {
286 // CHECK-LABEL: test_mm_insert_pi16
287 // CHECK: insertelement <4 x i16>
288 return _mm_insert_pi16(a
, d
, 2);
291 __m64
test_mm_madd_pi16(__m64 a
, __m64 b
) {
292 // CHECK-LABEL: test_mm_madd_pi16
293 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(
294 return _mm_madd_pi16(a
, b
);
297 __m64
test_mm_maddubs_pi16(__m64 a
, __m64 b
) {
298 // CHECK-LABEL: test_mm_maddubs_pi16
299 // CHECK: call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(
300 return _mm_maddubs_pi16(a
, b
);
303 void test_mm_maskmove_si64(__m64 d
, __m64 n
, char *p
) {
304 // CHECK-LABEL: test_mm_maskmove_si64
305 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(
306 _mm_maskmove_si64(d
, n
, p
);
309 __m64
test_mm_max_pi16(__m64 a
, __m64 b
) {
310 // CHECK-LABEL: test_mm_max_pi16
311 // CHECK: call <4 x i16> @llvm.smax.v4i16(
312 return _mm_max_pi16(a
, b
);
315 __m64
test_mm_max_pu8(__m64 a
, __m64 b
) {
316 // CHECK-LABEL: test_mm_max_pu8
317 // CHECK: call <8 x i8> @llvm.umax.v8i8(
318 return _mm_max_pu8(a
, b
);
321 __m64
test_mm_min_pi16(__m64 a
, __m64 b
) {
322 // CHECK-LABEL: test_mm_min_pi16
323 // CHECK: call <4 x i16> @llvm.smin.v4i16(
324 return _mm_min_pi16(a
, b
);
327 __m64
test_mm_min_pu8(__m64 a
, __m64 b
) {
328 // CHECK-LABEL: test_mm_min_pu8
329 // CHECK: call <8 x i8> @llvm.umin.v8i8(
330 return _mm_min_pu8(a
, b
);
333 int test_mm_movemask_pi8(__m64 a
) {
334 // CHECK-LABEL: test_mm_movemask_pi8
335 // CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(
336 return _mm_movemask_pi8(a
);
339 __m64
test_mm_mul_su32(__m64 a
, __m64 b
) {
340 // CHECK-LABEL: test_mm_mul_su32
341 // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295)
342 // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295)
343 // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
344 return _mm_mul_su32(a
, b
);
347 __m64
test_mm_mulhi_pi16(__m64 a
, __m64 b
) {
348 // CHECK-LABEL: test_mm_mulhi_pi16
349 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(
350 return _mm_mulhi_pi16(a
, b
);
353 __m64
test_mm_mulhi_pu16(__m64 a
, __m64 b
) {
354 // CHECK-LABEL: test_mm_mulhi_pu16
355 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(
356 return _mm_mulhi_pu16(a
, b
);
359 __m64
test_mm_mulhrs_pi16(__m64 a
, __m64 b
) {
360 // CHECK-LABEL: test_mm_mulhrs_pi16
361 // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(
362 return _mm_mulhrs_pi16(a
, b
);
365 __m64
test_mm_mullo_pi16(__m64 a
, __m64 b
) {
366 // CHECK-LABEL: test_mm_mullo_pi16
367 // CHECK: mul <4 x i16> {{%.*}}, {{%.*}}
368 return _mm_mullo_pi16(a
, b
);
371 __m64
test_mm_or_si64(__m64 a
, __m64 b
) {
372 // CHECK-LABEL: test_mm_or_si64
373 // CHECK: or <1 x i64> {{%.*}}, {{%.*}}
374 return _mm_or_si64(a
, b
);
377 __m64
test_mm_packs_pi16(__m64 a
, __m64 b
) {
378 // CHECK-LABEL: test_mm_packs_pi16
379 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(
380 return _mm_packs_pi16(a
, b
);
383 __m64
test_mm_packs_pi32(__m64 a
, __m64 b
) {
384 // CHECK-LABEL: test_mm_packs_pi32
385 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(
386 return _mm_packs_pi32(a
, b
);
389 __m64
test_mm_packs_pu16(__m64 a
, __m64 b
) {
390 // CHECK-LABEL: test_mm_packs_pu16
391 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(
392 return _mm_packs_pu16(a
, b
);
395 __m64
test_mm_sad_pu8(__m64 a
, __m64 b
) {
396 // CHECK-LABEL: test_mm_sad_pu8
397 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>
398 return _mm_sad_pu8(a
, b
);
401 __m64
test_mm_set_pi8(char a
, char b
, char c
, char d
, char e
, char f
, char g
, char h
) {
402 // CHECK-LABEL: test_mm_set_pi8
403 // CHECK: insertelement <8 x i8>
404 // CHECK: insertelement <8 x i8>
405 // CHECK: insertelement <8 x i8>
406 // CHECK: insertelement <8 x i8>
407 // CHECK: insertelement <8 x i8>
408 // CHECK: insertelement <8 x i8>
409 // CHECK: insertelement <8 x i8>
410 // CHECK: insertelement <8 x i8>
411 return _mm_set_pi8(a
, b
, c
, d
, e
, f
, g
, h
);
413 TEST_CONSTEXPR(match_v8qi(_mm_set_pi8(0, -1, 2, -3, 4, -5, 6, -7), -7, 6, -5, 4, -3, 2, -1, 0));
415 __m64
test_mm_set_pi16(short a
, short b
, short c
, short d
) {
416 // CHECK-LABEL: test_mm_set_pi16
417 // CHECK: insertelement <4 x i16>
418 // CHECK: insertelement <4 x i16>
419 // CHECK: insertelement <4 x i16>
420 // CHECK: insertelement <4 x i16>
421 return _mm_set_pi16(a
, b
, c
, d
);
423 TEST_CONSTEXPR(match_v4hi(_mm_set_pi16(101, 102, -103, -104), -104, -103, 102, 101));
425 __m64
test_mm_set_pi32(int a
, int b
) {
426 // CHECK-LABEL: test_mm_set_pi32
427 // CHECK: insertelement <2 x i32>
428 // CHECK: insertelement <2 x i32>
429 return _mm_set_pi32(a
, b
);
431 TEST_CONSTEXPR(match_v2si(_mm_set_pi32(5000, -1500), -1500, 5000));
433 __m64
test_mm_setr_pi8(char a
, char b
, char c
, char d
, char e
, char f
, char g
, char h
) {
434 // CHECK-LABEL: test_mm_setr_pi8
435 // CHECK: insertelement <8 x i8>
436 // CHECK: insertelement <8 x i8>
437 // CHECK: insertelement <8 x i8>
438 // CHECK: insertelement <8 x i8>
439 // CHECK: insertelement <8 x i8>
440 // CHECK: insertelement <8 x i8>
441 // CHECK: insertelement <8 x i8>
442 // CHECK: insertelement <8 x i8>
443 return _mm_setr_pi8(a
, b
, c
, d
, e
, f
, g
, h
);
445 TEST_CONSTEXPR(match_v8qi(_mm_setr_pi8(0, -1, 2, -3, 4, -5, 6, -7), 0, -1, 2, -3, 4, -5, 6, -7));
447 __m64
test_mm_setr_pi16(short a
, short b
, short c
, short d
) {
448 // CHECK-LABEL: test_mm_setr_pi16
449 // CHECK: insertelement <4 x i16>
450 // CHECK: insertelement <4 x i16>
451 // CHECK: insertelement <4 x i16>
452 // CHECK: insertelement <4 x i16>
453 return _mm_setr_pi16(a
, b
, c
, d
);
455 TEST_CONSTEXPR(match_v4hi(_mm_setr_pi16(101, 102, -103, -104), 101, 102, -103, -104));
457 __m64
test_mm_setr_pi32(int a
, int b
) {
458 // CHECK-LABEL: test_mm_setr_pi32
459 // CHECK: insertelement <2 x i32>
460 // CHECK: insertelement <2 x i32>
461 return _mm_setr_pi32(a
, b
);
463 TEST_CONSTEXPR(match_v2si(_mm_setr_pi32(5000, -1500), 5000, -1500));
465 __m64
test_mm_setzero_si64() {
466 // CHECK-LABEL: test_mm_setzero_si64
467 // CHECK: zeroinitializer
468 return _mm_setzero_si64();
470 TEST_CONSTEXPR(match_m64(_mm_setzero_si64(), 0ULL));
472 __m64
test_mm_set1_pi8(char a
) {
473 // CHECK-LABEL: test_mm_set1_pi8
474 // CHECK: insertelement <8 x i8>
475 // CHECK: insertelement <8 x i8>
476 // CHECK: insertelement <8 x i8>
477 // CHECK: insertelement <8 x i8>
478 // CHECK: insertelement <8 x i8>
479 // CHECK: insertelement <8 x i8>
480 // CHECK: insertelement <8 x i8>
481 // CHECK: insertelement <8 x i8>
482 return _mm_set1_pi8(a
);
484 TEST_CONSTEXPR(match_v8qi(_mm_set1_pi8(99), 99, 99, 99, 99, 99, 99, 99, 99));
486 __m64
test_mm_set1_pi16(short a
) {
487 // CHECK-LABEL: test_mm_set1_pi16
488 // CHECK: insertelement <4 x i16>
489 // CHECK: insertelement <4 x i16>
490 // CHECK: insertelement <4 x i16>
491 // CHECK: insertelement <4 x i16>
492 return _mm_set1_pi16(a
);
494 TEST_CONSTEXPR(match_v4hi(_mm_set1_pi16(-128), -128, -128, -128, -128));
496 __m64
test_mm_set1_pi32(int a
) {
497 // CHECK-LABEL: test_mm_set1_pi32
498 // CHECK: insertelement <2 x i32>
499 // CHECK: insertelement <2 x i32>
500 return _mm_set1_pi32(a
);
502 TEST_CONSTEXPR(match_v2si(_mm_set1_pi32(55), 55, 55));
504 __m64
test_mm_shuffle_pi8(__m64 a
, __m64 b
) {
505 // CHECK-LABEL: test_mm_shuffle_pi8
506 // CHECK: call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(
507 return _mm_shuffle_pi8(a
, b
);
510 __m64
test_mm_shuffle_pi16(__m64 a
) {
511 // CHECK-LABEL: test_mm_shuffle_pi16
512 // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
513 return _mm_shuffle_pi16(a
, 3);
516 __m64
test_mm_sign_pi8(__m64 a
, __m64 b
) {
517 // CHECK-LABEL: test_mm_sign_pi8
518 // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(
519 return _mm_sign_pi8(a
, b
);
522 __m64
test_mm_sign_pi16(__m64 a
, __m64 b
) {
523 // CHECK-LABEL: test_mm_sign_pi16
524 // CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(
525 return _mm_sign_pi16(a
, b
);
528 __m64
test_mm_sign_pi32(__m64 a
, __m64 b
) {
529 // CHECK-LABEL: test_mm_sign_pi32
530 // CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(
531 return _mm_sign_pi32(a
, b
);
534 __m64
test_mm_sll_pi16(__m64 a
, __m64 b
) {
535 // CHECK-LABEL: test_mm_sll_pi16
536 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(
537 return _mm_sll_pi16(a
, b
);
540 __m64
test_mm_sll_pi32(__m64 a
, __m64 b
) {
541 // CHECK-LABEL: test_mm_sll_pi32
542 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(
543 return _mm_sll_pi32(a
, b
);
546 __m64
test_mm_sll_si64(__m64 a
, __m64 b
) {
547 // CHECK-LABEL: test_mm_sll_si64
548 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(
549 return _mm_sll_si64(a
, b
);
552 __m64
test_mm_slli_pi16(__m64 a
) {
553 // CHECK-LABEL: test_mm_slli_pi16
554 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(
555 return _mm_slli_pi16(a
, 3);
558 __m64
test_mm_slli_pi32(__m64 a
) {
559 // CHECK-LABEL: test_mm_slli_pi32
560 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(
561 return _mm_slli_pi32(a
, 3);
564 __m64
test_mm_slli_si64(__m64 a
) {
565 // CHECK-LABEL: test_mm_slli_si64
566 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(
567 return _mm_slli_si64(a
, 3);
570 __m64
test_mm_sra_pi16(__m64 a
, __m64 b
) {
571 // CHECK-LABEL: test_mm_sra_pi16
572 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(
573 return _mm_sra_pi16(a
, b
);
576 __m64
test_mm_sra_pi32(__m64 a
, __m64 b
) {
577 // CHECK-LABEL: test_mm_sra_pi32
578 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(
579 return _mm_sra_pi32(a
, b
);
582 __m64
test_mm_srai_pi16(__m64 a
) {
583 // CHECK-LABEL: test_mm_srai_pi16
584 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(
585 return _mm_srai_pi16(a
, 3);
588 __m64
test_mm_srai_pi32(__m64 a
) {
589 // CHECK-LABEL: test_mm_srai_pi32
590 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(
591 return _mm_srai_pi32(a
, 3);
594 __m64
test_mm_srl_pi16(__m64 a
, __m64 b
) {
595 // CHECK-LABEL: test_mm_srl_pi16
596 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(
597 return _mm_srl_pi16(a
, b
);
600 __m64
test_mm_srl_pi32(__m64 a
, __m64 b
) {
601 // CHECK-LABEL: test_mm_srl_pi32
602 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(
603 return _mm_srl_pi32(a
, b
);
606 __m64
test_mm_srl_si64(__m64 a
, __m64 b
) {
607 // CHECK-LABEL: test_mm_srl_si64
608 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(
609 return _mm_srl_si64(a
, b
);
612 __m64
test_mm_srli_pi16(__m64 a
) {
613 // CHECK-LABEL: test_mm_srli_pi16
614 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(
615 return _mm_srli_pi16(a
, 3);
618 __m64
test_mm_srli_pi32(__m64 a
) {
619 // CHECK-LABEL: test_mm_srli_pi32
620 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(
621 return _mm_srli_pi32(a
, 3);
624 __m64
test_mm_srli_si64(__m64 a
) {
625 // CHECK-LABEL: test_mm_srli_si64
626 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(
627 return _mm_srli_si64(a
, 3);
630 void test_mm_stream_pi(__m64
*p
, __m64 a
) {
631 // CHECK-LABEL: test_mm_stream_pi
632 // CHECK: store <1 x i64> {{%.*}}, ptr {{%.*}}, align 8, !nontemporal
636 void test_mm_stream_pi_void(void *p
, __m64 a
) {
637 // CHECK-LABEL: test_mm_stream_pi_void
638 // CHECK: store <1 x i64> {{%.*}}, ptr {{%.*}}, align 8, !nontemporal
642 __m64
test_mm_sub_pi8(__m64 a
, __m64 b
) {
643 // CHECK-LABEL: test_mm_sub_pi8
644 // CHECK: sub <8 x i8> {{%.*}}, {{%.*}}
645 return _mm_sub_pi8(a
, b
);
648 __m64
test_mm_sub_pi16(__m64 a
, __m64 b
) {
649 // CHECK-LABEL: test_mm_sub_pi16
650 // CHECK: sub <4 x i16> {{%.*}}, {{%.*}}
651 return _mm_sub_pi16(a
, b
);
654 __m64
test_mm_sub_pi32(__m64 a
, __m64 b
) {
655 // CHECK-LABEL: test_mm_sub_pi32
656 // CHECK: sub <2 x i32> {{%.*}}, {{%.*}}
657 return _mm_sub_pi32(a
, b
);
660 __m64
test_mm_sub_si64(__m64 a
, __m64 b
) {
661 // CHECK-LABEL: test_mm_sub_si64
662 // CHECK: sub i64 {{%.*}}, {{%.*}}
663 return _mm_sub_si64(a
, b
);
666 __m64
test_mm_subs_pi8(__m64 a
, __m64 b
) {
667 // CHECK-LABEL: test_mm_subs_pi8
668 // CHECK: call <8 x i8> @llvm.ssub.sat.v8i8(
669 return _mm_subs_pi8(a
, b
);
672 __m64
test_mm_subs_pi16(__m64 a
, __m64 b
) {
673 // CHECK-LABEL: test_mm_subs_pi16
674 // CHECK: call <4 x i16> @llvm.ssub.sat.v4i16(
675 return _mm_subs_pi16(a
, b
);
678 __m64
test_mm_subs_pu8(__m64 a
, __m64 b
) {
679 // CHECK-LABEL: test_mm_subs_pu8
680 // CHECK: call <8 x i8> @llvm.usub.sat.v8i8(
681 return _mm_subs_pu8(a
, b
);
684 __m64
test_mm_subs_pu16(__m64 a
, __m64 b
) {
685 // CHECK-LABEL: test_mm_subs_pu16
686 // CHECK: call <4 x i16> @llvm.usub.sat.v4i16(
687 return _mm_subs_pu16(a
, b
);
690 int test_m_to_int(__m64 a
) {
691 // CHECK-LABEL: test_m_to_int
692 // CHECK: extractelement <2 x i32>
696 long long test_m_to_int64(__m64 a
) {
697 // CHECK-LABEL: test_m_to_int64
698 return _m_to_int64(a
);
701 __m64
test_mm_unpackhi_pi8(__m64 a
, __m64 b
) {
702 // CHECK-LABEL: test_mm_unpackhi_pi8
703 // CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
704 return _mm_unpackhi_pi8(a
, b
);
707 __m64
test_mm_unpackhi_pi16(__m64 a
, __m64 b
) {
708 // CHECK-LABEL: test_mm_unpackhi_pi16
709 // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
710 return _mm_unpackhi_pi16(a
, b
);
713 __m64
test_mm_unpackhi_pi32(__m64 a
, __m64 b
) {
714 // CHECK-LABEL: test_mm_unpackhi_pi32
715 // CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 3>
716 return _mm_unpackhi_pi32(a
, b
);
719 __m64
test_mm_unpacklo_pi8(__m64 a
, __m64 b
) {
720 // CHECK-LABEL: test_mm_unpacklo_pi8
721 // CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
722 return _mm_unpacklo_pi8(a
, b
);
725 __m64
test_mm_unpacklo_pi16(__m64 a
, __m64 b
) {
726 // CHECK-LABEL: test_mm_unpacklo_pi16
727 // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
728 return _mm_unpacklo_pi16(a
, b
);
731 __m64
test_mm_unpacklo_pi32(__m64 a
, __m64 b
) {
732 // CHECK-LABEL: test_mm_unpacklo_pi32
733 // CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 0, i32 2>
734 return _mm_unpacklo_pi32(a
, b
);
737 __m64
test_mm_xor_si64(__m64 a
, __m64 b
) {
738 // CHECK-LABEL: test_mm_xor_si64
739 // CHECK: xor <1 x i64> {{%.*}}, {{%.*}}
740 return _mm_xor_si64(a
, b
);