[LLVM][IR] Use splat syntax when printing ConstantExpr based splats. (#116856)
[llvm-project.git] / clang / test / CodeGen / X86 / sse2-builtins.c
blob04df59ef331a5b6368c0146da4584b3081d6b4d6
1 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
2 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
3 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
4 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
5 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
6 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
7 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
8 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
9 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
10 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
13 #include <immintrin.h>
14 #include "builtin_test_helpers.h"
16 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
18 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
19 // CHECK-LABEL: test_mm_add_epi8
20 // CHECK: add <16 x i8>
21 return _mm_add_epi8(A, B);
24 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
25 // CHECK-LABEL: test_mm_add_epi16
26 // CHECK: add <8 x i16>
27 return _mm_add_epi16(A, B);
30 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
31 // CHECK-LABEL: test_mm_add_epi32
32 // CHECK: add <4 x i32>
33 return _mm_add_epi32(A, B);
35 TEST_CONSTEXPR(match_v4si(_mm_add_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), -9, +6, +9, -8));
37 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
38 // CHECK-LABEL: test_mm_add_epi64
39 // CHECK: add <2 x i64>
40 return _mm_add_epi64(A, B);
42 TEST_CONSTEXPR(match_v2di(_mm_add_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), -4, +5));
44 __m128d test_mm_add_pd(__m128d A, __m128d B) {
45 // CHECK-LABEL: test_mm_add_pd
46 // CHECK: fadd <2 x double>
47 return _mm_add_pd(A, B);
49 TEST_CONSTEXPR(match_m128d(_mm_add_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -8.0));
51 __m128d test_mm_add_sd(__m128d A, __m128d B) {
52 // CHECK-LABEL: test_mm_add_sd
53 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
54 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
55 // CHECK: fadd double
56 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
57 return _mm_add_sd(A, B);
59 TEST_CONSTEXPR(match_m128d(_mm_add_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -3.0));
61 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
62 // CHECK-LABEL: test_mm_adds_epi8
63 // CHECK: call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
64 return _mm_adds_epi8(A, B);
67 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
68 // CHECK-LABEL: test_mm_adds_epi16
69 // CHECK: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
70 return _mm_adds_epi16(A, B);
73 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
74 // CHECK-LABEL: test_mm_adds_epu8
75 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
76 // CHECK: call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
77 return _mm_adds_epu8(A, B);
80 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
81 // CHECK-LABEL: test_mm_adds_epu16
82 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
83 // CHECK: call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
84 return _mm_adds_epu16(A, B);
87 __m128d test_mm_and_pd(__m128d A, __m128d B) {
88 // CHECK-LABEL: test_mm_and_pd
89 // CHECK: and <2 x i64>
90 return _mm_and_pd(A, B);
92 TEST_CONSTEXPR(match_m128d(_mm_and_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0));
94 __m128i test_mm_and_si128(__m128i A, __m128i B) {
95 // CHECK-LABEL: test_mm_and_si128
96 // CHECK: and <2 x i64>
97 return _mm_and_si128(A, B);
100 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
101 // CHECK-LABEL: test_mm_andnot_pd
102 // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1)
103 // CHECK: and <2 x i64>
104 return _mm_andnot_pd(A, B);
106 TEST_CONSTEXPR(match_m128d(_mm_andnot_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0));
108 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
109 // CHECK-LABEL: test_mm_andnot_si128
110 // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1)
111 // CHECK: and <2 x i64>
112 return _mm_andnot_si128(A, B);
115 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
116 // CHECK-LABEL: test_mm_avg_epu8
117 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
118 return _mm_avg_epu8(A, B);
121 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
122 // CHECK-LABEL: test_mm_avg_epu16
123 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
124 return _mm_avg_epu16(A, B);
127 __m128i test_mm_bslli_si128(__m128i A) {
128 // CHECK-LABEL: test_mm_bslli_si128
129 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
130 return _mm_bslli_si128(A, 5);
133 __m128i test_mm_bsrli_si128(__m128i A) {
134 // CHECK-LABEL: test_mm_bsrli_si128
135 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
136 return _mm_bsrli_si128(A, 5);
139 __m128 test_mm_castpd_ps(__m128d A) {
140 // CHECK-LABEL: test_mm_castpd_ps
141 return _mm_castpd_ps(A);
143 TEST_CONSTEXPR(match_m128(_mm_castpd_ps((__m128d){-1.0, +2.0}), +0.0f, -1.875f, +0.0f, +2.0f));
145 __m128i test_mm_castpd_si128(__m128d A) {
146 // CHECK-LABEL: test_mm_castpd_si128
147 return _mm_castpd_si128(A);
149 TEST_CONSTEXPR(match_m128i(_mm_castpd_si128((__m128d){-1.0, +2.0}), 0xBFF0000000000000ULL, 0x4000000000000000ULL));
151 __m128d test_mm_castps_pd(__m128 A) {
152 // CHECK-LABEL: test_mm_castps_pd
153 return _mm_castps_pd(A);
155 TEST_CONSTEXPR(match_m128d(_mm_castps_pd((__m128){0.0f, -1.0f, 0.0f, 4.0f}), -0.0078125, 512.0));
157 __m128i test_mm_castps_si128(__m128 A) {
158 // CHECK-LABEL: test_mm_castps_si128
159 return _mm_castps_si128(A);
161 TEST_CONSTEXPR(match_m128i(_mm_castps_si128((__m128){1.0f, -2.0f, -4.0f, 8.0f}), 0xC00000003F800000ULL, 0x41000000c0800000ULL));
163 __m128d test_mm_castsi128_pd(__m128i A) {
164 // CHECK-LABEL: test_mm_castsi128_pd
165 return _mm_castsi128_pd(A);
167 TEST_CONSTEXPR(match_m128d(_mm_castsi128_pd((__m128i)(__v2du){0x4070000000000000ULL, 0xC000000000000000ULL}), 256.0, -2.0));
169 __m128 test_mm_castsi128_ps(__m128i A) {
170 // CHECK-LABEL: test_mm_castsi128_ps
171 return _mm_castsi128_ps(A);
173 TEST_CONSTEXPR(match_m128(_mm_castsi128_ps((__m128i)(__v2du){0x42000000c1800000ULL, 0x43000000c2800000ULL}), -16.0f, 32.0f, -64.0f, 128.0f));
175 void test_mm_clflush(void* A) {
176 // CHECK-LABEL: test_mm_clflush
177 // CHECK: call void @llvm.x86.sse2.clflush(ptr %{{.*}})
178 _mm_clflush(A);
181 __m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) {
182 // CHECK-LABEL: test_mm_cmp_pd_eq_oq
183 // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
184 return _mm_cmp_pd(a, b, _CMP_EQ_OQ);
187 __m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) {
188 // CHECK-LABEL: test_mm_cmp_pd_lt_os
189 // CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}}
190 return _mm_cmp_pd(a, b, _CMP_LT_OS);
193 __m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) {
194 // CHECK-LABEL: test_mm_cmp_pd_le_os
195 // CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}}
196 return _mm_cmp_pd(a, b, _CMP_LE_OS);
199 __m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) {
200 // CHECK-LABEL: test_mm_cmp_pd_unord_q
201 // CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}}
202 return _mm_cmp_pd(a, b, _CMP_UNORD_Q);
205 __m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) {
206 // CHECK-LABEL: test_mm_cmp_pd_neq_uq
207 // CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}}
208 return _mm_cmp_pd(a, b, _CMP_NEQ_UQ);
211 __m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) {
212 // CHECK-LABEL: test_mm_cmp_pd_nlt_us
213 // CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}}
214 return _mm_cmp_pd(a, b, _CMP_NLT_US);
217 __m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) {
218 // CHECK-LABEL: test_mm_cmp_pd_nle_us
219 // CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}}
220 return _mm_cmp_pd(a, b, _CMP_NLE_US);
223 __m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) {
224 // CHECK-LABEL: test_mm_cmp_pd_ord_q
225 // CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}}
226 return _mm_cmp_pd(a, b, _CMP_ORD_Q);
229 __m128d test_mm_cmp_sd(__m128d A, __m128d B) {
230 // CHECK-LABEL: test_mm_cmp_sd
231 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
232 return _mm_cmp_sd(A, B, _CMP_ORD_Q);
235 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
236 // CHECK-LABEL: test_mm_cmpeq_epi8
237 // CHECK: icmp eq <16 x i8>
238 return _mm_cmpeq_epi8(A, B);
241 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
242 // CHECK-LABEL: test_mm_cmpeq_epi16
243 // CHECK: icmp eq <8 x i16>
244 return _mm_cmpeq_epi16(A, B);
247 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
248 // CHECK-LABEL: test_mm_cmpeq_epi32
249 // CHECK: icmp eq <4 x i32>
250 return _mm_cmpeq_epi32(A, B);
253 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
254 // CHECK-LABEL: test_mm_cmpeq_pd
255 // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
256 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
257 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
258 return _mm_cmpeq_pd(A, B);
261 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
262 // CHECK-LABEL: test_mm_cmpeq_sd
263 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
264 return _mm_cmpeq_sd(A, B);
267 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
268 // CHECK-LABEL: test_mm_cmpge_pd
269 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
270 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
271 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
272 return _mm_cmpge_pd(A, B);
275 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
276 // CHECK-LABEL: test_mm_cmpge_sd
277 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
278 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
279 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
280 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
281 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
282 return _mm_cmpge_sd(A, B);
285 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
286 // CHECK-LABEL: test_mm_cmpgt_epi8
287 // CHECK: icmp sgt <16 x i8>
288 return _mm_cmpgt_epi8(A, B);
291 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
292 // CHECK-LABEL: test_mm_cmpgt_epi16
293 // CHECK: icmp sgt <8 x i16>
294 return _mm_cmpgt_epi16(A, B);
297 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
298 // CHECK-LABEL: test_mm_cmpgt_epi32
299 // CHECK: icmp sgt <4 x i32>
300 return _mm_cmpgt_epi32(A, B);
303 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
304 // CHECK-LABEL: test_mm_cmpgt_pd
305 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
306 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
307 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
308 return _mm_cmpgt_pd(A, B);
311 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
312 // CHECK-LABEL: test_mm_cmpgt_sd
313 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
314 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
315 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
316 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
317 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
318 return _mm_cmpgt_sd(A, B);
321 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
322 // CHECK-LABEL: test_mm_cmple_pd
323 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
324 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
325 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
326 return _mm_cmple_pd(A, B);
329 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
330 // CHECK-LABEL: test_mm_cmple_sd
331 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
332 return _mm_cmple_sd(A, B);
335 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
336 // CHECK-LABEL: test_mm_cmplt_epi8
337 // CHECK: icmp sgt <16 x i8>
338 return _mm_cmplt_epi8(A, B);
341 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
342 // CHECK-LABEL: test_mm_cmplt_epi16
343 // CHECK: icmp sgt <8 x i16>
344 return _mm_cmplt_epi16(A, B);
347 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
348 // CHECK-LABEL: test_mm_cmplt_epi32
349 // CHECK: icmp sgt <4 x i32>
350 return _mm_cmplt_epi32(A, B);
353 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
354 // CHECK-LABEL: test_mm_cmplt_pd
355 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
356 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
357 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
358 return _mm_cmplt_pd(A, B);
361 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
362 // CHECK-LABEL: test_mm_cmplt_sd
363 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
364 return _mm_cmplt_sd(A, B);
367 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
368 // CHECK-LABEL: test_mm_cmpneq_pd
369 // CHECK: [[CMP:%.*]] = fcmp une <2 x double>
370 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
371 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
372 return _mm_cmpneq_pd(A, B);
375 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
376 // CHECK-LABEL: test_mm_cmpneq_sd
377 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
378 return _mm_cmpneq_sd(A, B);
381 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
382 // CHECK-LABEL: test_mm_cmpnge_pd
383 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
384 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
385 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
386 return _mm_cmpnge_pd(A, B);
389 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
390 // CHECK-LABEL: test_mm_cmpnge_sd
391 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
392 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
393 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
394 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
395 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
396 return _mm_cmpnge_sd(A, B);
399 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
400 // CHECK-LABEL: test_mm_cmpngt_pd
401 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
402 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
403 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
404 return _mm_cmpngt_pd(A, B);
407 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
408 // CHECK-LABEL: test_mm_cmpngt_sd
409 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
410 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
411 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
412 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
413 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
414 return _mm_cmpngt_sd(A, B);
417 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
418 // CHECK-LABEL: test_mm_cmpnle_pd
419 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
420 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
421 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
422 return _mm_cmpnle_pd(A, B);
425 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
426 // CHECK-LABEL: test_mm_cmpnle_sd
427 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
428 return _mm_cmpnle_sd(A, B);
431 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
432 // CHECK-LABEL: test_mm_cmpnlt_pd
433 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
434 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
435 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
436 return _mm_cmpnlt_pd(A, B);
439 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
440 // CHECK-LABEL: test_mm_cmpnlt_sd
441 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
442 return _mm_cmpnlt_sd(A, B);
445 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
446 // CHECK-LABEL: test_mm_cmpord_pd
447 // CHECK: [[CMP:%.*]] = fcmp ord <2 x double>
448 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
449 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
450 return _mm_cmpord_pd(A, B);
453 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
454 // CHECK-LABEL: test_mm_cmpord_sd
455 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
456 return _mm_cmpord_sd(A, B);
459 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
460 // CHECK-LABEL: test_mm_cmpunord_pd
461 // CHECK: [[CMP:%.*]] = fcmp uno <2 x double>
462 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
463 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
464 return _mm_cmpunord_pd(A, B);
467 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
468 // CHECK-LABEL: test_mm_cmpunord_sd
469 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
470 return _mm_cmpunord_sd(A, B);
473 int test_mm_comieq_sd(__m128d A, __m128d B) {
474 // CHECK-LABEL: test_mm_comieq_sd
475 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
476 return _mm_comieq_sd(A, B);
479 int test_mm_comige_sd(__m128d A, __m128d B) {
480 // CHECK-LABEL: test_mm_comige_sd
481 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
482 return _mm_comige_sd(A, B);
485 int test_mm_comigt_sd(__m128d A, __m128d B) {
486 // CHECK-LABEL: test_mm_comigt_sd
487 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
488 return _mm_comigt_sd(A, B);
491 int test_mm_comile_sd(__m128d A, __m128d B) {
492 // CHECK-LABEL: test_mm_comile_sd
493 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
494 return _mm_comile_sd(A, B);
497 int test_mm_comilt_sd(__m128d A, __m128d B) {
498 // CHECK-LABEL: test_mm_comilt_sd
499 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
500 return _mm_comilt_sd(A, B);
503 int test_mm_comineq_sd(__m128d A, __m128d B) {
504 // CHECK-LABEL: test_mm_comineq_sd
505 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
506 return _mm_comineq_sd(A, B);
509 __m128d test_mm_cvtepi32_pd(__m128i A) {
510 // CHECK-LABEL: test_mm_cvtepi32_pd
511 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
512 // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
513 return _mm_cvtepi32_pd(A);
515 TEST_CONSTEXPR(match_m128d(_mm_cvtepi32_pd((__m128i)(__v4si){-9, +8, -6, 0}), -9.0, +8.0));
517 __m128 test_mm_cvtepi32_ps(__m128i A) {
518 // CHECK-LABEL: test_mm_cvtepi32_ps
519 // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float>
520 return _mm_cvtepi32_ps(A);
522 TEST_CONSTEXPR(match_m128(_mm_cvtepi32_ps((__m128i)(__v4si){-3, +2, -1, 0}), -3.0f, +2.0f, -1.0f, +0.0f));
524 __m128i test_mm_cvtpd_epi32(__m128d A) {
525 // CHECK-LABEL: test_mm_cvtpd_epi32
526 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}})
527 return _mm_cvtpd_epi32(A);
530 __m128 test_mm_cvtpd_ps(__m128d A) {
531 // CHECK-LABEL: test_mm_cvtpd_ps
532 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}})
533 return _mm_cvtpd_ps(A);
536 __m128i test_mm_cvtps_epi32(__m128 A) {
537 // CHECK-LABEL: test_mm_cvtps_epi32
538 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
539 return _mm_cvtps_epi32(A);
542 __m128d test_mm_cvtps_pd(__m128 A) {
543 // CHECK-LABEL: test_mm_cvtps_pd
544 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
545 // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
546 return _mm_cvtps_pd(A);
548 TEST_CONSTEXPR(match_m128d(_mm_cvtps_pd((__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +2.0));
550 double test_mm_cvtsd_f64(__m128d A) {
551 // CHECK-LABEL: test_mm_cvtsd_f64
552 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
553 return _mm_cvtsd_f64(A);
555 TEST_CONSTEXPR(_mm_cvtsd_f64((__m128d){-4.0, +8.0}) == -4.0);
557 int test_mm_cvtsd_si32(__m128d A) {
558 // CHECK-LABEL: test_mm_cvtsd_si32
559 // CHECK: call {{.*}}i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
560 return _mm_cvtsd_si32(A);
563 #ifdef __x86_64__
564 long long test_mm_cvtsd_si64(__m128d A) {
565 // X64-LABEL: test_mm_cvtsd_si64
566 // X64: call {{.*}}i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
567 return _mm_cvtsd_si64(A);
569 #endif
571 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
572 // CHECK-LABEL: test_mm_cvtsd_ss
573 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}})
574 return _mm_cvtsd_ss(A, B);
577 int test_mm_cvtsi128_si32(__m128i A) {
578 // CHECK-LABEL: test_mm_cvtsi128_si32
579 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
580 return _mm_cvtsi128_si32(A);
583 long long test_mm_cvtsi128_si64(__m128i A) {
584 // CHECK-LABEL: test_mm_cvtsi128_si64
585 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
586 return _mm_cvtsi128_si64(A);
589 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
590 // CHECK-LABEL: test_mm_cvtsi32_sd
591 // CHECK: sitofp i32 %{{.*}} to double
592 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
593 return _mm_cvtsi32_sd(A, B);
595 TEST_CONSTEXPR(match_m128d(_mm_cvtsi32_sd((__m128d){-99.0, +42.0}, 55), +55.0, +42.0));
597 __m128i test_mm_cvtsi32_si128(int A) {
598 // CHECK-LABEL: test_mm_cvtsi32_si128
599 // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0
600 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
601 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
602 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
603 return _mm_cvtsi32_si128(A);
606 #ifdef __x86_64__
607 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
608 // X64-LABEL: test_mm_cvtsi64_sd
609 // X64: sitofp i64 %{{.*}} to double
610 // X64: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
611 return _mm_cvtsi64_sd(A, B);
613 TEST_CONSTEXPR(match_m128d(_mm_cvtsi64_sd((__m128d){-42.0, +99.0}, 55), +55.0, +99.0));
614 #endif
616 __m128i test_mm_cvtsi64_si128(long long A) {
617 // CHECK-LABEL: test_mm_cvtsi64_si128
618 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
619 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
620 return _mm_cvtsi64_si128(A);
623 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
624 // CHECK-LABEL: test_mm_cvtss_sd
625 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
626 // CHECK: fpext float %{{.*}} to double
627 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
628 return _mm_cvtss_sd(A, B);
630 TEST_CONSTEXPR(match_m128d(_mm_cvtss_sd((__m128d){+32.0, +8.0}, (__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +8.0));
632 __m128i test_mm_cvttpd_epi32(__m128d A) {
633 // CHECK-LABEL: test_mm_cvttpd_epi32
634 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
635 return _mm_cvttpd_epi32(A);
638 __m128i test_mm_cvttps_epi32(__m128 A) {
639 // CHECK-LABEL: test_mm_cvttps_epi32
640 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
641 return _mm_cvttps_epi32(A);
644 int test_mm_cvttsd_si32(__m128d A) {
645 // CHECK-LABEL: test_mm_cvttsd_si32
646 // CHECK: call {{.*}}i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}})
647 return _mm_cvttsd_si32(A);
650 #ifdef __x86_64__
651 long long test_mm_cvttsd_si64(__m128d A) {
652 // X64-LABEL: test_mm_cvttsd_si64
653 // X64: call {{.*}}i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
654 return _mm_cvttsd_si64(A);
656 #endif
658 __m128d test_mm_div_pd(__m128d A, __m128d B) {
659 // CHECK-LABEL: test_mm_div_pd
660 // CHECK: fdiv <2 x double>
661 return _mm_div_pd(A, B);
663 TEST_CONSTEXPR(match_m128d(_mm_div_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +4.0));
665 __m128d test_mm_div_sd(__m128d A, __m128d B) {
666 // CHECK-LABEL: test_mm_div_sd
667 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
668 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
669 // CHECK: fdiv double
670 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
671 return _mm_div_sd(A, B);
673 TEST_CONSTEXPR(match_m128d(_mm_div_sd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +8.0));
675 // Lowering to pextrw requires optimization.
676 int test_mm_extract_epi16(__m128i A) {
677 // CHECK-LABEL: test_mm_extract_epi16
678 // CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1
679 // CHECK: zext i16 %{{.*}} to i32
680 return _mm_extract_epi16(A, 1);
683 __m128i test_mm_insert_epi16(__m128i A, int B) {
684 // CHECK-LABEL: test_mm_insert_epi16
685 // CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0
686 return _mm_insert_epi16(A, B, 0);
689 void test_mm_lfence(void) {
690 // CHECK-LABEL: test_mm_lfence
691 // CHECK: call void @llvm.x86.sse2.lfence()
692 _mm_lfence();
695 __m128d test_mm_load_pd(double const* A) {
696 // CHECK-LABEL: test_mm_load_pd
697 // CHECK: load <2 x double>, ptr %{{.*}}, align 16
698 return _mm_load_pd(A);
701 __m128d test_mm_load_pd1(double const* A) {
702 // CHECK-LABEL: test_mm_load_pd1
703 // CHECK: load double, ptr %{{.*}}, align 8
704 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
705 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
706 return _mm_load_pd1(A);
709 __m128d test_mm_load_sd(double const* A) {
710 // CHECK-LABEL: test_mm_load_sd
711 // CHECK: load double, ptr %{{.*}}, align 1{{$}}
712 return _mm_load_sd(A);
715 __m128i test_mm_load_si128(__m128i const* A) {
716 // CHECK-LABEL: test_mm_load_si128
717 // CHECK: load <2 x i64>, ptr %{{.*}}, align 16
718 return _mm_load_si128(A);
721 __m128d test_mm_load1_pd(double const* A) {
722 // CHECK-LABEL: test_mm_load1_pd
723 // CHECK: load double, ptr %{{.*}}, align 8
724 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
725 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
726 return _mm_load1_pd(A);
729 __m128d test_mm_loadh_pd(__m128d x, double const* y) {
730 // CHECK-LABEL: test_mm_loadh_pd
731 // CHECK: load double, ptr %{{.*}}, align 1{{$}}
732 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
733 return _mm_loadh_pd(x, y);
736 __m128i test_mm_loadl_epi64(__m128i* y) {
737 // CHECK: test_mm_loadl_epi64
738 // CHECK: load i64, ptr {{.*}}, align 1{{$}}
739 // CHECK: insertelement <2 x i64> poison, i64 {{.*}}, i32 0
740 // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1
741 return _mm_loadl_epi64(y);
744 __m128d test_mm_loadl_pd(__m128d x, double const* y) {
745 // CHECK-LABEL: test_mm_loadl_pd
746 // CHECK: load double, ptr %{{.*}}, align 1{{$}}
747 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
748 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
749 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
750 return _mm_loadl_pd(x, y);
753 __m128d test_mm_loadr_pd(double const* A) {
754 // CHECK-LABEL: test_mm_loadr_pd
755 // CHECK: load <2 x double>, ptr %{{.*}}, align 16
756 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
757 return _mm_loadr_pd(A);
760 __m128d test_mm_loadu_pd(double const* A) {
761 // CHECK-LABEL: test_mm_loadu_pd
762 // CHECK: load <2 x double>, ptr %{{.*}}, align 1{{$}}
763 return _mm_loadu_pd(A);
766 __m128i test_mm_loadu_si128(__m128i const* A) {
767 // CHECK-LABEL: test_mm_loadu_si128
768 // CHECK: load <2 x i64>, ptr %{{.*}}, align 1{{$}}
769 return _mm_loadu_si128(A);
772 __m128i test_mm_loadu_si64(void const* A) {
773 // CHECK-LABEL: test_mm_loadu_si64
774 // CHECK: load i64, ptr %{{.*}}, align 1{{$}}
775 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
776 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
777 return _mm_loadu_si64(A);
780 __m128i test_mm_loadu_si32(void const* A) {
781 // CHECK-LABEL: test_mm_loadu_si32
782 // CHECK: load i32, ptr %{{.*}}, align 1{{$}}
783 // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0
784 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
785 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
786 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
787 return _mm_loadu_si32(A);
790 __m128i test_mm_loadu_si16(void const* A) {
791 // CHECK-LABEL: test_mm_loadu_si16
792 // CHECK: load i16, ptr %{{.*}}, align 1{{$}}
793 // CHECK: insertelement <8 x i16> poison, i16 %{{.*}}, i32 0
794 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1
795 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2
796 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3
797 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4
798 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
799 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
800 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
801 return _mm_loadu_si16(A);
804 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
805 // CHECK-LABEL: test_mm_madd_epi16
806 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
807 return _mm_madd_epi16(A, B);
810 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
811 // CHECK-LABEL: test_mm_maskmoveu_si128
812 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, ptr %{{.*}})
813 _mm_maskmoveu_si128(A, B, C);
816 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
817 // CHECK-LABEL: test_mm_max_epi16
818 // CHECK: call <8 x i16> @llvm.smax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
819 return _mm_max_epi16(A, B);
822 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
823 // CHECK-LABEL: test_mm_max_epu8
824 // CHECK: call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
825 return _mm_max_epu8(A, B);
828 __m128d test_mm_max_pd(__m128d A, __m128d B) {
829 // CHECK-LABEL: test_mm_max_pd
830 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
831 return _mm_max_pd(A, B);
834 __m128d test_mm_max_sd(__m128d A, __m128d B) {
835 // CHECK-LABEL: test_mm_max_sd
836 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
837 return _mm_max_sd(A, B);
840 void test_mm_mfence(void) {
841 // CHECK-LABEL: test_mm_mfence
842 // CHECK: call void @llvm.x86.sse2.mfence()
843 _mm_mfence();
846 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
847 // CHECK-LABEL: test_mm_min_epi16
848 // CHECK: call <8 x i16> @llvm.smin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
849 return _mm_min_epi16(A, B);
852 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
853 // CHECK-LABEL: test_mm_min_epu8
854 // CHECK: call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
855 return _mm_min_epu8(A, B);
858 __m128d test_mm_min_pd(__m128d A, __m128d B) {
859 // CHECK-LABEL: test_mm_min_pd
860 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
861 return _mm_min_pd(A, B);
864 __m128d test_mm_min_sd(__m128d A, __m128d B) {
865 // CHECK-LABEL: test_mm_min_sd
866 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
867 return _mm_min_sd(A, B);
870 __m64 test_mm_movepi64_pi64(__m128i A) {
871 // CHECK-LABEL: test_mm_movepi64_pi64
872 // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %1, i32 0
873 return _mm_movepi64_pi64(A);
875 TEST_CONSTEXPR(match_m64(_mm_movepi64_pi64((__m128i){8, -8}), 8ULL));
877 __m128i test_mm_movpi64_epi64(__m64 A) {
878 // CHECK-LABEL: test_mm_movpi64_epi64
879 // CHECK: shufflevector <1 x i64> %{{.*}}, <1 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1>
880 return _mm_movpi64_epi64(A);
882 TEST_CONSTEXPR(match_m128i(_mm_movpi64_epi64((__m64){5LL}), 5ULL, 0ULL));
884 __m128i test_mm_move_epi64(__m128i A) {
885 // CHECK-LABEL: test_mm_move_epi64
886 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
887 return _mm_move_epi64(A);
889 TEST_CONSTEXPR(match_m128i(_mm_move_epi64((__m128i){16LL, 15LL}), 16ULL, 0ULL));
891 __m128d test_mm_move_sd(__m128d A, __m128d B) {
892 // CHECK-LABEL: test_mm_move_sd
893 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
894 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
895 return _mm_move_sd(A, B);
897 TEST_CONSTEXPR(match_m128d(_mm_move_sd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -4.0, +8.0));
899 int test_mm_movemask_epi8(__m128i A) {
900 // CHECK-LABEL: test_mm_movemask_epi8
901 // CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
902 return _mm_movemask_epi8(A);
905 int test_mm_movemask_pd(__m128d A) {
906 // CHECK-LABEL: test_mm_movemask_pd
907 // CHECK: call {{.*}}i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
908 return _mm_movemask_pd(A);
911 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
912 // CHECK-LABEL: test_mm_mul_epu32
913 // CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295)
914 // CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295)
915 // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
916 return _mm_mul_epu32(A, B);
919 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
920 // CHECK-LABEL: test_mm_mul_pd
921 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
922 return _mm_mul_pd(A, B);
924 TEST_CONSTEXPR(match_m128d(_mm_mul_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +5.0, +15.0));
926 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
927 // CHECK-LABEL: test_mm_mul_sd
928 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
929 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
930 // CHECK: fmul double
931 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
932 return _mm_mul_sd(A, B);
934 TEST_CONSTEXPR(match_m128d(_mm_mul_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +5.0, -3.0));
936 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
937 // CHECK-LABEL: test_mm_mulhi_epi16
938 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
939 return _mm_mulhi_epi16(A, B);
942 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
943 // CHECK-LABEL: test_mm_mulhi_epu16
944 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
945 return _mm_mulhi_epu16(A, B);
948 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
949 // CHECK-LABEL: test_mm_mullo_epi16
950 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
951 return _mm_mullo_epi16(A, B);
954 __m128d test_mm_or_pd(__m128d A, __m128d B) {
955 // CHECK-LABEL: test_mm_or_pd
956 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
957 return _mm_or_pd(A, B);
959 TEST_CONSTEXPR(match_m128d(_mm_or_pd((__m128d){+1.0, -3.0}, (__m128d){-0.0, +0.0}), -1.0, -3.0));
961 __m128i test_mm_or_si128(__m128i A, __m128i B) {
962 // CHECK-LABEL: test_mm_or_si128
963 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
964 return _mm_or_si128(A, B);
967 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
968 // CHECK-LABEL: test_mm_packs_epi16
969 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
970 return _mm_packs_epi16(A, B);
973 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
974 // CHECK-LABEL: test_mm_packs_epi32
975 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
976 return _mm_packs_epi32(A, B);
979 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
980 // CHECK-LABEL: test_mm_packus_epi16
981 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
982 return _mm_packus_epi16(A, B);
985 void test_mm_pause(void) {
986 // CHECK-LABEL: test_mm_pause
987 // CHECK: call void @llvm.x86.sse2.pause()
988 return _mm_pause();
991 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
992 // CHECK-LABEL: test_mm_sad_epu8
993 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
994 return _mm_sad_epu8(A, B);
997 __m128i test_mm_set_epi8(char A, char B, char C, char D,
998 char E, char F, char G, char H,
999 char I, char J, char K, char L,
1000 char M, char N, char O, char P) {
1001 // CHECK-LABEL: test_mm_set_epi8
1002 // CHECK: insertelement <16 x i8> poison, i8 %{{.*}}, i32 0
1003 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1004 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1005 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1006 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1007 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1008 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1009 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1010 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1011 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1012 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1013 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1014 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1015 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1016 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1017 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1018 return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1020 TEST_CONSTEXPR(match_v16qi(_mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
1022 __m128i test_mm_set_epi16(short A, short B, short C, short D,
1023 short E, short F, short G, short H) {
1024 // CHECK-LABEL: test_mm_set_epi16
1025 // CHECK: insertelement <8 x i16> poison, i16 %{{.*}}, i32 0
1026 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1027 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1028 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1029 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1030 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1031 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1032 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1033 return _mm_set_epi16(A, B, C, D, E, F, G, H);
1035 TEST_CONSTEXPR(match_v8hi(_mm_set_epi16(0, -1, -2, -3, -4, -5, -6, -7), -7, -6, -5, -4, -3, -2, -1, 0));
1037 __m128i test_mm_set_epi32(int A, int B, int C, int D) {
1038 // CHECK-LABEL: test_mm_set_epi32
1039 // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0
1040 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1041 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1042 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1043 return _mm_set_epi32(A, B, C, D);
1045 TEST_CONSTEXPR(match_v4si(_mm_set_epi32(1, -3, 5, -7), -7, 5, -3, 1));
1047 __m128i test_mm_set_epi64(__m64 A, __m64 B) {
1048 // CHECK-LABEL: test_mm_set_epi64
1049 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
1050 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1051 return _mm_set_epi64(A, B);
1053 TEST_CONSTEXPR(match_v2di(_mm_set_epi64((__m64){-1}, (__m64){42}), 42, -1));
1055 __m128i test_mm_set_epi64x(long long A, long long B) {
1056 // CHECK-LABEL: test_mm_set_epi64x
1057 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
1058 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1059 return _mm_set_epi64x(A, B);
1061 TEST_CONSTEXPR(match_v2di(_mm_set_epi64x(100, -1000), -1000, 100));
1063 __m128d test_mm_set_pd(double A, double B) {
1064 // CHECK-LABEL: test_mm_set_pd
1065 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
1066 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1067 return _mm_set_pd(A, B);
1069 TEST_CONSTEXPR(match_m128d(_mm_set_pd(-9.0, +3.0), +3.0, -9.0));
1071 __m128d test_mm_set_pd1(double A) {
1072 // CHECK-LABEL: test_mm_set_pd1
1073 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
1074 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1075 return _mm_set_pd1(A);
1077 TEST_CONSTEXPR(match_m128d(_mm_set_pd1(+5.0), +5.0, +5.0));
1079 __m128d test_mm_set_sd(double A) {
1080 // CHECK-LABEL: test_mm_set_sd
1081 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
1082 // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
1083 return _mm_set_sd(A);
1085 TEST_CONSTEXPR(match_m128d(_mm_set_sd(+1.0), +1.0, +0.0));
1087 __m128i test_mm_set1_epi8(char A) {
1088 // CHECK-LABEL: test_mm_set1_epi8
1089 // CHECK: insertelement <16 x i8> poison, i8 %{{.*}}, i32 0
1090 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1091 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1092 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1093 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1094 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1095 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1096 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1097 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1098 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1099 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1100 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1101 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1102 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1103 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1104 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1105 return _mm_set1_epi8(A);
1107 TEST_CONSTEXPR(match_v16qi(_mm_set1_epi8(99), 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99));
1109 __m128i test_mm_set1_epi16(short A) {
1110 // CHECK-LABEL: test_mm_set1_epi16
1111 // CHECK: insertelement <8 x i16> poison, i16 %{{.*}}, i32 0
1112 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1113 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1114 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1115 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1116 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1117 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1118 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1119 return _mm_set1_epi16(A);
1121 TEST_CONSTEXPR(match_v8hi(_mm_set1_epi16(-128), -128, -128, -128, -128, -128, -128, -128, -128));
1123 __m128i test_mm_set1_epi32(int A) {
1124 // CHECK-LABEL: test_mm_set1_epi32
1125 // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0
1126 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1127 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1128 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1129 return _mm_set1_epi32(A);
1131 TEST_CONSTEXPR(match_v4si(_mm_set1_epi32(55), 55, 55, 55, 55));
1133 __m128i test_mm_set1_epi64(__m64 A) {
1134 // CHECK-LABEL: test_mm_set1_epi64
1135 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
1136 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1137 return _mm_set1_epi64(A);
1139 TEST_CONSTEXPR(match_v2di(_mm_set1_epi64((__m64){-65535}), -65535, -65535));
1141 __m128i test_mm_set1_epi64x(long long A) {
1142 // CHECK-LABEL: test_mm_set1_epi64x
1143 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
1144 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1145 return _mm_set1_epi64x(A);
1147 TEST_CONSTEXPR(match_v2di(_mm_set1_epi64x(65536), 65536, 65536));
1149 __m128d test_mm_set1_pd(double A) {
1150 // CHECK-LABEL: test_mm_set1_pd
1151 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
1152 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1153 return _mm_set1_pd(A);
1155 TEST_CONSTEXPR(match_m128d(_mm_set1_pd(-42.0), -42.0, -42.0));
1157 __m128i test_mm_setr_epi8(char A, char B, char C, char D,
1158 char E, char F, char G, char H,
1159 char I, char J, char K, char L,
1160 char M, char N, char O, char P) {
1161 // CHECK-LABEL: test_mm_setr_epi8
1162 // CHECK: insertelement <16 x i8> poison, i8 %{{.*}}, i32 0
1163 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1164 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1165 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1166 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1167 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1168 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1169 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1170 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1171 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1172 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1173 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1174 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1175 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1176 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1177 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1178 return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1180 TEST_CONSTEXPR(match_v16qi(_mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
1182 __m128i test_mm_setr_epi16(short A, short B, short C, short D,
1183 short E, short F, short G, short H) {
1184 // CHECK-LABEL: test_mm_setr_epi16
1185 // CHECK: insertelement <8 x i16> poison, i16 %{{.*}}, i32 0
1186 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1187 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1188 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1189 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1190 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1191 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1192 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1193 return _mm_setr_epi16(A, B, C, D, E, F, G, H);
1195 TEST_CONSTEXPR(match_v8hi(_mm_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7), 0, -1, -2, -3, -4, -5, -6, -7));
1197 __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
1198 // CHECK-LABEL: test_mm_setr_epi32
1199 // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0
1200 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1201 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1202 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1203 return _mm_setr_epi32(A, B, C, D);
1205 TEST_CONSTEXPR(match_v4si(_mm_setr_epi32(1, -3, 5, -7), 1, -3, 5, -7));
1207 __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
1208 // CHECK-LABEL: test_mm_setr_epi64
1209 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
1210 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1211 return _mm_setr_epi64(A, B);
1213 TEST_CONSTEXPR(match_v2di(_mm_setr_epi64((__m64){-1}, (__m64){42}), -1, 42));
1215 __m128d test_mm_setr_pd(double A, double B) {
1216 // CHECK-LABEL: test_mm_setr_pd
1217 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
1218 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1219 return _mm_setr_pd(A, B);
1221 TEST_CONSTEXPR(match_m128d(_mm_setr_pd(-9.0, +3.0), -9.0, +3.0));
1223 __m128d test_mm_setzero_pd(void) {
1224 // CHECK-LABEL: test_mm_setzero_pd
1225 // CHECK: store <2 x double> zeroinitializer
1226 return _mm_setzero_pd();
1228 TEST_CONSTEXPR(match_m128d(_mm_setzero_pd(), +0.0, +0.0));
1230 __m128i test_mm_setzero_si128(void) {
1231 // CHECK-LABEL: test_mm_setzero_si128
1232 // CHECK: store <2 x i64> zeroinitializer
1233 return _mm_setzero_si128();
1235 TEST_CONSTEXPR(match_m128i(_mm_setzero_si128(), 0, 0));
1237 __m128i test_mm_shuffle_epi32(__m128i A) {
1238 // CHECK-LABEL: test_mm_shuffle_epi32
1239 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
1240 return _mm_shuffle_epi32(A, 0);
1243 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
1244 // CHECK-LABEL: test_mm_shuffle_pd
1245 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
1246 return _mm_shuffle_pd(A, B, 1);
1249 __m128i test_mm_shufflehi_epi16(__m128i A) {
1250 // CHECK-LABEL: test_mm_shufflehi_epi16
1251 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1252 return _mm_shufflehi_epi16(A, 0);
1255 __m128i test_mm_shufflelo_epi16(__m128i A) {
1256 // CHECK-LABEL: test_mm_shufflelo_epi16
1257 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1258 return _mm_shufflelo_epi16(A, 0);
1261 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
1262 // CHECK-LABEL: test_mm_sll_epi16
1263 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1264 return _mm_sll_epi16(A, B);
1267 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
1268 // CHECK-LABEL: test_mm_sll_epi32
1269 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1270 return _mm_sll_epi32(A, B);
1273 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
1274 // CHECK-LABEL: test_mm_sll_epi64
1275 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1276 return _mm_sll_epi64(A, B);
1279 __m128i test_mm_slli_epi16(__m128i A) {
1280 // CHECK-LABEL: test_mm_slli_epi16
1281 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1282 return _mm_slli_epi16(A, 1);
1285 __m128i test_mm_slli_epi16_1(__m128i A) {
1286 // CHECK-LABEL: test_mm_slli_epi16_1
1287 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1288 return _mm_slli_epi16(A, -1);
1291 __m128i test_mm_slli_epi16_2(__m128i A, int B) {
1292 // CHECK-LABEL: test_mm_slli_epi16_2
1293 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1294 return _mm_slli_epi16(A, B);
1297 __m128i test_mm_slli_epi32(__m128i A) {
1298 // CHECK-LABEL: test_mm_slli_epi32
1299 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1300 return _mm_slli_epi32(A, 1);
1303 __m128i test_mm_slli_epi32_1(__m128i A) {
1304 // CHECK-LABEL: test_mm_slli_epi32_1
1305 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1306 return _mm_slli_epi32(A, -1);
1309 __m128i test_mm_slli_epi32_2(__m128i A, int B) {
1310 // CHECK-LABEL: test_mm_slli_epi32_2
1311 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1312 return _mm_slli_epi32(A, B);
1315 __m128i test_mm_slli_epi64(__m128i A) {
1316 // CHECK-LABEL: test_mm_slli_epi64
1317 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1318 return _mm_slli_epi64(A, 1);
1321 __m128i test_mm_slli_epi64_1(__m128i A) {
1322 // CHECK-LABEL: test_mm_slli_epi64_1
1323 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1324 return _mm_slli_epi64(A, -1);
1327 __m128i test_mm_slli_epi64_2(__m128i A, int B) {
1328 // CHECK-LABEL: test_mm_slli_epi64_2
1329 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1330 return _mm_slli_epi64(A, B);
1333 __m128i test_mm_slli_si128(__m128i A) {
1334 // CHECK-LABEL: test_mm_slli_si128
1335 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
1336 return _mm_slli_si128(A, 5);
1339 __m128i test_mm_slli_si128_2(__m128i A) {
1340 // CHECK-LABEL: test_mm_slli_si128_2
1341 // CHECK: ret <2 x i64> zeroinitializer
1342 return _mm_slli_si128(A, 17);
1345 __m128d test_mm_sqrt_pd(__m128d A) {
1346 // CHECK-LABEL: test_mm_sqrt_pd
1347 // CHECK: call {{.*}}<2 x double> @llvm.sqrt.v2f64(<2 x double> %{{.*}})
1348 return _mm_sqrt_pd(A);
1351 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
1352 // CHECK-LABEL: test_mm_sqrt_sd
1353 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
1354 // CHECK: call double @llvm.sqrt.f64(double {{.*}})
1355 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
1356 return _mm_sqrt_sd(A, B);
1359 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
1360 // CHECK-LABEL: test_mm_sra_epi16
1361 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1362 return _mm_sra_epi16(A, B);
1365 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
1366 // CHECK-LABEL: test_mm_sra_epi32
1367 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1368 return _mm_sra_epi32(A, B);
1371 __m128i test_mm_srai_epi16(__m128i A) {
1372 // CHECK-LABEL: test_mm_srai_epi16
1373 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1374 return _mm_srai_epi16(A, 1);
1377 __m128i test_mm_srai_epi16_1(__m128i A) {
1378 // CHECK-LABEL: test_mm_srai_epi16_1
1379 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1380 return _mm_srai_epi16(A, -1);
1383 __m128i test_mm_srai_epi16_2(__m128i A, int B) {
1384 // CHECK-LABEL: test_mm_srai_epi16_2
1385 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1386 return _mm_srai_epi16(A, B);
1389 __m128i test_mm_srai_epi32(__m128i A) {
1390 // CHECK-LABEL: test_mm_srai_epi32
1391 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1392 return _mm_srai_epi32(A, 1);
1395 __m128i test_mm_srai_epi32_1(__m128i A) {
1396 // CHECK-LABEL: test_mm_srai_epi32_1
1397 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1398 return _mm_srai_epi32(A, -1);
1401 __m128i test_mm_srai_epi32_2(__m128i A, int B) {
1402 // CHECK-LABEL: test_mm_srai_epi32_2
1403 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1404 return _mm_srai_epi32(A, B);
1407 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
1408 // CHECK-LABEL: test_mm_srl_epi16
1409 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1410 return _mm_srl_epi16(A, B);
1413 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
1414 // CHECK-LABEL: test_mm_srl_epi32
1415 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1416 return _mm_srl_epi32(A, B);
1419 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
1420 // CHECK-LABEL: test_mm_srl_epi64
1421 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1422 return _mm_srl_epi64(A, B);
1425 __m128i test_mm_srli_epi16(__m128i A) {
1426 // CHECK-LABEL: test_mm_srli_epi16
1427 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1428 return _mm_srli_epi16(A, 1);
1431 __m128i test_mm_srli_epi16_1(__m128i A) {
1432 // CHECK-LABEL: test_mm_srli_epi16_1
1433 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1434 return _mm_srli_epi16(A, -1);
1437 __m128i test_mm_srli_epi16_2(__m128i A, int B) {
1438 // CHECK-LABEL: test_mm_srli_epi16
1439 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1440 return _mm_srli_epi16(A, B);
1443 __m128i test_mm_srli_epi32(__m128i A) {
1444 // CHECK-LABEL: test_mm_srli_epi32
1445 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1446 return _mm_srli_epi32(A, 1);
1449 __m128i test_mm_srli_epi32_1(__m128i A) {
1450 // CHECK-LABEL: test_mm_srli_epi32_1
1451 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1452 return _mm_srli_epi32(A, -1);
1455 __m128i test_mm_srli_epi32_2(__m128i A, int B) {
1456 // CHECK-LABEL: test_mm_srli_epi32_2
1457 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1458 return _mm_srli_epi32(A, B);
1461 __m128i test_mm_srli_epi64(__m128i A) {
1462 // CHECK-LABEL: test_mm_srli_epi64
1463 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1464 return _mm_srli_epi64(A, 1);
1467 __m128i test_mm_srli_epi64_1(__m128i A) {
1468 // CHECK-LABEL: test_mm_srli_epi64_1
1469 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1470 return _mm_srli_epi64(A, -1);
1473 __m128i test_mm_srli_epi64_2(__m128i A, int B) {
1474 // CHECK-LABEL: test_mm_srli_epi64_2
1475 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1476 return _mm_srli_epi64(A, B);
1479 __m128i test_mm_srli_si128(__m128i A) {
1480 // CHECK-LABEL: test_mm_srli_si128
1481 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
1482 return _mm_srli_si128(A, 5);
1485 __m128i test_mm_srli_si128_2(__m128i A) {
1486 // CHECK-LABEL: test_mm_srli_si128_2
1487 // ret <2 x i64> zeroinitializer
1488 return _mm_srli_si128(A, 17);
1491 void test_mm_store_pd(double* A, __m128d B) {
1492 // CHECK-LABEL: test_mm_store_pd
1493 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16
1494 _mm_store_pd(A, B);
1497 void test_mm_store_pd1(double* x, __m128d y) {
1498 // CHECK-LABEL: test_mm_store_pd1
1499 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1500 // CHECK: store <2 x double> %{{.*}}, ptr {{.*}}, align 16
1501 _mm_store_pd1(x, y);
1504 void test_mm_store_sd(double* A, __m128d B) {
1505 // CHECK-LABEL: test_mm_store_sd
1506 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1507 // CHECK: store double %{{.*}}, ptr %{{.*}}, align 1{{$}}
1508 _mm_store_sd(A, B);
1511 void test_mm_store_si128(__m128i* A, __m128i B) {
1512 // CHECK-LABEL: test_mm_store_si128
1513 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16
1514 _mm_store_si128(A, B);
1517 void test_mm_store1_pd(double* x, __m128d y) {
1518 // CHECK-LABEL: test_mm_store1_pd
1519 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1520 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16
1521 _mm_store1_pd(x, y);
1524 void test_mm_storeh_pd(double* A, __m128d B) {
1525 // CHECK-LABEL: test_mm_storeh_pd
1526 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1527 // CHECK: store double %{{.*}}, ptr %{{.*}}, align 1{{$}}
1528 _mm_storeh_pd(A, B);
1531 void test_mm_storel_epi64(__m128i x, void* y) {
1532 // CHECK-LABEL: test_mm_storel_epi64
1533 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
1534 // CHECK: store {{.*}} ptr {{.*}}, align 1{{$}}
1535 _mm_storel_epi64((__m128i_u*)y, x);
1538 void test_mm_storel_pd(double* A, __m128d B) {
1539 // CHECK-LABEL: test_mm_storel_pd
1540 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1541 // CHECK: store double %{{.*}}, ptr %{{.*}}, align 1{{$}}
1542 _mm_storel_pd(A, B);
1545 void test_mm_storer_pd(__m128d A, double* B) {
1546 // CHECK-LABEL: test_mm_storer_pd
1547 // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0>
1548 // CHECK: store {{.*}} ptr {{.*}}, align 16{{$}}
1549 _mm_storer_pd(B, A);
1552 void test_mm_storeu_pd(double* A, __m128d B) {
1553 // CHECK-LABEL: test_mm_storeu_pd
1554 // CHECK: store {{.*}} ptr {{.*}}, align 1{{$}}
1555 // CHECK-NEXT: ret void
1556 _mm_storeu_pd(A, B);
1559 void test_mm_storeu_si128(__m128i* A, __m128i B) {
1560 // CHECK-LABEL: test_mm_storeu_si128
1561 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 1{{$}}
1562 // CHECK-NEXT: ret void
1563 _mm_storeu_si128(A, B);
1566 void test_mm_storeu_si64(void* A, __m128i B) {
1567 // CHECK-LABEL: test_mm_storeu_si64
1568 // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %{{.*}}, i32 0
1569 // CHECK: store i64 [[EXT]], ptr %{{.*}}, align 1{{$}}
1570 // CHECK-NEXT: ret void
1571 _mm_storeu_si64(A, B);
1574 void test_mm_storeu_si32(void* A, __m128i B) {
1575 // CHECK-LABEL: test_mm_storeu_si32
1576 // CHECK: [[EXT:%.*]] = extractelement <4 x i32> %{{.*}}, i32 0
1577 // CHECK: store i32 [[EXT]], ptr %{{.*}}, align 1{{$}}
1578 // CHECK-NEXT: ret void
1579 _mm_storeu_si32(A, B);
1582 void test_mm_storeu_si16(void* A, __m128i B) {
1583 // CHECK-LABEL: test_mm_storeu_si16
1584 // CHECK: [[EXT:%.*]] = extractelement <8 x i16> %{{.*}}, i32 0
1585 // CHECK: store i16 [[EXT]], ptr %{{.*}}, align 1{{$}}
1586 // CHECK-NEXT: ret void
1587 _mm_storeu_si16(A, B);
1590 void test_mm_stream_pd(double *A, __m128d B) {
1591 // CHECK-LABEL: test_mm_stream_pd
1592 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
1593 _mm_stream_pd(A, B);
1596 void test_mm_stream_pd_void(void *A, __m128d B) {
1597 // CHECK-LABEL: test_mm_stream_pd_void
1598 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
1599 _mm_stream_pd(A, B);
1602 void test_mm_stream_si32(int *A, int B) {
1603 // CHECK-LABEL: test_mm_stream_si32
1604 // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
1605 _mm_stream_si32(A, B);
1608 void test_mm_stream_si32_void(void *A, int B) {
1609 // CHECK-LABEL: test_mm_stream_si32_void
1610 // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
1611 _mm_stream_si32(A, B);
1614 #ifdef __x86_64__
1615 void test_mm_stream_si64(long long *A, long long B) {
1616 // X64-LABEL: test_mm_stream_si64
1617 // X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
1618 _mm_stream_si64(A, B);
1621 void test_mm_stream_si64_void(void *A, long long B) {
1622 // X64-LABEL: test_mm_stream_si64_void
1623 // X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
1624 _mm_stream_si64(A, B);
1626 #endif
1628 void test_mm_stream_si128(__m128i *A, __m128i B) {
1629 // CHECK-LABEL: test_mm_stream_si128
1630 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
1631 _mm_stream_si128(A, B);
1634 void test_mm_stream_si128_void(void *A, __m128i B) {
1635 // CHECK-LABEL: test_mm_stream_si128_void
1636 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
1637 _mm_stream_si128(A, B);
1640 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
1641 // CHECK-LABEL: test_mm_sub_epi8
1642 // CHECK: sub <16 x i8>
1643 return _mm_sub_epi8(A, B);
1646 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
1647 // CHECK-LABEL: test_mm_sub_epi16
1648 // CHECK: sub <8 x i16>
1649 return _mm_sub_epi16(A, B);
1652 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
1653 // CHECK-LABEL: test_mm_sub_epi32
1654 // CHECK: sub <4 x i32>
1655 return _mm_sub_epi32(A, B);
1657 TEST_CONSTEXPR(match_v4si(_mm_sub_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), +11, -10, -3, 0));
1659 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
1660 // CHECK-LABEL: test_mm_sub_epi64
1661 // CHECK: sub <2 x i64>
1662 return _mm_sub_epi64(A, B);
1664 TEST_CONSTEXPR(match_v2di(_mm_sub_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), +14, -11));
1666 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
1667 // CHECK-LABEL: test_mm_sub_pd
1668 // CHECK: fsub <2 x double>
1669 return _mm_sub_pd(A, B);
1671 TEST_CONSTEXPR(match_m128d(_mm_sub_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), -4.0, +2.0));
1673 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
1674 // CHECK-LABEL: test_mm_sub_sd
1675 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1676 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1677 // CHECK: fsub double
1678 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
1679 return _mm_sub_sd(A, B);
1681 TEST_CONSTEXPR(match_m128d(_mm_sub_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), -4.0, -3.0));
1683 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
1684 // CHECK-LABEL: test_mm_subs_epi8
1685 // CHECK: call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1686 return _mm_subs_epi8(A, B);
1689 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
1690 // CHECK-LABEL: test_mm_subs_epi16
1691 // CHECK: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1692 return _mm_subs_epi16(A, B);
1695 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
1696 // CHECK-LABEL: test_mm_subs_epu8
1697 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1698 // CHECK: call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1699 return _mm_subs_epu8(A, B);
1702 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
1703 // CHECK-LABEL: test_mm_subs_epu16
1704 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1705 // CHECK: call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1706 return _mm_subs_epu16(A, B);
1709 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1710 // CHECK-LABEL: test_mm_ucomieq_sd
1711 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1712 return _mm_ucomieq_sd(A, B);
1715 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1716 // CHECK-LABEL: test_mm_ucomige_sd
1717 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1718 return _mm_ucomige_sd(A, B);
1721 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1722 // CHECK-LABEL: test_mm_ucomigt_sd
1723 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1724 return _mm_ucomigt_sd(A, B);
1727 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1728 // CHECK-LABEL: test_mm_ucomile_sd
1729 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1730 return _mm_ucomile_sd(A, B);
1733 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1734 // CHECK-LABEL: test_mm_ucomilt_sd
1735 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1736 return _mm_ucomilt_sd(A, B);
1739 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1740 // CHECK-LABEL: test_mm_ucomineq_sd
1741 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1742 return _mm_ucomineq_sd(A, B);
1745 __m128d test_mm_undefined_pd(void) {
1746 // X64-LABEL: test_mm_undefined_pd
1747 // X64: ret <2 x double> zeroinitializer
1749 // X86-LABEL: test_mm_undefined_pd
1750 // X86: store <2 x double> zeroinitializer
1751 return _mm_undefined_pd();
1754 __m128i test_mm_undefined_si128(void) {
1755 // CHECK-LABEL: test_mm_undefined_si128
1756 // CHECK: ret <2 x i64> zeroinitializer
1757 return _mm_undefined_si128();
1760 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1761 // CHECK-LABEL: test_mm_unpackhi_epi8
1762 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1763 return _mm_unpackhi_epi8(A, B);
1766 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1767 // CHECK-LABEL: test_mm_unpackhi_epi16
1768 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1769 return _mm_unpackhi_epi16(A, B);
1772 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1773 // CHECK-LABEL: test_mm_unpackhi_epi32
1774 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1775 return _mm_unpackhi_epi32(A, B);
1778 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1779 // CHECK-LABEL: test_mm_unpackhi_epi64
1780 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1781 return _mm_unpackhi_epi64(A, B);
1784 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1785 // CHECK-LABEL: test_mm_unpackhi_pd
1786 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1787 return _mm_unpackhi_pd(A, B);
1789 TEST_CONSTEXPR(match_m128d(_mm_unpackhi_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, -2.0}), +8.0, -2.0));
1791 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1792 // CHECK-LABEL: test_mm_unpacklo_epi8
1793 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1794 return _mm_unpacklo_epi8(A, B);
1797 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1798 // CHECK-LABEL: test_mm_unpacklo_epi16
1799 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1800 return _mm_unpacklo_epi16(A, B);
1803 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1804 // CHECK-LABEL: test_mm_unpacklo_epi32
1805 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1806 return _mm_unpacklo_epi32(A, B);
1809 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1810 // CHECK-LABEL: test_mm_unpacklo_epi64
1811 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1812 return _mm_unpacklo_epi64(A, B);
1815 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1816 // CHECK-LABEL: test_mm_unpacklo_pd
1817 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1818 return _mm_unpacklo_pd(A, B);
1820 TEST_CONSTEXPR(match_m128d(_mm_unpacklo_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, -2.0}), +2.0, -4.0));
1822 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1823 // CHECK-LABEL: test_mm_xor_pd
1824 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1825 return _mm_xor_pd(A, B);
1827 TEST_CONSTEXPR(match_m128d(_mm_xor_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +1.0, +3.0));
1829 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1830 // CHECK-LABEL: test_mm_xor_si128
1831 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1832 return _mm_xor_si128(A, B);