Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / X86 / sse2-builtins.c
blob7165d2791827cfc4d477797bfadf080060170061
1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
2 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
3 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
4 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
5 // RUN: %clang_cc1 -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
8 #include <immintrin.h>
10 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
12 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
13 // CHECK-LABEL: test_mm_add_epi8
14 // CHECK: add <16 x i8>
15 return _mm_add_epi8(A, B);
18 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
19 // CHECK-LABEL: test_mm_add_epi16
20 // CHECK: add <8 x i16>
21 return _mm_add_epi16(A, B);
24 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
25 // CHECK-LABEL: test_mm_add_epi32
26 // CHECK: add <4 x i32>
27 return _mm_add_epi32(A, B);
30 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
31 // CHECK-LABEL: test_mm_add_epi64
32 // CHECK: add <2 x i64>
33 return _mm_add_epi64(A, B);
36 __m128d test_mm_add_pd(__m128d A, __m128d B) {
37 // CHECK-LABEL: test_mm_add_pd
38 // CHECK: fadd <2 x double>
39 return _mm_add_pd(A, B);
42 __m128d test_mm_add_sd(__m128d A, __m128d B) {
43 // CHECK-LABEL: test_mm_add_sd
44 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
45 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
46 // CHECK: fadd double
47 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
48 return _mm_add_sd(A, B);
51 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
52 // CHECK-LABEL: test_mm_adds_epi8
53 // CHECK: call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
54 return _mm_adds_epi8(A, B);
57 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
58 // CHECK-LABEL: test_mm_adds_epi16
59 // CHECK: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
60 return _mm_adds_epi16(A, B);
63 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
64 // CHECK-LABEL: test_mm_adds_epu8
65 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
66 // CHECK: call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
67 return _mm_adds_epu8(A, B);
70 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
71 // CHECK-LABEL: test_mm_adds_epu16
72 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
73 // CHECK: call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
74 return _mm_adds_epu16(A, B);
77 __m128d test_mm_and_pd(__m128d A, __m128d B) {
78 // CHECK-LABEL: test_mm_and_pd
79 // CHECK: and <2 x i64>
80 return _mm_and_pd(A, B);
83 __m128i test_mm_and_si128(__m128i A, __m128i B) {
84 // CHECK-LABEL: test_mm_and_si128
85 // CHECK: and <2 x i64>
86 return _mm_and_si128(A, B);
89 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
90 // CHECK-LABEL: test_mm_andnot_pd
91 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
92 // CHECK: and <2 x i64>
93 return _mm_andnot_pd(A, B);
96 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
97 // CHECK-LABEL: test_mm_andnot_si128
98 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
99 // CHECK: and <2 x i64>
100 return _mm_andnot_si128(A, B);
103 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
104 // CHECK-LABEL: test_mm_avg_epu8
105 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
106 return _mm_avg_epu8(A, B);
109 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
110 // CHECK-LABEL: test_mm_avg_epu16
111 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
112 return _mm_avg_epu16(A, B);
115 __m128i test_mm_bslli_si128(__m128i A) {
116 // CHECK-LABEL: test_mm_bslli_si128
117 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
118 return _mm_bslli_si128(A, 5);
121 __m128i test_mm_bsrli_si128(__m128i A) {
122 // CHECK-LABEL: test_mm_bsrli_si128
123 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
124 return _mm_bsrli_si128(A, 5);
127 __m128 test_mm_castpd_ps(__m128d A) {
128 // CHECK-LABEL: test_mm_castpd_ps
129 return _mm_castpd_ps(A);
132 __m128i test_mm_castpd_si128(__m128d A) {
133 // CHECK-LABEL: test_mm_castpd_si128
134 return _mm_castpd_si128(A);
137 __m128d test_mm_castps_pd(__m128 A) {
138 // CHECK-LABEL: test_mm_castps_pd
139 return _mm_castps_pd(A);
142 __m128i test_mm_castps_si128(__m128 A) {
143 // CHECK-LABEL: test_mm_castps_si128
144 return _mm_castps_si128(A);
147 __m128d test_mm_castsi128_pd(__m128i A) {
148 // CHECK-LABEL: test_mm_castsi128_pd
149 return _mm_castsi128_pd(A);
152 __m128 test_mm_castsi128_ps(__m128i A) {
153 // CHECK-LABEL: test_mm_castsi128_ps
154 return _mm_castsi128_ps(A);
157 void test_mm_clflush(void* A) {
158 // CHECK-LABEL: test_mm_clflush
159 // CHECK: call void @llvm.x86.sse2.clflush(ptr %{{.*}})
160 _mm_clflush(A);
163 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
164 // CHECK-LABEL: test_mm_cmpeq_epi8
165 // CHECK: icmp eq <16 x i8>
166 return _mm_cmpeq_epi8(A, B);
169 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
170 // CHECK-LABEL: test_mm_cmpeq_epi16
171 // CHECK: icmp eq <8 x i16>
172 return _mm_cmpeq_epi16(A, B);
175 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
176 // CHECK-LABEL: test_mm_cmpeq_epi32
177 // CHECK: icmp eq <4 x i32>
178 return _mm_cmpeq_epi32(A, B);
181 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
182 // CHECK-LABEL: test_mm_cmpeq_pd
183 // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
184 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
185 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
186 return _mm_cmpeq_pd(A, B);
189 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
190 // CHECK-LABEL: test_mm_cmpeq_sd
191 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
192 return _mm_cmpeq_sd(A, B);
195 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
196 // CHECK-LABEL: test_mm_cmpge_pd
197 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
198 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
199 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
200 return _mm_cmpge_pd(A, B);
203 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
204 // CHECK-LABEL: test_mm_cmpge_sd
205 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
206 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
207 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
208 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
209 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
210 return _mm_cmpge_sd(A, B);
213 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
214 // CHECK-LABEL: test_mm_cmpgt_epi8
215 // CHECK: icmp sgt <16 x i8>
216 return _mm_cmpgt_epi8(A, B);
219 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
220 // CHECK-LABEL: test_mm_cmpgt_epi16
221 // CHECK: icmp sgt <8 x i16>
222 return _mm_cmpgt_epi16(A, B);
225 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
226 // CHECK-LABEL: test_mm_cmpgt_epi32
227 // CHECK: icmp sgt <4 x i32>
228 return _mm_cmpgt_epi32(A, B);
231 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
232 // CHECK-LABEL: test_mm_cmpgt_pd
233 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
234 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
235 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
236 return _mm_cmpgt_pd(A, B);
239 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
240 // CHECK-LABEL: test_mm_cmpgt_sd
241 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
242 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
243 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
244 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
245 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
246 return _mm_cmpgt_sd(A, B);
249 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
250 // CHECK-LABEL: test_mm_cmple_pd
251 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
252 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
253 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
254 return _mm_cmple_pd(A, B);
257 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
258 // CHECK-LABEL: test_mm_cmple_sd
259 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
260 return _mm_cmple_sd(A, B);
263 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
264 // CHECK-LABEL: test_mm_cmplt_epi8
265 // CHECK: icmp sgt <16 x i8>
266 return _mm_cmplt_epi8(A, B);
269 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
270 // CHECK-LABEL: test_mm_cmplt_epi16
271 // CHECK: icmp sgt <8 x i16>
272 return _mm_cmplt_epi16(A, B);
275 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
276 // CHECK-LABEL: test_mm_cmplt_epi32
277 // CHECK: icmp sgt <4 x i32>
278 return _mm_cmplt_epi32(A, B);
281 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
282 // CHECK-LABEL: test_mm_cmplt_pd
283 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
284 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
285 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
286 return _mm_cmplt_pd(A, B);
289 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
290 // CHECK-LABEL: test_mm_cmplt_sd
291 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
292 return _mm_cmplt_sd(A, B);
295 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
296 // CHECK-LABEL: test_mm_cmpneq_pd
297 // CHECK: [[CMP:%.*]] = fcmp une <2 x double>
298 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
299 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
300 return _mm_cmpneq_pd(A, B);
303 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
304 // CHECK-LABEL: test_mm_cmpneq_sd
305 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
306 return _mm_cmpneq_sd(A, B);
309 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
310 // CHECK-LABEL: test_mm_cmpnge_pd
311 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
312 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
313 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
314 return _mm_cmpnge_pd(A, B);
317 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
318 // CHECK-LABEL: test_mm_cmpnge_sd
319 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
320 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
321 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
322 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
323 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
324 return _mm_cmpnge_sd(A, B);
327 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
328 // CHECK-LABEL: test_mm_cmpngt_pd
329 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
330 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
331 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
332 return _mm_cmpngt_pd(A, B);
335 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
336 // CHECK-LABEL: test_mm_cmpngt_sd
337 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
338 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
339 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
340 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
341 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
342 return _mm_cmpngt_sd(A, B);
345 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
346 // CHECK-LABEL: test_mm_cmpnle_pd
347 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
348 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
349 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
350 return _mm_cmpnle_pd(A, B);
353 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
354 // CHECK-LABEL: test_mm_cmpnle_sd
355 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
356 return _mm_cmpnle_sd(A, B);
359 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
360 // CHECK-LABEL: test_mm_cmpnlt_pd
361 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
362 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
363 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
364 return _mm_cmpnlt_pd(A, B);
367 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
368 // CHECK-LABEL: test_mm_cmpnlt_sd
369 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
370 return _mm_cmpnlt_sd(A, B);
373 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
374 // CHECK-LABEL: test_mm_cmpord_pd
375 // CHECK: [[CMP:%.*]] = fcmp ord <2 x double>
376 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
377 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
378 return _mm_cmpord_pd(A, B);
381 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
382 // CHECK-LABEL: test_mm_cmpord_sd
383 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
384 return _mm_cmpord_sd(A, B);
387 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
388 // CHECK-LABEL: test_mm_cmpunord_pd
389 // CHECK: [[CMP:%.*]] = fcmp uno <2 x double>
390 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
391 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
392 return _mm_cmpunord_pd(A, B);
395 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
396 // CHECK-LABEL: test_mm_cmpunord_sd
397 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
398 return _mm_cmpunord_sd(A, B);
401 int test_mm_comieq_sd(__m128d A, __m128d B) {
402 // CHECK-LABEL: test_mm_comieq_sd
403 // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
404 return _mm_comieq_sd(A, B);
407 int test_mm_comige_sd(__m128d A, __m128d B) {
408 // CHECK-LABEL: test_mm_comige_sd
409 // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
410 return _mm_comige_sd(A, B);
413 int test_mm_comigt_sd(__m128d A, __m128d B) {
414 // CHECK-LABEL: test_mm_comigt_sd
415 // CHECK: call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
416 return _mm_comigt_sd(A, B);
419 int test_mm_comile_sd(__m128d A, __m128d B) {
420 // CHECK-LABEL: test_mm_comile_sd
421 // CHECK: call i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
422 return _mm_comile_sd(A, B);
425 int test_mm_comilt_sd(__m128d A, __m128d B) {
426 // CHECK-LABEL: test_mm_comilt_sd
427 // CHECK: call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
428 return _mm_comilt_sd(A, B);
431 int test_mm_comineq_sd(__m128d A, __m128d B) {
432 // CHECK-LABEL: test_mm_comineq_sd
433 // CHECK: call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
434 return _mm_comineq_sd(A, B);
437 __m128d test_mm_cvtepi32_pd(__m128i A) {
438 // CHECK-LABEL: test_mm_cvtepi32_pd
439 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
440 // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
441 return _mm_cvtepi32_pd(A);
444 __m128 test_mm_cvtepi32_ps(__m128i A) {
445 // CHECK-LABEL: test_mm_cvtepi32_ps
446 // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float>
447 return _mm_cvtepi32_ps(A);
450 __m128i test_mm_cvtpd_epi32(__m128d A) {
451 // CHECK-LABEL: test_mm_cvtpd_epi32
452 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}})
453 return _mm_cvtpd_epi32(A);
456 __m128 test_mm_cvtpd_ps(__m128d A) {
457 // CHECK-LABEL: test_mm_cvtpd_ps
458 // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}})
459 return _mm_cvtpd_ps(A);
462 __m128i test_mm_cvtps_epi32(__m128 A) {
463 // CHECK-LABEL: test_mm_cvtps_epi32
464 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
465 return _mm_cvtps_epi32(A);
468 __m128d test_mm_cvtps_pd(__m128 A) {
469 // CHECK-LABEL: test_mm_cvtps_pd
470 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
471 // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
472 return _mm_cvtps_pd(A);
475 double test_mm_cvtsd_f64(__m128d A) {
476 // CHECK-LABEL: test_mm_cvtsd_f64
477 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
478 return _mm_cvtsd_f64(A);
481 int test_mm_cvtsd_si32(__m128d A) {
482 // CHECK-LABEL: test_mm_cvtsd_si32
483 // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
484 return _mm_cvtsd_si32(A);
487 #ifdef __x86_64__
488 long long test_mm_cvtsd_si64(__m128d A) {
489 // X64-LABEL: test_mm_cvtsd_si64
490 // X64: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
491 return _mm_cvtsd_si64(A);
493 #endif
495 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
496 // CHECK-LABEL: test_mm_cvtsd_ss
497 // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}})
498 return _mm_cvtsd_ss(A, B);
501 int test_mm_cvtsi128_si32(__m128i A) {
502 // CHECK-LABEL: test_mm_cvtsi128_si32
503 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
504 return _mm_cvtsi128_si32(A);
507 long long test_mm_cvtsi128_si64(__m128i A) {
508 // CHECK-LABEL: test_mm_cvtsi128_si64
509 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
510 return _mm_cvtsi128_si64(A);
513 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
514 // CHECK-LABEL: test_mm_cvtsi32_sd
515 // CHECK: sitofp i32 %{{.*}} to double
516 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
517 return _mm_cvtsi32_sd(A, B);
520 __m128i test_mm_cvtsi32_si128(int A) {
521 // CHECK-LABEL: test_mm_cvtsi32_si128
522 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
523 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
524 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
525 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
526 return _mm_cvtsi32_si128(A);
529 #ifdef __x86_64__
530 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
531 // X64-LABEL: test_mm_cvtsi64_sd
532 // X64: sitofp i64 %{{.*}} to double
533 // X64: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
534 return _mm_cvtsi64_sd(A, B);
536 #endif
538 __m128i test_mm_cvtsi64_si128(long long A) {
539 // CHECK-LABEL: test_mm_cvtsi64_si128
540 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
541 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
542 return _mm_cvtsi64_si128(A);
545 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
546 // CHECK-LABEL: test_mm_cvtss_sd
547 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
548 // CHECK: fpext float %{{.*}} to double
549 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
550 return _mm_cvtss_sd(A, B);
553 __m128i test_mm_cvttpd_epi32(__m128d A) {
554 // CHECK-LABEL: test_mm_cvttpd_epi32
555 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
556 return _mm_cvttpd_epi32(A);
559 __m128i test_mm_cvttps_epi32(__m128 A) {
560 // CHECK-LABEL: test_mm_cvttps_epi32
561 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
562 return _mm_cvttps_epi32(A);
565 int test_mm_cvttsd_si32(__m128d A) {
566 // CHECK-LABEL: test_mm_cvttsd_si32
567 // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}})
568 return _mm_cvttsd_si32(A);
571 #ifdef __x86_64__
572 long long test_mm_cvttsd_si64(__m128d A) {
573 // X64-LABEL: test_mm_cvttsd_si64
574 // X64: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
575 return _mm_cvttsd_si64(A);
577 #endif
579 __m128d test_mm_div_pd(__m128d A, __m128d B) {
580 // CHECK-LABEL: test_mm_div_pd
581 // CHECK: fdiv <2 x double>
582 return _mm_div_pd(A, B);
585 __m128d test_mm_div_sd(__m128d A, __m128d B) {
586 // CHECK-LABEL: test_mm_div_sd
587 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
588 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
589 // CHECK: fdiv double
590 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
591 return _mm_div_sd(A, B);
594 // Lowering to pextrw requires optimization.
595 int test_mm_extract_epi16(__m128i A) {
596 // CHECK-LABEL: test_mm_extract_epi16
597 // CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1
598 // CHECK: zext i16 %{{.*}} to i32
599 return _mm_extract_epi16(A, 1);
602 __m128i test_mm_insert_epi16(__m128i A, int B) {
603 // CHECK-LABEL: test_mm_insert_epi16
604 // CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0
605 return _mm_insert_epi16(A, B, 0);
608 void test_mm_lfence(void) {
609 // CHECK-LABEL: test_mm_lfence
610 // CHECK: call void @llvm.x86.sse2.lfence()
611 _mm_lfence();
614 __m128d test_mm_load_pd(double const* A) {
615 // CHECK-LABEL: test_mm_load_pd
616 // CHECK: load <2 x double>, ptr %{{.*}}, align 16
617 return _mm_load_pd(A);
620 __m128d test_mm_load_pd1(double const* A) {
621 // CHECK-LABEL: test_mm_load_pd1
622 // CHECK: load double, ptr %{{.*}}, align 8
623 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
624 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
625 return _mm_load_pd1(A);
628 __m128d test_mm_load_sd(double const* A) {
629 // CHECK-LABEL: test_mm_load_sd
630 // CHECK: load double, ptr %{{.*}}, align 1{{$}}
631 return _mm_load_sd(A);
634 __m128i test_mm_load_si128(__m128i const* A) {
635 // CHECK-LABEL: test_mm_load_si128
636 // CHECK: load <2 x i64>, ptr %{{.*}}, align 16
637 return _mm_load_si128(A);
640 __m128d test_mm_load1_pd(double const* A) {
641 // CHECK-LABEL: test_mm_load1_pd
642 // CHECK: load double, ptr %{{.*}}, align 8
643 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
644 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
645 return _mm_load1_pd(A);
648 __m128d test_mm_loadh_pd(__m128d x, void* y) {
649 // CHECK-LABEL: test_mm_loadh_pd
650 // CHECK: load double, ptr %{{.*}}, align 1{{$}}
651 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
652 return _mm_loadh_pd(x, y);
655 __m128i test_mm_loadl_epi64(__m128i* y) {
656 // CHECK: test_mm_loadl_epi64
657 // CHECK: load i64, ptr {{.*}}, align 1{{$}}
658 // CHECK: insertelement <2 x i64> undef, i64 {{.*}}, i32 0
659 // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1
660 return _mm_loadl_epi64(y);
663 __m128d test_mm_loadl_pd(__m128d x, void* y) {
664 // CHECK-LABEL: test_mm_loadl_pd
665 // CHECK: load double, ptr %{{.*}}, align 1{{$}}
666 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
667 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
668 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
669 return _mm_loadl_pd(x, y);
672 __m128d test_mm_loadr_pd(double const* A) {
673 // CHECK-LABEL: test_mm_loadr_pd
674 // CHECK: load <2 x double>, ptr %{{.*}}, align 16
675 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
676 return _mm_loadr_pd(A);
679 __m128d test_mm_loadu_pd(double const* A) {
680 // CHECK-LABEL: test_mm_loadu_pd
681 // CHECK: load <2 x double>, ptr %{{.*}}, align 1{{$}}
682 return _mm_loadu_pd(A);
685 __m128i test_mm_loadu_si128(__m128i const* A) {
686 // CHECK-LABEL: test_mm_loadu_si128
687 // CHECK: load <2 x i64>, ptr %{{.*}}, align 1{{$}}
688 return _mm_loadu_si128(A);
691 __m128i test_mm_loadu_si64(void const* A) {
692 // CHECK-LABEL: test_mm_loadu_si64
693 // CHECK: load i64, ptr %{{.*}}, align 1{{$}}
694 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
695 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
696 return _mm_loadu_si64(A);
699 __m128i test_mm_loadu_si32(void const* A) {
700 // CHECK-LABEL: test_mm_loadu_si32
701 // CHECK: load i32, ptr %{{.*}}, align 1{{$}}
702 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
703 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
704 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
705 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
706 return _mm_loadu_si32(A);
709 __m128i test_mm_loadu_si16(void const* A) {
710 // CHECK-LABEL: test_mm_loadu_si16
711 // CHECK: load i16, ptr %{{.*}}, align 1{{$}}
712 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
713 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1
714 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2
715 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3
716 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4
717 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
718 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
719 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
720 return _mm_loadu_si16(A);
723 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
724 // CHECK-LABEL: test_mm_madd_epi16
725 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
726 return _mm_madd_epi16(A, B);
729 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
730 // CHECK-LABEL: test_mm_maskmoveu_si128
731 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, ptr %{{.*}})
732 _mm_maskmoveu_si128(A, B, C);
735 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
736 // CHECK-LABEL: test_mm_max_epi16
737 // CHECK: call <8 x i16> @llvm.smax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
738 return _mm_max_epi16(A, B);
741 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
742 // CHECK-LABEL: test_mm_max_epu8
743 // CHECK: call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
744 return _mm_max_epu8(A, B);
747 __m128d test_mm_max_pd(__m128d A, __m128d B) {
748 // CHECK-LABEL: test_mm_max_pd
749 // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
750 return _mm_max_pd(A, B);
753 __m128d test_mm_max_sd(__m128d A, __m128d B) {
754 // CHECK-LABEL: test_mm_max_sd
755 // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
756 return _mm_max_sd(A, B);
759 void test_mm_mfence(void) {
760 // CHECK-LABEL: test_mm_mfence
761 // CHECK: call void @llvm.x86.sse2.mfence()
762 _mm_mfence();
765 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
766 // CHECK-LABEL: test_mm_min_epi16
767 // CHECK: call <8 x i16> @llvm.smin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
768 return _mm_min_epi16(A, B);
771 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
772 // CHECK-LABEL: test_mm_min_epu8
773 // CHECK: call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
774 return _mm_min_epu8(A, B);
777 __m128d test_mm_min_pd(__m128d A, __m128d B) {
778 // CHECK-LABEL: test_mm_min_pd
779 // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
780 return _mm_min_pd(A, B);
783 __m128d test_mm_min_sd(__m128d A, __m128d B) {
784 // CHECK-LABEL: test_mm_min_sd
785 // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
786 return _mm_min_sd(A, B);
789 __m64 test_mm_movepi64_pi64(__m128i A)
791 // CHECK-LABEL: test_mm_movepi64_pi64
792 // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %1, i32 0
793 return _mm_movepi64_pi64(A);
796 __m128i test_mm_movpi64_epi64(__m64 A)
798 // CHECK-LABEL: test_mm_movpi64_epi64
799 // CHECK: [[CAST:%.*]] = bitcast <1 x i64> %{{.*}} to i64
800 // CHECK: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[CAST]], i32 0
801 // CHECK: insertelement <2 x i64> [[INS]], i64 0, i32 1
802 return _mm_movpi64_epi64(A);
805 __m128i test_mm_move_epi64(__m128i A) {
806 // CHECK-LABEL: test_mm_move_epi64
807 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
808 return _mm_move_epi64(A);
811 __m128d test_mm_move_sd(__m128d A, __m128d B) {
812 // CHECK-LABEL: test_mm_move_sd
813 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
814 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
815 return _mm_move_sd(A, B);
818 int test_mm_movemask_epi8(__m128i A) {
819 // CHECK-LABEL: test_mm_movemask_epi8
820 // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
821 return _mm_movemask_epi8(A);
824 int test_mm_movemask_pd(__m128d A) {
825 // CHECK-LABEL: test_mm_movemask_pd
826 // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
827 return _mm_movemask_pd(A);
830 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
831 // CHECK-LABEL: test_mm_mul_epu32
832 // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
833 // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
834 // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
835 return _mm_mul_epu32(A, B);
838 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
839 // CHECK-LABEL: test_mm_mul_pd
840 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
841 return _mm_mul_pd(A, B);
844 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
845 // CHECK-LABEL: test_mm_mul_sd
846 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
847 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
848 // CHECK: fmul double
849 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
850 return _mm_mul_sd(A, B);
853 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
854 // CHECK-LABEL: test_mm_mulhi_epi16
855 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
856 return _mm_mulhi_epi16(A, B);
859 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
860 // CHECK-LABEL: test_mm_mulhi_epu16
861 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
862 return _mm_mulhi_epu16(A, B);
865 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
866 // CHECK-LABEL: test_mm_mullo_epi16
867 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
868 return _mm_mullo_epi16(A, B);
871 __m128d test_mm_or_pd(__m128d A, __m128d B) {
872 // CHECK-LABEL: test_mm_or_pd
873 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
874 return _mm_or_pd(A, B);
877 __m128i test_mm_or_si128(__m128i A, __m128i B) {
878 // CHECK-LABEL: test_mm_or_si128
879 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
880 return _mm_or_si128(A, B);
883 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
884 // CHECK-LABEL: test_mm_packs_epi16
885 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
886 return _mm_packs_epi16(A, B);
889 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
890 // CHECK-LABEL: test_mm_packs_epi32
891 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
892 return _mm_packs_epi32(A, B);
895 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
896 // CHECK-LABEL: test_mm_packus_epi16
897 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
898 return _mm_packus_epi16(A, B);
901 void test_mm_pause(void) {
902 // CHECK-LABEL: test_mm_pause
903 // CHECK: call void @llvm.x86.sse2.pause()
904 return _mm_pause();
907 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
908 // CHECK-LABEL: test_mm_sad_epu8
909 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
910 return _mm_sad_epu8(A, B);
913 __m128i test_mm_set_epi8(char A, char B, char C, char D,
914 char E, char F, char G, char H,
915 char I, char J, char K, char L,
916 char M, char N, char O, char P) {
917 // CHECK-LABEL: test_mm_set_epi8
918 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
919 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
920 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
921 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
922 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
923 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
924 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
925 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
926 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
927 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
928 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
929 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
930 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
931 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
932 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
933 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
934 return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
937 __m128i test_mm_set_epi16(short A, short B, short C, short D,
938 short E, short F, short G, short H) {
939 // CHECK-LABEL: test_mm_set_epi16
940 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
941 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
942 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
943 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
944 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
945 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
946 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
947 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
948 return _mm_set_epi16(A, B, C, D, E, F, G, H);
951 __m128i test_mm_set_epi32(int A, int B, int C, int D) {
952 // CHECK-LABEL: test_mm_set_epi32
953 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
954 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
955 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
956 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
957 return _mm_set_epi32(A, B, C, D);
960 __m128i test_mm_set_epi64(__m64 A, __m64 B) {
961 // CHECK-LABEL: test_mm_set_epi64
962 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
963 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
964 return _mm_set_epi64(A, B);
967 __m128i test_mm_set_epi64x(long long A, long long B) {
968 // CHECK-LABEL: test_mm_set_epi64x
969 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
970 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
971 return _mm_set_epi64x(A, B);
974 __m128d test_mm_set_pd(double A, double B) {
975 // CHECK-LABEL: test_mm_set_pd
976 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
977 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
978 return _mm_set_pd(A, B);
981 __m128d test_mm_set_pd1(double A) {
982 // CHECK-LABEL: test_mm_set_pd1
983 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
984 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
985 return _mm_set_pd1(A);
988 __m128d test_mm_set_sd(double A) {
989 // CHECK-LABEL: test_mm_set_sd
990 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
991 // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
992 return _mm_set_sd(A);
995 __m128i test_mm_set1_epi8(char A) {
996 // CHECK-LABEL: test_mm_set1_epi8
997 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
998 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
999 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1000 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1001 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1002 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1003 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1004 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1005 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1006 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1007 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1008 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1009 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1010 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1011 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1012 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1013 return _mm_set1_epi8(A);
1016 __m128i test_mm_set1_epi16(short A) {
1017 // CHECK-LABEL: test_mm_set1_epi16
1018 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1019 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1020 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1021 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1022 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1023 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1024 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1025 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1026 return _mm_set1_epi16(A);
1029 __m128i test_mm_set1_epi32(int A) {
1030 // CHECK-LABEL: test_mm_set1_epi32
1031 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1032 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1033 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1034 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1035 return _mm_set1_epi32(A);
1038 __m128i test_mm_set1_epi64(__m64 A) {
1039 // CHECK-LABEL: test_mm_set1_epi64
1040 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1041 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1042 return _mm_set1_epi64(A);
1045 __m128i test_mm_set1_epi64x(long long A) {
1046 // CHECK-LABEL: test_mm_set1_epi64x
1047 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1048 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1049 return _mm_set1_epi64x(A);
1052 __m128d test_mm_set1_pd(double A) {
1053 // CHECK-LABEL: test_mm_set1_pd
1054 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1055 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1056 return _mm_set1_pd(A);
1059 __m128i test_mm_setr_epi8(char A, char B, char C, char D,
1060 char E, char F, char G, char H,
1061 char I, char J, char K, char L,
1062 char M, char N, char O, char P) {
1063 // CHECK-LABEL: test_mm_setr_epi8
1064 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1065 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1066 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1067 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1068 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1069 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1070 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1071 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1072 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1073 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1074 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1075 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1076 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1077 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1078 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1079 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1080 return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1083 __m128i test_mm_setr_epi16(short A, short B, short C, short D,
1084 short E, short F, short G, short H) {
1085 // CHECK-LABEL: test_mm_setr_epi16
1086 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1087 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1088 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1089 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1090 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1091 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1092 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1093 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1094 return _mm_setr_epi16(A, B, C, D, E, F, G, H);
1097 __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
1098 // CHECK-LABEL: test_mm_setr_epi32
1099 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1100 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1101 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1102 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1103 return _mm_setr_epi32(A, B, C, D);
1106 __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
1107 // CHECK-LABEL: test_mm_setr_epi64
1108 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1109 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1110 return _mm_setr_epi64(A, B);
1113 __m128d test_mm_setr_pd(double A, double B) {
1114 // CHECK-LABEL: test_mm_setr_pd
1115 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1116 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1117 return _mm_setr_pd(A, B);
1120 __m128d test_mm_setzero_pd(void) {
1121 // CHECK-LABEL: test_mm_setzero_pd
1122 // CHECK: store <2 x double> zeroinitializer
1123 return _mm_setzero_pd();
1126 __m128i test_mm_setzero_si128(void) {
1127 // CHECK-LABEL: test_mm_setzero_si128
1128 // CHECK: store <2 x i64> zeroinitializer
1129 return _mm_setzero_si128();
1132 __m128i test_mm_shuffle_epi32(__m128i A) {
1133 // CHECK-LABEL: test_mm_shuffle_epi32
1134 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
1135 return _mm_shuffle_epi32(A, 0);
1138 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
1139 // CHECK-LABEL: test_mm_shuffle_pd
1140 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
1141 return _mm_shuffle_pd(A, B, 1);
1144 __m128i test_mm_shufflehi_epi16(__m128i A) {
1145 // CHECK-LABEL: test_mm_shufflehi_epi16
1146 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1147 return _mm_shufflehi_epi16(A, 0);
1150 __m128i test_mm_shufflelo_epi16(__m128i A) {
1151 // CHECK-LABEL: test_mm_shufflelo_epi16
1152 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1153 return _mm_shufflelo_epi16(A, 0);
1156 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
1157 // CHECK-LABEL: test_mm_sll_epi16
1158 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1159 return _mm_sll_epi16(A, B);
1162 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
1163 // CHECK-LABEL: test_mm_sll_epi32
1164 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1165 return _mm_sll_epi32(A, B);
1168 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
1169 // CHECK-LABEL: test_mm_sll_epi64
1170 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1171 return _mm_sll_epi64(A, B);
1174 __m128i test_mm_slli_epi16(__m128i A) {
1175 // CHECK-LABEL: test_mm_slli_epi16
1176 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1177 return _mm_slli_epi16(A, 1);
1180 __m128i test_mm_slli_epi16_1(__m128i A) {
1181 // CHECK-LABEL: test_mm_slli_epi16_1
1182 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1183 return _mm_slli_epi16(A, -1);
1186 __m128i test_mm_slli_epi16_2(__m128i A, int B) {
1187 // CHECK-LABEL: test_mm_slli_epi16_2
1188 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1189 return _mm_slli_epi16(A, B);
1192 __m128i test_mm_slli_epi32(__m128i A) {
1193 // CHECK-LABEL: test_mm_slli_epi32
1194 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1195 return _mm_slli_epi32(A, 1);
1198 __m128i test_mm_slli_epi32_1(__m128i A) {
1199 // CHECK-LABEL: test_mm_slli_epi32_1
1200 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1201 return _mm_slli_epi32(A, -1);
1204 __m128i test_mm_slli_epi32_2(__m128i A, int B) {
1205 // CHECK-LABEL: test_mm_slli_epi32_2
1206 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1207 return _mm_slli_epi32(A, B);
1210 __m128i test_mm_slli_epi64(__m128i A) {
1211 // CHECK-LABEL: test_mm_slli_epi64
1212 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1213 return _mm_slli_epi64(A, 1);
1216 __m128i test_mm_slli_epi64_1(__m128i A) {
1217 // CHECK-LABEL: test_mm_slli_epi64_1
1218 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1219 return _mm_slli_epi64(A, -1);
1222 __m128i test_mm_slli_epi64_2(__m128i A, int B) {
1223 // CHECK-LABEL: test_mm_slli_epi64_2
1224 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1225 return _mm_slli_epi64(A, B);
1228 __m128i test_mm_slli_si128(__m128i A) {
1229 // CHECK-LABEL: test_mm_slli_si128
1230 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
1231 return _mm_slli_si128(A, 5);
1234 __m128i test_mm_slli_si128_2(__m128i A) {
1235 // CHECK-LABEL: test_mm_slli_si128_2
1236 // CHECK: ret <2 x i64> zeroinitializer
1237 return _mm_slli_si128(A, 17);
1240 __m128d test_mm_sqrt_pd(__m128d A) {
1241 // CHECK-LABEL: test_mm_sqrt_pd
1242 // CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{.*}})
1243 return _mm_sqrt_pd(A);
1246 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
1247 // CHECK-LABEL: test_mm_sqrt_sd
1248 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
1249 // CHECK: call double @llvm.sqrt.f64(double {{.*}})
1250 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
1251 return _mm_sqrt_sd(A, B);
1254 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
1255 // CHECK-LABEL: test_mm_sra_epi16
1256 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1257 return _mm_sra_epi16(A, B);
1260 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
1261 // CHECK-LABEL: test_mm_sra_epi32
1262 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1263 return _mm_sra_epi32(A, B);
1266 __m128i test_mm_srai_epi16(__m128i A) {
1267 // CHECK-LABEL: test_mm_srai_epi16
1268 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1269 return _mm_srai_epi16(A, 1);
1272 __m128i test_mm_srai_epi16_1(__m128i A) {
1273 // CHECK-LABEL: test_mm_srai_epi16_1
1274 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1275 return _mm_srai_epi16(A, -1);
1278 __m128i test_mm_srai_epi16_2(__m128i A, int B) {
1279 // CHECK-LABEL: test_mm_srai_epi16_2
1280 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1281 return _mm_srai_epi16(A, B);
1284 __m128i test_mm_srai_epi32(__m128i A) {
1285 // CHECK-LABEL: test_mm_srai_epi32
1286 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1287 return _mm_srai_epi32(A, 1);
1290 __m128i test_mm_srai_epi32_1(__m128i A) {
1291 // CHECK-LABEL: test_mm_srai_epi32_1
1292 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1293 return _mm_srai_epi32(A, -1);
1296 __m128i test_mm_srai_epi32_2(__m128i A, int B) {
1297 // CHECK-LABEL: test_mm_srai_epi32_2
1298 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1299 return _mm_srai_epi32(A, B);
1302 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
1303 // CHECK-LABEL: test_mm_srl_epi16
1304 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1305 return _mm_srl_epi16(A, B);
1308 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
1309 // CHECK-LABEL: test_mm_srl_epi32
1310 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1311 return _mm_srl_epi32(A, B);
1314 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
1315 // CHECK-LABEL: test_mm_srl_epi64
1316 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1317 return _mm_srl_epi64(A, B);
1320 __m128i test_mm_srli_epi16(__m128i A) {
1321 // CHECK-LABEL: test_mm_srli_epi16
1322 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1323 return _mm_srli_epi16(A, 1);
1326 __m128i test_mm_srli_epi16_1(__m128i A) {
1327 // CHECK-LABEL: test_mm_srli_epi16_1
1328 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1329 return _mm_srli_epi16(A, -1);
1332 __m128i test_mm_srli_epi16_2(__m128i A, int B) {
1333 // CHECK-LABEL: test_mm_srli_epi16
1334 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1335 return _mm_srli_epi16(A, B);
1338 __m128i test_mm_srli_epi32(__m128i A) {
1339 // CHECK-LABEL: test_mm_srli_epi32
1340 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1341 return _mm_srli_epi32(A, 1);
1344 __m128i test_mm_srli_epi32_1(__m128i A) {
1345 // CHECK-LABEL: test_mm_srli_epi32_1
1346 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1347 return _mm_srli_epi32(A, -1);
1350 __m128i test_mm_srli_epi32_2(__m128i A, int B) {
1351 // CHECK-LABEL: test_mm_srli_epi32_2
1352 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1353 return _mm_srli_epi32(A, B);
1356 __m128i test_mm_srli_epi64(__m128i A) {
1357 // CHECK-LABEL: test_mm_srli_epi64
1358 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1359 return _mm_srli_epi64(A, 1);
1362 __m128i test_mm_srli_epi64_1(__m128i A) {
1363 // CHECK-LABEL: test_mm_srli_epi64_1
1364 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1365 return _mm_srli_epi64(A, -1);
1368 __m128i test_mm_srli_epi64_2(__m128i A, int B) {
1369 // CHECK-LABEL: test_mm_srli_epi64_2
1370 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1371 return _mm_srli_epi64(A, B);
1374 __m128i test_mm_srli_si128(__m128i A) {
1375 // CHECK-LABEL: test_mm_srli_si128
1376 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
1377 return _mm_srli_si128(A, 5);
1380 __m128i test_mm_srli_si128_2(__m128i A) {
1381 // CHECK-LABEL: test_mm_srli_si128_2
1382 // ret <2 x i64> zeroinitializer
1383 return _mm_srli_si128(A, 17);
1386 void test_mm_store_pd(double* A, __m128d B) {
1387 // CHECK-LABEL: test_mm_store_pd
1388 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16
1389 _mm_store_pd(A, B);
1392 void test_mm_store_pd1(double* x, __m128d y) {
1393 // CHECK-LABEL: test_mm_store_pd1
1394 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1395 // CHECK: store <2 x double> %{{.*}}, ptr {{.*}}, align 16
1396 _mm_store_pd1(x, y);
1399 void test_mm_store_sd(double* A, __m128d B) {
1400 // CHECK-LABEL: test_mm_store_sd
1401 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1402 // CHECK: store double %{{.*}}, ptr %{{.*}}, align 1{{$}}
1403 _mm_store_sd(A, B);
1406 void test_mm_store_si128(__m128i* A, __m128i B) {
1407 // CHECK-LABEL: test_mm_store_si128
1408 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16
1409 _mm_store_si128(A, B);
1412 void test_mm_store1_pd(double* x, __m128d y) {
1413 // CHECK-LABEL: test_mm_store1_pd
1414 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1415 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16
1416 _mm_store1_pd(x, y);
1419 void test_mm_storeh_pd(double* A, __m128d B) {
1420 // CHECK-LABEL: test_mm_storeh_pd
1421 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1422 // CHECK: store double %{{.*}}, ptr %{{.*}}, align 1{{$}}
1423 _mm_storeh_pd(A, B);
1426 void test_mm_storel_epi64(__m128i x, void* y) {
1427 // CHECK-LABEL: test_mm_storel_epi64
1428 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
1429 // CHECK: store {{.*}} ptr {{.*}}, align 1{{$}}
1430 _mm_storel_epi64(y, x);
1433 void test_mm_storel_pd(double* A, __m128d B) {
1434 // CHECK-LABEL: test_mm_storel_pd
1435 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1436 // CHECK: store double %{{.*}}, ptr %{{.*}}, align 1{{$}}
1437 _mm_storel_pd(A, B);
1440 void test_mm_storer_pd(__m128d A, double* B) {
1441 // CHECK-LABEL: test_mm_storer_pd
1442 // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0>
1443 // CHECK: store {{.*}} ptr {{.*}}, align 16{{$}}
1444 _mm_storer_pd(B, A);
1447 void test_mm_storeu_pd(double* A, __m128d B) {
1448 // CHECK-LABEL: test_mm_storeu_pd
1449 // CHECK: store {{.*}} ptr {{.*}}, align 1{{$}}
1450 // CHECK-NEXT: ret void
1451 _mm_storeu_pd(A, B);
1454 void test_mm_storeu_si128(__m128i* A, __m128i B) {
1455 // CHECK-LABEL: test_mm_storeu_si128
1456 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 1{{$}}
1457 // CHECK-NEXT: ret void
1458 _mm_storeu_si128(A, B);
1461 void test_mm_storeu_si64(void* A, __m128i B) {
1462 // CHECK-LABEL: test_mm_storeu_si64
1463 // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %{{.*}}, i32 0
1464 // CHECK: store i64 [[EXT]], ptr %{{.*}}, align 1{{$}}
1465 // CHECK-NEXT: ret void
1466 _mm_storeu_si64(A, B);
1469 void test_mm_storeu_si32(void* A, __m128i B) {
1470 // CHECK-LABEL: test_mm_storeu_si32
1471 // CHECK: [[EXT:%.*]] = extractelement <4 x i32> %{{.*}}, i32 0
1472 // CHECK: store i32 [[EXT]], ptr %{{.*}}, align 1{{$}}
1473 // CHECK-NEXT: ret void
1474 _mm_storeu_si32(A, B);
1477 void test_mm_storeu_si16(void* A, __m128i B) {
1478 // CHECK-LABEL: test_mm_storeu_si16
1479 // CHECK: [[EXT:%.*]] = extractelement <8 x i16> %{{.*}}, i32 0
1480 // CHECK: store i16 [[EXT]], ptr %{{.*}}, align 1{{$}}
1481 // CHECK-NEXT: ret void
1482 _mm_storeu_si16(A, B);
1485 void test_mm_stream_pd(double *A, __m128d B) {
1486 // CHECK-LABEL: test_mm_stream_pd
1487 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
1488 _mm_stream_pd(A, B);
1491 void test_mm_stream_pd_void(void *A, __m128d B) {
1492 // CHECK-LABEL: test_mm_stream_pd_void
1493 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
1494 _mm_stream_pd(A, B);
1497 void test_mm_stream_si32(int *A, int B) {
1498 // CHECK-LABEL: test_mm_stream_si32
1499 // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
1500 _mm_stream_si32(A, B);
1503 void test_mm_stream_si32_void(void *A, int B) {
1504 // CHECK-LABEL: test_mm_stream_si32_void
1505 // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
1506 _mm_stream_si32(A, B);
1509 #ifdef __x86_64__
1510 void test_mm_stream_si64(long long *A, long long B) {
1511 // X64-LABEL: test_mm_stream_si64
1512 // X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
1513 _mm_stream_si64(A, B);
1516 void test_mm_stream_si64_void(void *A, long long B) {
1517 // X64-LABEL: test_mm_stream_si64_void
1518 // X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
1519 _mm_stream_si64(A, B);
1521 #endif
1523 void test_mm_stream_si128(__m128i *A, __m128i B) {
1524 // CHECK-LABEL: test_mm_stream_si128
1525 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
1526 _mm_stream_si128(A, B);
1529 void test_mm_stream_si128_void(void *A, __m128i B) {
1530 // CHECK-LABEL: test_mm_stream_si128_void
1531 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
1532 _mm_stream_si128(A, B);
1535 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
1536 // CHECK-LABEL: test_mm_sub_epi8
1537 // CHECK: sub <16 x i8>
1538 return _mm_sub_epi8(A, B);
1541 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
1542 // CHECK-LABEL: test_mm_sub_epi16
1543 // CHECK: sub <8 x i16>
1544 return _mm_sub_epi16(A, B);
1547 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
1548 // CHECK-LABEL: test_mm_sub_epi32
1549 // CHECK: sub <4 x i32>
1550 return _mm_sub_epi32(A, B);
1553 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
1554 // CHECK-LABEL: test_mm_sub_epi64
1555 // CHECK: sub <2 x i64>
1556 return _mm_sub_epi64(A, B);
1559 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
1560 // CHECK-LABEL: test_mm_sub_pd
1561 // CHECK: fsub <2 x double>
1562 return _mm_sub_pd(A, B);
1565 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
1566 // CHECK-LABEL: test_mm_sub_sd
1567 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1568 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1569 // CHECK: fsub double
1570 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
1571 return _mm_sub_sd(A, B);
1574 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
1575 // CHECK-LABEL: test_mm_subs_epi8
1576 // CHECK: call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1577 return _mm_subs_epi8(A, B);
1580 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
1581 // CHECK-LABEL: test_mm_subs_epi16
1582 // CHECK: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1583 return _mm_subs_epi16(A, B);
1586 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
1587 // CHECK-LABEL: test_mm_subs_epu8
1588 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1589 // CHECK: call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1590 return _mm_subs_epu8(A, B);
1593 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
1594 // CHECK-LABEL: test_mm_subs_epu16
1595 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1596 // CHECK: call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1597 return _mm_subs_epu16(A, B);
1600 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1601 // CHECK-LABEL: test_mm_ucomieq_sd
1602 // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1603 return _mm_ucomieq_sd(A, B);
1606 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1607 // CHECK-LABEL: test_mm_ucomige_sd
1608 // CHECK: call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1609 return _mm_ucomige_sd(A, B);
1612 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1613 // CHECK-LABEL: test_mm_ucomigt_sd
1614 // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1615 return _mm_ucomigt_sd(A, B);
1618 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1619 // CHECK-LABEL: test_mm_ucomile_sd
1620 // CHECK: call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1621 return _mm_ucomile_sd(A, B);
1624 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1625 // CHECK-LABEL: test_mm_ucomilt_sd
1626 // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1627 return _mm_ucomilt_sd(A, B);
1630 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1631 // CHECK-LABEL: test_mm_ucomineq_sd
1632 // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1633 return _mm_ucomineq_sd(A, B);
1636 __m128d test_mm_undefined_pd(void) {
1637 // X64-LABEL: test_mm_undefined_pd
1638 // X64: ret <2 x double> zeroinitializer
1640 // X86-LABEL: test_mm_undefined_pd
1641 // X86: store <2 x double> zeroinitializer
1642 return _mm_undefined_pd();
1645 __m128i test_mm_undefined_si128(void) {
1646 // CHECK-LABEL: test_mm_undefined_si128
1647 // CHECK: ret <2 x i64> zeroinitializer
1648 return _mm_undefined_si128();
1651 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1652 // CHECK-LABEL: test_mm_unpackhi_epi8
1653 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1654 return _mm_unpackhi_epi8(A, B);
1657 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1658 // CHECK-LABEL: test_mm_unpackhi_epi16
1659 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1660 return _mm_unpackhi_epi16(A, B);
1663 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1664 // CHECK-LABEL: test_mm_unpackhi_epi32
1665 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1666 return _mm_unpackhi_epi32(A, B);
1669 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1670 // CHECK-LABEL: test_mm_unpackhi_epi64
1671 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1672 return _mm_unpackhi_epi64(A, B);
1675 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1676 // CHECK-LABEL: test_mm_unpackhi_pd
1677 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1678 return _mm_unpackhi_pd(A, B);
1681 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1682 // CHECK-LABEL: test_mm_unpacklo_epi8
1683 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1684 return _mm_unpacklo_epi8(A, B);
1687 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1688 // CHECK-LABEL: test_mm_unpacklo_epi16
1689 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1690 return _mm_unpacklo_epi16(A, B);
1693 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1694 // CHECK-LABEL: test_mm_unpacklo_epi32
1695 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1696 return _mm_unpacklo_epi32(A, B);
1699 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1700 // CHECK-LABEL: test_mm_unpacklo_epi64
1701 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1702 return _mm_unpacklo_epi64(A, B);
1705 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1706 // CHECK-LABEL: test_mm_unpacklo_pd
1707 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1708 return _mm_unpacklo_pd(A, B);
1711 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1712 // CHECK-LABEL: test_mm_xor_pd
1713 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1714 return _mm_xor_pd(A, B);
1717 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1718 // CHECK-LABEL: test_mm_xor_si128
1719 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1720 return _mm_xor_si128(A, B);