1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
7 define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
8 ; X32-LABEL: test_mm_add_epi8:
10 ; X32-NEXT: paddb %xmm1, %xmm0
13 ; X64-LABEL: test_mm_add_epi8:
15 ; X64-NEXT: paddb %xmm1, %xmm0
17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
18 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
19 %res = add <16 x i8> %arg0, %arg1
20 %bc = bitcast <16 x i8> %res to <2 x i64>
24 define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
25 ; X32-LABEL: test_mm_add_epi16:
27 ; X32-NEXT: paddw %xmm1, %xmm0
30 ; X64-LABEL: test_mm_add_epi16:
32 ; X64-NEXT: paddw %xmm1, %xmm0
34 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
35 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
36 %res = add <8 x i16> %arg0, %arg1
37 %bc = bitcast <8 x i16> %res to <2 x i64>
41 define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
42 ; X32-LABEL: test_mm_add_epi32:
44 ; X32-NEXT: paddd %xmm1, %xmm0
47 ; X64-LABEL: test_mm_add_epi32:
49 ; X64-NEXT: paddd %xmm1, %xmm0
51 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
52 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
53 %res = add <4 x i32> %arg0, %arg1
54 %bc = bitcast <4 x i32> %res to <2 x i64>
58 define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
59 ; X32-LABEL: test_mm_add_epi64:
61 ; X32-NEXT: paddq %xmm1, %xmm0
64 ; X64-LABEL: test_mm_add_epi64:
66 ; X64-NEXT: paddq %xmm1, %xmm0
68 %res = add <2 x i64> %a0, %a1
72 define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
73 ; X32-LABEL: test_mm_add_pd:
75 ; X32-NEXT: addpd %xmm1, %xmm0
78 ; X64-LABEL: test_mm_add_pd:
80 ; X64-NEXT: addpd %xmm1, %xmm0
82 %res = fadd <2 x double> %a0, %a1
86 define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
87 ; X32-LABEL: test_mm_add_sd:
89 ; X32-NEXT: addsd %xmm1, %xmm0
92 ; X64-LABEL: test_mm_add_sd:
94 ; X64-NEXT: addsd %xmm1, %xmm0
96 %ext0 = extractelement <2 x double> %a0, i32 0
97 %ext1 = extractelement <2 x double> %a1, i32 0
98 %fadd = fadd double %ext0, %ext1
99 %res = insertelement <2 x double> %a0, double %fadd, i32 0
100 ret <2 x double> %res
103 define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
104 ; X32-LABEL: test_mm_adds_epi8:
106 ; X32-NEXT: paddsb %xmm1, %xmm0
109 ; X64-LABEL: test_mm_adds_epi8:
111 ; X64-NEXT: paddsb %xmm1, %xmm0
113 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
114 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
115 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
116 %bc = bitcast <16 x i8> %res to <2 x i64>
119 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
121 define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
122 ; X32-LABEL: test_mm_adds_epi16:
124 ; X32-NEXT: paddsw %xmm1, %xmm0
127 ; X64-LABEL: test_mm_adds_epi16:
129 ; X64-NEXT: paddsw %xmm1, %xmm0
131 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
132 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
133 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
134 %bc = bitcast <8 x i16> %res to <2 x i64>
137 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
139 define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
140 ; X32-LABEL: test_mm_adds_epu8:
142 ; X32-NEXT: paddusb %xmm1, %xmm0
145 ; X64-LABEL: test_mm_adds_epu8:
147 ; X64-NEXT: paddusb %xmm1, %xmm0
149 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
150 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
151 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
152 %bc = bitcast <16 x i8> %res to <2 x i64>
155 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
157 define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
158 ; X32-LABEL: test_mm_adds_epu16:
160 ; X32-NEXT: paddusw %xmm1, %xmm0
163 ; X64-LABEL: test_mm_adds_epu16:
165 ; X64-NEXT: paddusw %xmm1, %xmm0
167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
169 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
170 %bc = bitcast <8 x i16> %res to <2 x i64>
173 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
175 define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
176 ; X32-LABEL: test_mm_and_pd:
178 ; X32-NEXT: andps %xmm1, %xmm0
181 ; X64-LABEL: test_mm_and_pd:
183 ; X64-NEXT: andps %xmm1, %xmm0
185 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
186 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
187 %res = and <4 x i32> %arg0, %arg1
188 %bc = bitcast <4 x i32> %res to <2 x double>
192 define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
193 ; X32-LABEL: test_mm_and_si128:
195 ; X32-NEXT: andps %xmm1, %xmm0
198 ; X64-LABEL: test_mm_and_si128:
200 ; X64-NEXT: andps %xmm1, %xmm0
202 %res = and <2 x i64> %a0, %a1
206 define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
207 ; X32-LABEL: test_mm_andnot_pd:
209 ; X32-NEXT: andnps %xmm1, %xmm0
212 ; X64-LABEL: test_mm_andnot_pd:
214 ; X64-NEXT: andnps %xmm1, %xmm0
216 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
217 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
218 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
219 %res = and <4 x i32> %not, %arg1
220 %bc = bitcast <4 x i32> %res to <2 x double>
224 define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
225 ; X32-LABEL: test_mm_andnot_si128:
227 ; X32-NEXT: pcmpeqd %xmm2, %xmm2
228 ; X32-NEXT: pxor %xmm2, %xmm0
229 ; X32-NEXT: pand %xmm1, %xmm0
232 ; X64-LABEL: test_mm_andnot_si128:
234 ; X64-NEXT: pcmpeqd %xmm2, %xmm2
235 ; X64-NEXT: pxor %xmm2, %xmm0
236 ; X64-NEXT: pand %xmm1, %xmm0
238 %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
239 %res = and <2 x i64> %not, %a1
243 define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
244 ; X32-LABEL: test_mm_avg_epu8:
246 ; X32-NEXT: pavgb %xmm1, %xmm0
249 ; X64-LABEL: test_mm_avg_epu8:
251 ; X64-NEXT: pavgb %xmm1, %xmm0
253 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
254 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
255 %zext0 = zext <16 x i8> %arg0 to <16 x i16>
256 %zext1 = zext <16 x i8> %arg1 to <16 x i16>
257 %add = add <16 x i16> %zext0, %zext1
258 %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
259 %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
260 %res = trunc <16 x i16> %lshr to <16 x i8>
261 %bc = bitcast <16 x i8> %res to <2 x i64>
265 define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
266 ; X32-LABEL: test_mm_avg_epu16:
268 ; X32-NEXT: pavgw %xmm1, %xmm0
271 ; X64-LABEL: test_mm_avg_epu16:
273 ; X64-NEXT: pavgw %xmm1, %xmm0
275 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
276 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
277 %zext0 = zext <8 x i16> %arg0 to <8 x i32>
278 %zext1 = zext <8 x i16> %arg1 to <8 x i32>
279 %add = add <8 x i32> %zext0, %zext1
280 %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
281 %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
282 %res = trunc <8 x i32> %lshr to <8 x i16>
283 %bc = bitcast <8 x i16> %res to <2 x i64>
287 define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
288 ; X32-LABEL: test_mm_bslli_si128:
290 ; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
293 ; X64-LABEL: test_mm_bslli_si128:
295 ; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
297 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
298 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
299 %bc = bitcast <16 x i8> %res to <2 x i64>
303 define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
304 ; X32-LABEL: test_mm_bsrli_si128:
306 ; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
309 ; X64-LABEL: test_mm_bsrli_si128:
311 ; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
313 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
314 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
315 %bc = bitcast <16 x i8> %res to <2 x i64>
319 define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
320 ; X32-LABEL: test_mm_castpd_ps:
324 ; X64-LABEL: test_mm_castpd_ps:
327 %res = bitcast <2 x double> %a0 to <4 x float>
331 define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
332 ; X32-LABEL: test_mm_castpd_si128:
336 ; X64-LABEL: test_mm_castpd_si128:
339 %res = bitcast <2 x double> %a0 to <2 x i64>
343 define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
344 ; X32-LABEL: test_mm_castps_pd:
348 ; X64-LABEL: test_mm_castps_pd:
351 %res = bitcast <4 x float> %a0 to <2 x double>
352 ret <2 x double> %res
355 define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
356 ; X32-LABEL: test_mm_castps_si128:
360 ; X64-LABEL: test_mm_castps_si128:
363 %res = bitcast <4 x float> %a0 to <2 x i64>
367 define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
368 ; X32-LABEL: test_mm_castsi128_pd:
372 ; X64-LABEL: test_mm_castsi128_pd:
375 %res = bitcast <2 x i64> %a0 to <2 x double>
376 ret <2 x double> %res
379 define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
380 ; X32-LABEL: test_mm_castsi128_ps:
384 ; X64-LABEL: test_mm_castsi128_ps:
387 %res = bitcast <2 x i64> %a0 to <4 x float>
391 define void @test_mm_clflush(i8* %a0) nounwind {
392 ; X32-LABEL: test_mm_clflush:
394 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
395 ; X32-NEXT: clflush (%eax)
398 ; X64-LABEL: test_mm_clflush:
400 ; X64-NEXT: clflush (%rdi)
402 call void @llvm.x86.sse2.clflush(i8* %a0)
405 declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
407 define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
408 ; X32-LABEL: test_mm_cmpeq_epi8:
410 ; X32-NEXT: pcmpeqb %xmm1, %xmm0
413 ; X64-LABEL: test_mm_cmpeq_epi8:
415 ; X64-NEXT: pcmpeqb %xmm1, %xmm0
417 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
418 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
419 %cmp = icmp eq <16 x i8> %arg0, %arg1
420 %res = sext <16 x i1> %cmp to <16 x i8>
421 %bc = bitcast <16 x i8> %res to <2 x i64>
425 define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
426 ; X32-LABEL: test_mm_cmpeq_epi16:
428 ; X32-NEXT: pcmpeqw %xmm1, %xmm0
431 ; X64-LABEL: test_mm_cmpeq_epi16:
433 ; X64-NEXT: pcmpeqw %xmm1, %xmm0
435 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
436 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
437 %cmp = icmp eq <8 x i16> %arg0, %arg1
438 %res = sext <8 x i1> %cmp to <8 x i16>
439 %bc = bitcast <8 x i16> %res to <2 x i64>
443 define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
444 ; X32-LABEL: test_mm_cmpeq_epi32:
446 ; X32-NEXT: pcmpeqd %xmm1, %xmm0
449 ; X64-LABEL: test_mm_cmpeq_epi32:
451 ; X64-NEXT: pcmpeqd %xmm1, %xmm0
453 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
454 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
455 %cmp = icmp eq <4 x i32> %arg0, %arg1
456 %res = sext <4 x i1> %cmp to <4 x i32>
457 %bc = bitcast <4 x i32> %res to <2 x i64>
461 define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
462 ; X32-LABEL: test_mm_cmpeq_pd:
464 ; X32-NEXT: cmpeqpd %xmm1, %xmm0
467 ; X64-LABEL: test_mm_cmpeq_pd:
469 ; X64-NEXT: cmpeqpd %xmm1, %xmm0
471 %fcmp = fcmp oeq <2 x double> %a0, %a1
472 %sext = sext <2 x i1> %fcmp to <2 x i64>
473 %res = bitcast <2 x i64> %sext to <2 x double>
474 ret <2 x double> %res
477 define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
478 ; X32-LABEL: test_mm_cmpeq_sd:
480 ; X32-NEXT: cmpeqsd %xmm1, %xmm0
483 ; X64-LABEL: test_mm_cmpeq_sd:
485 ; X64-NEXT: cmpeqsd %xmm1, %xmm0
487 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
488 ret <2 x double> %res
490 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
492 define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
493 ; X32-LABEL: test_mm_cmpge_pd:
495 ; X32-NEXT: cmplepd %xmm0, %xmm1
496 ; X32-NEXT: movapd %xmm1, %xmm0
499 ; X64-LABEL: test_mm_cmpge_pd:
501 ; X64-NEXT: cmplepd %xmm0, %xmm1
502 ; X64-NEXT: movapd %xmm1, %xmm0
504 %fcmp = fcmp ole <2 x double> %a1, %a0
505 %sext = sext <2 x i1> %fcmp to <2 x i64>
506 %res = bitcast <2 x i64> %sext to <2 x double>
507 ret <2 x double> %res
510 define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
511 ; X32-LABEL: test_mm_cmpge_sd:
513 ; X32-NEXT: cmplesd %xmm0, %xmm1
514 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
517 ; X64-LABEL: test_mm_cmpge_sd:
519 ; X64-NEXT: cmplesd %xmm0, %xmm1
520 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
522 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
523 %ext0 = extractelement <2 x double> %cmp, i32 0
524 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
525 %ext1 = extractelement <2 x double> %a0, i32 1
526 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
527 ret <2 x double> %ins1
530 define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
531 ; X32-LABEL: test_mm_cmpgt_epi8:
533 ; X32-NEXT: pcmpgtb %xmm1, %xmm0
536 ; X64-LABEL: test_mm_cmpgt_epi8:
538 ; X64-NEXT: pcmpgtb %xmm1, %xmm0
540 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
541 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
542 %cmp = icmp sgt <16 x i8> %arg0, %arg1
543 %res = sext <16 x i1> %cmp to <16 x i8>
544 %bc = bitcast <16 x i8> %res to <2 x i64>
548 define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
549 ; X32-LABEL: test_mm_cmpgt_epi16:
551 ; X32-NEXT: pcmpgtw %xmm1, %xmm0
554 ; X64-LABEL: test_mm_cmpgt_epi16:
556 ; X64-NEXT: pcmpgtw %xmm1, %xmm0
558 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
559 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
560 %cmp = icmp sgt <8 x i16> %arg0, %arg1
561 %res = sext <8 x i1> %cmp to <8 x i16>
562 %bc = bitcast <8 x i16> %res to <2 x i64>
566 define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
567 ; X32-LABEL: test_mm_cmpgt_epi32:
569 ; X32-NEXT: pcmpgtd %xmm1, %xmm0
572 ; X64-LABEL: test_mm_cmpgt_epi32:
574 ; X64-NEXT: pcmpgtd %xmm1, %xmm0
576 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
577 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
578 %cmp = icmp sgt <4 x i32> %arg0, %arg1
579 %res = sext <4 x i1> %cmp to <4 x i32>
580 %bc = bitcast <4 x i32> %res to <2 x i64>
584 define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
585 ; X32-LABEL: test_mm_cmpgt_pd:
587 ; X32-NEXT: cmpltpd %xmm0, %xmm1
588 ; X32-NEXT: movapd %xmm1, %xmm0
591 ; X64-LABEL: test_mm_cmpgt_pd:
593 ; X64-NEXT: cmpltpd %xmm0, %xmm1
594 ; X64-NEXT: movapd %xmm1, %xmm0
596 %fcmp = fcmp olt <2 x double> %a1, %a0
597 %sext = sext <2 x i1> %fcmp to <2 x i64>
598 %res = bitcast <2 x i64> %sext to <2 x double>
599 ret <2 x double> %res
602 define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
603 ; X32-LABEL: test_mm_cmpgt_sd:
605 ; X32-NEXT: cmpltsd %xmm0, %xmm1
606 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
609 ; X64-LABEL: test_mm_cmpgt_sd:
611 ; X64-NEXT: cmpltsd %xmm0, %xmm1
612 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
614 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
615 %ext0 = extractelement <2 x double> %cmp, i32 0
616 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
617 %ext1 = extractelement <2 x double> %a0, i32 1
618 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
619 ret <2 x double> %ins1
622 define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
623 ; X32-LABEL: test_mm_cmple_pd:
625 ; X32-NEXT: cmplepd %xmm1, %xmm0
628 ; X64-LABEL: test_mm_cmple_pd:
630 ; X64-NEXT: cmplepd %xmm1, %xmm0
632 %fcmp = fcmp ole <2 x double> %a0, %a1
633 %sext = sext <2 x i1> %fcmp to <2 x i64>
634 %res = bitcast <2 x i64> %sext to <2 x double>
635 ret <2 x double> %res
638 define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
639 ; X32-LABEL: test_mm_cmple_sd:
641 ; X32-NEXT: cmplesd %xmm1, %xmm0
644 ; X64-LABEL: test_mm_cmple_sd:
646 ; X64-NEXT: cmplesd %xmm1, %xmm0
648 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
649 ret <2 x double> %res
652 define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
653 ; X32-LABEL: test_mm_cmplt_epi8:
655 ; X32-NEXT: pcmpgtb %xmm0, %xmm1
656 ; X32-NEXT: movdqa %xmm1, %xmm0
659 ; X64-LABEL: test_mm_cmplt_epi8:
661 ; X64-NEXT: pcmpgtb %xmm0, %xmm1
662 ; X64-NEXT: movdqa %xmm1, %xmm0
664 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
665 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
666 %cmp = icmp sgt <16 x i8> %arg1, %arg0
667 %res = sext <16 x i1> %cmp to <16 x i8>
668 %bc = bitcast <16 x i8> %res to <2 x i64>
672 define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
673 ; X32-LABEL: test_mm_cmplt_epi16:
675 ; X32-NEXT: pcmpgtw %xmm0, %xmm1
676 ; X32-NEXT: movdqa %xmm1, %xmm0
679 ; X64-LABEL: test_mm_cmplt_epi16:
681 ; X64-NEXT: pcmpgtw %xmm0, %xmm1
682 ; X64-NEXT: movdqa %xmm1, %xmm0
684 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
685 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
686 %cmp = icmp sgt <8 x i16> %arg1, %arg0
687 %res = sext <8 x i1> %cmp to <8 x i16>
688 %bc = bitcast <8 x i16> %res to <2 x i64>
692 define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
693 ; X32-LABEL: test_mm_cmplt_epi32:
695 ; X32-NEXT: pcmpgtd %xmm0, %xmm1
696 ; X32-NEXT: movdqa %xmm1, %xmm0
699 ; X64-LABEL: test_mm_cmplt_epi32:
701 ; X64-NEXT: pcmpgtd %xmm0, %xmm1
702 ; X64-NEXT: movdqa %xmm1, %xmm0
704 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
705 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
706 %cmp = icmp sgt <4 x i32> %arg1, %arg0
707 %res = sext <4 x i1> %cmp to <4 x i32>
708 %bc = bitcast <4 x i32> %res to <2 x i64>
712 define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
713 ; X32-LABEL: test_mm_cmplt_pd:
715 ; X32-NEXT: cmpltpd %xmm1, %xmm0
718 ; X64-LABEL: test_mm_cmplt_pd:
720 ; X64-NEXT: cmpltpd %xmm1, %xmm0
722 %fcmp = fcmp olt <2 x double> %a0, %a1
723 %sext = sext <2 x i1> %fcmp to <2 x i64>
724 %res = bitcast <2 x i64> %sext to <2 x double>
725 ret <2 x double> %res
728 define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
729 ; X32-LABEL: test_mm_cmplt_sd:
731 ; X32-NEXT: cmpltsd %xmm1, %xmm0
734 ; X64-LABEL: test_mm_cmplt_sd:
736 ; X64-NEXT: cmpltsd %xmm1, %xmm0
738 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
739 ret <2 x double> %res
742 define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
743 ; X32-LABEL: test_mm_cmpneq_pd:
745 ; X32-NEXT: cmpneqpd %xmm1, %xmm0
748 ; X64-LABEL: test_mm_cmpneq_pd:
750 ; X64-NEXT: cmpneqpd %xmm1, %xmm0
752 %fcmp = fcmp une <2 x double> %a0, %a1
753 %sext = sext <2 x i1> %fcmp to <2 x i64>
754 %res = bitcast <2 x i64> %sext to <2 x double>
755 ret <2 x double> %res
758 define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
759 ; X32-LABEL: test_mm_cmpneq_sd:
761 ; X32-NEXT: cmpneqsd %xmm1, %xmm0
764 ; X64-LABEL: test_mm_cmpneq_sd:
766 ; X64-NEXT: cmpneqsd %xmm1, %xmm0
768 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
769 ret <2 x double> %res
772 define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
773 ; X32-LABEL: test_mm_cmpnge_pd:
775 ; X32-NEXT: cmpnlepd %xmm0, %xmm1
776 ; X32-NEXT: movapd %xmm1, %xmm0
779 ; X64-LABEL: test_mm_cmpnge_pd:
781 ; X64-NEXT: cmpnlepd %xmm0, %xmm1
782 ; X64-NEXT: movapd %xmm1, %xmm0
784 %fcmp = fcmp ugt <2 x double> %a1, %a0
785 %sext = sext <2 x i1> %fcmp to <2 x i64>
786 %res = bitcast <2 x i64> %sext to <2 x double>
787 ret <2 x double> %res
790 define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
791 ; X32-LABEL: test_mm_cmpnge_sd:
793 ; X32-NEXT: cmpnlesd %xmm0, %xmm1
794 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
797 ; X64-LABEL: test_mm_cmpnge_sd:
799 ; X64-NEXT: cmpnlesd %xmm0, %xmm1
800 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
802 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
803 %ext0 = extractelement <2 x double> %cmp, i32 0
804 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
805 %ext1 = extractelement <2 x double> %a0, i32 1
806 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
807 ret <2 x double> %ins1
810 define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
811 ; X32-LABEL: test_mm_cmpngt_pd:
813 ; X32-NEXT: cmpnltpd %xmm0, %xmm1
814 ; X32-NEXT: movapd %xmm1, %xmm0
817 ; X64-LABEL: test_mm_cmpngt_pd:
819 ; X64-NEXT: cmpnltpd %xmm0, %xmm1
820 ; X64-NEXT: movapd %xmm1, %xmm0
822 %fcmp = fcmp uge <2 x double> %a1, %a0
823 %sext = sext <2 x i1> %fcmp to <2 x i64>
824 %res = bitcast <2 x i64> %sext to <2 x double>
825 ret <2 x double> %res
828 define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
829 ; X32-LABEL: test_mm_cmpngt_sd:
831 ; X32-NEXT: cmpnltsd %xmm0, %xmm1
832 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
835 ; X64-LABEL: test_mm_cmpngt_sd:
837 ; X64-NEXT: cmpnltsd %xmm0, %xmm1
838 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
840 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
841 %ext0 = extractelement <2 x double> %cmp, i32 0
842 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
843 %ext1 = extractelement <2 x double> %a0, i32 1
844 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
845 ret <2 x double> %ins1
848 define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
849 ; X32-LABEL: test_mm_cmpnle_pd:
851 ; X32-NEXT: cmpnlepd %xmm1, %xmm0
854 ; X64-LABEL: test_mm_cmpnle_pd:
856 ; X64-NEXT: cmpnlepd %xmm1, %xmm0
858 %fcmp = fcmp ugt <2 x double> %a0, %a1
859 %sext = sext <2 x i1> %fcmp to <2 x i64>
860 %res = bitcast <2 x i64> %sext to <2 x double>
861 ret <2 x double> %res
864 define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
865 ; X32-LABEL: test_mm_cmpnle_sd:
867 ; X32-NEXT: cmpnlesd %xmm1, %xmm0
870 ; X64-LABEL: test_mm_cmpnle_sd:
872 ; X64-NEXT: cmpnlesd %xmm1, %xmm0
874 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
875 ret <2 x double> %res
878 define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
879 ; X32-LABEL: test_mm_cmpnlt_pd:
881 ; X32-NEXT: cmpnltpd %xmm1, %xmm0
884 ; X64-LABEL: test_mm_cmpnlt_pd:
886 ; X64-NEXT: cmpnltpd %xmm1, %xmm0
888 %fcmp = fcmp uge <2 x double> %a0, %a1
889 %sext = sext <2 x i1> %fcmp to <2 x i64>
890 %res = bitcast <2 x i64> %sext to <2 x double>
891 ret <2 x double> %res
894 define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
895 ; X32-LABEL: test_mm_cmpnlt_sd:
897 ; X32-NEXT: cmpnltsd %xmm1, %xmm0
900 ; X64-LABEL: test_mm_cmpnlt_sd:
902 ; X64-NEXT: cmpnltsd %xmm1, %xmm0
904 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
905 ret <2 x double> %res
908 define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
909 ; X32-LABEL: test_mm_cmpord_pd:
911 ; X32-NEXT: cmpordpd %xmm1, %xmm0
914 ; X64-LABEL: test_mm_cmpord_pd:
916 ; X64-NEXT: cmpordpd %xmm1, %xmm0
918 %fcmp = fcmp ord <2 x double> %a0, %a1
919 %sext = sext <2 x i1> %fcmp to <2 x i64>
920 %res = bitcast <2 x i64> %sext to <2 x double>
921 ret <2 x double> %res
924 define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
925 ; X32-LABEL: test_mm_cmpord_sd:
927 ; X32-NEXT: cmpordsd %xmm1, %xmm0
930 ; X64-LABEL: test_mm_cmpord_sd:
932 ; X64-NEXT: cmpordsd %xmm1, %xmm0
934 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
935 ret <2 x double> %res
938 define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
939 ; X32-LABEL: test_mm_cmpunord_pd:
941 ; X32-NEXT: cmpunordpd %xmm1, %xmm0
944 ; X64-LABEL: test_mm_cmpunord_pd:
946 ; X64-NEXT: cmpunordpd %xmm1, %xmm0
948 %fcmp = fcmp uno <2 x double> %a0, %a1
949 %sext = sext <2 x i1> %fcmp to <2 x i64>
950 %res = bitcast <2 x i64> %sext to <2 x double>
951 ret <2 x double> %res
954 define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
955 ; X32-LABEL: test_mm_cmpunord_sd:
957 ; X32-NEXT: cmpunordsd %xmm1, %xmm0
960 ; X64-LABEL: test_mm_cmpunord_sd:
962 ; X64-NEXT: cmpunordsd %xmm1, %xmm0
964 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
965 ret <2 x double> %res
968 define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
969 ; X32-LABEL: test_mm_comieq_sd:
971 ; X32-NEXT: comisd %xmm1, %xmm0
972 ; X32-NEXT: setnp %al
974 ; X32-NEXT: andb %al, %cl
975 ; X32-NEXT: movzbl %cl, %eax
978 ; X64-LABEL: test_mm_comieq_sd:
980 ; X64-NEXT: comisd %xmm1, %xmm0
981 ; X64-NEXT: setnp %al
983 ; X64-NEXT: andb %al, %cl
984 ; X64-NEXT: movzbl %cl, %eax
986 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
989 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
991 define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
992 ; X32-LABEL: test_mm_comige_sd:
994 ; X32-NEXT: xorl %eax, %eax
995 ; X32-NEXT: comisd %xmm1, %xmm0
996 ; X32-NEXT: setae %al
999 ; X64-LABEL: test_mm_comige_sd:
1001 ; X64-NEXT: xorl %eax, %eax
1002 ; X64-NEXT: comisd %xmm1, %xmm0
1003 ; X64-NEXT: setae %al
1005 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
1008 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
1010 define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1011 ; X32-LABEL: test_mm_comigt_sd:
1013 ; X32-NEXT: xorl %eax, %eax
1014 ; X32-NEXT: comisd %xmm1, %xmm0
1015 ; X32-NEXT: seta %al
1018 ; X64-LABEL: test_mm_comigt_sd:
1020 ; X64-NEXT: xorl %eax, %eax
1021 ; X64-NEXT: comisd %xmm1, %xmm0
1022 ; X64-NEXT: seta %al
1024 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
1027 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
1029 define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1030 ; X32-LABEL: test_mm_comile_sd:
1032 ; X32-NEXT: xorl %eax, %eax
1033 ; X32-NEXT: comisd %xmm0, %xmm1
1034 ; X32-NEXT: setae %al
1037 ; X64-LABEL: test_mm_comile_sd:
1039 ; X64-NEXT: xorl %eax, %eax
1040 ; X64-NEXT: comisd %xmm0, %xmm1
1041 ; X64-NEXT: setae %al
1043 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1046 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1048 define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1049 ; X32-LABEL: test_mm_comilt_sd:
1051 ; X32-NEXT: xorl %eax, %eax
1052 ; X32-NEXT: comisd %xmm0, %xmm1
1053 ; X32-NEXT: seta %al
1056 ; X64-LABEL: test_mm_comilt_sd:
1058 ; X64-NEXT: xorl %eax, %eax
1059 ; X64-NEXT: comisd %xmm0, %xmm1
1060 ; X64-NEXT: seta %al
1062 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1065 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1067 define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1068 ; X32-LABEL: test_mm_comineq_sd:
1070 ; X32-NEXT: comisd %xmm1, %xmm0
1071 ; X32-NEXT: setp %al
1072 ; X32-NEXT: setne %cl
1073 ; X32-NEXT: orb %al, %cl
1074 ; X32-NEXT: movzbl %cl, %eax
1077 ; X64-LABEL: test_mm_comineq_sd:
1079 ; X64-NEXT: comisd %xmm1, %xmm0
1080 ; X64-NEXT: setp %al
1081 ; X64-NEXT: setne %cl
1082 ; X64-NEXT: orb %al, %cl
1083 ; X64-NEXT: movzbl %cl, %eax
1085 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1088 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1090 define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1091 ; X32-LABEL: test_mm_cvtepi32_pd:
1093 ; X32-NEXT: cvtdq2pd %xmm0, %xmm0
1096 ; X64-LABEL: test_mm_cvtepi32_pd:
1098 ; X64-NEXT: cvtdq2pd %xmm0, %xmm0
1100 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1101 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
1102 %res = sitofp <2 x i32> %ext to <2 x double>
1103 ret <2 x double> %res
1106 define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1107 ; X32-LABEL: test_mm_cvtepi32_ps:
1109 ; X32-NEXT: cvtdq2ps %xmm0, %xmm0
1112 ; X64-LABEL: test_mm_cvtepi32_ps:
1114 ; X64-NEXT: cvtdq2ps %xmm0, %xmm0
1116 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1117 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0)
1118 ret <4 x float> %res
1120 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
1122 define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1123 ; X32-LABEL: test_mm_cvtpd_epi32:
1125 ; X32-NEXT: cvtpd2dq %xmm0, %xmm0
1128 ; X64-LABEL: test_mm_cvtpd_epi32:
1130 ; X64-NEXT: cvtpd2dq %xmm0, %xmm0
1132 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1133 %bc = bitcast <4 x i32> %res to <2 x i64>
1136 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1138 define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1139 ; X32-LABEL: test_mm_cvtpd_ps:
1141 ; X32-NEXT: cvtpd2ps %xmm0, %xmm0
1144 ; X64-LABEL: test_mm_cvtpd_ps:
1146 ; X64-NEXT: cvtpd2ps %xmm0, %xmm0
1148 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1149 ret <4 x float> %res
1151 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1153 define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1154 ; X32-LABEL: test_mm_cvtps_epi32:
1156 ; X32-NEXT: cvtps2dq %xmm0, %xmm0
1159 ; X64-LABEL: test_mm_cvtps_epi32:
1161 ; X64-NEXT: cvtps2dq %xmm0, %xmm0
1163 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1164 %bc = bitcast <4 x i32> %res to <2 x i64>
1167 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1169 define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1170 ; X32-LABEL: test_mm_cvtps_pd:
1172 ; X32-NEXT: cvtps2pd %xmm0, %xmm0
1175 ; X64-LABEL: test_mm_cvtps_pd:
1177 ; X64-NEXT: cvtps2pd %xmm0, %xmm0
1179 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
1180 %res = fpext <2 x float> %ext to <2 x double>
1181 ret <2 x double> %res
1184 define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1185 ; X32-LABEL: test_mm_cvtsd_f64:
1187 ; X32-NEXT: pushl %ebp
1188 ; X32-NEXT: movl %esp, %ebp
1189 ; X32-NEXT: andl $-8, %esp
1190 ; X32-NEXT: subl $8, %esp
1191 ; X32-NEXT: movlps %xmm0, (%esp)
1192 ; X32-NEXT: fldl (%esp)
1193 ; X32-NEXT: movl %ebp, %esp
1194 ; X32-NEXT: popl %ebp
1197 ; X64-LABEL: test_mm_cvtsd_f64:
1200 %res = extractelement <2 x double> %a0, i32 0
1204 define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1205 ; X32-LABEL: test_mm_cvtsd_si32:
1207 ; X32-NEXT: cvtsd2si %xmm0, %eax
1210 ; X64-LABEL: test_mm_cvtsd_si32:
1212 ; X64-NEXT: cvtsd2si %xmm0, %eax
1214 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1217 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1219 define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
1220 ; X32-LABEL: test_mm_cvtsd_ss:
1222 ; X32-NEXT: cvtsd2ss %xmm1, %xmm0
1225 ; X64-LABEL: test_mm_cvtsd_ss:
1227 ; X64-NEXT: cvtsd2ss %xmm1, %xmm0
1229 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1230 ret <4 x float> %res
1232 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
1234 define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
1235 ; X32-LABEL: test_mm_cvtsd_ss_load:
1237 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1238 ; X32-NEXT: cvtsd2ss (%eax), %xmm0
1241 ; X64-LABEL: test_mm_cvtsd_ss_load:
1243 ; X64-NEXT: cvtsd2ss (%rdi), %xmm0
1245 %a1 = load <2 x double>, <2 x double>* %p1
1246 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1247 ret <4 x float> %res
1250 define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1251 ; X32-LABEL: test_mm_cvtsi128_si32:
1253 ; X32-NEXT: movd %xmm0, %eax
1256 ; X64-LABEL: test_mm_cvtsi128_si32:
1258 ; X64-NEXT: movd %xmm0, %eax
1260 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1261 %res = extractelement <4 x i32> %arg0, i32 0
1265 define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1266 ; X32-LABEL: test_mm_cvtsi32_sd:
1268 ; X32-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
1271 ; X64-LABEL: test_mm_cvtsi32_sd:
1273 ; X64-NEXT: cvtsi2sdl %edi, %xmm0
1275 %cvt = sitofp i32 %a1 to double
1276 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1277 ret <2 x double> %res
1280 define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1281 ; X32-LABEL: test_mm_cvtsi32_si128:
1283 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1286 ; X64-LABEL: test_mm_cvtsi32_si128:
1288 ; X64-NEXT: movd %edi, %xmm0
1290 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1291 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1292 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1293 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1294 %res = bitcast <4 x i32> %res3 to <2 x i64>
1298 define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1299 ; X32-LABEL: test_mm_cvtss_sd:
1301 ; X32-NEXT: cvtss2sd %xmm1, %xmm0
1304 ; X64-LABEL: test_mm_cvtss_sd:
1306 ; X64-NEXT: cvtss2sd %xmm1, %xmm0
1308 %ext = extractelement <4 x float> %a1, i32 0
1309 %cvt = fpext float %ext to double
1310 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1311 ret <2 x double> %res
1314 define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1315 ; X32-LABEL: test_mm_cvttpd_epi32:
1317 ; X32-NEXT: cvttpd2dq %xmm0, %xmm0
1320 ; X64-LABEL: test_mm_cvttpd_epi32:
1322 ; X64-NEXT: cvttpd2dq %xmm0, %xmm0
1324 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1325 %bc = bitcast <4 x i32> %res to <2 x i64>
1328 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1330 define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1331 ; X32-LABEL: test_mm_cvttps_epi32:
1333 ; X32-NEXT: cvttps2dq %xmm0, %xmm0
1336 ; X64-LABEL: test_mm_cvttps_epi32:
1338 ; X64-NEXT: cvttps2dq %xmm0, %xmm0
1340 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
1341 %bc = bitcast <4 x i32> %res to <2 x i64>
1344 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
1346 define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1347 ; X32-LABEL: test_mm_cvttsd_si32:
1349 ; X32-NEXT: cvttsd2si %xmm0, %eax
1352 ; X64-LABEL: test_mm_cvttsd_si32:
1354 ; X64-NEXT: cvttsd2si %xmm0, %eax
1356 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
1359 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
1361 define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1362 ; X32-LABEL: test_mm_div_pd:
1364 ; X32-NEXT: divpd %xmm1, %xmm0
1367 ; X64-LABEL: test_mm_div_pd:
1369 ; X64-NEXT: divpd %xmm1, %xmm0
1371 %res = fdiv <2 x double> %a0, %a1
1372 ret <2 x double> %res
1375 define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1376 ; X32-LABEL: test_mm_div_sd:
1378 ; X32-NEXT: divsd %xmm1, %xmm0
1381 ; X64-LABEL: test_mm_div_sd:
1383 ; X64-NEXT: divsd %xmm1, %xmm0
1385 %ext0 = extractelement <2 x double> %a0, i32 0
1386 %ext1 = extractelement <2 x double> %a1, i32 0
1387 %fdiv = fdiv double %ext0, %ext1
1388 %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1389 ret <2 x double> %res
1392 define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1393 ; X32-LABEL: test_mm_extract_epi16:
1395 ; X32-NEXT: pextrw $1, %xmm0, %eax
1396 ; X32-NEXT: movzwl %ax, %eax
1399 ; X64-LABEL: test_mm_extract_epi16:
1401 ; X64-NEXT: pextrw $1, %xmm0, %eax
1402 ; X64-NEXT: movzwl %ax, %eax
1404 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1405 %ext = extractelement <8 x i16> %arg0, i32 1
1406 %res = zext i16 %ext to i32
1410 define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1411 ; X32-LABEL: test_mm_insert_epi16:
1413 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1414 ; X32-NEXT: pinsrw $1, %eax, %xmm0
1417 ; X64-LABEL: test_mm_insert_epi16:
1419 ; X64-NEXT: pinsrw $1, %edi, %xmm0
1421 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1422 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1423 %bc = bitcast <8 x i16> %res to <2 x i64>
1427 define void @test_mm_lfence() nounwind {
1428 ; X32-LABEL: test_mm_lfence:
1433 ; X64-LABEL: test_mm_lfence:
1437 call void @llvm.x86.sse2.lfence()
1440 declare void @llvm.x86.sse2.lfence() nounwind readnone
1442 define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1443 ; X32-LABEL: test_mm_load_pd:
1445 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1446 ; X32-NEXT: movaps (%eax), %xmm0
1449 ; X64-LABEL: test_mm_load_pd:
1451 ; X64-NEXT: movaps (%rdi), %xmm0
1453 %arg0 = bitcast double* %a0 to <2 x double>*
1454 %res = load <2 x double>, <2 x double>* %arg0, align 16
1455 ret <2 x double> %res
1458 define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1459 ; X32-LABEL: test_mm_load_sd:
1461 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1462 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1465 ; X64-LABEL: test_mm_load_sd:
1467 ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1469 %ld = load double, double* %a0, align 1
1470 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1471 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1472 ret <2 x double> %res1
1475 define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1476 ; X32-LABEL: test_mm_load_si128:
1478 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1479 ; X32-NEXT: movaps (%eax), %xmm0
1482 ; X64-LABEL: test_mm_load_si128:
1484 ; X64-NEXT: movaps (%rdi), %xmm0
1486 %res = load <2 x i64>, <2 x i64>* %a0, align 16
1490 define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
1491 ; X32-LABEL: test_mm_load1_pd:
1493 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1494 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1495 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1498 ; X64-LABEL: test_mm_load1_pd:
1500 ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1501 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1503 %ld = load double, double* %a0, align 8
1504 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1505 %res1 = insertelement <2 x double> %res0, double %ld, i32 1
1506 ret <2 x double> %res1
1509 define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
1510 ; X32-LABEL: test_mm_loadh_pd:
1512 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1513 ; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1516 ; X64-LABEL: test_mm_loadh_pd:
1518 ; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1520 %ld = load double, double* %a1, align 8
1521 %res = insertelement <2 x double> %a0, double %ld, i32 1
1522 ret <2 x double> %res
1525 define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
1526 ; X32-LABEL: test_mm_loadl_epi64:
1528 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1529 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1532 ; X64-LABEL: test_mm_loadl_epi64:
1534 ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1536 %bc = bitcast <2 x i64>* %a1 to i64*
1537 %ld = load i64, i64* %bc, align 1
1538 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
1539 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
1543 define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
1544 ; X32-LABEL: test_mm_loadl_pd:
1546 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1547 ; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1550 ; X64-LABEL: test_mm_loadl_pd:
1552 ; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1554 %ld = load double, double* %a1, align 8
1555 %res = insertelement <2 x double> %a0, double %ld, i32 0
1556 ret <2 x double> %res
1559 define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
1560 ; X32-LABEL: test_mm_loadr_pd:
1562 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1563 ; X32-NEXT: movapd (%eax), %xmm0
1564 ; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1567 ; X64-LABEL: test_mm_loadr_pd:
1569 ; X64-NEXT: movapd (%rdi), %xmm0
1570 ; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1572 %arg0 = bitcast double* %a0 to <2 x double>*
1573 %ld = load <2 x double>, <2 x double>* %arg0, align 16
1574 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1575 ret <2 x double> %res
1578 define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
1579 ; X32-LABEL: test_mm_loadu_pd:
1581 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1582 ; X32-NEXT: movups (%eax), %xmm0
1585 ; X64-LABEL: test_mm_loadu_pd:
1587 ; X64-NEXT: movups (%rdi), %xmm0
1589 %arg0 = bitcast double* %a0 to <2 x double>*
1590 %res = load <2 x double>, <2 x double>* %arg0, align 1
1591 ret <2 x double> %res
1594 define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
1595 ; X32-LABEL: test_mm_loadu_si128:
1597 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1598 ; X32-NEXT: movups (%eax), %xmm0
1601 ; X64-LABEL: test_mm_loadu_si128:
1603 ; X64-NEXT: movups (%rdi), %xmm0
1605 %res = load <2 x i64>, <2 x i64>* %a0, align 1
1609 define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1610 ; X32-LABEL: test_mm_madd_epi16:
1612 ; X32-NEXT: pmaddwd %xmm1, %xmm0
1615 ; X64-LABEL: test_mm_madd_epi16:
1617 ; X64-NEXT: pmaddwd %xmm1, %xmm0
1619 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1620 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1621 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
1622 %bc = bitcast <4 x i32> %res to <2 x i64>
1625 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1627 define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
1628 ; X32-LABEL: test_mm_maskmoveu_si128:
1630 ; X32-NEXT: pushl %edi
1631 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
1632 ; X32-NEXT: maskmovdqu %xmm1, %xmm0
1633 ; X32-NEXT: popl %edi
1636 ; X64-LABEL: test_mm_maskmoveu_si128:
1638 ; X64-NEXT: maskmovdqu %xmm1, %xmm0
1640 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1641 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1642 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
1645 declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
1647 define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1648 ; X32-LABEL: test_mm_max_epi16:
1650 ; X32-NEXT: pmaxsw %xmm1, %xmm0
1653 ; X64-LABEL: test_mm_max_epi16:
1655 ; X64-NEXT: pmaxsw %xmm1, %xmm0
1657 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1658 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1659 %cmp = icmp sgt <8 x i16> %arg0, %arg1
1660 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
1661 %bc = bitcast <8 x i16> %sel to <2 x i64>
1665 define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1666 ; X32-LABEL: test_mm_max_epu8:
1668 ; X32-NEXT: pmaxub %xmm1, %xmm0
1671 ; X64-LABEL: test_mm_max_epu8:
1673 ; X64-NEXT: pmaxub %xmm1, %xmm0
1675 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1676 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1677 %cmp = icmp ugt <16 x i8> %arg0, %arg1
1678 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
1679 %bc = bitcast <16 x i8> %sel to <2 x i64>
1683 define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1684 ; X32-LABEL: test_mm_max_pd:
1686 ; X32-NEXT: maxpd %xmm1, %xmm0
1689 ; X64-LABEL: test_mm_max_pd:
1691 ; X64-NEXT: maxpd %xmm1, %xmm0
1693 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1694 ret <2 x double> %res
1696 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1698 define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1699 ; X32-LABEL: test_mm_max_sd:
1701 ; X32-NEXT: maxsd %xmm1, %xmm0
1704 ; X64-LABEL: test_mm_max_sd:
1706 ; X64-NEXT: maxsd %xmm1, %xmm0
1708 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1709 ret <2 x double> %res
1711 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1713 define void @test_mm_mfence() nounwind {
1714 ; X32-LABEL: test_mm_mfence:
1719 ; X64-LABEL: test_mm_mfence:
1723 call void @llvm.x86.sse2.mfence()
1726 declare void @llvm.x86.sse2.mfence() nounwind readnone
1728 define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1729 ; X32-LABEL: test_mm_min_epi16:
1731 ; X32-NEXT: pminsw %xmm1, %xmm0
1734 ; X64-LABEL: test_mm_min_epi16:
1736 ; X64-NEXT: pminsw %xmm1, %xmm0
1738 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1739 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1740 %cmp = icmp slt <8 x i16> %arg0, %arg1
1741 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
1742 %bc = bitcast <8 x i16> %sel to <2 x i64>
1746 define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1747 ; X32-LABEL: test_mm_min_epu8:
1749 ; X32-NEXT: pminub %xmm1, %xmm0
1752 ; X64-LABEL: test_mm_min_epu8:
1754 ; X64-NEXT: pminub %xmm1, %xmm0
1756 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1757 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1758 %cmp = icmp ult <16 x i8> %arg0, %arg1
1759 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
1760 %bc = bitcast <16 x i8> %sel to <2 x i64>
1764 define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1765 ; X32-LABEL: test_mm_min_pd:
1767 ; X32-NEXT: minpd %xmm1, %xmm0
1770 ; X64-LABEL: test_mm_min_pd:
1772 ; X64-NEXT: minpd %xmm1, %xmm0
1774 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1775 ret <2 x double> %res
1777 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1779 define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1780 ; X32-LABEL: test_mm_min_sd:
1782 ; X32-NEXT: minsd %xmm1, %xmm0
1785 ; X64-LABEL: test_mm_min_sd:
1787 ; X64-NEXT: minsd %xmm1, %xmm0
1789 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1790 ret <2 x double> %res
1792 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1794 define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
1795 ; X32-LABEL: test_mm_move_epi64:
1797 ; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1800 ; X64-LABEL: test_mm_move_epi64:
1802 ; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1804 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
1808 define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1809 ; X32-LABEL: test_mm_move_sd:
1811 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1814 ; X64-LABEL: test_mm_move_sd:
1816 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1818 %ext0 = extractelement <2 x double> %a1, i32 0
1819 %res0 = insertelement <2 x double> undef, double %ext0, i32 0
1820 %ext1 = extractelement <2 x double> %a0, i32 1
1821 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
1822 ret <2 x double> %res1
1825 define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
1826 ; X32-LABEL: test_mm_movemask_epi8:
1828 ; X32-NEXT: pmovmskb %xmm0, %eax
1831 ; X64-LABEL: test_mm_movemask_epi8:
1833 ; X64-NEXT: pmovmskb %xmm0, %eax
1835 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1836 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
1839 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1841 define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
1842 ; X32-LABEL: test_mm_movemask_pd:
1844 ; X32-NEXT: movmskpd %xmm0, %eax
1847 ; X64-LABEL: test_mm_movemask_pd:
1849 ; X64-NEXT: movmskpd %xmm0, %eax
1851 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
1854 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
1856 define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) {
1857 ; X32-LABEL: test_mm_mul_epu32:
1859 ; X32-NEXT: pmuludq %xmm1, %xmm0
1862 ; X64-LABEL: test_mm_mul_epu32:
1864 ; X64-NEXT: pmuludq %xmm1, %xmm0
1866 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1867 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1868 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1)
1871 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
1873 define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1874 ; X32-LABEL: test_mm_mul_pd:
1876 ; X32-NEXT: mulpd %xmm1, %xmm0
1879 ; X64-LABEL: test_mm_mul_pd:
1881 ; X64-NEXT: mulpd %xmm1, %xmm0
1883 %res = fmul <2 x double> %a0, %a1
1884 ret <2 x double> %res
1887 define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1888 ; X32-LABEL: test_mm_mul_sd:
1890 ; X32-NEXT: mulsd %xmm1, %xmm0
1893 ; X64-LABEL: test_mm_mul_sd:
1895 ; X64-NEXT: mulsd %xmm1, %xmm0
1897 %ext0 = extractelement <2 x double> %a0, i32 0
1898 %ext1 = extractelement <2 x double> %a1, i32 0
1899 %fmul = fmul double %ext0, %ext1
1900 %res = insertelement <2 x double> %a0, double %fmul, i32 0
1901 ret <2 x double> %res
1904 define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1905 ; X32-LABEL: test_mm_mulhi_epi16:
1907 ; X32-NEXT: pmulhw %xmm1, %xmm0
1910 ; X64-LABEL: test_mm_mulhi_epi16:
1912 ; X64-NEXT: pmulhw %xmm1, %xmm0
1914 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1915 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1916 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
1917 %bc = bitcast <8 x i16> %res to <2 x i64>
1920 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1922 define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
1923 ; X32-LABEL: test_mm_mulhi_epu16:
1925 ; X32-NEXT: pmulhuw %xmm1, %xmm0
1928 ; X64-LABEL: test_mm_mulhi_epu16:
1930 ; X64-NEXT: pmulhuw %xmm1, %xmm0
1932 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1933 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1934 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
1935 %bc = bitcast <8 x i16> %res to <2 x i64>
1938 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1940 define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1941 ; X32-LABEL: test_mm_mullo_epi16:
1943 ; X32-NEXT: pmullw %xmm1, %xmm0
1946 ; X64-LABEL: test_mm_mullo_epi16:
1948 ; X64-NEXT: pmullw %xmm1, %xmm0
1950 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1951 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1952 %res = mul <8 x i16> %arg0, %arg1
1953 %bc = bitcast <8 x i16> %res to <2 x i64>
1957 define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1958 ; X32-LABEL: test_mm_or_pd:
1960 ; X32-NEXT: orps %xmm1, %xmm0
1963 ; X64-LABEL: test_mm_or_pd:
1965 ; X64-NEXT: orps %xmm1, %xmm0
1967 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
1968 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
1969 %res = or <4 x i32> %arg0, %arg1
1970 %bc = bitcast <4 x i32> %res to <2 x double>
1971 ret <2 x double> %bc
1974 define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1975 ; X32-LABEL: test_mm_or_si128:
1977 ; X32-NEXT: orps %xmm1, %xmm0
1980 ; X64-LABEL: test_mm_or_si128:
1982 ; X64-NEXT: orps %xmm1, %xmm0
1984 %res = or <2 x i64> %a0, %a1
1988 define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1989 ; X32-LABEL: test_mm_packs_epi16:
1991 ; X32-NEXT: packsswb %xmm1, %xmm0
1994 ; X64-LABEL: test_mm_packs_epi16:
1996 ; X64-NEXT: packsswb %xmm1, %xmm0
1998 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1999 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2000 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
2001 %bc = bitcast <16 x i8> %res to <2 x i64>
2004 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
2006 define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2007 ; X32-LABEL: test_mm_packs_epi32:
2009 ; X32-NEXT: packssdw %xmm1, %xmm0
2012 ; X64-LABEL: test_mm_packs_epi32:
2014 ; X64-NEXT: packssdw %xmm1, %xmm0
2016 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2017 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2018 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
2019 %bc = bitcast <8 x i16> %res to <2 x i64>
2022 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
2024 define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2025 ; X32-LABEL: test_mm_packus_epi16:
2027 ; X32-NEXT: packuswb %xmm1, %xmm0
2030 ; X64-LABEL: test_mm_packus_epi16:
2032 ; X64-NEXT: packuswb %xmm1, %xmm0
2034 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2035 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2036 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
2037 %bc = bitcast <16 x i8> %res to <2 x i64>
2040 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
2042 define void @test_mm_pause() nounwind {
2043 ; X32-LABEL: test_mm_pause:
2048 ; X64-LABEL: test_mm_pause:
2052 call void @llvm.x86.sse2.pause()
2055 declare void @llvm.x86.sse2.pause() nounwind readnone
2057 define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2058 ; X32-LABEL: test_mm_sad_epu8:
2060 ; X32-NEXT: psadbw %xmm1, %xmm0
2063 ; X64-LABEL: test_mm_sad_epu8:
2065 ; X64-NEXT: psadbw %xmm1, %xmm0
2067 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2068 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2069 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
2072 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
2074 define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2075 ; X32-LABEL: test_mm_set_epi8:
2077 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2078 ; X32-NEXT: movd %eax, %xmm0
2079 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2080 ; X32-NEXT: movd %eax, %xmm1
2081 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2082 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2083 ; X32-NEXT: movd %eax, %xmm0
2084 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2085 ; X32-NEXT: movd %eax, %xmm2
2086 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2087 ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2088 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2089 ; X32-NEXT: movd %eax, %xmm0
2090 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2091 ; X32-NEXT: movd %eax, %xmm3
2092 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2093 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2094 ; X32-NEXT: movd %eax, %xmm0
2095 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2096 ; X32-NEXT: movd %eax, %xmm1
2097 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2098 ; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
2099 ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2100 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2101 ; X32-NEXT: movd %eax, %xmm0
2102 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2103 ; X32-NEXT: movd %eax, %xmm2
2104 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2105 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2106 ; X32-NEXT: movd %eax, %xmm0
2107 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2108 ; X32-NEXT: movd %eax, %xmm3
2109 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2110 ; X32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2111 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2112 ; X32-NEXT: movd %eax, %xmm0
2113 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2114 ; X32-NEXT: movd %eax, %xmm2
2115 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2116 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2117 ; X32-NEXT: movd %eax, %xmm4
2118 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2119 ; X32-NEXT: movd %eax, %xmm0
2120 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2121 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2122 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
2123 ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2126 ; X64-LABEL: test_mm_set_epi8:
2128 ; X64-NEXT: movzbl %dil, %eax
2129 ; X64-NEXT: movd %eax, %xmm0
2130 ; X64-NEXT: movzbl %sil, %eax
2131 ; X64-NEXT: movd %eax, %xmm1
2132 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2133 ; X64-NEXT: movzbl %dl, %eax
2134 ; X64-NEXT: movd %eax, %xmm0
2135 ; X64-NEXT: movzbl %cl, %eax
2136 ; X64-NEXT: movd %eax, %xmm2
2137 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2138 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2139 ; X64-NEXT: movzbl %r8b, %eax
2140 ; X64-NEXT: movd %eax, %xmm0
2141 ; X64-NEXT: movzbl %r9b, %eax
2142 ; X64-NEXT: movd %eax, %xmm3
2143 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2144 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2145 ; X64-NEXT: movd %eax, %xmm0
2146 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2147 ; X64-NEXT: movd %eax, %xmm1
2148 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2149 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
2150 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2151 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2152 ; X64-NEXT: movd %eax, %xmm0
2153 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2154 ; X64-NEXT: movd %eax, %xmm2
2155 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2156 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2157 ; X64-NEXT: movd %eax, %xmm0
2158 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2159 ; X64-NEXT: movd %eax, %xmm3
2160 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2161 ; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2162 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2163 ; X64-NEXT: movd %eax, %xmm0
2164 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2165 ; X64-NEXT: movd %eax, %xmm2
2166 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2167 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2168 ; X64-NEXT: movd %eax, %xmm4
2169 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2170 ; X64-NEXT: movd %eax, %xmm0
2171 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2172 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2173 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
2174 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2176 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
2177 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
2178 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2
2179 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3
2180 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4
2181 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5
2182 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6
2183 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7
2184 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8
2185 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9
2186 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10
2187 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
2188 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
2189 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
2190 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
2191 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
2192 %res = bitcast <16 x i8> %res15 to <2 x i64>
2196 define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2197 ; X32-LABEL: test_mm_set_epi16:
2199 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2200 ; X32-NEXT: movd %eax, %xmm1
2201 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2202 ; X32-NEXT: movd %eax, %xmm2
2203 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2204 ; X32-NEXT: movd %eax, %xmm3
2205 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2206 ; X32-NEXT: movd %eax, %xmm4
2207 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2208 ; X32-NEXT: movd %eax, %xmm5
2209 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2210 ; X32-NEXT: movd %eax, %xmm6
2211 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2212 ; X32-NEXT: movd %eax, %xmm7
2213 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2214 ; X32-NEXT: movd %eax, %xmm0
2215 ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2216 ; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2217 ; X32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2218 ; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2219 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2220 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
2221 ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
2224 ; X64-LABEL: test_mm_set_epi16:
2226 ; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d
2227 ; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
2228 ; X64-NEXT: movd %edi, %xmm0
2229 ; X64-NEXT: movd %esi, %xmm1
2230 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2231 ; X64-NEXT: movd %edx, %xmm0
2232 ; X64-NEXT: movd %ecx, %xmm2
2233 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2234 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2235 ; X64-NEXT: movd %r8d, %xmm0
2236 ; X64-NEXT: movd %r9d, %xmm1
2237 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2238 ; X64-NEXT: movd %eax, %xmm3
2239 ; X64-NEXT: movd %r10d, %xmm0
2240 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2241 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2242 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2244 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
2245 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
2246 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2
2247 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3
2248 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4
2249 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5
2250 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6
2251 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2252 %res = bitcast <8 x i16> %res7 to <2 x i64>
2256 define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2257 ; X32-LABEL: test_mm_set_epi32:
2259 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2260 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2261 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2262 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2263 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2264 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2265 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2268 ; X64-LABEL: test_mm_set_epi32:
2270 ; X64-NEXT: movd %edi, %xmm0
2271 ; X64-NEXT: movd %esi, %xmm1
2272 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2273 ; X64-NEXT: movd %edx, %xmm2
2274 ; X64-NEXT: movd %ecx, %xmm0
2275 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2276 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2278 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
2279 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
2280 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2
2281 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2282 %res = bitcast <4 x i32> %res3 to <2 x i64>
2286 ; TODO test_mm_set_epi64
2288 define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
2289 ; X32-LABEL: test_mm_set_epi64x:
2291 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2292 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2293 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2294 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2295 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2296 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2297 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2300 ; X64-LABEL: test_mm_set_epi64x:
2302 ; X64-NEXT: movq %rdi, %xmm1
2303 ; X64-NEXT: movq %rsi, %xmm0
2304 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2306 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0
2307 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2311 define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
2312 ; X32-LABEL: test_mm_set_pd:
2314 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2315 ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2316 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2319 ; X64-LABEL: test_mm_set_pd:
2321 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2322 ; X64-NEXT: movaps %xmm1, %xmm0
2324 %res0 = insertelement <2 x double> undef, double %a1, i32 0
2325 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2326 ret <2 x double> %res1
2329 define <2 x double> @test_mm_set_pd1(double %a0) nounwind {
2330 ; X32-LABEL: test_mm_set_pd1:
2332 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2333 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2336 ; X64-LABEL: test_mm_set_pd1:
2338 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2340 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2341 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2342 ret <2 x double> %res1
2345 define <2 x double> @test_mm_set_sd(double %a0) nounwind {
2346 ; X32-LABEL: test_mm_set_sd:
2348 ; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2349 ; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2352 ; X64-LABEL: test_mm_set_sd:
2354 ; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2356 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2357 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
2358 ret <2 x double> %res1
2361 define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
2362 ; X32-LABEL: test_mm_set1_epi8:
2364 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2365 ; X32-NEXT: movd %eax, %xmm0
2366 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2367 ; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2368 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2371 ; X64-LABEL: test_mm_set1_epi8:
2373 ; X64-NEXT: movzbl %dil, %eax
2374 ; X64-NEXT: movd %eax, %xmm0
2375 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2376 ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2377 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2379 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0
2380 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1
2381 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2
2382 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3
2383 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4
2384 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5
2385 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6
2386 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7
2387 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8
2388 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9
2389 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10
2390 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
2391 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
2392 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
2393 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
2394 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
2395 %res = bitcast <16 x i8> %res15 to <2 x i64>
2399 define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
2400 ; X32-LABEL: test_mm_set1_epi16:
2402 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2403 ; X32-NEXT: movd %eax, %xmm0
2404 ; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2405 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2408 ; X64-LABEL: test_mm_set1_epi16:
2410 ; X64-NEXT: movd %edi, %xmm0
2411 ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2412 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2414 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2415 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1
2416 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2
2417 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3
2418 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4
2419 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5
2420 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6
2421 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2422 %res = bitcast <8 x i16> %res7 to <2 x i64>
2426 define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
2427 ; X32-LABEL: test_mm_set1_epi32:
2429 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2430 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2433 ; X64-LABEL: test_mm_set1_epi32:
2435 ; X64-NEXT: movd %edi, %xmm0
2436 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2438 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2439 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1
2440 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2
2441 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2442 %res = bitcast <4 x i32> %res3 to <2 x i64>
2446 ; TODO test_mm_set1_epi64
2448 define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
2449 ; X32-LABEL: test_mm_set1_epi64x:
2451 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2452 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2453 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2454 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2457 ; X64-LABEL: test_mm_set1_epi64x:
2459 ; X64-NEXT: movq %rdi, %xmm0
2460 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2462 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2463 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2467 define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
2468 ; X32-LABEL: test_mm_set1_pd:
2470 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2471 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2474 ; X64-LABEL: test_mm_set1_pd:
2476 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2478 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2479 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2480 ret <2 x double> %res1
2483 define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2484 ; X32-LABEL: test_mm_setr_epi8:
2486 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2487 ; X32-NEXT: movd %eax, %xmm0
2488 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2489 ; X32-NEXT: movd %eax, %xmm1
2490 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2491 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2492 ; X32-NEXT: movd %eax, %xmm0
2493 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2494 ; X32-NEXT: movd %eax, %xmm2
2495 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2496 ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2497 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2498 ; X32-NEXT: movd %eax, %xmm0
2499 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2500 ; X32-NEXT: movd %eax, %xmm3
2501 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2502 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2503 ; X32-NEXT: movd %eax, %xmm0
2504 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2505 ; X32-NEXT: movd %eax, %xmm1
2506 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2507 ; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
2508 ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2509 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2510 ; X32-NEXT: movd %eax, %xmm0
2511 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2512 ; X32-NEXT: movd %eax, %xmm2
2513 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2514 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2515 ; X32-NEXT: movd %eax, %xmm0
2516 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2517 ; X32-NEXT: movd %eax, %xmm3
2518 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2519 ; X32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2520 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2521 ; X32-NEXT: movd %eax, %xmm0
2522 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2523 ; X32-NEXT: movd %eax, %xmm2
2524 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2525 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2526 ; X32-NEXT: movd %eax, %xmm4
2527 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2528 ; X32-NEXT: movd %eax, %xmm0
2529 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2530 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2531 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
2532 ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2535 ; X64-LABEL: test_mm_setr_epi8:
2537 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2538 ; X64-NEXT: movd %eax, %xmm0
2539 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2540 ; X64-NEXT: movd %eax, %xmm1
2541 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2542 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2543 ; X64-NEXT: movd %eax, %xmm0
2544 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2545 ; X64-NEXT: movd %eax, %xmm2
2546 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2547 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2548 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2549 ; X64-NEXT: movd %eax, %xmm0
2550 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2551 ; X64-NEXT: movd %eax, %xmm3
2552 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2553 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2554 ; X64-NEXT: movd %eax, %xmm0
2555 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2556 ; X64-NEXT: movd %eax, %xmm1
2557 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2558 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
2559 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2560 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2561 ; X64-NEXT: movd %eax, %xmm0
2562 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2563 ; X64-NEXT: movd %eax, %xmm2
2564 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2565 ; X64-NEXT: movzbl %r9b, %eax
2566 ; X64-NEXT: movd %eax, %xmm0
2567 ; X64-NEXT: movzbl %r8b, %eax
2568 ; X64-NEXT: movd %eax, %xmm3
2569 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2570 ; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2571 ; X64-NEXT: movzbl %cl, %eax
2572 ; X64-NEXT: movd %eax, %xmm0
2573 ; X64-NEXT: movzbl %dl, %eax
2574 ; X64-NEXT: movd %eax, %xmm2
2575 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2576 ; X64-NEXT: movzbl %sil, %eax
2577 ; X64-NEXT: movd %eax, %xmm4
2578 ; X64-NEXT: movzbl %dil, %eax
2579 ; X64-NEXT: movd %eax, %xmm0
2580 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2581 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2582 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
2583 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2585 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
2586 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
2587 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2
2588 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3
2589 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4
2590 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5
2591 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6
2592 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7
2593 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8
2594 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9
2595 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10
2596 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
2597 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
2598 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
2599 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
2600 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
2601 %res = bitcast <16 x i8> %res15 to <2 x i64>
2605 define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2606 ; X32-LABEL: test_mm_setr_epi16:
2608 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2609 ; X32-NEXT: movd %eax, %xmm1
2610 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2611 ; X32-NEXT: movd %eax, %xmm2
2612 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2613 ; X32-NEXT: movd %eax, %xmm3
2614 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2615 ; X32-NEXT: movd %eax, %xmm4
2616 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2617 ; X32-NEXT: movd %eax, %xmm5
2618 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2619 ; X32-NEXT: movd %eax, %xmm6
2620 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2621 ; X32-NEXT: movd %eax, %xmm7
2622 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2623 ; X32-NEXT: movd %eax, %xmm0
2624 ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2625 ; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2626 ; X32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2627 ; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2628 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2629 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
2630 ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
2633 ; X64-LABEL: test_mm_setr_epi16:
2635 ; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
2636 ; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d
2637 ; X64-NEXT: movd %eax, %xmm0
2638 ; X64-NEXT: movd %r10d, %xmm1
2639 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2640 ; X64-NEXT: movd %r9d, %xmm0
2641 ; X64-NEXT: movd %r8d, %xmm2
2642 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2643 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2644 ; X64-NEXT: movd %ecx, %xmm0
2645 ; X64-NEXT: movd %edx, %xmm1
2646 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2647 ; X64-NEXT: movd %esi, %xmm3
2648 ; X64-NEXT: movd %edi, %xmm0
2649 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2650 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2651 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2653 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2654 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
2655 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2
2656 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3
2657 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4
2658 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5
2659 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6
2660 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7
2661 %res = bitcast <8 x i16> %res7 to <2 x i64>
2665 define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2666 ; X32-LABEL: test_mm_setr_epi32:
2668 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2669 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2670 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2671 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2672 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2673 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2674 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2677 ; X64-LABEL: test_mm_setr_epi32:
2679 ; X64-NEXT: movd %ecx, %xmm0
2680 ; X64-NEXT: movd %edx, %xmm1
2681 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2682 ; X64-NEXT: movd %esi, %xmm2
2683 ; X64-NEXT: movd %edi, %xmm0
2684 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2685 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2687 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2688 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
2689 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2
2690 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3
2691 %res = bitcast <4 x i32> %res3 to <2 x i64>
2695 ; TODO test_mm_setr_epi64
2697 define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
2698 ; X32-LABEL: test_mm_setr_epi64x:
2700 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2701 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2702 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2703 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2704 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2705 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2706 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2709 ; X64-LABEL: test_mm_setr_epi64x:
2711 ; X64-NEXT: movq %rsi, %xmm1
2712 ; X64-NEXT: movq %rdi, %xmm0
2713 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2715 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2716 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1
2720 define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
2721 ; X32-LABEL: test_mm_setr_pd:
2723 ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2724 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2725 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2728 ; X64-LABEL: test_mm_setr_pd:
2730 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2732 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2733 %res1 = insertelement <2 x double> %res0, double %a1, i32 1
2734 ret <2 x double> %res1
2737 define <2 x double> @test_mm_setzero_pd() {
2738 ; X32-LABEL: test_mm_setzero_pd:
2740 ; X32-NEXT: xorps %xmm0, %xmm0
2743 ; X64-LABEL: test_mm_setzero_pd:
2745 ; X64-NEXT: xorps %xmm0, %xmm0
2747 ret <2 x double> zeroinitializer
2750 define <2 x i64> @test_mm_setzero_si128() {
2751 ; X32-LABEL: test_mm_setzero_si128:
2753 ; X32-NEXT: xorps %xmm0, %xmm0
2756 ; X64-LABEL: test_mm_setzero_si128:
2758 ; X64-NEXT: xorps %xmm0, %xmm0
2760 ret <2 x i64> zeroinitializer
2763 define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
2764 ; X32-LABEL: test_mm_shuffle_epi32:
2766 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2769 ; X64-LABEL: test_mm_shuffle_epi32:
2771 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2773 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2774 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
2775 %bc = bitcast <4 x i32> %res to <2 x i64>
2779 define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
2780 ; X32-LABEL: test_mm_shuffle_pd:
2782 ; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2785 ; X64-LABEL: test_mm_shuffle_pd:
2787 ; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2789 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
2790 ret <2 x double> %res
2793 define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
2794 ; X32-LABEL: test_mm_shufflehi_epi16:
2796 ; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2799 ; X64-LABEL: test_mm_shufflehi_epi16:
2801 ; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2803 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2804 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
2805 %bc = bitcast <8 x i16> %res to <2 x i64>
2809 define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
2810 ; X32-LABEL: test_mm_shufflelo_epi16:
2812 ; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2815 ; X64-LABEL: test_mm_shufflelo_epi16:
2817 ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2819 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2820 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
2821 %bc = bitcast <8 x i16> %res to <2 x i64>
2825 define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2826 ; X32-LABEL: test_mm_sll_epi16:
2828 ; X32-NEXT: psllw %xmm1, %xmm0
2831 ; X64-LABEL: test_mm_sll_epi16:
2833 ; X64-NEXT: psllw %xmm1, %xmm0
2835 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2836 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2837 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
2838 %bc = bitcast <8 x i16> %res to <2 x i64>
2841 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
2843 define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2844 ; X32-LABEL: test_mm_sll_epi32:
2846 ; X32-NEXT: pslld %xmm1, %xmm0
2849 ; X64-LABEL: test_mm_sll_epi32:
2851 ; X64-NEXT: pslld %xmm1, %xmm0
2853 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2854 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2855 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
2856 %bc = bitcast <4 x i32> %res to <2 x i64>
2859 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
2861 define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2862 ; X32-LABEL: test_mm_sll_epi64:
2864 ; X32-NEXT: psllq %xmm1, %xmm0
2867 ; X64-LABEL: test_mm_sll_epi64:
2869 ; X64-NEXT: psllq %xmm1, %xmm0
2871 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
2874 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
2876 define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
2877 ; X32-LABEL: test_mm_slli_epi16:
2879 ; X32-NEXT: psllw $1, %xmm0
2882 ; X64-LABEL: test_mm_slli_epi16:
2884 ; X64-NEXT: psllw $1, %xmm0
2886 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2887 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
2888 %bc = bitcast <8 x i16> %res to <2 x i64>
2891 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
2893 define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
2894 ; X32-LABEL: test_mm_slli_epi32:
2896 ; X32-NEXT: pslld $1, %xmm0
2899 ; X64-LABEL: test_mm_slli_epi32:
2901 ; X64-NEXT: pslld $1, %xmm0
2903 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2904 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
2905 %bc = bitcast <4 x i32> %res to <2 x i64>
2908 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
2910 define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
2911 ; X32-LABEL: test_mm_slli_epi64:
2913 ; X32-NEXT: psllq $1, %xmm0
2916 ; X64-LABEL: test_mm_slli_epi64:
2918 ; X64-NEXT: psllq $1, %xmm0
2920 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
2923 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
2925 define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
2926 ; X32-LABEL: test_mm_slli_si128:
2928 ; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2931 ; X64-LABEL: test_mm_slli_si128:
2933 ; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2935 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2936 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
2937 %bc = bitcast <16 x i8> %res to <2 x i64>
2941 define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
2942 ; X32-LABEL: test_mm_sqrt_pd:
2944 ; X32-NEXT: sqrtpd %xmm0, %xmm0
2947 ; X64-LABEL: test_mm_sqrt_pd:
2949 ; X64-NEXT: sqrtpd %xmm0, %xmm0
2951 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
2952 ret <2 x double> %res
2954 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
2956 define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2957 ; X32-LABEL: test_mm_sqrt_sd:
2959 ; X32-NEXT: sqrtsd %xmm0, %xmm1
2960 ; X32-NEXT: movapd %xmm1, %xmm0
2963 ; X64-LABEL: test_mm_sqrt_sd:
2965 ; X64-NEXT: sqrtsd %xmm0, %xmm1
2966 ; X64-NEXT: movapd %xmm1, %xmm0
2968 %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
2969 %ext0 = extractelement <2 x double> %call, i32 0
2970 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
2971 %ext1 = extractelement <2 x double> %a1, i32 1
2972 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
2973 ret <2 x double> %ins1
2975 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
2977 define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2978 ; X32-LABEL: test_mm_sra_epi16:
2980 ; X32-NEXT: psraw %xmm1, %xmm0
2983 ; X64-LABEL: test_mm_sra_epi16:
2985 ; X64-NEXT: psraw %xmm1, %xmm0
2987 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2988 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2989 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
2990 %bc = bitcast <8 x i16> %res to <2 x i64>
2993 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
2995 define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2996 ; X32-LABEL: test_mm_sra_epi32:
2998 ; X32-NEXT: psrad %xmm1, %xmm0
3001 ; X64-LABEL: test_mm_sra_epi32:
3003 ; X64-NEXT: psrad %xmm1, %xmm0
3005 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3006 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3007 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
3008 %bc = bitcast <4 x i32> %res to <2 x i64>
3011 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
3013 define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
3014 ; X32-LABEL: test_mm_srai_epi16:
3016 ; X32-NEXT: psraw $1, %xmm0
3019 ; X64-LABEL: test_mm_srai_epi16:
3021 ; X64-NEXT: psraw $1, %xmm0
3023 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3024 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
3025 %bc = bitcast <8 x i16> %res to <2 x i64>
3028 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
3030 define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
3031 ; X32-LABEL: test_mm_srai_epi32:
3033 ; X32-NEXT: psrad $1, %xmm0
3036 ; X64-LABEL: test_mm_srai_epi32:
3038 ; X64-NEXT: psrad $1, %xmm0
3040 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3041 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
3042 %bc = bitcast <4 x i32> %res to <2 x i64>
3045 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
3047 define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3048 ; X32-LABEL: test_mm_srl_epi16:
3050 ; X32-NEXT: psrlw %xmm1, %xmm0
3053 ; X64-LABEL: test_mm_srl_epi16:
3055 ; X64-NEXT: psrlw %xmm1, %xmm0
3057 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3058 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3059 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
3060 %bc = bitcast <8 x i16> %res to <2 x i64>
3063 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
3065 define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3066 ; X32-LABEL: test_mm_srl_epi32:
3068 ; X32-NEXT: psrld %xmm1, %xmm0
3071 ; X64-LABEL: test_mm_srl_epi32:
3073 ; X64-NEXT: psrld %xmm1, %xmm0
3075 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3076 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3077 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
3078 %bc = bitcast <4 x i32> %res to <2 x i64>
3081 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
3083 define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3084 ; X32-LABEL: test_mm_srl_epi64:
3086 ; X32-NEXT: psrlq %xmm1, %xmm0
3089 ; X64-LABEL: test_mm_srl_epi64:
3091 ; X64-NEXT: psrlq %xmm1, %xmm0
3093 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
3096 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
3098 define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
3099 ; X32-LABEL: test_mm_srli_epi16:
3101 ; X32-NEXT: psrlw $1, %xmm0
3104 ; X64-LABEL: test_mm_srli_epi16:
3106 ; X64-NEXT: psrlw $1, %xmm0
3108 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3109 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
3110 %bc = bitcast <8 x i16> %res to <2 x i64>
3113 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
3115 define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
3116 ; X32-LABEL: test_mm_srli_epi32:
3118 ; X32-NEXT: psrld $1, %xmm0
3121 ; X64-LABEL: test_mm_srli_epi32:
3123 ; X64-NEXT: psrld $1, %xmm0
3125 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3126 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
3127 %bc = bitcast <4 x i32> %res to <2 x i64>
3130 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
3132 define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
3133 ; X32-LABEL: test_mm_srli_epi64:
3135 ; X32-NEXT: psrlq $1, %xmm0
3138 ; X64-LABEL: test_mm_srli_epi64:
3140 ; X64-NEXT: psrlq $1, %xmm0
3142 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
3145 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
3147 define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
3148 ; X32-LABEL: test_mm_srli_si128:
3150 ; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3153 ; X64-LABEL: test_mm_srli_si128:
3155 ; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3157 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3158 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
3159 %bc = bitcast <16 x i8> %res to <2 x i64>
3163 define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
3164 ; X32-LABEL: test_mm_store_pd:
3166 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3167 ; X32-NEXT: movaps %xmm0, (%eax)
3170 ; X64-LABEL: test_mm_store_pd:
3172 ; X64-NEXT: movaps %xmm0, (%rdi)
3174 %arg0 = bitcast double* %a0 to <2 x double>*
3175 store <2 x double> %a1, <2 x double>* %arg0, align 16
3179 define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
3180 ; X32-LABEL: test_mm_store_pd1:
3182 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3183 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3184 ; X32-NEXT: movaps %xmm0, (%eax)
3187 ; X64-LABEL: test_mm_store_pd1:
3189 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3190 ; X64-NEXT: movaps %xmm0, (%rdi)
3192 %arg0 = bitcast double * %a0 to <2 x double>*
3193 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
3194 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3198 define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
3199 ; X32-LABEL: test_mm_store_sd:
3201 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3202 ; X32-NEXT: movsd %xmm0, (%eax)
3205 ; X64-LABEL: test_mm_store_sd:
3207 ; X64-NEXT: movsd %xmm0, (%rdi)
3209 %ext = extractelement <2 x double> %a1, i32 0
3210 store double %ext, double* %a0, align 1
3214 define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3215 ; X32-LABEL: test_mm_store_si128:
3217 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3218 ; X32-NEXT: movaps %xmm0, (%eax)
3221 ; X64-LABEL: test_mm_store_si128:
3223 ; X64-NEXT: movaps %xmm0, (%rdi)
3225 store <2 x i64> %a1, <2 x i64>* %a0, align 16
3229 define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
3230 ; X32-LABEL: test_mm_store1_pd:
3232 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3233 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3234 ; X32-NEXT: movaps %xmm0, (%eax)
3237 ; X64-LABEL: test_mm_store1_pd:
3239 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3240 ; X64-NEXT: movaps %xmm0, (%rdi)
3242 %arg0 = bitcast double * %a0 to <2 x double>*
3243 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
3244 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3248 define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
3249 ; X32-LABEL: test_mm_storeh_sd:
3251 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3252 ; X32-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
3253 ; X32-NEXT: movsd %xmm0, (%eax)
3256 ; X64-LABEL: test_mm_storeh_sd:
3258 ; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
3259 ; X64-NEXT: movsd %xmm0, (%rdi)
3261 %ext = extractelement <2 x double> %a1, i32 1
3262 store double %ext, double* %a0, align 8
3266 define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
3267 ; X32-LABEL: test_mm_storel_epi64:
3269 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3270 ; X32-NEXT: movlps %xmm0, (%eax)
3273 ; X64-LABEL: test_mm_storel_epi64:
3275 ; X64-NEXT: movq %xmm0, %rax
3276 ; X64-NEXT: movq %rax, (%rdi)
3278 %ext = extractelement <2 x i64> %a1, i32 0
3279 %bc = bitcast <2 x i64> *%a0 to i64*
3280 store i64 %ext, i64* %bc, align 8
3284 define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
3285 ; X32-LABEL: test_mm_storel_sd:
3287 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3288 ; X32-NEXT: movsd %xmm0, (%eax)
3291 ; X64-LABEL: test_mm_storel_sd:
3293 ; X64-NEXT: movsd %xmm0, (%rdi)
3295 %ext = extractelement <2 x double> %a1, i32 0
3296 store double %ext, double* %a0, align 8
3300 define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
3301 ; X32-LABEL: test_mm_storer_pd:
3303 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3304 ; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3305 ; X32-NEXT: movapd %xmm0, (%eax)
3308 ; X64-LABEL: test_mm_storer_pd:
3310 ; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3311 ; X64-NEXT: movapd %xmm0, (%rdi)
3313 %arg0 = bitcast double* %a0 to <2 x double>*
3314 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
3315 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3319 define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
3320 ; X32-LABEL: test_mm_storeu_pd:
3322 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3323 ; X32-NEXT: movups %xmm0, (%eax)
3326 ; X64-LABEL: test_mm_storeu_pd:
3328 ; X64-NEXT: movups %xmm0, (%rdi)
3330 %arg0 = bitcast double* %a0 to <2 x double>*
3331 store <2 x double> %a1, <2 x double>* %arg0, align 1
3335 define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3336 ; X32-LABEL: test_mm_storeu_si128:
3338 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3339 ; X32-NEXT: movups %xmm0, (%eax)
3342 ; X64-LABEL: test_mm_storeu_si128:
3344 ; X64-NEXT: movups %xmm0, (%rdi)
3346 store <2 x i64> %a1, <2 x i64>* %a0, align 1
3350 define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
3351 ; X32-LABEL: test_mm_stream_pd:
3353 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3354 ; X32-NEXT: movntps %xmm0, (%eax)
3357 ; X64-LABEL: test_mm_stream_pd:
3359 ; X64-NEXT: movntps %xmm0, (%rdi)
3361 %arg0 = bitcast double* %a0 to <2 x double>*
3362 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
3366 define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
3367 ; X32-LABEL: test_mm_stream_si32:
3369 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3370 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
3371 ; X32-NEXT: movntil %eax, (%ecx)
3374 ; X64-LABEL: test_mm_stream_si32:
3376 ; X64-NEXT: movntil %esi, (%rdi)
3378 store i32 %a1, i32* %a0, align 1, !nontemporal !0
3382 define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3383 ; X32-LABEL: test_mm_stream_si128:
3385 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3386 ; X32-NEXT: movntps %xmm0, (%eax)
3389 ; X64-LABEL: test_mm_stream_si128:
3391 ; X64-NEXT: movntps %xmm0, (%rdi)
3393 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
3397 define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3398 ; X32-LABEL: test_mm_sub_epi8:
3400 ; X32-NEXT: psubb %xmm1, %xmm0
3403 ; X64-LABEL: test_mm_sub_epi8:
3405 ; X64-NEXT: psubb %xmm1, %xmm0
3407 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3408 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3409 %res = sub <16 x i8> %arg0, %arg1
3410 %bc = bitcast <16 x i8> %res to <2 x i64>
3414 define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3415 ; X32-LABEL: test_mm_sub_epi16:
3417 ; X32-NEXT: psubw %xmm1, %xmm0
3420 ; X64-LABEL: test_mm_sub_epi16:
3422 ; X64-NEXT: psubw %xmm1, %xmm0
3424 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3425 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3426 %res = sub <8 x i16> %arg0, %arg1
3427 %bc = bitcast <8 x i16> %res to <2 x i64>
3431 define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3432 ; X32-LABEL: test_mm_sub_epi32:
3434 ; X32-NEXT: psubd %xmm1, %xmm0
3437 ; X64-LABEL: test_mm_sub_epi32:
3439 ; X64-NEXT: psubd %xmm1, %xmm0
3441 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3442 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3443 %res = sub <4 x i32> %arg0, %arg1
3444 %bc = bitcast <4 x i32> %res to <2 x i64>
3448 define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3449 ; X32-LABEL: test_mm_sub_epi64:
3451 ; X32-NEXT: psubq %xmm1, %xmm0
3454 ; X64-LABEL: test_mm_sub_epi64:
3456 ; X64-NEXT: psubq %xmm1, %xmm0
3458 %res = sub <2 x i64> %a0, %a1
3462 define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3463 ; X32-LABEL: test_mm_sub_pd:
3465 ; X32-NEXT: subpd %xmm1, %xmm0
3468 ; X64-LABEL: test_mm_sub_pd:
3470 ; X64-NEXT: subpd %xmm1, %xmm0
3472 %res = fsub <2 x double> %a0, %a1
3473 ret <2 x double> %res
3476 define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3477 ; X32-LABEL: test_mm_sub_sd:
3479 ; X32-NEXT: subsd %xmm1, %xmm0
3482 ; X64-LABEL: test_mm_sub_sd:
3484 ; X64-NEXT: subsd %xmm1, %xmm0
3486 %ext0 = extractelement <2 x double> %a0, i32 0
3487 %ext1 = extractelement <2 x double> %a1, i32 0
3488 %fsub = fsub double %ext0, %ext1
3489 %res = insertelement <2 x double> %a0, double %fsub, i32 0
3490 ret <2 x double> %res
3493 define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3494 ; X32-LABEL: test_mm_subs_epi8:
3496 ; X32-NEXT: psubsb %xmm1, %xmm0
3499 ; X64-LABEL: test_mm_subs_epi8:
3501 ; X64-NEXT: psubsb %xmm1, %xmm0
3503 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3504 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3505 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
3506 %bc = bitcast <16 x i8> %res to <2 x i64>
3509 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
3511 define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3512 ; X32-LABEL: test_mm_subs_epi16:
3514 ; X32-NEXT: psubsw %xmm1, %xmm0
3517 ; X64-LABEL: test_mm_subs_epi16:
3519 ; X64-NEXT: psubsw %xmm1, %xmm0
3521 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3522 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3523 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
3524 %bc = bitcast <8 x i16> %res to <2 x i64>
3527 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
3529 define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3530 ; X32-LABEL: test_mm_subs_epu8:
3532 ; X32-NEXT: psubusb %xmm1, %xmm0
3535 ; X64-LABEL: test_mm_subs_epu8:
3537 ; X64-NEXT: psubusb %xmm1, %xmm0
3539 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3540 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3541 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
3542 %bc = bitcast <16 x i8> %res to <2 x i64>
3545 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
3547 define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3548 ; X32-LABEL: test_mm_subs_epu16:
3550 ; X32-NEXT: psubusw %xmm1, %xmm0
3553 ; X64-LABEL: test_mm_subs_epu16:
3555 ; X64-NEXT: psubusw %xmm1, %xmm0
3557 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3558 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3559 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
3560 %bc = bitcast <8 x i16> %res to <2 x i64>
3563 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
3565 define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3566 ; X32-LABEL: test_mm_ucomieq_sd:
3568 ; X32-NEXT: ucomisd %xmm1, %xmm0
3569 ; X32-NEXT: setnp %al
3570 ; X32-NEXT: sete %cl
3571 ; X32-NEXT: andb %al, %cl
3572 ; X32-NEXT: movzbl %cl, %eax
3575 ; X64-LABEL: test_mm_ucomieq_sd:
3577 ; X64-NEXT: ucomisd %xmm1, %xmm0
3578 ; X64-NEXT: setnp %al
3579 ; X64-NEXT: sete %cl
3580 ; X64-NEXT: andb %al, %cl
3581 ; X64-NEXT: movzbl %cl, %eax
3583 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
3586 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
3588 define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3589 ; X32-LABEL: test_mm_ucomige_sd:
3591 ; X32-NEXT: xorl %eax, %eax
3592 ; X32-NEXT: ucomisd %xmm1, %xmm0
3593 ; X32-NEXT: setae %al
3596 ; X64-LABEL: test_mm_ucomige_sd:
3598 ; X64-NEXT: xorl %eax, %eax
3599 ; X64-NEXT: ucomisd %xmm1, %xmm0
3600 ; X64-NEXT: setae %al
3602 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
3605 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
3607 define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3608 ; X32-LABEL: test_mm_ucomigt_sd:
3610 ; X32-NEXT: xorl %eax, %eax
3611 ; X32-NEXT: ucomisd %xmm1, %xmm0
3612 ; X32-NEXT: seta %al
3615 ; X64-LABEL: test_mm_ucomigt_sd:
3617 ; X64-NEXT: xorl %eax, %eax
3618 ; X64-NEXT: ucomisd %xmm1, %xmm0
3619 ; X64-NEXT: seta %al
3621 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
3624 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
3626 define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3627 ; X32-LABEL: test_mm_ucomile_sd:
3629 ; X32-NEXT: xorl %eax, %eax
3630 ; X32-NEXT: ucomisd %xmm0, %xmm1
3631 ; X32-NEXT: setae %al
3634 ; X64-LABEL: test_mm_ucomile_sd:
3636 ; X64-NEXT: xorl %eax, %eax
3637 ; X64-NEXT: ucomisd %xmm0, %xmm1
3638 ; X64-NEXT: setae %al
3640 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
3643 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
3645 define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3646 ; X32-LABEL: test_mm_ucomilt_sd:
3648 ; X32-NEXT: xorl %eax, %eax
3649 ; X32-NEXT: ucomisd %xmm0, %xmm1
3650 ; X32-NEXT: seta %al
3653 ; X64-LABEL: test_mm_ucomilt_sd:
3655 ; X64-NEXT: xorl %eax, %eax
3656 ; X64-NEXT: ucomisd %xmm0, %xmm1
3657 ; X64-NEXT: seta %al
3659 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
3662 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
3664 define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3665 ; X32-LABEL: test_mm_ucomineq_sd:
3667 ; X32-NEXT: ucomisd %xmm1, %xmm0
3668 ; X32-NEXT: setp %al
3669 ; X32-NEXT: setne %cl
3670 ; X32-NEXT: orb %al, %cl
3671 ; X32-NEXT: movzbl %cl, %eax
3674 ; X64-LABEL: test_mm_ucomineq_sd:
3676 ; X64-NEXT: ucomisd %xmm1, %xmm0
3677 ; X64-NEXT: setp %al
3678 ; X64-NEXT: setne %cl
3679 ; X64-NEXT: orb %al, %cl
3680 ; X64-NEXT: movzbl %cl, %eax
3682 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
3685 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
3687 define <2 x double> @test_mm_undefined_pd() {
3688 ; X32-LABEL: test_mm_undefined_pd:
3692 ; X64-LABEL: test_mm_undefined_pd:
3695 ret <2 x double> undef
3698 define <2 x i64> @test_mm_undefined_si128() {
3699 ; X32-LABEL: test_mm_undefined_si128:
3703 ; X64-LABEL: test_mm_undefined_si128:
3709 define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3710 ; X32-LABEL: test_mm_unpackhi_epi8:
3712 ; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3715 ; X64-LABEL: test_mm_unpackhi_epi8:
3717 ; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3719 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3720 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3721 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
3722 %bc = bitcast <16 x i8> %res to <2 x i64>
3726 define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3727 ; X32-LABEL: test_mm_unpackhi_epi16:
3729 ; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3732 ; X64-LABEL: test_mm_unpackhi_epi16:
3734 ; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3736 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3737 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3738 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
3739 %bc = bitcast <8 x i16> %res to <2 x i64>
3743 define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3744 ; X32-LABEL: test_mm_unpackhi_epi32:
3746 ; X32-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3749 ; X64-LABEL: test_mm_unpackhi_epi32:
3751 ; X64-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3753 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3754 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3755 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3756 %bc = bitcast <4 x i32> %res to <2 x i64>
3760 define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3761 ; X32-LABEL: test_mm_unpackhi_epi64:
3763 ; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3766 ; X64-LABEL: test_mm_unpackhi_epi64:
3768 ; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3770 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
3774 define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
3775 ; X32-LABEL: test_mm_unpackhi_pd:
3777 ; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3780 ; X64-LABEL: test_mm_unpackhi_pd:
3782 ; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3784 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
3785 ret <2 x double> %res
3788 define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3789 ; X32-LABEL: test_mm_unpacklo_epi8:
3791 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3794 ; X64-LABEL: test_mm_unpacklo_epi8:
3796 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3798 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3799 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3800 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
3801 %bc = bitcast <16 x i8> %res to <2 x i64>
3805 define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3806 ; X32-LABEL: test_mm_unpacklo_epi16:
3808 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3811 ; X64-LABEL: test_mm_unpacklo_epi16:
3813 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3815 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3816 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3817 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
3818 %bc = bitcast <8 x i16> %res to <2 x i64>
3822 define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3823 ; X32-LABEL: test_mm_unpacklo_epi32:
3825 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3828 ; X64-LABEL: test_mm_unpacklo_epi32:
3830 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3832 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3833 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3834 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3835 %bc = bitcast <4 x i32> %res to <2 x i64>
3839 define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3840 ; X32-LABEL: test_mm_unpacklo_epi64:
3842 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3845 ; X64-LABEL: test_mm_unpacklo_epi64:
3847 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3849 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
3853 define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
3854 ; X32-LABEL: test_mm_unpacklo_pd:
3856 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3859 ; X64-LABEL: test_mm_unpacklo_pd:
3861 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3863 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
3864 ret <2 x double> %res
3867 define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3868 ; X32-LABEL: test_mm_xor_pd:
3870 ; X32-NEXT: xorps %xmm1, %xmm0
3873 ; X64-LABEL: test_mm_xor_pd:
3875 ; X64-NEXT: xorps %xmm1, %xmm0
3877 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
3878 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
3879 %res = xor <4 x i32> %arg0, %arg1
3880 %bc = bitcast <4 x i32> %res to <2 x double>
3881 ret <2 x double> %bc
3884 define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3885 ; X32-LABEL: test_mm_xor_si128:
3887 ; X32-NEXT: xorps %xmm1, %xmm0
3890 ; X64-LABEL: test_mm_xor_si128:
3892 ; X64-NEXT: xorps %xmm1, %xmm0
3894 %res = xor <2 x i64> %a0, %a1