1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X86
3 ; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X86
4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X64
5 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X64
7 declare <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64>, <1 x i64>) nounwind readnone
9 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
11 ; X86: # %bb.0: # %entry
12 ; X86-NEXT: pushl %ebp
13 ; X86-NEXT: movl %esp, %ebp
14 ; X86-NEXT: andl $-8, %esp
15 ; X86-NEXT: subl $24, %esp
16 ; X86-NEXT: movl 8(%ebp), %eax
17 ; X86-NEXT: movl 12(%ebp), %ecx
18 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
19 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
20 ; X86-NEXT: movl 16(%ebp), %eax
21 ; X86-NEXT: movl 20(%ebp), %ecx
22 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
24 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
25 ; X86-NEXT: phaddw {{[0-9]+}}(%esp), %mm0
26 ; X86-NEXT: movq %mm0, (%esp)
27 ; X86-NEXT: movl (%esp), %eax
28 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
29 ; X86-NEXT: movl %ebp, %esp
34 ; X64: # %bb.0: # %entry
35 ; X64-NEXT: movq %rsi, %mm0
36 ; X64-NEXT: movq %rdi, %mm1
37 ; X64-NEXT: phaddw %mm0, %mm1
38 ; X64-NEXT: movq %mm1, %rax
41 %0 = bitcast <1 x i64> %b to <4 x i16>
42 %1 = bitcast <1 x i64> %a to <4 x i16>
43 %2 = bitcast <4 x i16> %1 to <1 x i64>
44 %3 = bitcast <4 x i16> %0 to <1 x i64>
45 %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone
46 %5 = bitcast <1 x i64> %4 to <4 x i16>
47 %6 = bitcast <4 x i16> %5 to <1 x i64>
48 %7 = extractelement <1 x i64> %6, i32 0
52 declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone
54 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
56 ; X86: # %bb.0: # %entry
57 ; X86-NEXT: pushl %ebp
58 ; X86-NEXT: movl %esp, %ebp
59 ; X86-NEXT: andl $-8, %esp
60 ; X86-NEXT: subl $24, %esp
61 ; X86-NEXT: movl 8(%ebp), %eax
62 ; X86-NEXT: movl 12(%ebp), %ecx
63 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
64 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
65 ; X86-NEXT: movl 16(%ebp), %eax
66 ; X86-NEXT: movl 20(%ebp), %ecx
67 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
68 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
69 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
70 ; X86-NEXT: pcmpgtd {{[0-9]+}}(%esp), %mm0
71 ; X86-NEXT: movq %mm0, (%esp)
72 ; X86-NEXT: movl (%esp), %eax
73 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
74 ; X86-NEXT: movl %ebp, %esp
79 ; X64: # %bb.0: # %entry
80 ; X64-NEXT: movq %rsi, %mm0
81 ; X64-NEXT: movq %rdi, %mm1
82 ; X64-NEXT: pcmpgtd %mm0, %mm1
83 ; X64-NEXT: movq %mm1, %rax
86 %0 = bitcast <1 x i64> %b to <2 x i32>
87 %1 = bitcast <1 x i64> %a to <2 x i32>
88 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
89 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
90 %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
91 %3 = bitcast <1 x i64> %2 to <2 x i32>
92 %4 = bitcast <2 x i32> %3 to <1 x i64>
93 %5 = extractelement <1 x i64> %4, i32 0
97 declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) nounwind readnone
99 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
101 ; X86: # %bb.0: # %entry
102 ; X86-NEXT: pushl %ebp
103 ; X86-NEXT: movl %esp, %ebp
104 ; X86-NEXT: andl $-8, %esp
105 ; X86-NEXT: subl $24, %esp
106 ; X86-NEXT: movl 8(%ebp), %eax
107 ; X86-NEXT: movl 12(%ebp), %ecx
108 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
109 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
110 ; X86-NEXT: movl 16(%ebp), %eax
111 ; X86-NEXT: movl 20(%ebp), %ecx
112 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
113 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
114 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
115 ; X86-NEXT: pcmpgtw {{[0-9]+}}(%esp), %mm0
116 ; X86-NEXT: movq %mm0, (%esp)
117 ; X86-NEXT: movl (%esp), %eax
118 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
119 ; X86-NEXT: movl %ebp, %esp
120 ; X86-NEXT: popl %ebp
124 ; X64: # %bb.0: # %entry
125 ; X64-NEXT: movq %rsi, %mm0
126 ; X64-NEXT: movq %rdi, %mm1
127 ; X64-NEXT: pcmpgtw %mm0, %mm1
128 ; X64-NEXT: movq %mm1, %rax
131 %0 = bitcast <1 x i64> %b to <4 x i16>
132 %1 = bitcast <1 x i64> %a to <4 x i16>
133 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
134 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
135 %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
136 %3 = bitcast <1 x i64> %2 to <4 x i16>
137 %4 = bitcast <4 x i16> %3 to <1 x i64>
138 %5 = extractelement <1 x i64> %4, i32 0
142 declare <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64>, <1 x i64>) nounwind readnone
144 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
146 ; X86: # %bb.0: # %entry
147 ; X86-NEXT: pushl %ebp
148 ; X86-NEXT: movl %esp, %ebp
149 ; X86-NEXT: andl $-8, %esp
150 ; X86-NEXT: subl $24, %esp
151 ; X86-NEXT: movl 8(%ebp), %eax
152 ; X86-NEXT: movl 12(%ebp), %ecx
153 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
154 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
155 ; X86-NEXT: movl 16(%ebp), %eax
156 ; X86-NEXT: movl 20(%ebp), %ecx
157 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
158 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
159 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
160 ; X86-NEXT: pcmpgtb {{[0-9]+}}(%esp), %mm0
161 ; X86-NEXT: movq %mm0, (%esp)
162 ; X86-NEXT: movl (%esp), %eax
163 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
164 ; X86-NEXT: movl %ebp, %esp
165 ; X86-NEXT: popl %ebp
169 ; X64: # %bb.0: # %entry
170 ; X64-NEXT: movq %rsi, %mm0
171 ; X64-NEXT: movq %rdi, %mm1
172 ; X64-NEXT: pcmpgtb %mm0, %mm1
173 ; X64-NEXT: movq %mm1, %rax
176 %0 = bitcast <1 x i64> %b to <8 x i8>
177 %1 = bitcast <1 x i64> %a to <8 x i8>
178 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
179 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
180 %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
181 %3 = bitcast <1 x i64> %2 to <8 x i8>
182 %4 = bitcast <8 x i8> %3 to <1 x i64>
183 %5 = extractelement <1 x i64> %4, i32 0
187 declare <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64>, <1 x i64>) nounwind readnone
189 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
191 ; X86: # %bb.0: # %entry
192 ; X86-NEXT: pushl %ebp
193 ; X86-NEXT: movl %esp, %ebp
194 ; X86-NEXT: andl $-8, %esp
195 ; X86-NEXT: subl $24, %esp
196 ; X86-NEXT: movl 8(%ebp), %eax
197 ; X86-NEXT: movl 12(%ebp), %ecx
198 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
199 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
200 ; X86-NEXT: movl 16(%ebp), %eax
201 ; X86-NEXT: movl 20(%ebp), %ecx
202 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
203 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
204 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
205 ; X86-NEXT: pcmpeqd {{[0-9]+}}(%esp), %mm0
206 ; X86-NEXT: movq %mm0, (%esp)
207 ; X86-NEXT: movl (%esp), %eax
208 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
209 ; X86-NEXT: movl %ebp, %esp
210 ; X86-NEXT: popl %ebp
214 ; X64: # %bb.0: # %entry
215 ; X64-NEXT: movq %rsi, %mm0
216 ; X64-NEXT: movq %rdi, %mm1
217 ; X64-NEXT: pcmpeqd %mm0, %mm1
218 ; X64-NEXT: movq %mm1, %rax
221 %0 = bitcast <1 x i64> %b to <2 x i32>
222 %1 = bitcast <1 x i64> %a to <2 x i32>
223 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
224 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
225 %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
226 %3 = bitcast <1 x i64> %2 to <2 x i32>
227 %4 = bitcast <2 x i32> %3 to <1 x i64>
228 %5 = extractelement <1 x i64> %4, i32 0
232 declare <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64>, <1 x i64>) nounwind readnone
234 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
236 ; X86: # %bb.0: # %entry
237 ; X86-NEXT: pushl %ebp
238 ; X86-NEXT: movl %esp, %ebp
239 ; X86-NEXT: andl $-8, %esp
240 ; X86-NEXT: subl $24, %esp
241 ; X86-NEXT: movl 8(%ebp), %eax
242 ; X86-NEXT: movl 12(%ebp), %ecx
243 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
244 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
245 ; X86-NEXT: movl 16(%ebp), %eax
246 ; X86-NEXT: movl 20(%ebp), %ecx
247 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
248 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
249 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
250 ; X86-NEXT: pcmpeqw {{[0-9]+}}(%esp), %mm0
251 ; X86-NEXT: movq %mm0, (%esp)
252 ; X86-NEXT: movl (%esp), %eax
253 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
254 ; X86-NEXT: movl %ebp, %esp
255 ; X86-NEXT: popl %ebp
259 ; X64: # %bb.0: # %entry
260 ; X64-NEXT: movq %rsi, %mm0
261 ; X64-NEXT: movq %rdi, %mm1
262 ; X64-NEXT: pcmpeqw %mm0, %mm1
263 ; X64-NEXT: movq %mm1, %rax
266 %0 = bitcast <1 x i64> %b to <4 x i16>
267 %1 = bitcast <1 x i64> %a to <4 x i16>
268 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
269 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
270 %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
271 %3 = bitcast <1 x i64> %2 to <4 x i16>
272 %4 = bitcast <4 x i16> %3 to <1 x i64>
273 %5 = extractelement <1 x i64> %4, i32 0
277 declare <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64>, <1 x i64>) nounwind readnone
279 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
281 ; X86: # %bb.0: # %entry
282 ; X86-NEXT: pushl %ebp
283 ; X86-NEXT: movl %esp, %ebp
284 ; X86-NEXT: andl $-8, %esp
285 ; X86-NEXT: subl $24, %esp
286 ; X86-NEXT: movl 8(%ebp), %eax
287 ; X86-NEXT: movl 12(%ebp), %ecx
288 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
289 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
290 ; X86-NEXT: movl 16(%ebp), %eax
291 ; X86-NEXT: movl 20(%ebp), %ecx
292 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
293 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
294 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
295 ; X86-NEXT: pcmpeqb {{[0-9]+}}(%esp), %mm0
296 ; X86-NEXT: movq %mm0, (%esp)
297 ; X86-NEXT: movl (%esp), %eax
298 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
299 ; X86-NEXT: movl %ebp, %esp
300 ; X86-NEXT: popl %ebp
304 ; X64: # %bb.0: # %entry
305 ; X64-NEXT: movq %rsi, %mm0
306 ; X64-NEXT: movq %rdi, %mm1
307 ; X64-NEXT: pcmpeqb %mm0, %mm1
308 ; X64-NEXT: movq %mm1, %rax
311 %0 = bitcast <1 x i64> %b to <8 x i8>
312 %1 = bitcast <1 x i64> %a to <8 x i8>
313 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
314 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
315 %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
316 %3 = bitcast <1 x i64> %2 to <8 x i8>
317 %4 = bitcast <8 x i8> %3 to <1 x i64>
318 %5 = extractelement <1 x i64> %4, i32 0
322 declare <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64>, <1 x i64>) nounwind readnone
324 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
326 ; X86: # %bb.0: # %entry
327 ; X86-NEXT: pushl %ebp
328 ; X86-NEXT: movl %esp, %ebp
329 ; X86-NEXT: andl $-8, %esp
330 ; X86-NEXT: subl $24, %esp
331 ; X86-NEXT: movl 8(%ebp), %eax
332 ; X86-NEXT: movl 12(%ebp), %ecx
333 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
334 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
335 ; X86-NEXT: movl 16(%ebp), %eax
336 ; X86-NEXT: movl 20(%ebp), %ecx
337 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
338 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
339 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
340 ; X86-NEXT: punpckldq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0]
341 ; X86-NEXT: movq %mm0, (%esp)
342 ; X86-NEXT: movl (%esp), %eax
343 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
344 ; X86-NEXT: movl %ebp, %esp
345 ; X86-NEXT: popl %ebp
349 ; X64: # %bb.0: # %entry
350 ; X64-NEXT: movq %rsi, %mm0
351 ; X64-NEXT: movq %rdi, %mm1
352 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
353 ; X64-NEXT: movq %mm1, %rax
356 %0 = bitcast <1 x i64> %b to <2 x i32>
357 %1 = bitcast <1 x i64> %a to <2 x i32>
358 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
359 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
360 %2 = tail call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
361 %3 = bitcast <1 x i64> %2 to <2 x i32>
362 %4 = bitcast <2 x i32> %3 to <1 x i64>
363 %5 = extractelement <1 x i64> %4, i32 0
367 declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) nounwind readnone
369 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
371 ; X86: # %bb.0: # %entry
372 ; X86-NEXT: pushl %ebp
373 ; X86-NEXT: movl %esp, %ebp
374 ; X86-NEXT: andl $-8, %esp
375 ; X86-NEXT: subl $24, %esp
376 ; X86-NEXT: movl 8(%ebp), %eax
377 ; X86-NEXT: movl 12(%ebp), %ecx
378 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
379 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
380 ; X86-NEXT: movl 16(%ebp), %eax
381 ; X86-NEXT: movl 20(%ebp), %ecx
382 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
383 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
384 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
385 ; X86-NEXT: punpcklwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1]
386 ; X86-NEXT: movq %mm0, (%esp)
387 ; X86-NEXT: movl (%esp), %eax
388 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
389 ; X86-NEXT: movl %ebp, %esp
390 ; X86-NEXT: popl %ebp
394 ; X64: # %bb.0: # %entry
395 ; X64-NEXT: movq %rsi, %mm0
396 ; X64-NEXT: movq %rdi, %mm1
397 ; X64-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
398 ; X64-NEXT: movq %mm1, %rax
401 %0 = bitcast <1 x i64> %b to <4 x i16>
402 %1 = bitcast <1 x i64> %a to <4 x i16>
403 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
404 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
405 %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
406 %3 = bitcast <1 x i64> %2 to <4 x i16>
407 %4 = bitcast <4 x i16> %3 to <1 x i64>
408 %5 = extractelement <1 x i64> %4, i32 0
412 declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) nounwind readnone
414 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
416 ; X86: # %bb.0: # %entry
417 ; X86-NEXT: pushl %ebp
418 ; X86-NEXT: movl %esp, %ebp
419 ; X86-NEXT: andl $-8, %esp
420 ; X86-NEXT: subl $24, %esp
421 ; X86-NEXT: movl 8(%ebp), %eax
422 ; X86-NEXT: movl 12(%ebp), %ecx
423 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
424 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
425 ; X86-NEXT: movl 16(%ebp), %eax
426 ; X86-NEXT: movl 20(%ebp), %ecx
427 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
428 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
429 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
430 ; X86-NEXT: punpcklbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
431 ; X86-NEXT: movq %mm0, (%esp)
432 ; X86-NEXT: movl (%esp), %eax
433 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
434 ; X86-NEXT: movl %ebp, %esp
435 ; X86-NEXT: popl %ebp
439 ; X64: # %bb.0: # %entry
440 ; X64-NEXT: movq %rsi, %mm0
441 ; X64-NEXT: movq %rdi, %mm1
442 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
443 ; X64-NEXT: movq %mm1, %rax
446 %0 = bitcast <1 x i64> %b to <8 x i8>
447 %1 = bitcast <1 x i64> %a to <8 x i8>
448 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
449 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
450 %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
451 %3 = bitcast <1 x i64> %2 to <8 x i8>
452 %4 = bitcast <8 x i8> %3 to <1 x i64>
453 %5 = extractelement <1 x i64> %4, i32 0
457 declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) nounwind readnone
459 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
461 ; X86: # %bb.0: # %entry
462 ; X86-NEXT: pushl %ebp
463 ; X86-NEXT: movl %esp, %ebp
464 ; X86-NEXT: andl $-8, %esp
465 ; X86-NEXT: subl $24, %esp
466 ; X86-NEXT: movl 8(%ebp), %eax
467 ; X86-NEXT: movl 12(%ebp), %ecx
468 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
469 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
470 ; X86-NEXT: movl 16(%ebp), %eax
471 ; X86-NEXT: movl 20(%ebp), %ecx
472 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
473 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
474 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
475 ; X86-NEXT: punpckhdq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[1],mem[1]
476 ; X86-NEXT: movq %mm0, (%esp)
477 ; X86-NEXT: movl (%esp), %eax
478 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
479 ; X86-NEXT: movl %ebp, %esp
480 ; X86-NEXT: popl %ebp
484 ; X64: # %bb.0: # %entry
485 ; X64-NEXT: movq %rsi, %mm0
486 ; X64-NEXT: movq %rdi, %mm1
487 ; X64-NEXT: punpckhdq %mm0, %mm1 # mm1 = mm1[1],mm0[1]
488 ; X64-NEXT: movq %mm1, %rax
491 %0 = bitcast <1 x i64> %b to <2 x i32>
492 %1 = bitcast <1 x i64> %a to <2 x i32>
493 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
494 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
495 %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
496 %3 = bitcast <1 x i64> %2 to <2 x i32>
497 %4 = bitcast <2 x i32> %3 to <1 x i64>
498 %5 = extractelement <1 x i64> %4, i32 0
502 declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) nounwind readnone
504 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
506 ; X86: # %bb.0: # %entry
507 ; X86-NEXT: pushl %ebp
508 ; X86-NEXT: movl %esp, %ebp
509 ; X86-NEXT: andl $-8, %esp
510 ; X86-NEXT: subl $24, %esp
511 ; X86-NEXT: movl 8(%ebp), %eax
512 ; X86-NEXT: movl 12(%ebp), %ecx
513 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
514 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
515 ; X86-NEXT: movl 16(%ebp), %eax
516 ; X86-NEXT: movl 20(%ebp), %ecx
517 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
518 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
519 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
520 ; X86-NEXT: punpckhwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3]
521 ; X86-NEXT: movq %mm0, (%esp)
522 ; X86-NEXT: movl (%esp), %eax
523 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
524 ; X86-NEXT: movl %ebp, %esp
525 ; X86-NEXT: popl %ebp
529 ; X64: # %bb.0: # %entry
530 ; X64-NEXT: movq %rsi, %mm0
531 ; X64-NEXT: movq %rdi, %mm1
532 ; X64-NEXT: punpckhwd %mm0, %mm1 # mm1 = mm1[2],mm0[2],mm1[3],mm0[3]
533 ; X64-NEXT: movq %mm1, %rax
536 %0 = bitcast <1 x i64> %b to <4 x i16>
537 %1 = bitcast <1 x i64> %a to <4 x i16>
538 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
539 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
540 %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
541 %3 = bitcast <1 x i64> %2 to <4 x i16>
542 %4 = bitcast <4 x i16> %3 to <1 x i64>
543 %5 = extractelement <1 x i64> %4, i32 0
547 declare <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64>, <1 x i64>) nounwind readnone
549 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
551 ; X86: # %bb.0: # %entry
552 ; X86-NEXT: pushl %ebp
553 ; X86-NEXT: movl %esp, %ebp
554 ; X86-NEXT: andl $-8, %esp
555 ; X86-NEXT: subl $24, %esp
556 ; X86-NEXT: movl 8(%ebp), %eax
557 ; X86-NEXT: movl 12(%ebp), %ecx
558 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
559 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
560 ; X86-NEXT: movl 16(%ebp), %eax
561 ; X86-NEXT: movl 20(%ebp), %ecx
562 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
563 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
564 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
565 ; X86-NEXT: punpckhbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
566 ; X86-NEXT: movq %mm0, (%esp)
567 ; X86-NEXT: movl (%esp), %eax
568 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
569 ; X86-NEXT: movl %ebp, %esp
570 ; X86-NEXT: popl %ebp
574 ; X64: # %bb.0: # %entry
575 ; X64-NEXT: movq %rsi, %mm0
576 ; X64-NEXT: movq %rdi, %mm1
577 ; X64-NEXT: punpckhbw %mm0, %mm1 # mm1 = mm1[4],mm0[4],mm1[5],mm0[5],mm1[6],mm0[6],mm1[7],mm0[7]
578 ; X64-NEXT: movq %mm1, %rax
581 %0 = bitcast <1 x i64> %b to <8 x i8>
582 %1 = bitcast <1 x i64> %a to <8 x i8>
583 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
584 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
585 %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
586 %3 = bitcast <1 x i64> %2 to <8 x i8>
587 %4 = bitcast <8 x i8> %3 to <1 x i64>
588 %5 = extractelement <1 x i64> %4, i32 0
592 declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone
594 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
596 ; X86: # %bb.0: # %entry
597 ; X86-NEXT: pushl %ebp
598 ; X86-NEXT: movl %esp, %ebp
599 ; X86-NEXT: andl $-8, %esp
600 ; X86-NEXT: subl $24, %esp
601 ; X86-NEXT: movl 8(%ebp), %eax
602 ; X86-NEXT: movl 12(%ebp), %ecx
603 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
604 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
605 ; X86-NEXT: movl 16(%ebp), %eax
606 ; X86-NEXT: movl 20(%ebp), %ecx
607 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
608 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
609 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
610 ; X86-NEXT: packuswb {{[0-9]+}}(%esp), %mm0
611 ; X86-NEXT: movq %mm0, (%esp)
612 ; X86-NEXT: movl (%esp), %eax
613 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
614 ; X86-NEXT: movl %ebp, %esp
615 ; X86-NEXT: popl %ebp
619 ; X64: # %bb.0: # %entry
620 ; X64-NEXT: movq %rsi, %mm0
621 ; X64-NEXT: movq %rdi, %mm1
622 ; X64-NEXT: packuswb %mm0, %mm1
623 ; X64-NEXT: movq %mm1, %rax
626 %0 = bitcast <1 x i64> %b to <4 x i16>
627 %1 = bitcast <1 x i64> %a to <4 x i16>
628 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
629 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
630 %2 = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
631 %3 = bitcast <1 x i64> %2 to <8 x i8>
632 %4 = bitcast <8 x i8> %3 to <1 x i64>
633 %5 = extractelement <1 x i64> %4, i32 0
637 declare <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64>, <1 x i64>) nounwind readnone
639 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
641 ; X86: # %bb.0: # %entry
642 ; X86-NEXT: pushl %ebp
643 ; X86-NEXT: movl %esp, %ebp
644 ; X86-NEXT: andl $-8, %esp
645 ; X86-NEXT: subl $24, %esp
646 ; X86-NEXT: movl 8(%ebp), %eax
647 ; X86-NEXT: movl 12(%ebp), %ecx
648 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
649 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
650 ; X86-NEXT: movl 16(%ebp), %eax
651 ; X86-NEXT: movl 20(%ebp), %ecx
652 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
653 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
654 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
655 ; X86-NEXT: packssdw {{[0-9]+}}(%esp), %mm0
656 ; X86-NEXT: movq %mm0, (%esp)
657 ; X86-NEXT: movl (%esp), %eax
658 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
659 ; X86-NEXT: movl %ebp, %esp
660 ; X86-NEXT: popl %ebp
664 ; X64: # %bb.0: # %entry
665 ; X64-NEXT: movq %rsi, %mm0
666 ; X64-NEXT: movq %rdi, %mm1
667 ; X64-NEXT: packssdw %mm0, %mm1
668 ; X64-NEXT: movq %mm1, %rax
671 %0 = bitcast <1 x i64> %b to <2 x i32>
672 %1 = bitcast <1 x i64> %a to <2 x i32>
673 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
674 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
675 %2 = tail call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
676 %3 = bitcast <1 x i64> %2 to <4 x i16>
677 %4 = bitcast <4 x i16> %3 to <1 x i64>
678 %5 = extractelement <1 x i64> %4, i32 0
682 declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) nounwind readnone
684 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
686 ; X86: # %bb.0: # %entry
687 ; X86-NEXT: pushl %ebp
688 ; X86-NEXT: movl %esp, %ebp
689 ; X86-NEXT: andl $-8, %esp
690 ; X86-NEXT: subl $24, %esp
691 ; X86-NEXT: movl 8(%ebp), %eax
692 ; X86-NEXT: movl 12(%ebp), %ecx
693 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
694 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
695 ; X86-NEXT: movl 16(%ebp), %eax
696 ; X86-NEXT: movl 20(%ebp), %ecx
697 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
698 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
699 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
700 ; X86-NEXT: packsswb {{[0-9]+}}(%esp), %mm0
701 ; X86-NEXT: movq %mm0, (%esp)
702 ; X86-NEXT: movl (%esp), %eax
703 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
704 ; X86-NEXT: movl %ebp, %esp
705 ; X86-NEXT: popl %ebp
709 ; X64: # %bb.0: # %entry
710 ; X64-NEXT: movq %rsi, %mm0
711 ; X64-NEXT: movq %rdi, %mm1
712 ; X64-NEXT: packsswb %mm0, %mm1
713 ; X64-NEXT: movq %mm1, %rax
716 %0 = bitcast <1 x i64> %b to <4 x i16>
717 %1 = bitcast <1 x i64> %a to <4 x i16>
718 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
719 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
720 %2 = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
721 %3 = bitcast <1 x i64> %2 to <8 x i8>
722 %4 = bitcast <8 x i8> %3 to <1 x i64>
723 %5 = extractelement <1 x i64> %4, i32 0
727 declare <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64>, i32) nounwind readnone
729 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
731 ; X86: # %bb.0: # %entry
732 ; X86-NEXT: pushl %ebp
733 ; X86-NEXT: movl %esp, %ebp
734 ; X86-NEXT: andl $-8, %esp
735 ; X86-NEXT: subl $16, %esp
736 ; X86-NEXT: movl 8(%ebp), %eax
737 ; X86-NEXT: movl 12(%ebp), %ecx
738 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
739 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
740 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
741 ; X86-NEXT: psrad $3, %mm0
742 ; X86-NEXT: movq %mm0, (%esp)
743 ; X86-NEXT: movl (%esp), %eax
744 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
745 ; X86-NEXT: movl %ebp, %esp
746 ; X86-NEXT: popl %ebp
750 ; X64: # %bb.0: # %entry
751 ; X64-NEXT: movq %rdi, %mm0
752 ; X64-NEXT: psrad $3, %mm0
753 ; X64-NEXT: movq %mm0, %rax
756 %0 = bitcast <1 x i64> %a to <2 x i32>
757 %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
758 %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> %mmx_var.i, i32 3) nounwind
759 %2 = bitcast <1 x i64> %1 to <2 x i32>
760 %3 = bitcast <2 x i32> %2 to <1 x i64>
761 %4 = extractelement <1 x i64> %3, i32 0
765 declare <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64>, i32) nounwind readnone
767 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
769 ; X86: # %bb.0: # %entry
770 ; X86-NEXT: pushl %ebp
771 ; X86-NEXT: movl %esp, %ebp
772 ; X86-NEXT: andl $-8, %esp
773 ; X86-NEXT: subl $16, %esp
774 ; X86-NEXT: movl 8(%ebp), %eax
775 ; X86-NEXT: movl 12(%ebp), %ecx
776 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
777 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
778 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
779 ; X86-NEXT: psraw $3, %mm0
780 ; X86-NEXT: movq %mm0, (%esp)
781 ; X86-NEXT: movl (%esp), %eax
782 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
783 ; X86-NEXT: movl %ebp, %esp
784 ; X86-NEXT: popl %ebp
788 ; X64: # %bb.0: # %entry
789 ; X64-NEXT: movq %rdi, %mm0
790 ; X64-NEXT: psraw $3, %mm0
791 ; X64-NEXT: movq %mm0, %rax
794 %0 = bitcast <1 x i64> %a to <4 x i16>
795 %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
796 %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 3) nounwind
797 %2 = bitcast <1 x i64> %1 to <4 x i16>
798 %3 = bitcast <4 x i16> %2 to <1 x i64>
799 %4 = extractelement <1 x i64> %3, i32 0
803 define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp {
804 ; X86-LABEL: test72_2:
805 ; X86: # %bb.0: # %entry
806 ; X86-NEXT: pushl %ebp
807 ; X86-NEXT: movl %esp, %ebp
808 ; X86-NEXT: andl $-8, %esp
809 ; X86-NEXT: subl $16, %esp
810 ; X86-NEXT: movl 8(%ebp), %eax
811 ; X86-NEXT: movl 12(%ebp), %ecx
812 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
813 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
814 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
815 ; X86-NEXT: movq %mm0, (%esp)
816 ; X86-NEXT: movl (%esp), %eax
817 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
818 ; X86-NEXT: movl %ebp, %esp
819 ; X86-NEXT: popl %ebp
822 ; X64-LABEL: test72_2:
823 ; X64: # %bb.0: # %entry
824 ; X64-NEXT: movq %rdi, %rax
827 %0 = bitcast <1 x i64> %a to <4 x i16>
828 %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
829 %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 0) nounwind
830 %2 = bitcast <1 x i64> %1 to <4 x i16>
831 %3 = bitcast <4 x i16> %2 to <1 x i64>
832 %4 = extractelement <1 x i64> %3, i32 0
836 declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone
838 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
840 ; X86: # %bb.0: # %entry
841 ; X86-NEXT: pushl %ebp
842 ; X86-NEXT: movl %esp, %ebp
843 ; X86-NEXT: andl $-8, %esp
844 ; X86-NEXT: subl $8, %esp
845 ; X86-NEXT: movq 8(%ebp), %mm0
846 ; X86-NEXT: psrlq $3, %mm0
847 ; X86-NEXT: movq %mm0, (%esp)
848 ; X86-NEXT: movl (%esp), %eax
849 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
850 ; X86-NEXT: movl %ebp, %esp
851 ; X86-NEXT: popl %ebp
855 ; X64: # %bb.0: # %entry
856 ; X64-NEXT: movq %rdi, %mm0
857 ; X64-NEXT: psrlq $3, %mm0
858 ; X64-NEXT: movq %mm0, %rax
861 %0 = extractelement <1 x i64> %a, i32 0
862 %mmx_var.i = bitcast i64 %0 to <1 x i64>
863 %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %mmx_var.i, i32 3) nounwind
864 %2 = bitcast <1 x i64> %1 to i64
868 declare <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64>, i32) nounwind readnone
870 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
872 ; X86: # %bb.0: # %entry
873 ; X86-NEXT: pushl %ebp
874 ; X86-NEXT: movl %esp, %ebp
875 ; X86-NEXT: andl $-8, %esp
876 ; X86-NEXT: subl $16, %esp
877 ; X86-NEXT: movl 8(%ebp), %eax
878 ; X86-NEXT: movl 12(%ebp), %ecx
879 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
880 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
881 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
882 ; X86-NEXT: psrld $3, %mm0
883 ; X86-NEXT: movq %mm0, (%esp)
884 ; X86-NEXT: movl (%esp), %eax
885 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
886 ; X86-NEXT: movl %ebp, %esp
887 ; X86-NEXT: popl %ebp
891 ; X64: # %bb.0: # %entry
892 ; X64-NEXT: movq %rdi, %mm0
893 ; X64-NEXT: psrld $3, %mm0
894 ; X64-NEXT: movq %mm0, %rax
897 %0 = bitcast <1 x i64> %a to <2 x i32>
898 %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
899 %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 3) nounwind
900 %2 = bitcast <1 x i64> %1 to <2 x i32>
901 %3 = bitcast <2 x i32> %2 to <1 x i64>
902 %4 = extractelement <1 x i64> %3, i32 0
906 define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp {
907 ; X86-LABEL: test70_2:
908 ; X86: # %bb.0: # %entry
909 ; X86-NEXT: pushl %ebp
910 ; X86-NEXT: movl %esp, %ebp
911 ; X86-NEXT: andl $-8, %esp
912 ; X86-NEXT: subl $16, %esp
913 ; X86-NEXT: movl 8(%ebp), %eax
914 ; X86-NEXT: movl 12(%ebp), %ecx
915 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
916 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
917 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
918 ; X86-NEXT: movq %mm0, (%esp)
919 ; X86-NEXT: movl (%esp), %eax
920 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
921 ; X86-NEXT: movl %ebp, %esp
922 ; X86-NEXT: popl %ebp
925 ; X64-LABEL: test70_2:
926 ; X64: # %bb.0: # %entry
927 ; X64-NEXT: movq %rdi, %rax
930 %0 = bitcast <1 x i64> %a to <2 x i32>
931 %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
932 %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 0) nounwind
933 %2 = bitcast <1 x i64> %1 to <2 x i32>
934 %3 = bitcast <2 x i32> %2 to <1 x i64>
935 %4 = extractelement <1 x i64> %3, i32 0
939 declare <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64>, i32) nounwind readnone
941 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
943 ; X86: # %bb.0: # %entry
944 ; X86-NEXT: pushl %ebp
945 ; X86-NEXT: movl %esp, %ebp
946 ; X86-NEXT: andl $-8, %esp
947 ; X86-NEXT: subl $16, %esp
948 ; X86-NEXT: movl 8(%ebp), %eax
949 ; X86-NEXT: movl 12(%ebp), %ecx
950 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
951 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
952 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
953 ; X86-NEXT: psrlw $3, %mm0
954 ; X86-NEXT: movq %mm0, (%esp)
955 ; X86-NEXT: movl (%esp), %eax
956 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
957 ; X86-NEXT: movl %ebp, %esp
958 ; X86-NEXT: popl %ebp
962 ; X64: # %bb.0: # %entry
963 ; X64-NEXT: movq %rdi, %mm0
964 ; X64-NEXT: psrlw $3, %mm0
965 ; X64-NEXT: movq %mm0, %rax
968 %0 = bitcast <1 x i64> %a to <4 x i16>
969 %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
970 %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> %mmx_var.i, i32 3) nounwind
971 %2 = bitcast <1 x i64> %1 to <4 x i16>
972 %3 = bitcast <4 x i16> %2 to <1 x i64>
973 %4 = extractelement <1 x i64> %3, i32 0
977 declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone
979 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
981 ; X86: # %bb.0: # %entry
982 ; X86-NEXT: pushl %ebp
983 ; X86-NEXT: movl %esp, %ebp
984 ; X86-NEXT: andl $-8, %esp
985 ; X86-NEXT: subl $8, %esp
986 ; X86-NEXT: movq 8(%ebp), %mm0
987 ; X86-NEXT: psllq $3, %mm0
988 ; X86-NEXT: movq %mm0, (%esp)
989 ; X86-NEXT: movl (%esp), %eax
990 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
991 ; X86-NEXT: movl %ebp, %esp
992 ; X86-NEXT: popl %ebp
996 ; X64: # %bb.0: # %entry
997 ; X64-NEXT: movq %rdi, %mm0
998 ; X64-NEXT: psllq $3, %mm0
999 ; X64-NEXT: movq %mm0, %rax
1002 %0 = extractelement <1 x i64> %a, i32 0
1003 %mmx_var.i = bitcast i64 %0 to <1 x i64>
1004 %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %mmx_var.i, i32 3) nounwind
1005 %2 = bitcast <1 x i64> %1 to i64
1009 declare <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64>, i32) nounwind readnone
1011 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
1012 ; X86-LABEL: test67:
1013 ; X86: # %bb.0: # %entry
1014 ; X86-NEXT: pushl %ebp
1015 ; X86-NEXT: movl %esp, %ebp
1016 ; X86-NEXT: andl $-8, %esp
1017 ; X86-NEXT: subl $16, %esp
1018 ; X86-NEXT: movl 8(%ebp), %eax
1019 ; X86-NEXT: movl 12(%ebp), %ecx
1020 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1021 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1022 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1023 ; X86-NEXT: pslld $3, %mm0
1024 ; X86-NEXT: movq %mm0, (%esp)
1025 ; X86-NEXT: movl (%esp), %eax
1026 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1027 ; X86-NEXT: movl %ebp, %esp
1028 ; X86-NEXT: popl %ebp
1031 ; X64-LABEL: test67:
1032 ; X64: # %bb.0: # %entry
1033 ; X64-NEXT: movq %rdi, %mm0
1034 ; X64-NEXT: pslld $3, %mm0
1035 ; X64-NEXT: movq %mm0, %rax
1038 %0 = bitcast <1 x i64> %a to <2 x i32>
1039 %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
1040 %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> %mmx_var.i, i32 3) nounwind
1041 %2 = bitcast <1 x i64> %1 to <2 x i32>
1042 %3 = bitcast <2 x i32> %2 to <1 x i64>
1043 %4 = extractelement <1 x i64> %3, i32 0
1047 declare <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64>, i32) nounwind readnone
1049 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
1050 ; X86-LABEL: test66:
1051 ; X86: # %bb.0: # %entry
1052 ; X86-NEXT: pushl %ebp
1053 ; X86-NEXT: movl %esp, %ebp
1054 ; X86-NEXT: andl $-8, %esp
1055 ; X86-NEXT: subl $16, %esp
1056 ; X86-NEXT: movl 8(%ebp), %eax
1057 ; X86-NEXT: movl 12(%ebp), %ecx
1058 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1059 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1060 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1061 ; X86-NEXT: psllw $3, %mm0
1062 ; X86-NEXT: movq %mm0, (%esp)
1063 ; X86-NEXT: movl (%esp), %eax
1064 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1065 ; X86-NEXT: movl %ebp, %esp
1066 ; X86-NEXT: popl %ebp
1069 ; X64-LABEL: test66:
1070 ; X64: # %bb.0: # %entry
1071 ; X64-NEXT: movq %rdi, %mm0
1072 ; X64-NEXT: psllw $3, %mm0
1073 ; X64-NEXT: movq %mm0, %rax
1076 %0 = bitcast <1 x i64> %a to <4 x i16>
1077 %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
1078 %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 3) nounwind
1079 %2 = bitcast <1 x i64> %1 to <4 x i16>
1080 %3 = bitcast <4 x i16> %2 to <1 x i64>
1081 %4 = extractelement <1 x i64> %3, i32 0
1085 define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp {
1086 ; X86-LABEL: test66_2:
1087 ; X86: # %bb.0: # %entry
1088 ; X86-NEXT: pushl %ebp
1089 ; X86-NEXT: movl %esp, %ebp
1090 ; X86-NEXT: andl $-8, %esp
1091 ; X86-NEXT: subl $16, %esp
1092 ; X86-NEXT: movl 8(%ebp), %eax
1093 ; X86-NEXT: movl 12(%ebp), %ecx
1094 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1095 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1096 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1097 ; X86-NEXT: movq %mm0, (%esp)
1098 ; X86-NEXT: movl (%esp), %eax
1099 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1100 ; X86-NEXT: movl %ebp, %esp
1101 ; X86-NEXT: popl %ebp
1104 ; X64-LABEL: test66_2:
1105 ; X64: # %bb.0: # %entry
1106 ; X64-NEXT: movq %rdi, %rax
1109 %0 = bitcast <1 x i64> %a to <4 x i16>
1110 %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
1111 %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 0) nounwind
1112 %2 = bitcast <1 x i64> %1 to <4 x i16>
1113 %3 = bitcast <4 x i16> %2 to <1 x i64>
1114 %4 = extractelement <1 x i64> %3, i32 0
1118 declare <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64>, <1 x i64>) nounwind readnone
1120 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1121 ; X86-LABEL: test65:
1122 ; X86: # %bb.0: # %entry
1123 ; X86-NEXT: pushl %ebp
1124 ; X86-NEXT: movl %esp, %ebp
1125 ; X86-NEXT: andl $-8, %esp
1126 ; X86-NEXT: subl $16, %esp
1127 ; X86-NEXT: movl 8(%ebp), %eax
1128 ; X86-NEXT: movl 12(%ebp), %ecx
1129 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1130 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1131 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1132 ; X86-NEXT: psrad 16(%ebp), %mm0
1133 ; X86-NEXT: movq %mm0, (%esp)
1134 ; X86-NEXT: movl (%esp), %eax
1135 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1136 ; X86-NEXT: movl %ebp, %esp
1137 ; X86-NEXT: popl %ebp
1140 ; X64-LABEL: test65:
1141 ; X64: # %bb.0: # %entry
1142 ; X64-NEXT: movq %rdi, %mm0
1143 ; X64-NEXT: movq %rsi, %mm1
1144 ; X64-NEXT: psrad %mm1, %mm0
1145 ; X64-NEXT: movq %mm0, %rax
1148 %0 = bitcast <1 x i64> %a to <2 x i32>
1149 %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
1150 %1 = extractelement <1 x i64> %b, i32 0
1151 %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1152 %2 = tail call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1153 %3 = bitcast <1 x i64> %2 to <2 x i32>
1154 %4 = bitcast <2 x i32> %3 to <1 x i64>
1155 %5 = extractelement <1 x i64> %4, i32 0
1159 declare <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64>, <1 x i64>) nounwind readnone
1161 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1162 ; X86-LABEL: test64:
1163 ; X86: # %bb.0: # %entry
1164 ; X86-NEXT: pushl %ebp
1165 ; X86-NEXT: movl %esp, %ebp
1166 ; X86-NEXT: andl $-8, %esp
1167 ; X86-NEXT: subl $16, %esp
1168 ; X86-NEXT: movl 8(%ebp), %eax
1169 ; X86-NEXT: movl 12(%ebp), %ecx
1170 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1171 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1172 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1173 ; X86-NEXT: psraw 16(%ebp), %mm0
1174 ; X86-NEXT: movq %mm0, (%esp)
1175 ; X86-NEXT: movl (%esp), %eax
1176 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1177 ; X86-NEXT: movl %ebp, %esp
1178 ; X86-NEXT: popl %ebp
1181 ; X64-LABEL: test64:
1182 ; X64: # %bb.0: # %entry
1183 ; X64-NEXT: movq %rdi, %mm0
1184 ; X64-NEXT: movq %rsi, %mm1
1185 ; X64-NEXT: psraw %mm1, %mm0
1186 ; X64-NEXT: movq %mm0, %rax
1189 %0 = bitcast <1 x i64> %a to <4 x i16>
1190 %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
1191 %1 = extractelement <1 x i64> %b, i32 0
1192 %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1193 %2 = tail call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1194 %3 = bitcast <1 x i64> %2 to <4 x i16>
1195 %4 = bitcast <4 x i16> %3 to <1 x i64>
1196 %5 = extractelement <1 x i64> %4, i32 0
1200 declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone
1202 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1203 ; X86-LABEL: test63:
1204 ; X86: # %bb.0: # %entry
1205 ; X86-NEXT: pushl %ebp
1206 ; X86-NEXT: movl %esp, %ebp
1207 ; X86-NEXT: andl $-8, %esp
1208 ; X86-NEXT: subl $8, %esp
1209 ; X86-NEXT: movq 8(%ebp), %mm0
1210 ; X86-NEXT: psrlq 16(%ebp), %mm0
1211 ; X86-NEXT: movq %mm0, (%esp)
1212 ; X86-NEXT: movl (%esp), %eax
1213 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1214 ; X86-NEXT: movl %ebp, %esp
1215 ; X86-NEXT: popl %ebp
1218 ; X64-LABEL: test63:
1219 ; X64: # %bb.0: # %entry
1220 ; X64-NEXT: movq %rdi, %mm0
1221 ; X64-NEXT: movq %rsi, %mm1
1222 ; X64-NEXT: psrlq %mm1, %mm0
1223 ; X64-NEXT: movq %mm0, %rax
1226 %0 = extractelement <1 x i64> %a, i32 0
1227 %mmx_var.i = bitcast i64 %0 to <1 x i64>
1228 %1 = extractelement <1 x i64> %b, i32 0
1229 %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1230 %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1231 %3 = bitcast <1 x i64> %2 to i64
1235 declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) nounwind readnone
1237 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1238 ; X86-LABEL: test62:
1239 ; X86: # %bb.0: # %entry
1240 ; X86-NEXT: pushl %ebp
1241 ; X86-NEXT: movl %esp, %ebp
1242 ; X86-NEXT: andl $-8, %esp
1243 ; X86-NEXT: subl $16, %esp
1244 ; X86-NEXT: movl 8(%ebp), %eax
1245 ; X86-NEXT: movl 12(%ebp), %ecx
1246 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1247 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1248 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1249 ; X86-NEXT: psrld 16(%ebp), %mm0
1250 ; X86-NEXT: movq %mm0, (%esp)
1251 ; X86-NEXT: movl (%esp), %eax
1252 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1253 ; X86-NEXT: movl %ebp, %esp
1254 ; X86-NEXT: popl %ebp
1257 ; X64-LABEL: test62:
1258 ; X64: # %bb.0: # %entry
1259 ; X64-NEXT: movq %rdi, %mm0
1260 ; X64-NEXT: movq %rsi, %mm1
1261 ; X64-NEXT: psrld %mm1, %mm0
1262 ; X64-NEXT: movq %mm0, %rax
1265 %0 = bitcast <1 x i64> %a to <2 x i32>
1266 %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
1267 %1 = extractelement <1 x i64> %b, i32 0
1268 %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1269 %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1270 %3 = bitcast <1 x i64> %2 to <2 x i32>
1271 %4 = bitcast <2 x i32> %3 to <1 x i64>
1272 %5 = extractelement <1 x i64> %4, i32 0
1276 declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) nounwind readnone
1278 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1279 ; X86-LABEL: test61:
1280 ; X86: # %bb.0: # %entry
1281 ; X86-NEXT: pushl %ebp
1282 ; X86-NEXT: movl %esp, %ebp
1283 ; X86-NEXT: andl $-8, %esp
1284 ; X86-NEXT: subl $16, %esp
1285 ; X86-NEXT: movl 8(%ebp), %eax
1286 ; X86-NEXT: movl 12(%ebp), %ecx
1287 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1288 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1289 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1290 ; X86-NEXT: psrlw 16(%ebp), %mm0
1291 ; X86-NEXT: movq %mm0, (%esp)
1292 ; X86-NEXT: movl (%esp), %eax
1293 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1294 ; X86-NEXT: movl %ebp, %esp
1295 ; X86-NEXT: popl %ebp
1298 ; X64-LABEL: test61:
1299 ; X64: # %bb.0: # %entry
1300 ; X64-NEXT: movq %rdi, %mm0
1301 ; X64-NEXT: movq %rsi, %mm1
1302 ; X64-NEXT: psrlw %mm1, %mm0
1303 ; X64-NEXT: movq %mm0, %rax
1306 %0 = bitcast <1 x i64> %a to <4 x i16>
1307 %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
1308 %1 = extractelement <1 x i64> %b, i32 0
1309 %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1310 %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1311 %3 = bitcast <1 x i64> %2 to <4 x i16>
1312 %4 = bitcast <4 x i16> %3 to <1 x i64>
1313 %5 = extractelement <1 x i64> %4, i32 0
1317 declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone
1319 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1320 ; X86-LABEL: test60:
1321 ; X86: # %bb.0: # %entry
1322 ; X86-NEXT: pushl %ebp
1323 ; X86-NEXT: movl %esp, %ebp
1324 ; X86-NEXT: andl $-8, %esp
1325 ; X86-NEXT: subl $8, %esp
1326 ; X86-NEXT: movq 8(%ebp), %mm0
1327 ; X86-NEXT: psllq 16(%ebp), %mm0
1328 ; X86-NEXT: movq %mm0, (%esp)
1329 ; X86-NEXT: movl (%esp), %eax
1330 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1331 ; X86-NEXT: movl %ebp, %esp
1332 ; X86-NEXT: popl %ebp
1335 ; X64-LABEL: test60:
1336 ; X64: # %bb.0: # %entry
1337 ; X64-NEXT: movq %rdi, %mm0
1338 ; X64-NEXT: movq %rsi, %mm1
1339 ; X64-NEXT: psllq %mm1, %mm0
1340 ; X64-NEXT: movq %mm0, %rax
1343 %0 = extractelement <1 x i64> %a, i32 0
1344 %mmx_var.i = bitcast i64 %0 to <1 x i64>
1345 %1 = extractelement <1 x i64> %b, i32 0
1346 %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1347 %2 = tail call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1348 %3 = bitcast <1 x i64> %2 to i64
1352 declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) nounwind readnone
1354 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1355 ; X86-LABEL: test59:
1356 ; X86: # %bb.0: # %entry
1357 ; X86-NEXT: pushl %ebp
1358 ; X86-NEXT: movl %esp, %ebp
1359 ; X86-NEXT: andl $-8, %esp
1360 ; X86-NEXT: subl $16, %esp
1361 ; X86-NEXT: movl 8(%ebp), %eax
1362 ; X86-NEXT: movl 12(%ebp), %ecx
1363 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1364 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1365 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1366 ; X86-NEXT: pslld 16(%ebp), %mm0
1367 ; X86-NEXT: movq %mm0, (%esp)
1368 ; X86-NEXT: movl (%esp), %eax
1369 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1370 ; X86-NEXT: movl %ebp, %esp
1371 ; X86-NEXT: popl %ebp
1374 ; X64-LABEL: test59:
1375 ; X64: # %bb.0: # %entry
1376 ; X64-NEXT: movq %rdi, %mm0
1377 ; X64-NEXT: movq %rsi, %mm1
1378 ; X64-NEXT: pslld %mm1, %mm0
1379 ; X64-NEXT: movq %mm0, %rax
1382 %0 = bitcast <1 x i64> %a to <2 x i32>
1383 %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
1384 %1 = extractelement <1 x i64> %b, i32 0
1385 %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1386 %2 = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1387 %3 = bitcast <1 x i64> %2 to <2 x i32>
1388 %4 = bitcast <2 x i32> %3 to <1 x i64>
1389 %5 = extractelement <1 x i64> %4, i32 0
1393 declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) nounwind readnone
1395 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1396 ; X86-LABEL: test58:
1397 ; X86: # %bb.0: # %entry
1398 ; X86-NEXT: pushl %ebp
1399 ; X86-NEXT: movl %esp, %ebp
1400 ; X86-NEXT: andl $-8, %esp
1401 ; X86-NEXT: subl $16, %esp
1402 ; X86-NEXT: movl 8(%ebp), %eax
1403 ; X86-NEXT: movl 12(%ebp), %ecx
1404 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1405 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1406 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1407 ; X86-NEXT: psllw 16(%ebp), %mm0
1408 ; X86-NEXT: movq %mm0, (%esp)
1409 ; X86-NEXT: movl (%esp), %eax
1410 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1411 ; X86-NEXT: movl %ebp, %esp
1412 ; X86-NEXT: popl %ebp
1415 ; X64-LABEL: test58:
1416 ; X64: # %bb.0: # %entry
1417 ; X64-NEXT: movq %rdi, %mm0
1418 ; X64-NEXT: movq %rsi, %mm1
1419 ; X64-NEXT: psllw %mm1, %mm0
1420 ; X64-NEXT: movq %mm0, %rax
1423 %0 = bitcast <1 x i64> %a to <4 x i16>
1424 %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
1425 %1 = extractelement <1 x i64> %b, i32 0
1426 %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1427 %2 = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1428 %3 = bitcast <1 x i64> %2 to <4 x i16>
1429 %4 = bitcast <4 x i16> %3 to <1 x i64>
1430 %5 = extractelement <1 x i64> %4, i32 0
1434 declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone
1436 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1437 ; X86-LABEL: test56:
1438 ; X86: # %bb.0: # %entry
1439 ; X86-NEXT: pushl %ebp
1440 ; X86-NEXT: movl %esp, %ebp
1441 ; X86-NEXT: andl $-8, %esp
1442 ; X86-NEXT: subl $24, %esp
1443 ; X86-NEXT: movl 8(%ebp), %eax
1444 ; X86-NEXT: movl 12(%ebp), %ecx
1445 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1446 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1447 ; X86-NEXT: movl 16(%ebp), %eax
1448 ; X86-NEXT: movl 20(%ebp), %ecx
1449 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1450 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1451 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1452 ; X86-NEXT: pxor {{[0-9]+}}(%esp), %mm0
1453 ; X86-NEXT: movq %mm0, (%esp)
1454 ; X86-NEXT: movl (%esp), %eax
1455 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1456 ; X86-NEXT: movl %ebp, %esp
1457 ; X86-NEXT: popl %ebp
1460 ; X64-LABEL: test56:
1461 ; X64: # %bb.0: # %entry
1462 ; X64-NEXT: movq %rsi, %mm0
1463 ; X64-NEXT: movq %rdi, %mm1
1464 ; X64-NEXT: pxor %mm0, %mm1
1465 ; X64-NEXT: movq %mm1, %rax
1468 %0 = bitcast <1 x i64> %b to <2 x i32>
1469 %1 = bitcast <1 x i64> %a to <2 x i32>
1470 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
1471 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
1472 %2 = tail call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1473 %3 = bitcast <1 x i64> %2 to <2 x i32>
1474 %4 = bitcast <2 x i32> %3 to <1 x i64>
1475 %5 = extractelement <1 x i64> %4, i32 0
1479 declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone
1481 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1482 ; X86-LABEL: test55:
1483 ; X86: # %bb.0: # %entry
1484 ; X86-NEXT: pushl %ebp
1485 ; X86-NEXT: movl %esp, %ebp
1486 ; X86-NEXT: andl $-8, %esp
1487 ; X86-NEXT: subl $24, %esp
1488 ; X86-NEXT: movl 8(%ebp), %eax
1489 ; X86-NEXT: movl 12(%ebp), %ecx
1490 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1491 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1492 ; X86-NEXT: movl 16(%ebp), %eax
1493 ; X86-NEXT: movl 20(%ebp), %ecx
1494 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1495 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1496 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1497 ; X86-NEXT: por {{[0-9]+}}(%esp), %mm0
1498 ; X86-NEXT: movq %mm0, (%esp)
1499 ; X86-NEXT: movl (%esp), %eax
1500 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1501 ; X86-NEXT: movl %ebp, %esp
1502 ; X86-NEXT: popl %ebp
1505 ; X64-LABEL: test55:
1506 ; X64: # %bb.0: # %entry
1507 ; X64-NEXT: movq %rsi, %mm0
1508 ; X64-NEXT: movq %rdi, %mm1
1509 ; X64-NEXT: por %mm0, %mm1
1510 ; X64-NEXT: movq %mm1, %rax
1513 %0 = bitcast <1 x i64> %b to <2 x i32>
1514 %1 = bitcast <1 x i64> %a to <2 x i32>
1515 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
1516 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
1517 %2 = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1518 %3 = bitcast <1 x i64> %2 to <2 x i32>
1519 %4 = bitcast <2 x i32> %3 to <1 x i64>
1520 %5 = extractelement <1 x i64> %4, i32 0
1524 declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone
1526 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1527 ; X86-LABEL: test54:
1528 ; X86: # %bb.0: # %entry
1529 ; X86-NEXT: pushl %ebp
1530 ; X86-NEXT: movl %esp, %ebp
1531 ; X86-NEXT: andl $-8, %esp
1532 ; X86-NEXT: subl $24, %esp
1533 ; X86-NEXT: movl 8(%ebp), %eax
1534 ; X86-NEXT: movl 12(%ebp), %ecx
1535 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1536 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1537 ; X86-NEXT: movl 16(%ebp), %eax
1538 ; X86-NEXT: movl 20(%ebp), %ecx
1539 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1540 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1541 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1542 ; X86-NEXT: pandn {{[0-9]+}}(%esp), %mm0
1543 ; X86-NEXT: movq %mm0, (%esp)
1544 ; X86-NEXT: movl (%esp), %eax
1545 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1546 ; X86-NEXT: movl %ebp, %esp
1547 ; X86-NEXT: popl %ebp
1550 ; X64-LABEL: test54:
1551 ; X64: # %bb.0: # %entry
1552 ; X64-NEXT: movq %rsi, %mm0
1553 ; X64-NEXT: movq %rdi, %mm1
1554 ; X64-NEXT: pandn %mm0, %mm1
1555 ; X64-NEXT: movq %mm1, %rax
1558 %0 = bitcast <1 x i64> %b to <2 x i32>
1559 %1 = bitcast <1 x i64> %a to <2 x i32>
1560 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
1561 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
1562 %2 = tail call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1563 %3 = bitcast <1 x i64> %2 to <2 x i32>
1564 %4 = bitcast <2 x i32> %3 to <1 x i64>
1565 %5 = extractelement <1 x i64> %4, i32 0
1569 declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone
1571 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1572 ; X86-LABEL: test53:
1573 ; X86: # %bb.0: # %entry
1574 ; X86-NEXT: pushl %ebp
1575 ; X86-NEXT: movl %esp, %ebp
1576 ; X86-NEXT: andl $-8, %esp
1577 ; X86-NEXT: subl $24, %esp
1578 ; X86-NEXT: movl 8(%ebp), %eax
1579 ; X86-NEXT: movl 12(%ebp), %ecx
1580 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1581 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1582 ; X86-NEXT: movl 16(%ebp), %eax
1583 ; X86-NEXT: movl 20(%ebp), %ecx
1584 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1585 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1586 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1587 ; X86-NEXT: pand {{[0-9]+}}(%esp), %mm0
1588 ; X86-NEXT: movq %mm0, (%esp)
1589 ; X86-NEXT: movl (%esp), %eax
1590 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1591 ; X86-NEXT: movl %ebp, %esp
1592 ; X86-NEXT: popl %ebp
1595 ; X64-LABEL: test53:
1596 ; X64: # %bb.0: # %entry
1597 ; X64-NEXT: movq %rsi, %mm0
1598 ; X64-NEXT: movq %rdi, %mm1
1599 ; X64-NEXT: pand %mm0, %mm1
1600 ; X64-NEXT: movq %mm1, %rax
1603 %0 = bitcast <1 x i64> %b to <2 x i32>
1604 %1 = bitcast <1 x i64> %a to <2 x i32>
1605 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
1606 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
1607 %2 = tail call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1608 %3 = bitcast <1 x i64> %2 to <2 x i32>
1609 %4 = bitcast <2 x i32> %3 to <1 x i64>
1610 %5 = extractelement <1 x i64> %4, i32 0
1614 declare <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64>, <1 x i64>) nounwind readnone
1616 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1617 ; X86-LABEL: test52:
1618 ; X86: # %bb.0: # %entry
1619 ; X86-NEXT: pushl %ebp
1620 ; X86-NEXT: movl %esp, %ebp
1621 ; X86-NEXT: andl $-8, %esp
1622 ; X86-NEXT: subl $24, %esp
1623 ; X86-NEXT: movl 8(%ebp), %eax
1624 ; X86-NEXT: movl 12(%ebp), %ecx
1625 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1626 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1627 ; X86-NEXT: movl 16(%ebp), %eax
1628 ; X86-NEXT: movl 20(%ebp), %ecx
1629 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1630 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1631 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1632 ; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0
1633 ; X86-NEXT: movq %mm0, (%esp)
1634 ; X86-NEXT: movl (%esp), %eax
1635 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1636 ; X86-NEXT: movl %ebp, %esp
1637 ; X86-NEXT: popl %ebp
1640 ; X64-LABEL: test52:
1641 ; X64: # %bb.0: # %entry
1642 ; X64-NEXT: movq %rsi, %mm0
1643 ; X64-NEXT: movq %rdi, %mm1
1644 ; X64-NEXT: pmullw %mm0, %mm1
1645 ; X64-NEXT: movq %mm1, %rax
1648 %0 = bitcast <1 x i64> %b to <4 x i16>
1649 %1 = bitcast <1 x i64> %a to <4 x i16>
1650 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1651 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1652 %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1653 %3 = bitcast <1 x i64> %2 to <4 x i16>
1654 %4 = bitcast <4 x i16> %3 to <1 x i64>
1655 %5 = extractelement <1 x i64> %4, i32 0
1659 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1660 ; X86-LABEL: test51:
1661 ; X86: # %bb.0: # %entry
1662 ; X86-NEXT: pushl %ebp
1663 ; X86-NEXT: movl %esp, %ebp
1664 ; X86-NEXT: andl $-8, %esp
1665 ; X86-NEXT: subl $24, %esp
1666 ; X86-NEXT: movl 8(%ebp), %eax
1667 ; X86-NEXT: movl 12(%ebp), %ecx
1668 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1669 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1670 ; X86-NEXT: movl 16(%ebp), %eax
1671 ; X86-NEXT: movl 20(%ebp), %ecx
1672 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1673 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1674 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1675 ; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0
1676 ; X86-NEXT: movq %mm0, (%esp)
1677 ; X86-NEXT: movl (%esp), %eax
1678 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1679 ; X86-NEXT: movl %ebp, %esp
1680 ; X86-NEXT: popl %ebp
1683 ; X64-LABEL: test51:
1684 ; X64: # %bb.0: # %entry
1685 ; X64-NEXT: movq %rsi, %mm0
1686 ; X64-NEXT: movq %rdi, %mm1
1687 ; X64-NEXT: pmullw %mm0, %mm1
1688 ; X64-NEXT: movq %mm1, %rax
1691 %0 = bitcast <1 x i64> %b to <4 x i16>
1692 %1 = bitcast <1 x i64> %a to <4 x i16>
1693 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1694 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1695 %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1696 %3 = bitcast <1 x i64> %2 to <4 x i16>
1697 %4 = bitcast <4 x i16> %3 to <1 x i64>
1698 %5 = extractelement <1 x i64> %4, i32 0
1702 declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) nounwind readnone
1704 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1705 ; X86-LABEL: test50:
1706 ; X86: # %bb.0: # %entry
1707 ; X86-NEXT: pushl %ebp
1708 ; X86-NEXT: movl %esp, %ebp
1709 ; X86-NEXT: andl $-8, %esp
1710 ; X86-NEXT: subl $24, %esp
1711 ; X86-NEXT: movl 8(%ebp), %eax
1712 ; X86-NEXT: movl 12(%ebp), %ecx
1713 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1714 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1715 ; X86-NEXT: movl 16(%ebp), %eax
1716 ; X86-NEXT: movl 20(%ebp), %ecx
1717 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1718 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1719 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1720 ; X86-NEXT: pmulhw {{[0-9]+}}(%esp), %mm0
1721 ; X86-NEXT: movq %mm0, (%esp)
1722 ; X86-NEXT: movl (%esp), %eax
1723 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1724 ; X86-NEXT: movl %ebp, %esp
1725 ; X86-NEXT: popl %ebp
1728 ; X64-LABEL: test50:
1729 ; X64: # %bb.0: # %entry
1730 ; X64-NEXT: movq %rsi, %mm0
1731 ; X64-NEXT: movq %rdi, %mm1
1732 ; X64-NEXT: pmulhw %mm0, %mm1
1733 ; X64-NEXT: movq %mm1, %rax
1736 %0 = bitcast <1 x i64> %b to <4 x i16>
1737 %1 = bitcast <1 x i64> %a to <4 x i16>
1738 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1739 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1740 %2 = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1741 %3 = bitcast <1 x i64> %2 to <4 x i16>
1742 %4 = bitcast <4 x i16> %3 to <1 x i64>
1743 %5 = extractelement <1 x i64> %4, i32 0
1747 declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) nounwind readnone
1749 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1750 ; X86-LABEL: test49:
1751 ; X86: # %bb.0: # %entry
1752 ; X86-NEXT: pushl %ebp
1753 ; X86-NEXT: movl %esp, %ebp
1754 ; X86-NEXT: andl $-8, %esp
1755 ; X86-NEXT: subl $24, %esp
1756 ; X86-NEXT: movl 8(%ebp), %eax
1757 ; X86-NEXT: movl 12(%ebp), %ecx
1758 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1759 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1760 ; X86-NEXT: movl 16(%ebp), %eax
1761 ; X86-NEXT: movl 20(%ebp), %ecx
1762 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1763 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1764 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1765 ; X86-NEXT: pmaddwd {{[0-9]+}}(%esp), %mm0
1766 ; X86-NEXT: movq %mm0, (%esp)
1767 ; X86-NEXT: movl (%esp), %eax
1768 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1769 ; X86-NEXT: movl %ebp, %esp
1770 ; X86-NEXT: popl %ebp
1773 ; X64-LABEL: test49:
1774 ; X64: # %bb.0: # %entry
1775 ; X64-NEXT: movq %rsi, %mm0
1776 ; X64-NEXT: movq %rdi, %mm1
1777 ; X64-NEXT: pmaddwd %mm0, %mm1
1778 ; X64-NEXT: movq %mm1, %rax
1781 %0 = bitcast <1 x i64> %b to <4 x i16>
1782 %1 = bitcast <1 x i64> %a to <4 x i16>
1783 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1784 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1785 %2 = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1786 %3 = bitcast <1 x i64> %2 to <2 x i32>
1787 %4 = bitcast <2 x i32> %3 to <1 x i64>
1788 %5 = extractelement <1 x i64> %4, i32 0
1792 declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone
1794 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1795 ; X86-LABEL: test48:
1796 ; X86: # %bb.0: # %entry
1797 ; X86-NEXT: pushl %ebp
1798 ; X86-NEXT: movl %esp, %ebp
1799 ; X86-NEXT: andl $-8, %esp
1800 ; X86-NEXT: subl $24, %esp
1801 ; X86-NEXT: movl 8(%ebp), %eax
1802 ; X86-NEXT: movl 12(%ebp), %ecx
1803 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1804 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1805 ; X86-NEXT: movl 16(%ebp), %eax
1806 ; X86-NEXT: movl 20(%ebp), %ecx
1807 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1808 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1809 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1810 ; X86-NEXT: psubusw {{[0-9]+}}(%esp), %mm0
1811 ; X86-NEXT: movq %mm0, (%esp)
1812 ; X86-NEXT: movl (%esp), %eax
1813 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1814 ; X86-NEXT: movl %ebp, %esp
1815 ; X86-NEXT: popl %ebp
1818 ; X64-LABEL: test48:
1819 ; X64: # %bb.0: # %entry
1820 ; X64-NEXT: movq %rsi, %mm0
1821 ; X64-NEXT: movq %rdi, %mm1
1822 ; X64-NEXT: psubusw %mm0, %mm1
1823 ; X64-NEXT: movq %mm1, %rax
1826 %0 = bitcast <1 x i64> %b to <4 x i16>
1827 %1 = bitcast <1 x i64> %a to <4 x i16>
1828 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1829 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1830 %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1831 %3 = bitcast <1 x i64> %2 to <4 x i16>
1832 %4 = bitcast <4 x i16> %3 to <1 x i64>
1833 %5 = extractelement <1 x i64> %4, i32 0
1837 declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone
1839 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1840 ; X86-LABEL: test47:
1841 ; X86: # %bb.0: # %entry
1842 ; X86-NEXT: pushl %ebp
1843 ; X86-NEXT: movl %esp, %ebp
1844 ; X86-NEXT: andl $-8, %esp
1845 ; X86-NEXT: subl $24, %esp
1846 ; X86-NEXT: movl 8(%ebp), %eax
1847 ; X86-NEXT: movl 12(%ebp), %ecx
1848 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1849 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1850 ; X86-NEXT: movl 16(%ebp), %eax
1851 ; X86-NEXT: movl 20(%ebp), %ecx
1852 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1853 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1854 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1855 ; X86-NEXT: psubusb {{[0-9]+}}(%esp), %mm0
1856 ; X86-NEXT: movq %mm0, (%esp)
1857 ; X86-NEXT: movl (%esp), %eax
1858 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1859 ; X86-NEXT: movl %ebp, %esp
1860 ; X86-NEXT: popl %ebp
1863 ; X64-LABEL: test47:
1864 ; X64: # %bb.0: # %entry
1865 ; X64-NEXT: movq %rsi, %mm0
1866 ; X64-NEXT: movq %rdi, %mm1
1867 ; X64-NEXT: psubusb %mm0, %mm1
1868 ; X64-NEXT: movq %mm1, %rax
1871 %0 = bitcast <1 x i64> %b to <8 x i8>
1872 %1 = bitcast <1 x i64> %a to <8 x i8>
1873 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
1874 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
1875 %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1876 %3 = bitcast <1 x i64> %2 to <8 x i8>
1877 %4 = bitcast <8 x i8> %3 to <1 x i64>
1878 %5 = extractelement <1 x i64> %4, i32 0
1882 declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone
1884 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1885 ; X86-LABEL: test46:
1886 ; X86: # %bb.0: # %entry
1887 ; X86-NEXT: pushl %ebp
1888 ; X86-NEXT: movl %esp, %ebp
1889 ; X86-NEXT: andl $-8, %esp
1890 ; X86-NEXT: subl $24, %esp
1891 ; X86-NEXT: movl 8(%ebp), %eax
1892 ; X86-NEXT: movl 12(%ebp), %ecx
1893 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1894 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1895 ; X86-NEXT: movl 16(%ebp), %eax
1896 ; X86-NEXT: movl 20(%ebp), %ecx
1897 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1898 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1899 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1900 ; X86-NEXT: psubsw {{[0-9]+}}(%esp), %mm0
1901 ; X86-NEXT: movq %mm0, (%esp)
1902 ; X86-NEXT: movl (%esp), %eax
1903 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1904 ; X86-NEXT: movl %ebp, %esp
1905 ; X86-NEXT: popl %ebp
1908 ; X64-LABEL: test46:
1909 ; X64: # %bb.0: # %entry
1910 ; X64-NEXT: movq %rsi, %mm0
1911 ; X64-NEXT: movq %rdi, %mm1
1912 ; X64-NEXT: psubsw %mm0, %mm1
1913 ; X64-NEXT: movq %mm1, %rax
1916 %0 = bitcast <1 x i64> %b to <4 x i16>
1917 %1 = bitcast <1 x i64> %a to <4 x i16>
1918 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1919 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1920 %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1921 %3 = bitcast <1 x i64> %2 to <4 x i16>
1922 %4 = bitcast <4 x i16> %3 to <1 x i64>
1923 %5 = extractelement <1 x i64> %4, i32 0
1927 declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone
1929 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1930 ; X86-LABEL: test45:
1931 ; X86: # %bb.0: # %entry
1932 ; X86-NEXT: pushl %ebp
1933 ; X86-NEXT: movl %esp, %ebp
1934 ; X86-NEXT: andl $-8, %esp
1935 ; X86-NEXT: subl $24, %esp
1936 ; X86-NEXT: movl 8(%ebp), %eax
1937 ; X86-NEXT: movl 12(%ebp), %ecx
1938 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1939 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1940 ; X86-NEXT: movl 16(%ebp), %eax
1941 ; X86-NEXT: movl 20(%ebp), %ecx
1942 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1943 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1944 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
1945 ; X86-NEXT: psubsb {{[0-9]+}}(%esp), %mm0
1946 ; X86-NEXT: movq %mm0, (%esp)
1947 ; X86-NEXT: movl (%esp), %eax
1948 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1949 ; X86-NEXT: movl %ebp, %esp
1950 ; X86-NEXT: popl %ebp
1953 ; X64-LABEL: test45:
1954 ; X64: # %bb.0: # %entry
1955 ; X64-NEXT: movq %rsi, %mm0
1956 ; X64-NEXT: movq %rdi, %mm1
1957 ; X64-NEXT: psubsb %mm0, %mm1
1958 ; X64-NEXT: movq %mm1, %rax
1961 %0 = bitcast <1 x i64> %b to <8 x i8>
1962 %1 = bitcast <1 x i64> %a to <8 x i8>
1963 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
1964 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
1965 %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1966 %3 = bitcast <1 x i64> %2 to <8 x i8>
1967 %4 = bitcast <8 x i8> %3 to <1 x i64>
1968 %5 = extractelement <1 x i64> %4, i32 0
1972 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1973 ; X86-LABEL: test44:
1974 ; X86: # %bb.0: # %entry
1975 ; X86-NEXT: pushl %ebp
1976 ; X86-NEXT: movl %esp, %ebp
1977 ; X86-NEXT: andl $-8, %esp
1978 ; X86-NEXT: subl $8, %esp
1979 ; X86-NEXT: movq 8(%ebp), %mm0
1980 ; X86-NEXT: psubq 16(%ebp), %mm0
1981 ; X86-NEXT: movq %mm0, (%esp)
1982 ; X86-NEXT: movl (%esp), %eax
1983 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1984 ; X86-NEXT: movl %ebp, %esp
1985 ; X86-NEXT: popl %ebp
1988 ; X64-LABEL: test44:
1989 ; X64: # %bb.0: # %entry
1990 ; X64-NEXT: movq %rdi, %mm0
1991 ; X64-NEXT: movq %rsi, %mm1
1992 ; X64-NEXT: psubq %mm1, %mm0
1993 ; X64-NEXT: movq %mm0, %rax
1996 %0 = extractelement <1 x i64> %a, i32 0
1997 %mmx_var = bitcast i64 %0 to <1 x i64>
1998 %1 = extractelement <1 x i64> %b, i32 0
1999 %mmx_var1 = bitcast i64 %1 to <1 x i64>
2000 %2 = tail call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1)
2001 %3 = bitcast <1 x i64> %2 to i64
2005 declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone
2007 declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone
2009 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2010 ; X86-LABEL: test43:
2011 ; X86: # %bb.0: # %entry
2012 ; X86-NEXT: pushl %ebp
2013 ; X86-NEXT: movl %esp, %ebp
2014 ; X86-NEXT: andl $-8, %esp
2015 ; X86-NEXT: subl $24, %esp
2016 ; X86-NEXT: movl 8(%ebp), %eax
2017 ; X86-NEXT: movl 12(%ebp), %ecx
2018 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2019 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2020 ; X86-NEXT: movl 16(%ebp), %eax
2021 ; X86-NEXT: movl 20(%ebp), %ecx
2022 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2023 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2024 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2025 ; X86-NEXT: psubd {{[0-9]+}}(%esp), %mm0
2026 ; X86-NEXT: movq %mm0, (%esp)
2027 ; X86-NEXT: movl (%esp), %eax
2028 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2029 ; X86-NEXT: movl %ebp, %esp
2030 ; X86-NEXT: popl %ebp
2033 ; X64-LABEL: test43:
2034 ; X64: # %bb.0: # %entry
2035 ; X64-NEXT: movq %rsi, %mm0
2036 ; X64-NEXT: movq %rdi, %mm1
2037 ; X64-NEXT: psubd %mm0, %mm1
2038 ; X64-NEXT: movq %mm1, %rax
2041 %0 = bitcast <1 x i64> %b to <2 x i32>
2042 %1 = bitcast <1 x i64> %a to <2 x i32>
2043 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
2044 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
2045 %2 = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2046 %3 = bitcast <1 x i64> %2 to <2 x i32>
2047 %4 = bitcast <2 x i32> %3 to <1 x i64>
2048 %5 = extractelement <1 x i64> %4, i32 0
2052 declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone
2054 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2055 ; X86-LABEL: test42:
2056 ; X86: # %bb.0: # %entry
2057 ; X86-NEXT: pushl %ebp
2058 ; X86-NEXT: movl %esp, %ebp
2059 ; X86-NEXT: andl $-8, %esp
2060 ; X86-NEXT: subl $24, %esp
2061 ; X86-NEXT: movl 8(%ebp), %eax
2062 ; X86-NEXT: movl 12(%ebp), %ecx
2063 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2064 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2065 ; X86-NEXT: movl 16(%ebp), %eax
2066 ; X86-NEXT: movl 20(%ebp), %ecx
2067 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2068 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2069 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2070 ; X86-NEXT: psubw {{[0-9]+}}(%esp), %mm0
2071 ; X86-NEXT: movq %mm0, (%esp)
2072 ; X86-NEXT: movl (%esp), %eax
2073 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2074 ; X86-NEXT: movl %ebp, %esp
2075 ; X86-NEXT: popl %ebp
2078 ; X64-LABEL: test42:
2079 ; X64: # %bb.0: # %entry
2080 ; X64-NEXT: movq %rsi, %mm0
2081 ; X64-NEXT: movq %rdi, %mm1
2082 ; X64-NEXT: psubw %mm0, %mm1
2083 ; X64-NEXT: movq %mm1, %rax
2086 %0 = bitcast <1 x i64> %b to <4 x i16>
2087 %1 = bitcast <1 x i64> %a to <4 x i16>
2088 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2089 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2090 %2 = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2091 %3 = bitcast <1 x i64> %2 to <4 x i16>
2092 %4 = bitcast <4 x i16> %3 to <1 x i64>
2093 %5 = extractelement <1 x i64> %4, i32 0
2097 declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone
2099 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2100 ; X86-LABEL: test41:
2101 ; X86: # %bb.0: # %entry
2102 ; X86-NEXT: pushl %ebp
2103 ; X86-NEXT: movl %esp, %ebp
2104 ; X86-NEXT: andl $-8, %esp
2105 ; X86-NEXT: subl $24, %esp
2106 ; X86-NEXT: movl 8(%ebp), %eax
2107 ; X86-NEXT: movl 12(%ebp), %ecx
2108 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2109 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2110 ; X86-NEXT: movl 16(%ebp), %eax
2111 ; X86-NEXT: movl 20(%ebp), %ecx
2112 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2113 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2114 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2115 ; X86-NEXT: psubb {{[0-9]+}}(%esp), %mm0
2116 ; X86-NEXT: movq %mm0, (%esp)
2117 ; X86-NEXT: movl (%esp), %eax
2118 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2119 ; X86-NEXT: movl %ebp, %esp
2120 ; X86-NEXT: popl %ebp
2123 ; X64-LABEL: test41:
2124 ; X64: # %bb.0: # %entry
2125 ; X64-NEXT: movq %rsi, %mm0
2126 ; X64-NEXT: movq %rdi, %mm1
2127 ; X64-NEXT: psubb %mm0, %mm1
2128 ; X64-NEXT: movq %mm1, %rax
2131 %0 = bitcast <1 x i64> %b to <8 x i8>
2132 %1 = bitcast <1 x i64> %a to <8 x i8>
2133 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2134 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2135 %2 = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2136 %3 = bitcast <1 x i64> %2 to <8 x i8>
2137 %4 = bitcast <8 x i8> %3 to <1 x i64>
2138 %5 = extractelement <1 x i64> %4, i32 0
2142 declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) nounwind readnone
2144 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2145 ; X86-LABEL: test40:
2146 ; X86: # %bb.0: # %entry
2147 ; X86-NEXT: pushl %ebp
2148 ; X86-NEXT: movl %esp, %ebp
2149 ; X86-NEXT: andl $-8, %esp
2150 ; X86-NEXT: subl $24, %esp
2151 ; X86-NEXT: movl 8(%ebp), %eax
2152 ; X86-NEXT: movl 12(%ebp), %ecx
2153 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2154 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2155 ; X86-NEXT: movl 16(%ebp), %eax
2156 ; X86-NEXT: movl 20(%ebp), %ecx
2157 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2158 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2159 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2160 ; X86-NEXT: paddusw {{[0-9]+}}(%esp), %mm0
2161 ; X86-NEXT: movq %mm0, (%esp)
2162 ; X86-NEXT: movl (%esp), %eax
2163 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2164 ; X86-NEXT: movl %ebp, %esp
2165 ; X86-NEXT: popl %ebp
2168 ; X64-LABEL: test40:
2169 ; X64: # %bb.0: # %entry
2170 ; X64-NEXT: movq %rsi, %mm0
2171 ; X64-NEXT: movq %rdi, %mm1
2172 ; X64-NEXT: paddusw %mm0, %mm1
2173 ; X64-NEXT: movq %mm1, %rax
2176 %0 = bitcast <1 x i64> %b to <4 x i16>
2177 %1 = bitcast <1 x i64> %a to <4 x i16>
2178 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2179 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2180 %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2181 %3 = bitcast <1 x i64> %2 to <4 x i16>
2182 %4 = bitcast <4 x i16> %3 to <1 x i64>
2183 %5 = extractelement <1 x i64> %4, i32 0
2187 declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) nounwind readnone
2189 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2190 ; X86-LABEL: test39:
2191 ; X86: # %bb.0: # %entry
2192 ; X86-NEXT: pushl %ebp
2193 ; X86-NEXT: movl %esp, %ebp
2194 ; X86-NEXT: andl $-8, %esp
2195 ; X86-NEXT: subl $24, %esp
2196 ; X86-NEXT: movl 8(%ebp), %eax
2197 ; X86-NEXT: movl 12(%ebp), %ecx
2198 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2199 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2200 ; X86-NEXT: movl 16(%ebp), %eax
2201 ; X86-NEXT: movl 20(%ebp), %ecx
2202 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2203 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2204 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2205 ; X86-NEXT: paddusb {{[0-9]+}}(%esp), %mm0
2206 ; X86-NEXT: movq %mm0, (%esp)
2207 ; X86-NEXT: movl (%esp), %eax
2208 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2209 ; X86-NEXT: movl %ebp, %esp
2210 ; X86-NEXT: popl %ebp
2213 ; X64-LABEL: test39:
2214 ; X64: # %bb.0: # %entry
2215 ; X64-NEXT: movq %rsi, %mm0
2216 ; X64-NEXT: movq %rdi, %mm1
2217 ; X64-NEXT: paddusb %mm0, %mm1
2218 ; X64-NEXT: movq %mm1, %rax
2221 %0 = bitcast <1 x i64> %b to <8 x i8>
2222 %1 = bitcast <1 x i64> %a to <8 x i8>
2223 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2224 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2225 %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2226 %3 = bitcast <1 x i64> %2 to <8 x i8>
2227 %4 = bitcast <8 x i8> %3 to <1 x i64>
2228 %5 = extractelement <1 x i64> %4, i32 0
2232 declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) nounwind readnone
2234 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2235 ; X86-LABEL: test38:
2236 ; X86: # %bb.0: # %entry
2237 ; X86-NEXT: pushl %ebp
2238 ; X86-NEXT: movl %esp, %ebp
2239 ; X86-NEXT: andl $-8, %esp
2240 ; X86-NEXT: subl $24, %esp
2241 ; X86-NEXT: movl 8(%ebp), %eax
2242 ; X86-NEXT: movl 12(%ebp), %ecx
2243 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2244 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2245 ; X86-NEXT: movl 16(%ebp), %eax
2246 ; X86-NEXT: movl 20(%ebp), %ecx
2247 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2248 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2249 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2250 ; X86-NEXT: paddsw {{[0-9]+}}(%esp), %mm0
2251 ; X86-NEXT: movq %mm0, (%esp)
2252 ; X86-NEXT: movl (%esp), %eax
2253 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2254 ; X86-NEXT: movl %ebp, %esp
2255 ; X86-NEXT: popl %ebp
2258 ; X64-LABEL: test38:
2259 ; X64: # %bb.0: # %entry
2260 ; X64-NEXT: movq %rsi, %mm0
2261 ; X64-NEXT: movq %rdi, %mm1
2262 ; X64-NEXT: paddsw %mm0, %mm1
2263 ; X64-NEXT: movq %mm1, %rax
2266 %0 = bitcast <1 x i64> %b to <4 x i16>
2267 %1 = bitcast <1 x i64> %a to <4 x i16>
2268 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2269 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2270 %2 = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2271 %3 = bitcast <1 x i64> %2 to <4 x i16>
2272 %4 = bitcast <4 x i16> %3 to <1 x i64>
2273 %5 = extractelement <1 x i64> %4, i32 0
2277 declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) nounwind readnone
2279 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2280 ; X86-LABEL: test37:
2281 ; X86: # %bb.0: # %entry
2282 ; X86-NEXT: pushl %ebp
2283 ; X86-NEXT: movl %esp, %ebp
2284 ; X86-NEXT: andl $-8, %esp
2285 ; X86-NEXT: subl $24, %esp
2286 ; X86-NEXT: movl 8(%ebp), %eax
2287 ; X86-NEXT: movl 12(%ebp), %ecx
2288 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2289 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2290 ; X86-NEXT: movl 16(%ebp), %eax
2291 ; X86-NEXT: movl 20(%ebp), %ecx
2292 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2293 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2294 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2295 ; X86-NEXT: paddsb {{[0-9]+}}(%esp), %mm0
2296 ; X86-NEXT: movq %mm0, (%esp)
2297 ; X86-NEXT: movl (%esp), %eax
2298 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2299 ; X86-NEXT: movl %ebp, %esp
2300 ; X86-NEXT: popl %ebp
2303 ; X64-LABEL: test37:
2304 ; X64: # %bb.0: # %entry
2305 ; X64-NEXT: movq %rsi, %mm0
2306 ; X64-NEXT: movq %rdi, %mm1
2307 ; X64-NEXT: paddsb %mm0, %mm1
2308 ; X64-NEXT: movq %mm1, %rax
2311 %0 = bitcast <1 x i64> %b to <8 x i8>
2312 %1 = bitcast <1 x i64> %a to <8 x i8>
2313 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2314 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2315 %2 = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2316 %3 = bitcast <1 x i64> %2 to <8 x i8>
2317 %4 = bitcast <8 x i8> %3 to <1 x i64>
2318 %5 = extractelement <1 x i64> %4, i32 0
2322 declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone
2324 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2325 ; X86-LABEL: test36:
2326 ; X86: # %bb.0: # %entry
2327 ; X86-NEXT: pushl %ebp
2328 ; X86-NEXT: movl %esp, %ebp
2329 ; X86-NEXT: andl $-8, %esp
2330 ; X86-NEXT: subl $8, %esp
2331 ; X86-NEXT: movq 8(%ebp), %mm0
2332 ; X86-NEXT: paddq 16(%ebp), %mm0
2333 ; X86-NEXT: movq %mm0, (%esp)
2334 ; X86-NEXT: movl (%esp), %eax
2335 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2336 ; X86-NEXT: movl %ebp, %esp
2337 ; X86-NEXT: popl %ebp
2340 ; X64-LABEL: test36:
2341 ; X64: # %bb.0: # %entry
2342 ; X64-NEXT: movq %rdi, %mm0
2343 ; X64-NEXT: movq %rsi, %mm1
2344 ; X64-NEXT: paddq %mm0, %mm1
2345 ; X64-NEXT: movq %mm1, %rax
2348 %0 = extractelement <1 x i64> %a, i32 0
2349 %mmx_var = bitcast i64 %0 to <1 x i64>
2350 %1 = extractelement <1 x i64> %b, i32 0
2351 %mmx_var1 = bitcast i64 %1 to <1 x i64>
2352 %2 = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1)
2353 %3 = bitcast <1 x i64> %2 to i64
2357 declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) nounwind readnone
2359 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2360 ; X86-LABEL: test35:
2361 ; X86: # %bb.0: # %entry
2362 ; X86-NEXT: pushl %ebp
2363 ; X86-NEXT: movl %esp, %ebp
2364 ; X86-NEXT: andl $-8, %esp
2365 ; X86-NEXT: subl $24, %esp
2366 ; X86-NEXT: movl 8(%ebp), %eax
2367 ; X86-NEXT: movl 12(%ebp), %ecx
2368 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2369 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2370 ; X86-NEXT: movl 16(%ebp), %eax
2371 ; X86-NEXT: movl 20(%ebp), %ecx
2372 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2373 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2374 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2375 ; X86-NEXT: paddd {{[0-9]+}}(%esp), %mm0
2376 ; X86-NEXT: movq %mm0, (%esp)
2377 ; X86-NEXT: movl (%esp), %eax
2378 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2379 ; X86-NEXT: movl %ebp, %esp
2380 ; X86-NEXT: popl %ebp
2383 ; X64-LABEL: test35:
2384 ; X64: # %bb.0: # %entry
2385 ; X64-NEXT: movq %rsi, %mm0
2386 ; X64-NEXT: movq %rdi, %mm1
2387 ; X64-NEXT: paddd %mm0, %mm1
2388 ; X64-NEXT: movq %mm1, %rax
2391 %0 = bitcast <1 x i64> %b to <2 x i32>
2392 %1 = bitcast <1 x i64> %a to <2 x i32>
2393 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
2394 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
2395 %2 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2396 %3 = bitcast <1 x i64> %2 to <2 x i32>
2397 %4 = bitcast <2 x i32> %3 to <1 x i64>
2398 %5 = extractelement <1 x i64> %4, i32 0
2402 declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) nounwind readnone
2404 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2405 ; X86-LABEL: test34:
2406 ; X86: # %bb.0: # %entry
2407 ; X86-NEXT: pushl %ebp
2408 ; X86-NEXT: movl %esp, %ebp
2409 ; X86-NEXT: andl $-8, %esp
2410 ; X86-NEXT: subl $24, %esp
2411 ; X86-NEXT: movl 8(%ebp), %eax
2412 ; X86-NEXT: movl 12(%ebp), %ecx
2413 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2414 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2415 ; X86-NEXT: movl 16(%ebp), %eax
2416 ; X86-NEXT: movl 20(%ebp), %ecx
2417 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2418 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2419 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2420 ; X86-NEXT: paddw {{[0-9]+}}(%esp), %mm0
2421 ; X86-NEXT: movq %mm0, (%esp)
2422 ; X86-NEXT: movl (%esp), %eax
2423 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2424 ; X86-NEXT: movl %ebp, %esp
2425 ; X86-NEXT: popl %ebp
2428 ; X64-LABEL: test34:
2429 ; X64: # %bb.0: # %entry
2430 ; X64-NEXT: movq %rsi, %mm0
2431 ; X64-NEXT: movq %rdi, %mm1
2432 ; X64-NEXT: paddw %mm0, %mm1
2433 ; X64-NEXT: movq %mm1, %rax
2436 %0 = bitcast <1 x i64> %b to <4 x i16>
2437 %1 = bitcast <1 x i64> %a to <4 x i16>
2438 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2439 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2440 %2 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2441 %3 = bitcast <1 x i64> %2 to <4 x i16>
2442 %4 = bitcast <4 x i16> %3 to <1 x i64>
2443 %5 = extractelement <1 x i64> %4, i32 0
2447 declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) nounwind readnone
2449 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2450 ; X86-LABEL: test33:
2451 ; X86: # %bb.0: # %entry
2452 ; X86-NEXT: pushl %ebp
2453 ; X86-NEXT: movl %esp, %ebp
2454 ; X86-NEXT: andl $-8, %esp
2455 ; X86-NEXT: subl $24, %esp
2456 ; X86-NEXT: movl 8(%ebp), %eax
2457 ; X86-NEXT: movl 12(%ebp), %ecx
2458 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2459 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2460 ; X86-NEXT: movl 16(%ebp), %eax
2461 ; X86-NEXT: movl 20(%ebp), %ecx
2462 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2463 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2464 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2465 ; X86-NEXT: paddb {{[0-9]+}}(%esp), %mm0
2466 ; X86-NEXT: movq %mm0, (%esp)
2467 ; X86-NEXT: movl (%esp), %eax
2468 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2469 ; X86-NEXT: movl %ebp, %esp
2470 ; X86-NEXT: popl %ebp
2473 ; X64-LABEL: test33:
2474 ; X64: # %bb.0: # %entry
2475 ; X64-NEXT: movq %rsi, %mm0
2476 ; X64-NEXT: movq %rdi, %mm1
2477 ; X64-NEXT: paddb %mm0, %mm1
2478 ; X64-NEXT: movq %mm1, %rax
2481 %0 = bitcast <1 x i64> %b to <8 x i8>
2482 %1 = bitcast <1 x i64> %a to <8 x i8>
2483 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2484 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2485 %2 = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2486 %3 = bitcast <1 x i64> %2 to <8 x i8>
2487 %4 = bitcast <8 x i8> %3 to <1 x i64>
2488 %5 = extractelement <1 x i64> %4, i32 0
2492 declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone
2494 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2495 ; X86-LABEL: test32:
2496 ; X86: # %bb.0: # %entry
2497 ; X86-NEXT: pushl %ebp
2498 ; X86-NEXT: movl %esp, %ebp
2499 ; X86-NEXT: andl $-8, %esp
2500 ; X86-NEXT: subl $24, %esp
2501 ; X86-NEXT: movl 8(%ebp), %eax
2502 ; X86-NEXT: movl 12(%ebp), %ecx
2503 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2504 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2505 ; X86-NEXT: movl 16(%ebp), %eax
2506 ; X86-NEXT: movl 20(%ebp), %ecx
2507 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2508 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2509 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2510 ; X86-NEXT: psadbw {{[0-9]+}}(%esp), %mm0
2511 ; X86-NEXT: movq %mm0, (%esp)
2512 ; X86-NEXT: movl (%esp), %eax
2513 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2514 ; X86-NEXT: movl %ebp, %esp
2515 ; X86-NEXT: popl %ebp
2518 ; X64-LABEL: test32:
2519 ; X64: # %bb.0: # %entry
2520 ; X64-NEXT: movq %rsi, %mm0
2521 ; X64-NEXT: movq %rdi, %mm1
2522 ; X64-NEXT: psadbw %mm0, %mm1
2523 ; X64-NEXT: movq %mm1, %rax
2526 %0 = bitcast <1 x i64> %b to <8 x i8>
2527 %1 = bitcast <1 x i64> %a to <8 x i8>
2528 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2529 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2530 %2 = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2531 %3 = bitcast <1 x i64> %2 to i64
2535 declare <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64>, <1 x i64>) nounwind readnone
2537 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2538 ; X86-LABEL: test31:
2539 ; X86: # %bb.0: # %entry
2540 ; X86-NEXT: pushl %ebp
2541 ; X86-NEXT: movl %esp, %ebp
2542 ; X86-NEXT: andl $-8, %esp
2543 ; X86-NEXT: subl $24, %esp
2544 ; X86-NEXT: movl 8(%ebp), %eax
2545 ; X86-NEXT: movl 12(%ebp), %ecx
2546 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2547 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2548 ; X86-NEXT: movl 16(%ebp), %eax
2549 ; X86-NEXT: movl 20(%ebp), %ecx
2550 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2551 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2552 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2553 ; X86-NEXT: pminsw {{[0-9]+}}(%esp), %mm0
2554 ; X86-NEXT: movq %mm0, (%esp)
2555 ; X86-NEXT: movl (%esp), %eax
2556 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2557 ; X86-NEXT: movl %ebp, %esp
2558 ; X86-NEXT: popl %ebp
2561 ; X64-LABEL: test31:
2562 ; X64: # %bb.0: # %entry
2563 ; X64-NEXT: movq %rsi, %mm0
2564 ; X64-NEXT: movq %rdi, %mm1
2565 ; X64-NEXT: pminsw %mm0, %mm1
2566 ; X64-NEXT: movq %mm1, %rax
2569 %0 = bitcast <1 x i64> %b to <4 x i16>
2570 %1 = bitcast <1 x i64> %a to <4 x i16>
2571 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2572 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2573 %2 = tail call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2574 %3 = bitcast <1 x i64> %2 to <4 x i16>
2575 %4 = bitcast <4 x i16> %3 to <1 x i64>
2576 %5 = extractelement <1 x i64> %4, i32 0
2580 declare <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64>, <1 x i64>) nounwind readnone
2582 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2583 ; X86-LABEL: test30:
2584 ; X86: # %bb.0: # %entry
2585 ; X86-NEXT: pushl %ebp
2586 ; X86-NEXT: movl %esp, %ebp
2587 ; X86-NEXT: andl $-8, %esp
2588 ; X86-NEXT: subl $24, %esp
2589 ; X86-NEXT: movl 8(%ebp), %eax
2590 ; X86-NEXT: movl 12(%ebp), %ecx
2591 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2592 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2593 ; X86-NEXT: movl 16(%ebp), %eax
2594 ; X86-NEXT: movl 20(%ebp), %ecx
2595 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2596 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2597 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2598 ; X86-NEXT: pminub {{[0-9]+}}(%esp), %mm0
2599 ; X86-NEXT: movq %mm0, (%esp)
2600 ; X86-NEXT: movl (%esp), %eax
2601 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2602 ; X86-NEXT: movl %ebp, %esp
2603 ; X86-NEXT: popl %ebp
2606 ; X64-LABEL: test30:
2607 ; X64: # %bb.0: # %entry
2608 ; X64-NEXT: movq %rsi, %mm0
2609 ; X64-NEXT: movq %rdi, %mm1
2610 ; X64-NEXT: pminub %mm0, %mm1
2611 ; X64-NEXT: movq %mm1, %rax
2614 %0 = bitcast <1 x i64> %b to <8 x i8>
2615 %1 = bitcast <1 x i64> %a to <8 x i8>
2616 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2617 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2618 %2 = tail call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2619 %3 = bitcast <1 x i64> %2 to <8 x i8>
2620 %4 = bitcast <8 x i8> %3 to <1 x i64>
2621 %5 = extractelement <1 x i64> %4, i32 0
2625 declare <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64>, <1 x i64>) nounwind readnone
2627 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2628 ; X86-LABEL: test29:
2629 ; X86: # %bb.0: # %entry
2630 ; X86-NEXT: pushl %ebp
2631 ; X86-NEXT: movl %esp, %ebp
2632 ; X86-NEXT: andl $-8, %esp
2633 ; X86-NEXT: subl $24, %esp
2634 ; X86-NEXT: movl 8(%ebp), %eax
2635 ; X86-NEXT: movl 12(%ebp), %ecx
2636 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2637 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2638 ; X86-NEXT: movl 16(%ebp), %eax
2639 ; X86-NEXT: movl 20(%ebp), %ecx
2640 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2641 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2642 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2643 ; X86-NEXT: pmaxsw {{[0-9]+}}(%esp), %mm0
2644 ; X86-NEXT: movq %mm0, (%esp)
2645 ; X86-NEXT: movl (%esp), %eax
2646 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2647 ; X86-NEXT: movl %ebp, %esp
2648 ; X86-NEXT: popl %ebp
2651 ; X64-LABEL: test29:
2652 ; X64: # %bb.0: # %entry
2653 ; X64-NEXT: movq %rsi, %mm0
2654 ; X64-NEXT: movq %rdi, %mm1
2655 ; X64-NEXT: pmaxsw %mm0, %mm1
2656 ; X64-NEXT: movq %mm1, %rax
2659 %0 = bitcast <1 x i64> %b to <4 x i16>
2660 %1 = bitcast <1 x i64> %a to <4 x i16>
2661 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2662 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2663 %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2664 %3 = bitcast <1 x i64> %2 to <4 x i16>
2665 %4 = bitcast <4 x i16> %3 to <1 x i64>
2666 %5 = extractelement <1 x i64> %4, i32 0
2670 declare <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64>, <1 x i64>) nounwind readnone
2672 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2673 ; X86-LABEL: test28:
2674 ; X86: # %bb.0: # %entry
2675 ; X86-NEXT: pushl %ebp
2676 ; X86-NEXT: movl %esp, %ebp
2677 ; X86-NEXT: andl $-8, %esp
2678 ; X86-NEXT: subl $24, %esp
2679 ; X86-NEXT: movl 8(%ebp), %eax
2680 ; X86-NEXT: movl 12(%ebp), %ecx
2681 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2682 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2683 ; X86-NEXT: movl 16(%ebp), %eax
2684 ; X86-NEXT: movl 20(%ebp), %ecx
2685 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2686 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2687 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2688 ; X86-NEXT: pmaxub {{[0-9]+}}(%esp), %mm0
2689 ; X86-NEXT: movq %mm0, (%esp)
2690 ; X86-NEXT: movl (%esp), %eax
2691 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2692 ; X86-NEXT: movl %ebp, %esp
2693 ; X86-NEXT: popl %ebp
2696 ; X64-LABEL: test28:
2697 ; X64: # %bb.0: # %entry
2698 ; X64-NEXT: movq %rsi, %mm0
2699 ; X64-NEXT: movq %rdi, %mm1
2700 ; X64-NEXT: pmaxub %mm0, %mm1
2701 ; X64-NEXT: movq %mm1, %rax
2704 %0 = bitcast <1 x i64> %b to <8 x i8>
2705 %1 = bitcast <1 x i64> %a to <8 x i8>
2706 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2707 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2708 %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2709 %3 = bitcast <1 x i64> %2 to <8 x i8>
2710 %4 = bitcast <8 x i8> %3 to <1 x i64>
2711 %5 = extractelement <1 x i64> %4, i32 0
2715 declare <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64>, <1 x i64>) nounwind readnone
2717 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2718 ; X86-LABEL: test27:
2719 ; X86: # %bb.0: # %entry
2720 ; X86-NEXT: pushl %ebp
2721 ; X86-NEXT: movl %esp, %ebp
2722 ; X86-NEXT: andl $-8, %esp
2723 ; X86-NEXT: subl $24, %esp
2724 ; X86-NEXT: movl 8(%ebp), %eax
2725 ; X86-NEXT: movl 12(%ebp), %ecx
2726 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2727 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2728 ; X86-NEXT: movl 16(%ebp), %eax
2729 ; X86-NEXT: movl 20(%ebp), %ecx
2730 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2731 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2732 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2733 ; X86-NEXT: pavgw {{[0-9]+}}(%esp), %mm0
2734 ; X86-NEXT: movq %mm0, (%esp)
2735 ; X86-NEXT: movl (%esp), %eax
2736 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2737 ; X86-NEXT: movl %ebp, %esp
2738 ; X86-NEXT: popl %ebp
2741 ; X64-LABEL: test27:
2742 ; X64: # %bb.0: # %entry
2743 ; X64-NEXT: movq %rsi, %mm0
2744 ; X64-NEXT: movq %rdi, %mm1
2745 ; X64-NEXT: pavgw %mm0, %mm1
2746 ; X64-NEXT: movq %mm1, %rax
2749 %0 = bitcast <1 x i64> %b to <4 x i16>
2750 %1 = bitcast <1 x i64> %a to <4 x i16>
2751 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2752 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2753 %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2754 %3 = bitcast <1 x i64> %2 to <4 x i16>
2755 %4 = bitcast <4 x i16> %3 to <1 x i64>
2756 %5 = extractelement <1 x i64> %4, i32 0
2760 declare <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64>, <1 x i64>) nounwind readnone
2762 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2763 ; X86-LABEL: test26:
2764 ; X86: # %bb.0: # %entry
2765 ; X86-NEXT: pushl %ebp
2766 ; X86-NEXT: movl %esp, %ebp
2767 ; X86-NEXT: andl $-8, %esp
2768 ; X86-NEXT: subl $24, %esp
2769 ; X86-NEXT: movl 8(%ebp), %eax
2770 ; X86-NEXT: movl 12(%ebp), %ecx
2771 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2772 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2773 ; X86-NEXT: movl 16(%ebp), %eax
2774 ; X86-NEXT: movl 20(%ebp), %ecx
2775 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2776 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2777 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2778 ; X86-NEXT: pavgb {{[0-9]+}}(%esp), %mm0
2779 ; X86-NEXT: movq %mm0, (%esp)
2780 ; X86-NEXT: movl (%esp), %eax
2781 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2782 ; X86-NEXT: movl %ebp, %esp
2783 ; X86-NEXT: popl %ebp
2786 ; X64-LABEL: test26:
2787 ; X64: # %bb.0: # %entry
2788 ; X64-NEXT: movq %rsi, %mm0
2789 ; X64-NEXT: movq %rdi, %mm1
2790 ; X64-NEXT: pavgb %mm0, %mm1
2791 ; X64-NEXT: movq %mm1, %rax
2794 %0 = bitcast <1 x i64> %b to <8 x i8>
2795 %1 = bitcast <1 x i64> %a to <8 x i8>
2796 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2797 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2798 %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2799 %3 = bitcast <1 x i64> %2 to <8 x i8>
2800 %4 = bitcast <8 x i8> %3 to <1 x i64>
2801 %5 = extractelement <1 x i64> %4, i32 0
2805 declare void @llvm.x86.mmx.movnt.dq(ptr, <1 x i64>) nounwind
2807 define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp {
2808 ; X86-LABEL: test25:
2809 ; X86: # %bb.0: # %entry
2810 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2811 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2812 ; X86-NEXT: movntq %mm0, (%eax)
2815 ; X64-LABEL: test25:
2816 ; X64: # %bb.0: # %entry
2817 ; X64-NEXT: movq %rsi, %mm0
2818 ; X64-NEXT: movntq %mm0, (%rdi)
2821 %0 = extractelement <1 x i64> %a, i32 0
2822 %mmx_var.i = bitcast i64 %0 to <1 x i64>
2823 tail call void @llvm.x86.mmx.movnt.dq(ptr %p, <1 x i64> %mmx_var.i) nounwind
2827 declare i32 @llvm.x86.mmx.pmovmskb(<1 x i64>) nounwind readnone
2829 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
2830 ; X86-LABEL: test24:
2831 ; X86: # %bb.0: # %entry
2832 ; X86-NEXT: pushl %ebp
2833 ; X86-NEXT: movl %esp, %ebp
2834 ; X86-NEXT: andl $-8, %esp
2835 ; X86-NEXT: subl $8, %esp
2836 ; X86-NEXT: movl 8(%ebp), %eax
2837 ; X86-NEXT: movl 12(%ebp), %ecx
2838 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2839 ; X86-NEXT: movl %eax, (%esp)
2840 ; X86-NEXT: movq (%esp), %mm0
2841 ; X86-NEXT: pmovmskb %mm0, %eax
2842 ; X86-NEXT: movl %ebp, %esp
2843 ; X86-NEXT: popl %ebp
2846 ; X64-LABEL: test24:
2847 ; X64: # %bb.0: # %entry
2848 ; X64-NEXT: movq %rdi, %mm0
2849 ; X64-NEXT: pmovmskb %mm0, %eax
2852 %0 = bitcast <1 x i64> %a to <8 x i8>
2853 %mmx_var.i = bitcast <8 x i8> %0 to <1 x i64>
2854 %1 = tail call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) nounwind
2858 declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr) nounwind
2860 define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp {
2861 ; X86-LABEL: test23:
2862 ; X86: # %bb.0: # %entry
2863 ; X86-NEXT: pushl %ebp
2864 ; X86-NEXT: movl %esp, %ebp
2865 ; X86-NEXT: pushl %edi
2866 ; X86-NEXT: andl $-8, %esp
2867 ; X86-NEXT: subl $24, %esp
2868 ; X86-NEXT: movl 16(%ebp), %eax
2869 ; X86-NEXT: movl 20(%ebp), %ecx
2870 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2871 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2872 ; X86-NEXT: movl 8(%ebp), %eax
2873 ; X86-NEXT: movl 12(%ebp), %ecx
2874 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2875 ; X86-NEXT: movl %eax, (%esp)
2876 ; X86-NEXT: movl 24(%ebp), %edi
2877 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2878 ; X86-NEXT: movq (%esp), %mm1
2879 ; X86-NEXT: maskmovq %mm0, %mm1
2880 ; X86-NEXT: leal -4(%ebp), %esp
2881 ; X86-NEXT: popl %edi
2882 ; X86-NEXT: popl %ebp
2885 ; X64-LABEL: test23:
2886 ; X64: # %bb.0: # %entry
2887 ; X64-NEXT: movq %rsi, %mm0
2888 ; X64-NEXT: movq %rdi, %mm1
2889 ; X64-NEXT: movq %rdx, %rdi
2890 ; X64-NEXT: maskmovq %mm0, %mm1
2893 %0 = bitcast <1 x i64> %n to <8 x i8>
2894 %1 = bitcast <1 x i64> %d to <8 x i8>
2895 %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2896 %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2897 tail call void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) nounwind
2901 declare <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64>, <1 x i64>) nounwind readnone
2903 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
2904 ; X86-LABEL: test22:
2905 ; X86: # %bb.0: # %entry
2906 ; X86-NEXT: pushl %ebp
2907 ; X86-NEXT: movl %esp, %ebp
2908 ; X86-NEXT: andl $-8, %esp
2909 ; X86-NEXT: subl $24, %esp
2910 ; X86-NEXT: movl 8(%ebp), %eax
2911 ; X86-NEXT: movl 12(%ebp), %ecx
2912 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2913 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2914 ; X86-NEXT: movl 16(%ebp), %eax
2915 ; X86-NEXT: movl 20(%ebp), %ecx
2916 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2917 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2918 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
2919 ; X86-NEXT: pmulhuw {{[0-9]+}}(%esp), %mm0
2920 ; X86-NEXT: movq %mm0, (%esp)
2921 ; X86-NEXT: movl (%esp), %eax
2922 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2923 ; X86-NEXT: movl %ebp, %esp
2924 ; X86-NEXT: popl %ebp
2927 ; X64-LABEL: test22:
2928 ; X64: # %bb.0: # %entry
2929 ; X64-NEXT: movq %rsi, %mm0
2930 ; X64-NEXT: movq %rdi, %mm1
2931 ; X64-NEXT: pmulhuw %mm0, %mm1
2932 ; X64-NEXT: movq %mm1, %rax
2935 %0 = bitcast <1 x i64> %b to <4 x i16>
2936 %1 = bitcast <1 x i64> %a to <4 x i16>
2937 %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2938 %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2939 %2 = tail call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2940 %3 = bitcast <1 x i64> %2 to <4 x i16>
2941 %4 = bitcast <4 x i16> %3 to <1 x i64>
2942 %5 = extractelement <1 x i64> %4, i32 0
2946 declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) nounwind readnone
2948 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
2949 ; X86-LABEL: test21:
2950 ; X86: # %bb.0: # %entry
2951 ; X86-NEXT: pushl %ebp
2952 ; X86-NEXT: movl %esp, %ebp
2953 ; X86-NEXT: andl $-8, %esp
2954 ; X86-NEXT: subl $16, %esp
2955 ; X86-NEXT: movl 8(%ebp), %eax
2956 ; X86-NEXT: movl 12(%ebp), %ecx
2957 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2958 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
2959 ; X86-NEXT: pshufw $3, {{[0-9]+}}(%esp), %mm0 # mm0 = mem[3,0,0,0]
2960 ; X86-NEXT: movq %mm0, (%esp)
2961 ; X86-NEXT: movl (%esp), %eax
2962 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2963 ; X86-NEXT: movl %ebp, %esp
2964 ; X86-NEXT: popl %ebp
2967 ; X64-LABEL: test21:
2968 ; X64: # %bb.0: # %entry
2969 ; X64-NEXT: movq %rdi, %mm0
2970 ; X64-NEXT: pshufw $3, %mm0, %mm0 # mm0 = mm0[3,0,0,0]
2971 ; X64-NEXT: movq %mm0, %rax
2974 %0 = bitcast <1 x i64> %a to <4 x i16>
2975 %1 = bitcast <4 x i16> %0 to <1 x i64>
2976 %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone
2977 %3 = bitcast <1 x i64> %2 to <4 x i16>
2978 %4 = bitcast <4 x i16> %3 to <1 x i64>
2979 %5 = extractelement <1 x i64> %4, i32 0
2983 define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
2984 ; X86-LABEL: test21_2:
2985 ; X86: # %bb.0: # %entry
2986 ; X86-NEXT: pushl %ebp
2987 ; X86-NEXT: movl %esp, %ebp
2988 ; X86-NEXT: andl $-8, %esp
2989 ; X86-NEXT: subl $8, %esp
2990 ; X86-NEXT: movl 8(%ebp), %eax
2991 ; X86-NEXT: movl 12(%ebp), %ecx
2992 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2993 ; X86-NEXT: movl %eax, (%esp)
2994 ; X86-NEXT: pshufw $3, (%esp), %mm0 # mm0 = mem[3,0,0,0]
2995 ; X86-NEXT: movd %mm0, %eax
2996 ; X86-NEXT: movl %ebp, %esp
2997 ; X86-NEXT: popl %ebp
3000 ; X64-LABEL: test21_2:
3001 ; X64: # %bb.0: # %entry
3002 ; X64-NEXT: movq %rdi, %mm0
3003 ; X64-NEXT: pshufw $3, %mm0, %mm0 # mm0 = mm0[3,0,0,0]
3004 ; X64-NEXT: movd %mm0, %eax
3007 %0 = bitcast <1 x i64> %a to <4 x i16>
3008 %1 = bitcast <4 x i16> %0 to <1 x i64>
3009 %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone
3010 %3 = bitcast <1 x i64> %2 to <4 x i16>
3011 %4 = bitcast <4 x i16> %3 to <2 x i32>
3012 %5 = extractelement <2 x i32> %4, i32 0
3016 declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) nounwind readnone
3018 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3019 ; X86-LABEL: test20:
3020 ; X86: # %bb.0: # %entry
3021 ; X86-NEXT: pushl %ebp
3022 ; X86-NEXT: movl %esp, %ebp
3023 ; X86-NEXT: andl $-8, %esp
3024 ; X86-NEXT: subl $24, %esp
3025 ; X86-NEXT: movl 8(%ebp), %eax
3026 ; X86-NEXT: movl 12(%ebp), %ecx
3027 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3028 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3029 ; X86-NEXT: movl 16(%ebp), %eax
3030 ; X86-NEXT: movl 20(%ebp), %ecx
3031 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3032 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3033 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3034 ; X86-NEXT: pmuludq {{[0-9]+}}(%esp), %mm0
3035 ; X86-NEXT: movq %mm0, (%esp)
3036 ; X86-NEXT: movl (%esp), %eax
3037 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3038 ; X86-NEXT: movl %ebp, %esp
3039 ; X86-NEXT: popl %ebp
3042 ; X64-LABEL: test20:
3043 ; X64: # %bb.0: # %entry
3044 ; X64-NEXT: movq %rsi, %mm0
3045 ; X64-NEXT: movq %rdi, %mm1
3046 ; X64-NEXT: pmuludq %mm0, %mm1
3047 ; X64-NEXT: movq %mm1, %rax
3050 %0 = bitcast <1 x i64> %b to <2 x i32>
3051 %1 = bitcast <1 x i64> %a to <2 x i32>
3052 %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
3053 %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
3054 %2 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
3055 %3 = bitcast <1 x i64> %2 to i64
3059 declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) nounwind readnone
3061 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
3062 ; X86-LABEL: test19:
3063 ; X86: # %bb.0: # %entry
3064 ; X86-NEXT: pushl %ebp
3065 ; X86-NEXT: movl %esp, %ebp
3066 ; X86-NEXT: andl $-8, %esp
3067 ; X86-NEXT: subl $8, %esp
3068 ; X86-NEXT: movl 8(%ebp), %eax
3069 ; X86-NEXT: movl 12(%ebp), %ecx
3070 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3071 ; X86-NEXT: movl %eax, (%esp)
3072 ; X86-NEXT: cvtpi2pd (%esp), %xmm0
3073 ; X86-NEXT: movl %ebp, %esp
3074 ; X86-NEXT: popl %ebp
3077 ; X64-LABEL: test19:
3078 ; X64: # %bb.0: # %entry
3079 ; X64-NEXT: movq %rdi, %mm0
3080 ; X64-NEXT: cvtpi2pd %mm0, %xmm0
3083 %0 = bitcast <1 x i64> %a to <2 x i32>
3084 %1 = bitcast <2 x i32> %0 to <1 x i64>
3085 %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %1) nounwind readnone
3089 declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
3091 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
3092 ; X86-LABEL: test18:
3093 ; X86: # %bb.0: # %entry
3094 ; X86-NEXT: pushl %ebp
3095 ; X86-NEXT: movl %esp, %ebp
3096 ; X86-NEXT: andl $-8, %esp
3097 ; X86-NEXT: subl $8, %esp
3098 ; X86-NEXT: cvttpd2pi %xmm0, %mm0
3099 ; X86-NEXT: movq %mm0, (%esp)
3100 ; X86-NEXT: movl (%esp), %eax
3101 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3102 ; X86-NEXT: movl %ebp, %esp
3103 ; X86-NEXT: popl %ebp
3106 ; X64-LABEL: test18:
3107 ; X64: # %bb.0: # %entry
3108 ; X64-NEXT: cvttpd2pi %xmm0, %mm0
3109 ; X64-NEXT: movq %mm0, %rax
3112 %0 = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
3113 %1 = bitcast <1 x i64> %0 to <2 x i32>
3114 %2 = bitcast <2 x i32> %1 to <1 x i64>
3115 %3 = extractelement <1 x i64> %2, i32 0
3119 declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
3121 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
3122 ; X86-LABEL: test17:
3123 ; X86: # %bb.0: # %entry
3124 ; X86-NEXT: pushl %ebp
3125 ; X86-NEXT: movl %esp, %ebp
3126 ; X86-NEXT: andl $-8, %esp
3127 ; X86-NEXT: subl $8, %esp
3128 ; X86-NEXT: cvtpd2pi %xmm0, %mm0
3129 ; X86-NEXT: movq %mm0, (%esp)
3130 ; X86-NEXT: movl (%esp), %eax
3131 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3132 ; X86-NEXT: movl %ebp, %esp
3133 ; X86-NEXT: popl %ebp
3136 ; X64-LABEL: test17:
3137 ; X64: # %bb.0: # %entry
3138 ; X64-NEXT: cvtpd2pi %xmm0, %mm0
3139 ; X64-NEXT: movq %mm0, %rax
3142 %0 = tail call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
3143 %1 = bitcast <1 x i64> %0 to <2 x i32>
3144 %2 = bitcast <2 x i32> %1 to <1 x i64>
3145 %3 = extractelement <1 x i64> %2, i32 0
3149 declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone
3151 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3152 ; X86-LABEL: test16:
3153 ; X86: # %bb.0: # %entry
3154 ; X86-NEXT: pushl %ebp
3155 ; X86-NEXT: movl %esp, %ebp
3156 ; X86-NEXT: andl $-8, %esp
3157 ; X86-NEXT: subl $8, %esp
3158 ; X86-NEXT: movq 8(%ebp), %mm0
3159 ; X86-NEXT: palignr $16, 16(%ebp), %mm0
3160 ; X86-NEXT: movq %mm0, (%esp)
3161 ; X86-NEXT: movl (%esp), %eax
3162 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3163 ; X86-NEXT: movl %ebp, %esp
3164 ; X86-NEXT: popl %ebp
3167 ; X64-LABEL: test16:
3168 ; X64: # %bb.0: # %entry
3169 ; X64-NEXT: movq %rdi, %mm0
3170 ; X64-NEXT: movq %rsi, %mm1
3171 ; X64-NEXT: palignr $16, %mm1, %mm0
3172 ; X64-NEXT: movq %mm0, %rax
3175 %0 = extractelement <1 x i64> %a, i32 0
3176 %mmx_var = bitcast i64 %0 to <1 x i64>
3177 %1 = extractelement <1 x i64> %b, i32 0
3178 %mmx_var1 = bitcast i64 %1 to <1 x i64>
3179 %2 = tail call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %mmx_var, <1 x i64> %mmx_var1, i8 16)
3180 %3 = bitcast <1 x i64> %2 to i64
3184 declare <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64>) nounwind readnone
3186 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
3187 ; X86-LABEL: test15:
3188 ; X86: # %bb.0: # %entry
3189 ; X86-NEXT: pushl %ebp
3190 ; X86-NEXT: movl %esp, %ebp
3191 ; X86-NEXT: andl $-8, %esp
3192 ; X86-NEXT: subl $16, %esp
3193 ; X86-NEXT: movl 8(%ebp), %eax
3194 ; X86-NEXT: movl 12(%ebp), %ecx
3195 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3196 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3197 ; X86-NEXT: pabsd {{[0-9]+}}(%esp), %mm0
3198 ; X86-NEXT: movq %mm0, (%esp)
3199 ; X86-NEXT: movl (%esp), %eax
3200 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3201 ; X86-NEXT: movl %ebp, %esp
3202 ; X86-NEXT: popl %ebp
3205 ; X64-LABEL: test15:
3206 ; X64: # %bb.0: # %entry
3207 ; X64-NEXT: movq %rdi, %mm0
3208 ; X64-NEXT: pabsd %mm0, %mm0
3209 ; X64-NEXT: movq %mm0, %rax
3212 %0 = bitcast <1 x i64> %a to <2 x i32>
3213 %1 = bitcast <2 x i32> %0 to <1 x i64>
3214 %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> %1) nounwind readnone
3215 %3 = bitcast <1 x i64> %2 to <2 x i32>
3216 %4 = bitcast <2 x i32> %3 to <1 x i64>
3217 %5 = extractelement <1 x i64> %4, i32 0
3221 declare <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64>) nounwind readnone
3223 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
3224 ; X86-LABEL: test14:
3225 ; X86: # %bb.0: # %entry
3226 ; X86-NEXT: pushl %ebp
3227 ; X86-NEXT: movl %esp, %ebp
3228 ; X86-NEXT: andl $-8, %esp
3229 ; X86-NEXT: subl $16, %esp
3230 ; X86-NEXT: movl 8(%ebp), %eax
3231 ; X86-NEXT: movl 12(%ebp), %ecx
3232 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3233 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3234 ; X86-NEXT: pabsw {{[0-9]+}}(%esp), %mm0
3235 ; X86-NEXT: movq %mm0, (%esp)
3236 ; X86-NEXT: movl (%esp), %eax
3237 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3238 ; X86-NEXT: movl %ebp, %esp
3239 ; X86-NEXT: popl %ebp
3242 ; X64-LABEL: test14:
3243 ; X64: # %bb.0: # %entry
3244 ; X64-NEXT: movq %rdi, %mm0
3245 ; X64-NEXT: pabsw %mm0, %mm0
3246 ; X64-NEXT: movq %mm0, %rax
3249 %0 = bitcast <1 x i64> %a to <4 x i16>
3250 %1 = bitcast <4 x i16> %0 to <1 x i64>
3251 %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> %1) nounwind readnone
3252 %3 = bitcast <1 x i64> %2 to <4 x i16>
3253 %4 = bitcast <4 x i16> %3 to <1 x i64>
3254 %5 = extractelement <1 x i64> %4, i32 0
3258 declare <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64>) nounwind readnone
3260 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
3261 ; X86-LABEL: test13:
3262 ; X86: # %bb.0: # %entry
3263 ; X86-NEXT: pushl %ebp
3264 ; X86-NEXT: movl %esp, %ebp
3265 ; X86-NEXT: andl $-8, %esp
3266 ; X86-NEXT: subl $16, %esp
3267 ; X86-NEXT: movl 8(%ebp), %eax
3268 ; X86-NEXT: movl 12(%ebp), %ecx
3269 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3270 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3271 ; X86-NEXT: pabsb {{[0-9]+}}(%esp), %mm0
3272 ; X86-NEXT: movq %mm0, (%esp)
3273 ; X86-NEXT: movl (%esp), %eax
3274 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3275 ; X86-NEXT: movl %ebp, %esp
3276 ; X86-NEXT: popl %ebp
3279 ; X64-LABEL: test13:
3280 ; X64: # %bb.0: # %entry
3281 ; X64-NEXT: movq %rdi, %mm0
3282 ; X64-NEXT: pabsb %mm0, %mm0
3283 ; X64-NEXT: movq %mm0, %rax
3286 %0 = bitcast <1 x i64> %a to <8 x i8>
3287 %1 = bitcast <8 x i8> %0 to <1 x i64>
3288 %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> %1) nounwind readnone
3289 %3 = bitcast <1 x i64> %2 to <8 x i8>
3290 %4 = bitcast <8 x i8> %3 to <1 x i64>
3291 %5 = extractelement <1 x i64> %4, i32 0
3295 declare <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64>, <1 x i64>) nounwind readnone
3297 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3298 ; X86-LABEL: test12:
3299 ; X86: # %bb.0: # %entry
3300 ; X86-NEXT: pushl %ebp
3301 ; X86-NEXT: movl %esp, %ebp
3302 ; X86-NEXT: andl $-8, %esp
3303 ; X86-NEXT: subl $24, %esp
3304 ; X86-NEXT: movl 8(%ebp), %eax
3305 ; X86-NEXT: movl 12(%ebp), %ecx
3306 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3307 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3308 ; X86-NEXT: movl 16(%ebp), %eax
3309 ; X86-NEXT: movl 20(%ebp), %ecx
3310 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3311 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3312 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3313 ; X86-NEXT: psignd {{[0-9]+}}(%esp), %mm0
3314 ; X86-NEXT: movq %mm0, (%esp)
3315 ; X86-NEXT: movl (%esp), %eax
3316 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3317 ; X86-NEXT: movl %ebp, %esp
3318 ; X86-NEXT: popl %ebp
3321 ; X64-LABEL: test12:
3322 ; X64: # %bb.0: # %entry
3323 ; X64-NEXT: movq %rsi, %mm0
3324 ; X64-NEXT: movq %rdi, %mm1
3325 ; X64-NEXT: psignd %mm0, %mm1
3326 ; X64-NEXT: movq %mm1, %rax
3329 %0 = bitcast <1 x i64> %b to <2 x i32>
3330 %1 = bitcast <1 x i64> %a to <2 x i32>
3331 %2 = bitcast <2 x i32> %1 to <1 x i64>
3332 %3 = bitcast <2 x i32> %0 to <1 x i64>
3333 %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3334 %5 = bitcast <1 x i64> %4 to <2 x i32>
3335 %6 = bitcast <2 x i32> %5 to <1 x i64>
3336 %7 = extractelement <1 x i64> %6, i32 0
3340 declare <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64>, <1 x i64>) nounwind readnone
3342 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3343 ; X86-LABEL: test11:
3344 ; X86: # %bb.0: # %entry
3345 ; X86-NEXT: pushl %ebp
3346 ; X86-NEXT: movl %esp, %ebp
3347 ; X86-NEXT: andl $-8, %esp
3348 ; X86-NEXT: subl $24, %esp
3349 ; X86-NEXT: movl 8(%ebp), %eax
3350 ; X86-NEXT: movl 12(%ebp), %ecx
3351 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3352 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3353 ; X86-NEXT: movl 16(%ebp), %eax
3354 ; X86-NEXT: movl 20(%ebp), %ecx
3355 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3356 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3357 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3358 ; X86-NEXT: psignw {{[0-9]+}}(%esp), %mm0
3359 ; X86-NEXT: movq %mm0, (%esp)
3360 ; X86-NEXT: movl (%esp), %eax
3361 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3362 ; X86-NEXT: movl %ebp, %esp
3363 ; X86-NEXT: popl %ebp
3366 ; X64-LABEL: test11:
3367 ; X64: # %bb.0: # %entry
3368 ; X64-NEXT: movq %rsi, %mm0
3369 ; X64-NEXT: movq %rdi, %mm1
3370 ; X64-NEXT: psignw %mm0, %mm1
3371 ; X64-NEXT: movq %mm1, %rax
3374 %0 = bitcast <1 x i64> %b to <4 x i16>
3375 %1 = bitcast <1 x i64> %a to <4 x i16>
3376 %2 = bitcast <4 x i16> %1 to <1 x i64>
3377 %3 = bitcast <4 x i16> %0 to <1 x i64>
3378 %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3379 %5 = bitcast <1 x i64> %4 to <4 x i16>
3380 %6 = bitcast <4 x i16> %5 to <1 x i64>
3381 %7 = extractelement <1 x i64> %6, i32 0
3385 declare <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64>, <1 x i64>) nounwind readnone
3387 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3388 ; X86-LABEL: test10:
3389 ; X86: # %bb.0: # %entry
3390 ; X86-NEXT: pushl %ebp
3391 ; X86-NEXT: movl %esp, %ebp
3392 ; X86-NEXT: andl $-8, %esp
3393 ; X86-NEXT: subl $24, %esp
3394 ; X86-NEXT: movl 8(%ebp), %eax
3395 ; X86-NEXT: movl 12(%ebp), %ecx
3396 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3397 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3398 ; X86-NEXT: movl 16(%ebp), %eax
3399 ; X86-NEXT: movl 20(%ebp), %ecx
3400 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3401 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3402 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3403 ; X86-NEXT: psignb {{[0-9]+}}(%esp), %mm0
3404 ; X86-NEXT: movq %mm0, (%esp)
3405 ; X86-NEXT: movl (%esp), %eax
3406 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3407 ; X86-NEXT: movl %ebp, %esp
3408 ; X86-NEXT: popl %ebp
3411 ; X64-LABEL: test10:
3412 ; X64: # %bb.0: # %entry
3413 ; X64-NEXT: movq %rsi, %mm0
3414 ; X64-NEXT: movq %rdi, %mm1
3415 ; X64-NEXT: psignb %mm0, %mm1
3416 ; X64-NEXT: movq %mm1, %rax
3419 %0 = bitcast <1 x i64> %b to <8 x i8>
3420 %1 = bitcast <1 x i64> %a to <8 x i8>
3421 %2 = bitcast <8 x i8> %1 to <1 x i64>
3422 %3 = bitcast <8 x i8> %0 to <1 x i64>
3423 %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3424 %5 = bitcast <1 x i64> %4 to <8 x i8>
3425 %6 = bitcast <8 x i8> %5 to <1 x i64>
3426 %7 = extractelement <1 x i64> %6, i32 0
3430 declare <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) nounwind readnone
3432 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3434 ; X86: # %bb.0: # %entry
3435 ; X86-NEXT: pushl %ebp
3436 ; X86-NEXT: movl %esp, %ebp
3437 ; X86-NEXT: andl $-8, %esp
3438 ; X86-NEXT: subl $24, %esp
3439 ; X86-NEXT: movl 8(%ebp), %eax
3440 ; X86-NEXT: movl 12(%ebp), %ecx
3441 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3442 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3443 ; X86-NEXT: movl 16(%ebp), %eax
3444 ; X86-NEXT: movl 20(%ebp), %ecx
3445 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3446 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3447 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3448 ; X86-NEXT: pshufb {{[0-9]+}}(%esp), %mm0
3449 ; X86-NEXT: movq %mm0, (%esp)
3450 ; X86-NEXT: movl (%esp), %eax
3451 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3452 ; X86-NEXT: movl %ebp, %esp
3453 ; X86-NEXT: popl %ebp
3457 ; X64: # %bb.0: # %entry
3458 ; X64-NEXT: movq %rsi, %mm0
3459 ; X64-NEXT: movq %rdi, %mm1
3460 ; X64-NEXT: pshufb %mm0, %mm1
3461 ; X64-NEXT: movq %mm1, %rax
3464 %0 = bitcast <1 x i64> %b to <8 x i8>
3465 %1 = bitcast <1 x i64> %a to <8 x i8>
3466 %2 = bitcast <8 x i8> %1 to <1 x i64>
3467 %3 = bitcast <8 x i8> %0 to <1 x i64>
3468 %4 = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3469 %5 = bitcast <1 x i64> %4 to <8 x i8>
3470 %6 = bitcast <8 x i8> %5 to <1 x i64>
3471 %7 = extractelement <1 x i64> %6, i32 0
3475 declare <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64>, <1 x i64>) nounwind readnone
3477 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3479 ; X86: # %bb.0: # %entry
3480 ; X86-NEXT: pushl %ebp
3481 ; X86-NEXT: movl %esp, %ebp
3482 ; X86-NEXT: andl $-8, %esp
3483 ; X86-NEXT: subl $24, %esp
3484 ; X86-NEXT: movl 8(%ebp), %eax
3485 ; X86-NEXT: movl 12(%ebp), %ecx
3486 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3487 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3488 ; X86-NEXT: movl 16(%ebp), %eax
3489 ; X86-NEXT: movl 20(%ebp), %ecx
3490 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3491 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3492 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3493 ; X86-NEXT: pmulhrsw {{[0-9]+}}(%esp), %mm0
3494 ; X86-NEXT: movq %mm0, (%esp)
3495 ; X86-NEXT: movl (%esp), %eax
3496 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3497 ; X86-NEXT: movl %ebp, %esp
3498 ; X86-NEXT: popl %ebp
3502 ; X64: # %bb.0: # %entry
3503 ; X64-NEXT: movq %rsi, %mm0
3504 ; X64-NEXT: movq %rdi, %mm1
3505 ; X64-NEXT: pmulhrsw %mm0, %mm1
3506 ; X64-NEXT: movq %mm1, %rax
3509 %0 = bitcast <1 x i64> %b to <4 x i16>
3510 %1 = bitcast <1 x i64> %a to <4 x i16>
3511 %2 = bitcast <4 x i16> %1 to <1 x i64>
3512 %3 = bitcast <4 x i16> %0 to <1 x i64>
3513 %4 = tail call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3514 %5 = bitcast <1 x i64> %4 to <4 x i16>
3515 %6 = bitcast <4 x i16> %5 to <1 x i64>
3516 %7 = extractelement <1 x i64> %6, i32 0
3520 declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone
3522 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3524 ; X86: # %bb.0: # %entry
3525 ; X86-NEXT: pushl %ebp
3526 ; X86-NEXT: movl %esp, %ebp
3527 ; X86-NEXT: andl $-8, %esp
3528 ; X86-NEXT: subl $24, %esp
3529 ; X86-NEXT: movl 8(%ebp), %eax
3530 ; X86-NEXT: movl 12(%ebp), %ecx
3531 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3532 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3533 ; X86-NEXT: movl 16(%ebp), %eax
3534 ; X86-NEXT: movl 20(%ebp), %ecx
3535 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3536 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3537 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3538 ; X86-NEXT: pmaddubsw {{[0-9]+}}(%esp), %mm0
3539 ; X86-NEXT: movq %mm0, (%esp)
3540 ; X86-NEXT: movl (%esp), %eax
3541 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3542 ; X86-NEXT: movl %ebp, %esp
3543 ; X86-NEXT: popl %ebp
3547 ; X64: # %bb.0: # %entry
3548 ; X64-NEXT: movq %rsi, %mm0
3549 ; X64-NEXT: movq %rdi, %mm1
3550 ; X64-NEXT: pmaddubsw %mm0, %mm1
3551 ; X64-NEXT: movq %mm1, %rax
3554 %0 = bitcast <1 x i64> %b to <8 x i8>
3555 %1 = bitcast <1 x i64> %a to <8 x i8>
3556 %2 = bitcast <8 x i8> %1 to <1 x i64>
3557 %3 = bitcast <8 x i8> %0 to <1 x i64>
3558 %4 = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3559 %5 = bitcast <1 x i64> %4 to <8 x i8>
3560 %6 = bitcast <8 x i8> %5 to <1 x i64>
3561 %7 = extractelement <1 x i64> %6, i32 0
3565 declare <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64>, <1 x i64>) nounwind readnone
3567 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3569 ; X86: # %bb.0: # %entry
3570 ; X86-NEXT: pushl %ebp
3571 ; X86-NEXT: movl %esp, %ebp
3572 ; X86-NEXT: andl $-8, %esp
3573 ; X86-NEXT: subl $24, %esp
3574 ; X86-NEXT: movl 8(%ebp), %eax
3575 ; X86-NEXT: movl 12(%ebp), %ecx
3576 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3577 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3578 ; X86-NEXT: movl 16(%ebp), %eax
3579 ; X86-NEXT: movl 20(%ebp), %ecx
3580 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3581 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3582 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3583 ; X86-NEXT: phsubsw {{[0-9]+}}(%esp), %mm0
3584 ; X86-NEXT: movq %mm0, (%esp)
3585 ; X86-NEXT: movl (%esp), %eax
3586 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3587 ; X86-NEXT: movl %ebp, %esp
3588 ; X86-NEXT: popl %ebp
3592 ; X64: # %bb.0: # %entry
3593 ; X64-NEXT: movq %rsi, %mm0
3594 ; X64-NEXT: movq %rdi, %mm1
3595 ; X64-NEXT: phsubsw %mm0, %mm1
3596 ; X64-NEXT: movq %mm1, %rax
3599 %0 = bitcast <1 x i64> %b to <4 x i16>
3600 %1 = bitcast <1 x i64> %a to <4 x i16>
3601 %2 = bitcast <4 x i16> %1 to <1 x i64>
3602 %3 = bitcast <4 x i16> %0 to <1 x i64>
3603 %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3604 %5 = bitcast <1 x i64> %4 to <4 x i16>
3605 %6 = bitcast <4 x i16> %5 to <1 x i64>
3606 %7 = extractelement <1 x i64> %6, i32 0
3610 declare <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64>, <1 x i64>) nounwind readnone
3612 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3614 ; X86: # %bb.0: # %entry
3615 ; X86-NEXT: pushl %ebp
3616 ; X86-NEXT: movl %esp, %ebp
3617 ; X86-NEXT: andl $-8, %esp
3618 ; X86-NEXT: subl $24, %esp
3619 ; X86-NEXT: movl 8(%ebp), %eax
3620 ; X86-NEXT: movl 12(%ebp), %ecx
3621 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3622 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3623 ; X86-NEXT: movl 16(%ebp), %eax
3624 ; X86-NEXT: movl 20(%ebp), %ecx
3625 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3626 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3627 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3628 ; X86-NEXT: phsubd {{[0-9]+}}(%esp), %mm0
3629 ; X86-NEXT: movq %mm0, (%esp)
3630 ; X86-NEXT: movl (%esp), %eax
3631 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3632 ; X86-NEXT: movl %ebp, %esp
3633 ; X86-NEXT: popl %ebp
3637 ; X64: # %bb.0: # %entry
3638 ; X64-NEXT: movq %rsi, %mm0
3639 ; X64-NEXT: movq %rdi, %mm1
3640 ; X64-NEXT: phsubd %mm0, %mm1
3641 ; X64-NEXT: movq %mm1, %rax
3644 %0 = bitcast <1 x i64> %b to <2 x i32>
3645 %1 = bitcast <1 x i64> %a to <2 x i32>
3646 %2 = bitcast <2 x i32> %1 to <1 x i64>
3647 %3 = bitcast <2 x i32> %0 to <1 x i64>
3648 %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3649 %5 = bitcast <1 x i64> %4 to <2 x i32>
3650 %6 = bitcast <2 x i32> %5 to <1 x i64>
3651 %7 = extractelement <1 x i64> %6, i32 0
3655 declare <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64>, <1 x i64>) nounwind readnone
3657 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3659 ; X86: # %bb.0: # %entry
3660 ; X86-NEXT: pushl %ebp
3661 ; X86-NEXT: movl %esp, %ebp
3662 ; X86-NEXT: andl $-8, %esp
3663 ; X86-NEXT: subl $24, %esp
3664 ; X86-NEXT: movl 8(%ebp), %eax
3665 ; X86-NEXT: movl 12(%ebp), %ecx
3666 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3667 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3668 ; X86-NEXT: movl 16(%ebp), %eax
3669 ; X86-NEXT: movl 20(%ebp), %ecx
3670 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3671 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3672 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3673 ; X86-NEXT: phsubw {{[0-9]+}}(%esp), %mm0
3674 ; X86-NEXT: movq %mm0, (%esp)
3675 ; X86-NEXT: movl (%esp), %eax
3676 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3677 ; X86-NEXT: movl %ebp, %esp
3678 ; X86-NEXT: popl %ebp
3682 ; X64: # %bb.0: # %entry
3683 ; X64-NEXT: movq %rsi, %mm0
3684 ; X64-NEXT: movq %rdi, %mm1
3685 ; X64-NEXT: phsubw %mm0, %mm1
3686 ; X64-NEXT: movq %mm1, %rax
3689 %0 = bitcast <1 x i64> %b to <4 x i16>
3690 %1 = bitcast <1 x i64> %a to <4 x i16>
3691 %2 = bitcast <4 x i16> %1 to <1 x i64>
3692 %3 = bitcast <4 x i16> %0 to <1 x i64>
3693 %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3694 %5 = bitcast <1 x i64> %4 to <4 x i16>
3695 %6 = bitcast <4 x i16> %5 to <1 x i64>
3696 %7 = extractelement <1 x i64> %6, i32 0
3700 declare <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64>, <1 x i64>) nounwind readnone
3702 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3704 ; X86: # %bb.0: # %entry
3705 ; X86-NEXT: pushl %ebp
3706 ; X86-NEXT: movl %esp, %ebp
3707 ; X86-NEXT: andl $-8, %esp
3708 ; X86-NEXT: subl $24, %esp
3709 ; X86-NEXT: movl 8(%ebp), %eax
3710 ; X86-NEXT: movl 12(%ebp), %ecx
3711 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3712 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3713 ; X86-NEXT: movl 16(%ebp), %eax
3714 ; X86-NEXT: movl 20(%ebp), %ecx
3715 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3716 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3717 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3718 ; X86-NEXT: phaddsw {{[0-9]+}}(%esp), %mm0
3719 ; X86-NEXT: movq %mm0, (%esp)
3720 ; X86-NEXT: movl (%esp), %eax
3721 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3722 ; X86-NEXT: movl %ebp, %esp
3723 ; X86-NEXT: popl %ebp
3727 ; X64: # %bb.0: # %entry
3728 ; X64-NEXT: movq %rsi, %mm0
3729 ; X64-NEXT: movq %rdi, %mm1
3730 ; X64-NEXT: phaddsw %mm0, %mm1
3731 ; X64-NEXT: movq %mm1, %rax
3734 %0 = bitcast <1 x i64> %b to <4 x i16>
3735 %1 = bitcast <1 x i64> %a to <4 x i16>
3736 %2 = bitcast <4 x i16> %1 to <1 x i64>
3737 %3 = bitcast <4 x i16> %0 to <1 x i64>
3738 %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3739 %5 = bitcast <1 x i64> %4 to <4 x i16>
3740 %6 = bitcast <4 x i16> %5 to <1 x i64>
3741 %7 = extractelement <1 x i64> %6, i32 0
3745 declare <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64>, <1 x i64>) nounwind readnone
3747 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
3749 ; X86: # %bb.0: # %entry
3750 ; X86-NEXT: pushl %ebp
3751 ; X86-NEXT: movl %esp, %ebp
3752 ; X86-NEXT: andl $-8, %esp
3753 ; X86-NEXT: subl $24, %esp
3754 ; X86-NEXT: movl 8(%ebp), %eax
3755 ; X86-NEXT: movl 12(%ebp), %ecx
3756 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3757 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3758 ; X86-NEXT: movl 16(%ebp), %eax
3759 ; X86-NEXT: movl 20(%ebp), %ecx
3760 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3761 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3762 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3763 ; X86-NEXT: phaddd {{[0-9]+}}(%esp), %mm0
3764 ; X86-NEXT: movq %mm0, (%esp)
3765 ; X86-NEXT: movl (%esp), %eax
3766 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3767 ; X86-NEXT: movl %ebp, %esp
3768 ; X86-NEXT: popl %ebp
3772 ; X64: # %bb.0: # %entry
3773 ; X64-NEXT: movq %rsi, %mm0
3774 ; X64-NEXT: movq %rdi, %mm1
3775 ; X64-NEXT: phaddd %mm0, %mm1
3776 ; X64-NEXT: movq %mm1, %rax
3779 %0 = bitcast <1 x i64> %b to <2 x i32>
3780 %1 = bitcast <1 x i64> %a to <2 x i32>
3781 %2 = bitcast <2 x i32> %1 to <1 x i64>
3782 %3 = bitcast <2 x i32> %0 to <1 x i64>
3783 %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3784 %5 = bitcast <1 x i64> %4 to <2 x i32>
3785 %6 = bitcast <2 x i32> %5 to <1 x i64>
3786 %7 = extractelement <1 x i64> %6, i32 0
3790 define <4 x float> @test89(<4 x float> %a, <1 x i64> %b) nounwind {
3791 ; X86-LABEL: test89:
3793 ; X86-NEXT: pushl %ebp
3794 ; X86-NEXT: movl %esp, %ebp
3795 ; X86-NEXT: andl $-8, %esp
3796 ; X86-NEXT: subl $8, %esp
3797 ; X86-NEXT: movl 8(%ebp), %eax
3798 ; X86-NEXT: movl 12(%ebp), %ecx
3799 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3800 ; X86-NEXT: movl %eax, (%esp)
3801 ; X86-NEXT: cvtpi2ps (%esp), %xmm0
3802 ; X86-NEXT: movl %ebp, %esp
3803 ; X86-NEXT: popl %ebp
3806 ; X64-LABEL: test89:
3808 ; X64-NEXT: movq %rdi, %mm0
3809 ; X64-NEXT: cvtpi2ps %mm0, %xmm0
3811 %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, <1 x i64> %b)
3815 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) nounwind readnone
3817 define void @test90() {
3818 ; ALL-LABEL: test90:
3821 ; ALL-NEXT: ret{{[l|q]}}
3822 call void @llvm.x86.mmx.emms()
3826 declare void @llvm.x86.mmx.emms()
3828 define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind {
3829 ; X86-LABEL: test_mm_insert_pi16:
3830 ; X86: # %bb.0: # %entry
3831 ; X86-NEXT: pushl %ebp
3832 ; X86-NEXT: movl %esp, %ebp
3833 ; X86-NEXT: andl $-8, %esp
3834 ; X86-NEXT: subl $16, %esp
3835 ; X86-NEXT: movl 8(%ebp), %eax
3836 ; X86-NEXT: movl 12(%ebp), %ecx
3837 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3838 ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
3839 ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
3840 ; X86-NEXT: pinsrw $2, 16(%ebp), %mm0
3841 ; X86-NEXT: movq %mm0, (%esp)
3842 ; X86-NEXT: movl (%esp), %eax
3843 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
3844 ; X86-NEXT: movl %ebp, %esp
3845 ; X86-NEXT: popl %ebp
3848 ; X64-LABEL: test_mm_insert_pi16:
3849 ; X64: # %bb.0: # %entry
3850 ; X64-NEXT: movq %rdi, %mm0
3851 ; X64-NEXT: pinsrw $2, %esi, %mm0
3852 ; X64-NEXT: movq %mm0, %rax
3855 %1 = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %a.coerce, i32 %d, i32 2)
3859 declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32 immarg)
3861 define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind {
3862 ; X86-LABEL: test_mm_extract_pi16:
3863 ; X86: # %bb.0: # %entry
3864 ; X86-NEXT: pushl %ebp
3865 ; X86-NEXT: movl %esp, %ebp
3866 ; X86-NEXT: andl $-8, %esp
3867 ; X86-NEXT: subl $8, %esp
3868 ; X86-NEXT: movl 8(%ebp), %eax
3869 ; X86-NEXT: movl 12(%ebp), %ecx
3870 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3871 ; X86-NEXT: movl %eax, (%esp)
3872 ; X86-NEXT: movq (%esp), %mm0
3873 ; X86-NEXT: pextrw $2, %mm0, %eax
3874 ; X86-NEXT: movl %ebp, %esp
3875 ; X86-NEXT: popl %ebp
3878 ; X64-LABEL: test_mm_extract_pi16:
3879 ; X64: # %bb.0: # %entry
3880 ; X64-NEXT: movq %rdi, %mm0
3881 ; X64-NEXT: pextrw $2, %mm0, %eax
3884 %1 = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> %a.coerce, i32 2)
3888 declare i32 @llvm.x86.mmx.pextr.w(<1 x i64>, i32 immarg)