1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X64
5 define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
6 ; X86-LABEL: test_pavgusb:
7 ; X86: # %bb.0: # %entry
8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
9 ; X86-NEXT: pavgusb %mm1, %mm0
10 ; X86-NEXT: movq %mm0, (%eax)
13 ; X64-LABEL: test_pavgusb:
14 ; X64: # %bb.0: # %entry
15 ; X64-NEXT: pavgusb %mm1, %mm0
16 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
17 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
18 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
21 %0 = bitcast x86_mmx %a.coerce to <8 x i8>
22 %1 = bitcast x86_mmx %b.coerce to <8 x i8>
23 %2 = bitcast <8 x i8> %0 to x86_mmx
24 %3 = bitcast <8 x i8> %1 to x86_mmx
25 %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
26 %5 = bitcast x86_mmx %4 to <8 x i8>
30 declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
32 define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
33 ; X86-LABEL: test_pf2id:
34 ; X86: # %bb.0: # %entry
35 ; X86-NEXT: pushl %ebp
36 ; X86-NEXT: movl %esp, %ebp
37 ; X86-NEXT: andl $-8, %esp
38 ; X86-NEXT: subl $8, %esp
39 ; X86-NEXT: movd 12(%ebp), %mm0
40 ; X86-NEXT: movd 8(%ebp), %mm1
41 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
42 ; X86-NEXT: pf2id %mm1, %mm0
43 ; X86-NEXT: movq %mm0, (%esp)
44 ; X86-NEXT: movl (%esp), %eax
45 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
46 ; X86-NEXT: movl %ebp, %esp
50 ; X64-LABEL: test_pf2id:
51 ; X64: # %bb.0: # %entry
52 ; X64-NEXT: movdq2q %xmm0, %mm0
53 ; X64-NEXT: pf2id %mm0, %mm0
54 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
55 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
56 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
59 %0 = bitcast <2 x float> %a to x86_mmx
60 %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
61 %2 = bitcast x86_mmx %1 to <2 x i32>
65 declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
67 define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
68 ; X86-LABEL: test_pfacc:
69 ; X86: # %bb.0: # %entry
70 ; X86-NEXT: pushl %ebp
71 ; X86-NEXT: movl %esp, %ebp
72 ; X86-NEXT: andl $-8, %esp
73 ; X86-NEXT: subl $8, %esp
74 ; X86-NEXT: movd 20(%ebp), %mm0
75 ; X86-NEXT: movd 16(%ebp), %mm1
76 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
77 ; X86-NEXT: movd 12(%ebp), %mm0
78 ; X86-NEXT: movd 8(%ebp), %mm2
79 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
80 ; X86-NEXT: pfacc %mm1, %mm2
81 ; X86-NEXT: movq %mm2, (%esp)
82 ; X86-NEXT: flds {{[0-9]+}}(%esp)
83 ; X86-NEXT: flds (%esp)
84 ; X86-NEXT: movl %ebp, %esp
88 ; X64-LABEL: test_pfacc:
89 ; X64: # %bb.0: # %entry
90 ; X64-NEXT: movdq2q %xmm1, %mm0
91 ; X64-NEXT: movdq2q %xmm0, %mm1
92 ; X64-NEXT: pfacc %mm0, %mm1
93 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
94 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
97 %0 = bitcast <2 x float> %a to x86_mmx
98 %1 = bitcast <2 x float> %b to x86_mmx
99 %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
100 %3 = bitcast x86_mmx %2 to <2 x float>
104 declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
106 define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
107 ; X86-LABEL: test_pfadd:
108 ; X86: # %bb.0: # %entry
109 ; X86-NEXT: pushl %ebp
110 ; X86-NEXT: movl %esp, %ebp
111 ; X86-NEXT: andl $-8, %esp
112 ; X86-NEXT: subl $8, %esp
113 ; X86-NEXT: movd 20(%ebp), %mm0
114 ; X86-NEXT: movd 16(%ebp), %mm1
115 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
116 ; X86-NEXT: movd 12(%ebp), %mm0
117 ; X86-NEXT: movd 8(%ebp), %mm2
118 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
119 ; X86-NEXT: pfadd %mm1, %mm2
120 ; X86-NEXT: movq %mm2, (%esp)
121 ; X86-NEXT: flds {{[0-9]+}}(%esp)
122 ; X86-NEXT: flds (%esp)
123 ; X86-NEXT: movl %ebp, %esp
124 ; X86-NEXT: popl %ebp
127 ; X64-LABEL: test_pfadd:
128 ; X64: # %bb.0: # %entry
129 ; X64-NEXT: movdq2q %xmm1, %mm0
130 ; X64-NEXT: movdq2q %xmm0, %mm1
131 ; X64-NEXT: pfadd %mm0, %mm1
132 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
133 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
136 %0 = bitcast <2 x float> %a to x86_mmx
137 %1 = bitcast <2 x float> %b to x86_mmx
138 %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
139 %3 = bitcast x86_mmx %2 to <2 x float>
143 declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
145 define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
146 ; X86-LABEL: test_pfcmpeq:
147 ; X86: # %bb.0: # %entry
148 ; X86-NEXT: pushl %ebp
149 ; X86-NEXT: movl %esp, %ebp
150 ; X86-NEXT: andl $-8, %esp
151 ; X86-NEXT: subl $8, %esp
152 ; X86-NEXT: movd 20(%ebp), %mm0
153 ; X86-NEXT: movd 16(%ebp), %mm1
154 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
155 ; X86-NEXT: movd 12(%ebp), %mm0
156 ; X86-NEXT: movd 8(%ebp), %mm2
157 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
158 ; X86-NEXT: pfcmpeq %mm1, %mm2
159 ; X86-NEXT: movq %mm2, (%esp)
160 ; X86-NEXT: movl (%esp), %eax
161 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
162 ; X86-NEXT: movl %ebp, %esp
163 ; X86-NEXT: popl %ebp
166 ; X64-LABEL: test_pfcmpeq:
167 ; X64: # %bb.0: # %entry
168 ; X64-NEXT: movdq2q %xmm1, %mm0
169 ; X64-NEXT: movdq2q %xmm0, %mm1
170 ; X64-NEXT: pfcmpeq %mm0, %mm1
171 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
172 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
173 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
176 %0 = bitcast <2 x float> %a to x86_mmx
177 %1 = bitcast <2 x float> %b to x86_mmx
178 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
179 %3 = bitcast x86_mmx %2 to <2 x i32>
183 declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
185 define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
186 ; X86-LABEL: test_pfcmpge:
187 ; X86: # %bb.0: # %entry
188 ; X86-NEXT: pushl %ebp
189 ; X86-NEXT: movl %esp, %ebp
190 ; X86-NEXT: andl $-8, %esp
191 ; X86-NEXT: subl $8, %esp
192 ; X86-NEXT: movd 20(%ebp), %mm0
193 ; X86-NEXT: movd 16(%ebp), %mm1
194 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
195 ; X86-NEXT: movd 12(%ebp), %mm0
196 ; X86-NEXT: movd 8(%ebp), %mm2
197 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
198 ; X86-NEXT: pfcmpge %mm1, %mm2
199 ; X86-NEXT: movq %mm2, (%esp)
200 ; X86-NEXT: movl (%esp), %eax
201 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
202 ; X86-NEXT: movl %ebp, %esp
203 ; X86-NEXT: popl %ebp
206 ; X64-LABEL: test_pfcmpge:
207 ; X64: # %bb.0: # %entry
208 ; X64-NEXT: movdq2q %xmm1, %mm0
209 ; X64-NEXT: movdq2q %xmm0, %mm1
210 ; X64-NEXT: pfcmpge %mm0, %mm1
211 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
212 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
213 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
216 %0 = bitcast <2 x float> %a to x86_mmx
217 %1 = bitcast <2 x float> %b to x86_mmx
218 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
219 %3 = bitcast x86_mmx %2 to <2 x i32>
223 declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
225 define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
226 ; X86-LABEL: test_pfcmpgt:
227 ; X86: # %bb.0: # %entry
228 ; X86-NEXT: pushl %ebp
229 ; X86-NEXT: movl %esp, %ebp
230 ; X86-NEXT: andl $-8, %esp
231 ; X86-NEXT: subl $8, %esp
232 ; X86-NEXT: movd 20(%ebp), %mm0
233 ; X86-NEXT: movd 16(%ebp), %mm1
234 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
235 ; X86-NEXT: movd 12(%ebp), %mm0
236 ; X86-NEXT: movd 8(%ebp), %mm2
237 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
238 ; X86-NEXT: pfcmpgt %mm1, %mm2
239 ; X86-NEXT: movq %mm2, (%esp)
240 ; X86-NEXT: movl (%esp), %eax
241 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
242 ; X86-NEXT: movl %ebp, %esp
243 ; X86-NEXT: popl %ebp
246 ; X64-LABEL: test_pfcmpgt:
247 ; X64: # %bb.0: # %entry
248 ; X64-NEXT: movdq2q %xmm1, %mm0
249 ; X64-NEXT: movdq2q %xmm0, %mm1
250 ; X64-NEXT: pfcmpgt %mm0, %mm1
251 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
252 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
253 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
256 %0 = bitcast <2 x float> %a to x86_mmx
257 %1 = bitcast <2 x float> %b to x86_mmx
258 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
259 %3 = bitcast x86_mmx %2 to <2 x i32>
263 declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
265 define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
266 ; X86-LABEL: test_pfmax:
267 ; X86: # %bb.0: # %entry
268 ; X86-NEXT: pushl %ebp
269 ; X86-NEXT: movl %esp, %ebp
270 ; X86-NEXT: andl $-8, %esp
271 ; X86-NEXT: subl $8, %esp
272 ; X86-NEXT: movd 20(%ebp), %mm0
273 ; X86-NEXT: movd 16(%ebp), %mm1
274 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
275 ; X86-NEXT: movd 12(%ebp), %mm0
276 ; X86-NEXT: movd 8(%ebp), %mm2
277 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
278 ; X86-NEXT: pfmax %mm1, %mm2
279 ; X86-NEXT: movq %mm2, (%esp)
280 ; X86-NEXT: flds {{[0-9]+}}(%esp)
281 ; X86-NEXT: flds (%esp)
282 ; X86-NEXT: movl %ebp, %esp
283 ; X86-NEXT: popl %ebp
286 ; X64-LABEL: test_pfmax:
287 ; X64: # %bb.0: # %entry
288 ; X64-NEXT: movdq2q %xmm1, %mm0
289 ; X64-NEXT: movdq2q %xmm0, %mm1
290 ; X64-NEXT: pfmax %mm0, %mm1
291 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
292 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
295 %0 = bitcast <2 x float> %a to x86_mmx
296 %1 = bitcast <2 x float> %b to x86_mmx
297 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
298 %3 = bitcast x86_mmx %2 to <2 x float>
302 declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
304 define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
305 ; X86-LABEL: test_pfmin:
306 ; X86: # %bb.0: # %entry
307 ; X86-NEXT: pushl %ebp
308 ; X86-NEXT: movl %esp, %ebp
309 ; X86-NEXT: andl $-8, %esp
310 ; X86-NEXT: subl $8, %esp
311 ; X86-NEXT: movd 20(%ebp), %mm0
312 ; X86-NEXT: movd 16(%ebp), %mm1
313 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
314 ; X86-NEXT: movd 12(%ebp), %mm0
315 ; X86-NEXT: movd 8(%ebp), %mm2
316 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
317 ; X86-NEXT: pfmin %mm1, %mm2
318 ; X86-NEXT: movq %mm2, (%esp)
319 ; X86-NEXT: flds {{[0-9]+}}(%esp)
320 ; X86-NEXT: flds (%esp)
321 ; X86-NEXT: movl %ebp, %esp
322 ; X86-NEXT: popl %ebp
325 ; X64-LABEL: test_pfmin:
326 ; X64: # %bb.0: # %entry
327 ; X64-NEXT: movdq2q %xmm1, %mm0
328 ; X64-NEXT: movdq2q %xmm0, %mm1
329 ; X64-NEXT: pfmin %mm0, %mm1
330 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
331 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
334 %0 = bitcast <2 x float> %a to x86_mmx
335 %1 = bitcast <2 x float> %b to x86_mmx
336 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
337 %3 = bitcast x86_mmx %2 to <2 x float>
341 declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
343 define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
344 ; X86-LABEL: test_pfmul:
345 ; X86: # %bb.0: # %entry
346 ; X86-NEXT: pushl %ebp
347 ; X86-NEXT: movl %esp, %ebp
348 ; X86-NEXT: andl $-8, %esp
349 ; X86-NEXT: subl $8, %esp
350 ; X86-NEXT: movd 20(%ebp), %mm0
351 ; X86-NEXT: movd 16(%ebp), %mm1
352 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
353 ; X86-NEXT: movd 12(%ebp), %mm0
354 ; X86-NEXT: movd 8(%ebp), %mm2
355 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
356 ; X86-NEXT: pfmul %mm1, %mm2
357 ; X86-NEXT: movq %mm2, (%esp)
358 ; X86-NEXT: flds {{[0-9]+}}(%esp)
359 ; X86-NEXT: flds (%esp)
360 ; X86-NEXT: movl %ebp, %esp
361 ; X86-NEXT: popl %ebp
364 ; X64-LABEL: test_pfmul:
365 ; X64: # %bb.0: # %entry
366 ; X64-NEXT: movdq2q %xmm1, %mm0
367 ; X64-NEXT: movdq2q %xmm0, %mm1
368 ; X64-NEXT: pfmul %mm0, %mm1
369 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
370 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
373 %0 = bitcast <2 x float> %a to x86_mmx
374 %1 = bitcast <2 x float> %b to x86_mmx
375 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
376 %3 = bitcast x86_mmx %2 to <2 x float>
380 declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
382 define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
383 ; X86-LABEL: test_pfrcp:
384 ; X86: # %bb.0: # %entry
385 ; X86-NEXT: pushl %ebp
386 ; X86-NEXT: movl %esp, %ebp
387 ; X86-NEXT: andl $-8, %esp
388 ; X86-NEXT: subl $8, %esp
389 ; X86-NEXT: movd 12(%ebp), %mm0
390 ; X86-NEXT: movd 8(%ebp), %mm1
391 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
392 ; X86-NEXT: pfrcp %mm1, %mm0
393 ; X86-NEXT: movq %mm0, (%esp)
394 ; X86-NEXT: flds {{[0-9]+}}(%esp)
395 ; X86-NEXT: flds (%esp)
396 ; X86-NEXT: movl %ebp, %esp
397 ; X86-NEXT: popl %ebp
400 ; X64-LABEL: test_pfrcp:
401 ; X64: # %bb.0: # %entry
402 ; X64-NEXT: movdq2q %xmm0, %mm0
403 ; X64-NEXT: pfrcp %mm0, %mm0
404 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
405 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
408 %0 = bitcast <2 x float> %a to x86_mmx
409 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
410 %2 = bitcast x86_mmx %1 to <2 x float>
414 declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
416 define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
417 ; X86-LABEL: test_pfrcpit1:
418 ; X86: # %bb.0: # %entry
419 ; X86-NEXT: pushl %ebp
420 ; X86-NEXT: movl %esp, %ebp
421 ; X86-NEXT: andl $-8, %esp
422 ; X86-NEXT: subl $8, %esp
423 ; X86-NEXT: movd 20(%ebp), %mm0
424 ; X86-NEXT: movd 16(%ebp), %mm1
425 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
426 ; X86-NEXT: movd 12(%ebp), %mm0
427 ; X86-NEXT: movd 8(%ebp), %mm2
428 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
429 ; X86-NEXT: pfrcpit1 %mm1, %mm2
430 ; X86-NEXT: movq %mm2, (%esp)
431 ; X86-NEXT: flds {{[0-9]+}}(%esp)
432 ; X86-NEXT: flds (%esp)
433 ; X86-NEXT: movl %ebp, %esp
434 ; X86-NEXT: popl %ebp
437 ; X64-LABEL: test_pfrcpit1:
438 ; X64: # %bb.0: # %entry
439 ; X64-NEXT: movdq2q %xmm1, %mm0
440 ; X64-NEXT: movdq2q %xmm0, %mm1
441 ; X64-NEXT: pfrcpit1 %mm0, %mm1
442 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
443 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
446 %0 = bitcast <2 x float> %a to x86_mmx
447 %1 = bitcast <2 x float> %b to x86_mmx
448 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
449 %3 = bitcast x86_mmx %2 to <2 x float>
453 declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
455 define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
456 ; X86-LABEL: test_pfrcpit2:
457 ; X86: # %bb.0: # %entry
458 ; X86-NEXT: pushl %ebp
459 ; X86-NEXT: movl %esp, %ebp
460 ; X86-NEXT: andl $-8, %esp
461 ; X86-NEXT: subl $8, %esp
462 ; X86-NEXT: movd 20(%ebp), %mm0
463 ; X86-NEXT: movd 16(%ebp), %mm1
464 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
465 ; X86-NEXT: movd 12(%ebp), %mm0
466 ; X86-NEXT: movd 8(%ebp), %mm2
467 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
468 ; X86-NEXT: pfrcpit2 %mm1, %mm2
469 ; X86-NEXT: movq %mm2, (%esp)
470 ; X86-NEXT: flds {{[0-9]+}}(%esp)
471 ; X86-NEXT: flds (%esp)
472 ; X86-NEXT: movl %ebp, %esp
473 ; X86-NEXT: popl %ebp
476 ; X64-LABEL: test_pfrcpit2:
477 ; X64: # %bb.0: # %entry
478 ; X64-NEXT: movdq2q %xmm1, %mm0
479 ; X64-NEXT: movdq2q %xmm0, %mm1
480 ; X64-NEXT: pfrcpit2 %mm0, %mm1
481 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
482 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
485 %0 = bitcast <2 x float> %a to x86_mmx
486 %1 = bitcast <2 x float> %b to x86_mmx
487 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
488 %3 = bitcast x86_mmx %2 to <2 x float>
492 declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
494 define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
495 ; X86-LABEL: test_pfrsqrt:
496 ; X86: # %bb.0: # %entry
497 ; X86-NEXT: pushl %ebp
498 ; X86-NEXT: movl %esp, %ebp
499 ; X86-NEXT: andl $-8, %esp
500 ; X86-NEXT: subl $8, %esp
501 ; X86-NEXT: movd 12(%ebp), %mm0
502 ; X86-NEXT: movd 8(%ebp), %mm1
503 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
504 ; X86-NEXT: pfrsqrt %mm1, %mm0
505 ; X86-NEXT: movq %mm0, (%esp)
506 ; X86-NEXT: flds {{[0-9]+}}(%esp)
507 ; X86-NEXT: flds (%esp)
508 ; X86-NEXT: movl %ebp, %esp
509 ; X86-NEXT: popl %ebp
512 ; X64-LABEL: test_pfrsqrt:
513 ; X64: # %bb.0: # %entry
514 ; X64-NEXT: movdq2q %xmm0, %mm0
515 ; X64-NEXT: pfrsqrt %mm0, %mm0
516 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
517 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
520 %0 = bitcast <2 x float> %a to x86_mmx
521 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
522 %2 = bitcast x86_mmx %1 to <2 x float>
526 declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
528 define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
529 ; X86-LABEL: test_pfrsqit1:
530 ; X86: # %bb.0: # %entry
531 ; X86-NEXT: pushl %ebp
532 ; X86-NEXT: movl %esp, %ebp
533 ; X86-NEXT: andl $-8, %esp
534 ; X86-NEXT: subl $8, %esp
535 ; X86-NEXT: movd 20(%ebp), %mm0
536 ; X86-NEXT: movd 16(%ebp), %mm1
537 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
538 ; X86-NEXT: movd 12(%ebp), %mm0
539 ; X86-NEXT: movd 8(%ebp), %mm2
540 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
541 ; X86-NEXT: pfrsqit1 %mm1, %mm2
542 ; X86-NEXT: movq %mm2, (%esp)
543 ; X86-NEXT: flds {{[0-9]+}}(%esp)
544 ; X86-NEXT: flds (%esp)
545 ; X86-NEXT: movl %ebp, %esp
546 ; X86-NEXT: popl %ebp
549 ; X64-LABEL: test_pfrsqit1:
550 ; X64: # %bb.0: # %entry
551 ; X64-NEXT: movdq2q %xmm1, %mm0
552 ; X64-NEXT: movdq2q %xmm0, %mm1
553 ; X64-NEXT: pfrsqit1 %mm0, %mm1
554 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
555 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
558 %0 = bitcast <2 x float> %a to x86_mmx
559 %1 = bitcast <2 x float> %b to x86_mmx
560 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
561 %3 = bitcast x86_mmx %2 to <2 x float>
565 declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
567 define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
568 ; X86-LABEL: test_pfsub:
569 ; X86: # %bb.0: # %entry
570 ; X86-NEXT: pushl %ebp
571 ; X86-NEXT: movl %esp, %ebp
572 ; X86-NEXT: andl $-8, %esp
573 ; X86-NEXT: subl $8, %esp
574 ; X86-NEXT: movd 20(%ebp), %mm0
575 ; X86-NEXT: movd 16(%ebp), %mm1
576 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
577 ; X86-NEXT: movd 12(%ebp), %mm0
578 ; X86-NEXT: movd 8(%ebp), %mm2
579 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
580 ; X86-NEXT: pfsub %mm1, %mm2
581 ; X86-NEXT: movq %mm2, (%esp)
582 ; X86-NEXT: flds {{[0-9]+}}(%esp)
583 ; X86-NEXT: flds (%esp)
584 ; X86-NEXT: movl %ebp, %esp
585 ; X86-NEXT: popl %ebp
588 ; X64-LABEL: test_pfsub:
589 ; X64: # %bb.0: # %entry
590 ; X64-NEXT: movdq2q %xmm1, %mm0
591 ; X64-NEXT: movdq2q %xmm0, %mm1
592 ; X64-NEXT: pfsub %mm0, %mm1
593 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
594 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
597 %0 = bitcast <2 x float> %a to x86_mmx
598 %1 = bitcast <2 x float> %b to x86_mmx
599 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
600 %3 = bitcast x86_mmx %2 to <2 x float>
604 declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
606 define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
607 ; X86-LABEL: test_pfsubr:
608 ; X86: # %bb.0: # %entry
609 ; X86-NEXT: pushl %ebp
610 ; X86-NEXT: movl %esp, %ebp
611 ; X86-NEXT: andl $-8, %esp
612 ; X86-NEXT: subl $8, %esp
613 ; X86-NEXT: movd 20(%ebp), %mm0
614 ; X86-NEXT: movd 16(%ebp), %mm1
615 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
616 ; X86-NEXT: movd 12(%ebp), %mm0
617 ; X86-NEXT: movd 8(%ebp), %mm2
618 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
619 ; X86-NEXT: pfsubr %mm1, %mm2
620 ; X86-NEXT: movq %mm2, (%esp)
621 ; X86-NEXT: flds {{[0-9]+}}(%esp)
622 ; X86-NEXT: flds (%esp)
623 ; X86-NEXT: movl %ebp, %esp
624 ; X86-NEXT: popl %ebp
627 ; X64-LABEL: test_pfsubr:
628 ; X64: # %bb.0: # %entry
629 ; X64-NEXT: movdq2q %xmm1, %mm0
630 ; X64-NEXT: movdq2q %xmm0, %mm1
631 ; X64-NEXT: pfsubr %mm0, %mm1
632 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
633 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
636 %0 = bitcast <2 x float> %a to x86_mmx
637 %1 = bitcast <2 x float> %b to x86_mmx
638 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
639 %3 = bitcast x86_mmx %2 to <2 x float>
643 declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
645 define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
646 ; X86-LABEL: test_pi2fd:
647 ; X86: # %bb.0: # %entry
648 ; X86-NEXT: pushl %ebp
649 ; X86-NEXT: movl %esp, %ebp
650 ; X86-NEXT: andl $-8, %esp
651 ; X86-NEXT: subl $8, %esp
652 ; X86-NEXT: pi2fd %mm0, %mm0
653 ; X86-NEXT: movq %mm0, (%esp)
654 ; X86-NEXT: flds {{[0-9]+}}(%esp)
655 ; X86-NEXT: flds (%esp)
656 ; X86-NEXT: movl %ebp, %esp
657 ; X86-NEXT: popl %ebp
660 ; X64-LABEL: test_pi2fd:
661 ; X64: # %bb.0: # %entry
662 ; X64-NEXT: pi2fd %mm0, %mm0
663 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
664 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
667 %0 = bitcast x86_mmx %a.coerce to <2 x i32>
668 %1 = bitcast <2 x i32> %0 to x86_mmx
669 %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
670 %3 = bitcast x86_mmx %2 to <2 x float>
674 declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
676 define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
677 ; X86-LABEL: test_pmulhrw:
678 ; X86: # %bb.0: # %entry
679 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
680 ; X86-NEXT: pmulhrw %mm1, %mm0
681 ; X86-NEXT: movq %mm0, (%eax)
684 ; X64-LABEL: test_pmulhrw:
685 ; X64: # %bb.0: # %entry
686 ; X64-NEXT: pmulhrw %mm1, %mm0
687 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
688 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
689 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
692 %0 = bitcast x86_mmx %a.coerce to <4 x i16>
693 %1 = bitcast x86_mmx %b.coerce to <4 x i16>
694 %2 = bitcast <4 x i16> %0 to x86_mmx
695 %3 = bitcast <4 x i16> %1 to x86_mmx
696 %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
697 %5 = bitcast x86_mmx %4 to <4 x i16>
701 declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
703 define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
704 ; X86-LABEL: test_pf2iw:
705 ; X86: # %bb.0: # %entry
706 ; X86-NEXT: pushl %ebp
707 ; X86-NEXT: movl %esp, %ebp
708 ; X86-NEXT: andl $-8, %esp
709 ; X86-NEXT: subl $8, %esp
710 ; X86-NEXT: movd 12(%ebp), %mm0
711 ; X86-NEXT: movd 8(%ebp), %mm1
712 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
713 ; X86-NEXT: pf2iw %mm1, %mm0
714 ; X86-NEXT: movq %mm0, (%esp)
715 ; X86-NEXT: movl (%esp), %eax
716 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
717 ; X86-NEXT: movl %ebp, %esp
718 ; X86-NEXT: popl %ebp
721 ; X64-LABEL: test_pf2iw:
722 ; X64: # %bb.0: # %entry
723 ; X64-NEXT: movdq2q %xmm0, %mm0
724 ; X64-NEXT: pf2iw %mm0, %mm0
725 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
726 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
727 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
730 %0 = bitcast <2 x float> %a to x86_mmx
731 %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
732 %2 = bitcast x86_mmx %1 to <2 x i32>
736 declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
738 define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
739 ; X86-LABEL: test_pfnacc:
740 ; X86: # %bb.0: # %entry
741 ; X86-NEXT: pushl %ebp
742 ; X86-NEXT: movl %esp, %ebp
743 ; X86-NEXT: andl $-8, %esp
744 ; X86-NEXT: subl $8, %esp
745 ; X86-NEXT: movd 20(%ebp), %mm0
746 ; X86-NEXT: movd 16(%ebp), %mm1
747 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
748 ; X86-NEXT: movd 12(%ebp), %mm0
749 ; X86-NEXT: movd 8(%ebp), %mm2
750 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
751 ; X86-NEXT: pfnacc %mm1, %mm2
752 ; X86-NEXT: movq %mm2, (%esp)
753 ; X86-NEXT: flds {{[0-9]+}}(%esp)
754 ; X86-NEXT: flds (%esp)
755 ; X86-NEXT: movl %ebp, %esp
756 ; X86-NEXT: popl %ebp
759 ; X64-LABEL: test_pfnacc:
760 ; X64: # %bb.0: # %entry
761 ; X64-NEXT: movdq2q %xmm1, %mm0
762 ; X64-NEXT: movdq2q %xmm0, %mm1
763 ; X64-NEXT: pfnacc %mm0, %mm1
764 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
765 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
768 %0 = bitcast <2 x float> %a to x86_mmx
769 %1 = bitcast <2 x float> %b to x86_mmx
770 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
771 %3 = bitcast x86_mmx %2 to <2 x float>
775 declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
777 define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
778 ; X86-LABEL: test_pfpnacc:
779 ; X86: # %bb.0: # %entry
780 ; X86-NEXT: pushl %ebp
781 ; X86-NEXT: movl %esp, %ebp
782 ; X86-NEXT: andl $-8, %esp
783 ; X86-NEXT: subl $8, %esp
784 ; X86-NEXT: movd 20(%ebp), %mm0
785 ; X86-NEXT: movd 16(%ebp), %mm1
786 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
787 ; X86-NEXT: movd 12(%ebp), %mm0
788 ; X86-NEXT: movd 8(%ebp), %mm2
789 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
790 ; X86-NEXT: pfpnacc %mm1, %mm2
791 ; X86-NEXT: movq %mm2, (%esp)
792 ; X86-NEXT: flds {{[0-9]+}}(%esp)
793 ; X86-NEXT: flds (%esp)
794 ; X86-NEXT: movl %ebp, %esp
795 ; X86-NEXT: popl %ebp
798 ; X64-LABEL: test_pfpnacc:
799 ; X64: # %bb.0: # %entry
800 ; X64-NEXT: movdq2q %xmm1, %mm0
801 ; X64-NEXT: movdq2q %xmm0, %mm1
802 ; X64-NEXT: pfpnacc %mm0, %mm1
803 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
804 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
807 %0 = bitcast <2 x float> %a to x86_mmx
808 %1 = bitcast <2 x float> %b to x86_mmx
809 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
810 %3 = bitcast x86_mmx %2 to <2 x float>
814 declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
816 define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
817 ; X86-LABEL: test_pi2fw:
818 ; X86: # %bb.0: # %entry
819 ; X86-NEXT: pushl %ebp
820 ; X86-NEXT: movl %esp, %ebp
821 ; X86-NEXT: andl $-8, %esp
822 ; X86-NEXT: subl $8, %esp
823 ; X86-NEXT: pi2fw %mm0, %mm0
824 ; X86-NEXT: movq %mm0, (%esp)
825 ; X86-NEXT: flds {{[0-9]+}}(%esp)
826 ; X86-NEXT: flds (%esp)
827 ; X86-NEXT: movl %ebp, %esp
828 ; X86-NEXT: popl %ebp
831 ; X64-LABEL: test_pi2fw:
832 ; X64: # %bb.0: # %entry
833 ; X64-NEXT: pi2fw %mm0, %mm0
834 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
835 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
838 %0 = bitcast x86_mmx %a.coerce to <2 x i32>
839 %1 = bitcast <2 x i32> %0 to x86_mmx
840 %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
841 %3 = bitcast x86_mmx %2 to <2 x float>
845 declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
847 define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
848 ; X86-LABEL: test_pswapdsf:
849 ; X86: # %bb.0: # %entry
850 ; X86-NEXT: pushl %ebp
851 ; X86-NEXT: movl %esp, %ebp
852 ; X86-NEXT: andl $-8, %esp
853 ; X86-NEXT: subl $8, %esp
854 ; X86-NEXT: movd 12(%ebp), %mm0
855 ; X86-NEXT: movd 8(%ebp), %mm1
856 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
857 ; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0]
858 ; X86-NEXT: movq %mm0, (%esp)
859 ; X86-NEXT: flds {{[0-9]+}}(%esp)
860 ; X86-NEXT: flds (%esp)
861 ; X86-NEXT: movl %ebp, %esp
862 ; X86-NEXT: popl %ebp
865 ; X64-LABEL: test_pswapdsf:
866 ; X64: # %bb.0: # %entry
867 ; X64-NEXT: movdq2q %xmm0, %mm0
868 ; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0]
869 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
870 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
873 %0 = bitcast <2 x float> %a to x86_mmx
874 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
875 %2 = bitcast x86_mmx %1 to <2 x float>
879 define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
880 ; X86-LABEL: test_pswapdsi:
881 ; X86: # %bb.0: # %entry
882 ; X86-NEXT: pushl %ebp
883 ; X86-NEXT: movl %esp, %ebp
884 ; X86-NEXT: andl $-8, %esp
885 ; X86-NEXT: subl $8, %esp
886 ; X86-NEXT: movd 12(%ebp), %mm0
887 ; X86-NEXT: movd 8(%ebp), %mm1
888 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
889 ; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0]
890 ; X86-NEXT: movq %mm0, (%esp)
891 ; X86-NEXT: movl (%esp), %eax
892 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
893 ; X86-NEXT: movl %ebp, %esp
894 ; X86-NEXT: popl %ebp
897 ; X64-LABEL: test_pswapdsi:
898 ; X64: # %bb.0: # %entry
899 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
900 ; X64-NEXT: movdq2q %xmm0, %mm0
901 ; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0]
902 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
903 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
904 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
907 %0 = bitcast <2 x i32> %a to x86_mmx
908 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
909 %2 = bitcast x86_mmx %1 to <2 x i32>
913 declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone