1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X64
5 define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
6 ; X86-LABEL: test_pavgusb:
7 ; X86: # %bb.0: # %entry
8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
9 ; X86-NEXT: pavgusb %mm1, %mm0
10 ; X86-NEXT: movq %mm0, (%eax)
13 ; X64-LABEL: test_pavgusb:
14 ; X64: # %bb.0: # %entry
15 ; X64-NEXT: pavgusb %mm1, %mm0
16 ; X64-NEXT: movq2dq %mm0, %xmm0
19 %0 = bitcast x86_mmx %a.coerce to <8 x i8>
20 %1 = bitcast x86_mmx %b.coerce to <8 x i8>
21 %2 = bitcast <8 x i8> %0 to x86_mmx
22 %3 = bitcast <8 x i8> %1 to x86_mmx
23 %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
24 %5 = bitcast x86_mmx %4 to <8 x i8>
28 declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
30 define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
31 ; X86-LABEL: test_pf2id:
32 ; X86: # %bb.0: # %entry
33 ; X86-NEXT: pushl %ebp
34 ; X86-NEXT: movl %esp, %ebp
35 ; X86-NEXT: andl $-8, %esp
36 ; X86-NEXT: subl $8, %esp
37 ; X86-NEXT: movd 12(%ebp), %mm0
38 ; X86-NEXT: movd 8(%ebp), %mm1
39 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
40 ; X86-NEXT: pf2id %mm1, %mm0
41 ; X86-NEXT: movq %mm0, (%esp)
42 ; X86-NEXT: movl (%esp), %eax
43 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
44 ; X86-NEXT: movl %ebp, %esp
48 ; X64-LABEL: test_pf2id:
49 ; X64: # %bb.0: # %entry
50 ; X64-NEXT: movdq2q %xmm0, %mm0
51 ; X64-NEXT: pf2id %mm0, %mm0
52 ; X64-NEXT: movq2dq %mm0, %xmm0
55 %0 = bitcast <2 x float> %a to x86_mmx
56 %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
57 %2 = bitcast x86_mmx %1 to <2 x i32>
61 declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
63 define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
64 ; X86-LABEL: test_pfacc:
65 ; X86: # %bb.0: # %entry
66 ; X86-NEXT: pushl %ebp
67 ; X86-NEXT: movl %esp, %ebp
68 ; X86-NEXT: andl $-8, %esp
69 ; X86-NEXT: subl $8, %esp
70 ; X86-NEXT: movd 20(%ebp), %mm0
71 ; X86-NEXT: movd 16(%ebp), %mm1
72 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
73 ; X86-NEXT: movd 12(%ebp), %mm0
74 ; X86-NEXT: movd 8(%ebp), %mm2
75 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
76 ; X86-NEXT: pfacc %mm1, %mm2
77 ; X86-NEXT: movq %mm2, (%esp)
78 ; X86-NEXT: flds {{[0-9]+}}(%esp)
79 ; X86-NEXT: flds (%esp)
80 ; X86-NEXT: movl %ebp, %esp
84 ; X64-LABEL: test_pfacc:
85 ; X64: # %bb.0: # %entry
86 ; X64-NEXT: movdq2q %xmm1, %mm0
87 ; X64-NEXT: movdq2q %xmm0, %mm1
88 ; X64-NEXT: pfacc %mm0, %mm1
89 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
90 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
93 %0 = bitcast <2 x float> %a to x86_mmx
94 %1 = bitcast <2 x float> %b to x86_mmx
95 %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
96 %3 = bitcast x86_mmx %2 to <2 x float>
100 declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
102 define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
103 ; X86-LABEL: test_pfadd:
104 ; X86: # %bb.0: # %entry
105 ; X86-NEXT: pushl %ebp
106 ; X86-NEXT: movl %esp, %ebp
107 ; X86-NEXT: andl $-8, %esp
108 ; X86-NEXT: subl $8, %esp
109 ; X86-NEXT: movd 20(%ebp), %mm0
110 ; X86-NEXT: movd 16(%ebp), %mm1
111 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
112 ; X86-NEXT: movd 12(%ebp), %mm0
113 ; X86-NEXT: movd 8(%ebp), %mm2
114 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
115 ; X86-NEXT: pfadd %mm1, %mm2
116 ; X86-NEXT: movq %mm2, (%esp)
117 ; X86-NEXT: flds {{[0-9]+}}(%esp)
118 ; X86-NEXT: flds (%esp)
119 ; X86-NEXT: movl %ebp, %esp
120 ; X86-NEXT: popl %ebp
123 ; X64-LABEL: test_pfadd:
124 ; X64: # %bb.0: # %entry
125 ; X64-NEXT: movdq2q %xmm1, %mm0
126 ; X64-NEXT: movdq2q %xmm0, %mm1
127 ; X64-NEXT: pfadd %mm0, %mm1
128 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
129 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
132 %0 = bitcast <2 x float> %a to x86_mmx
133 %1 = bitcast <2 x float> %b to x86_mmx
134 %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
135 %3 = bitcast x86_mmx %2 to <2 x float>
139 declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
141 define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
142 ; X86-LABEL: test_pfcmpeq:
143 ; X86: # %bb.0: # %entry
144 ; X86-NEXT: pushl %ebp
145 ; X86-NEXT: movl %esp, %ebp
146 ; X86-NEXT: andl $-8, %esp
147 ; X86-NEXT: subl $8, %esp
148 ; X86-NEXT: movd 20(%ebp), %mm0
149 ; X86-NEXT: movd 16(%ebp), %mm1
150 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
151 ; X86-NEXT: movd 12(%ebp), %mm0
152 ; X86-NEXT: movd 8(%ebp), %mm2
153 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
154 ; X86-NEXT: pfcmpeq %mm1, %mm2
155 ; X86-NEXT: movq %mm2, (%esp)
156 ; X86-NEXT: movl (%esp), %eax
157 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
158 ; X86-NEXT: movl %ebp, %esp
159 ; X86-NEXT: popl %ebp
162 ; X64-LABEL: test_pfcmpeq:
163 ; X64: # %bb.0: # %entry
164 ; X64-NEXT: movdq2q %xmm1, %mm0
165 ; X64-NEXT: movdq2q %xmm0, %mm1
166 ; X64-NEXT: pfcmpeq %mm0, %mm1
167 ; X64-NEXT: movq2dq %mm1, %xmm0
170 %0 = bitcast <2 x float> %a to x86_mmx
171 %1 = bitcast <2 x float> %b to x86_mmx
172 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
173 %3 = bitcast x86_mmx %2 to <2 x i32>
177 declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
179 define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
180 ; X86-LABEL: test_pfcmpge:
181 ; X86: # %bb.0: # %entry
182 ; X86-NEXT: pushl %ebp
183 ; X86-NEXT: movl %esp, %ebp
184 ; X86-NEXT: andl $-8, %esp
185 ; X86-NEXT: subl $8, %esp
186 ; X86-NEXT: movd 20(%ebp), %mm0
187 ; X86-NEXT: movd 16(%ebp), %mm1
188 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
189 ; X86-NEXT: movd 12(%ebp), %mm0
190 ; X86-NEXT: movd 8(%ebp), %mm2
191 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
192 ; X86-NEXT: pfcmpge %mm1, %mm2
193 ; X86-NEXT: movq %mm2, (%esp)
194 ; X86-NEXT: movl (%esp), %eax
195 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
196 ; X86-NEXT: movl %ebp, %esp
197 ; X86-NEXT: popl %ebp
200 ; X64-LABEL: test_pfcmpge:
201 ; X64: # %bb.0: # %entry
202 ; X64-NEXT: movdq2q %xmm1, %mm0
203 ; X64-NEXT: movdq2q %xmm0, %mm1
204 ; X64-NEXT: pfcmpge %mm0, %mm1
205 ; X64-NEXT: movq2dq %mm1, %xmm0
208 %0 = bitcast <2 x float> %a to x86_mmx
209 %1 = bitcast <2 x float> %b to x86_mmx
210 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
211 %3 = bitcast x86_mmx %2 to <2 x i32>
215 declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
217 define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
218 ; X86-LABEL: test_pfcmpgt:
219 ; X86: # %bb.0: # %entry
220 ; X86-NEXT: pushl %ebp
221 ; X86-NEXT: movl %esp, %ebp
222 ; X86-NEXT: andl $-8, %esp
223 ; X86-NEXT: subl $8, %esp
224 ; X86-NEXT: movd 20(%ebp), %mm0
225 ; X86-NEXT: movd 16(%ebp), %mm1
226 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
227 ; X86-NEXT: movd 12(%ebp), %mm0
228 ; X86-NEXT: movd 8(%ebp), %mm2
229 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
230 ; X86-NEXT: pfcmpgt %mm1, %mm2
231 ; X86-NEXT: movq %mm2, (%esp)
232 ; X86-NEXT: movl (%esp), %eax
233 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
234 ; X86-NEXT: movl %ebp, %esp
235 ; X86-NEXT: popl %ebp
238 ; X64-LABEL: test_pfcmpgt:
239 ; X64: # %bb.0: # %entry
240 ; X64-NEXT: movdq2q %xmm1, %mm0
241 ; X64-NEXT: movdq2q %xmm0, %mm1
242 ; X64-NEXT: pfcmpgt %mm0, %mm1
243 ; X64-NEXT: movq2dq %mm1, %xmm0
246 %0 = bitcast <2 x float> %a to x86_mmx
247 %1 = bitcast <2 x float> %b to x86_mmx
248 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
249 %3 = bitcast x86_mmx %2 to <2 x i32>
253 declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
255 define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
256 ; X86-LABEL: test_pfmax:
257 ; X86: # %bb.0: # %entry
258 ; X86-NEXT: pushl %ebp
259 ; X86-NEXT: movl %esp, %ebp
260 ; X86-NEXT: andl $-8, %esp
261 ; X86-NEXT: subl $8, %esp
262 ; X86-NEXT: movd 20(%ebp), %mm0
263 ; X86-NEXT: movd 16(%ebp), %mm1
264 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
265 ; X86-NEXT: movd 12(%ebp), %mm0
266 ; X86-NEXT: movd 8(%ebp), %mm2
267 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
268 ; X86-NEXT: pfmax %mm1, %mm2
269 ; X86-NEXT: movq %mm2, (%esp)
270 ; X86-NEXT: flds {{[0-9]+}}(%esp)
271 ; X86-NEXT: flds (%esp)
272 ; X86-NEXT: movl %ebp, %esp
273 ; X86-NEXT: popl %ebp
276 ; X64-LABEL: test_pfmax:
277 ; X64: # %bb.0: # %entry
278 ; X64-NEXT: movdq2q %xmm1, %mm0
279 ; X64-NEXT: movdq2q %xmm0, %mm1
280 ; X64-NEXT: pfmax %mm0, %mm1
281 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
282 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
285 %0 = bitcast <2 x float> %a to x86_mmx
286 %1 = bitcast <2 x float> %b to x86_mmx
287 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
288 %3 = bitcast x86_mmx %2 to <2 x float>
292 declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
294 define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
295 ; X86-LABEL: test_pfmin:
296 ; X86: # %bb.0: # %entry
297 ; X86-NEXT: pushl %ebp
298 ; X86-NEXT: movl %esp, %ebp
299 ; X86-NEXT: andl $-8, %esp
300 ; X86-NEXT: subl $8, %esp
301 ; X86-NEXT: movd 20(%ebp), %mm0
302 ; X86-NEXT: movd 16(%ebp), %mm1
303 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
304 ; X86-NEXT: movd 12(%ebp), %mm0
305 ; X86-NEXT: movd 8(%ebp), %mm2
306 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
307 ; X86-NEXT: pfmin %mm1, %mm2
308 ; X86-NEXT: movq %mm2, (%esp)
309 ; X86-NEXT: flds {{[0-9]+}}(%esp)
310 ; X86-NEXT: flds (%esp)
311 ; X86-NEXT: movl %ebp, %esp
312 ; X86-NEXT: popl %ebp
315 ; X64-LABEL: test_pfmin:
316 ; X64: # %bb.0: # %entry
317 ; X64-NEXT: movdq2q %xmm1, %mm0
318 ; X64-NEXT: movdq2q %xmm0, %mm1
319 ; X64-NEXT: pfmin %mm0, %mm1
320 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
321 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
324 %0 = bitcast <2 x float> %a to x86_mmx
325 %1 = bitcast <2 x float> %b to x86_mmx
326 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
327 %3 = bitcast x86_mmx %2 to <2 x float>
331 declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
333 define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
334 ; X86-LABEL: test_pfmul:
335 ; X86: # %bb.0: # %entry
336 ; X86-NEXT: pushl %ebp
337 ; X86-NEXT: movl %esp, %ebp
338 ; X86-NEXT: andl $-8, %esp
339 ; X86-NEXT: subl $8, %esp
340 ; X86-NEXT: movd 20(%ebp), %mm0
341 ; X86-NEXT: movd 16(%ebp), %mm1
342 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
343 ; X86-NEXT: movd 12(%ebp), %mm0
344 ; X86-NEXT: movd 8(%ebp), %mm2
345 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
346 ; X86-NEXT: pfmul %mm1, %mm2
347 ; X86-NEXT: movq %mm2, (%esp)
348 ; X86-NEXT: flds {{[0-9]+}}(%esp)
349 ; X86-NEXT: flds (%esp)
350 ; X86-NEXT: movl %ebp, %esp
351 ; X86-NEXT: popl %ebp
354 ; X64-LABEL: test_pfmul:
355 ; X64: # %bb.0: # %entry
356 ; X64-NEXT: movdq2q %xmm1, %mm0
357 ; X64-NEXT: movdq2q %xmm0, %mm1
358 ; X64-NEXT: pfmul %mm0, %mm1
359 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
360 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
363 %0 = bitcast <2 x float> %a to x86_mmx
364 %1 = bitcast <2 x float> %b to x86_mmx
365 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
366 %3 = bitcast x86_mmx %2 to <2 x float>
370 declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
372 define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
373 ; X86-LABEL: test_pfrcp:
374 ; X86: # %bb.0: # %entry
375 ; X86-NEXT: pushl %ebp
376 ; X86-NEXT: movl %esp, %ebp
377 ; X86-NEXT: andl $-8, %esp
378 ; X86-NEXT: subl $8, %esp
379 ; X86-NEXT: movd 12(%ebp), %mm0
380 ; X86-NEXT: movd 8(%ebp), %mm1
381 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
382 ; X86-NEXT: pfrcp %mm1, %mm0
383 ; X86-NEXT: movq %mm0, (%esp)
384 ; X86-NEXT: flds {{[0-9]+}}(%esp)
385 ; X86-NEXT: flds (%esp)
386 ; X86-NEXT: movl %ebp, %esp
387 ; X86-NEXT: popl %ebp
390 ; X64-LABEL: test_pfrcp:
391 ; X64: # %bb.0: # %entry
392 ; X64-NEXT: movdq2q %xmm0, %mm0
393 ; X64-NEXT: pfrcp %mm0, %mm0
394 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
395 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
398 %0 = bitcast <2 x float> %a to x86_mmx
399 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
400 %2 = bitcast x86_mmx %1 to <2 x float>
404 declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
406 define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
407 ; X86-LABEL: test_pfrcpit1:
408 ; X86: # %bb.0: # %entry
409 ; X86-NEXT: pushl %ebp
410 ; X86-NEXT: movl %esp, %ebp
411 ; X86-NEXT: andl $-8, %esp
412 ; X86-NEXT: subl $8, %esp
413 ; X86-NEXT: movd 20(%ebp), %mm0
414 ; X86-NEXT: movd 16(%ebp), %mm1
415 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
416 ; X86-NEXT: movd 12(%ebp), %mm0
417 ; X86-NEXT: movd 8(%ebp), %mm2
418 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
419 ; X86-NEXT: pfrcpit1 %mm1, %mm2
420 ; X86-NEXT: movq %mm2, (%esp)
421 ; X86-NEXT: flds {{[0-9]+}}(%esp)
422 ; X86-NEXT: flds (%esp)
423 ; X86-NEXT: movl %ebp, %esp
424 ; X86-NEXT: popl %ebp
427 ; X64-LABEL: test_pfrcpit1:
428 ; X64: # %bb.0: # %entry
429 ; X64-NEXT: movdq2q %xmm1, %mm0
430 ; X64-NEXT: movdq2q %xmm0, %mm1
431 ; X64-NEXT: pfrcpit1 %mm0, %mm1
432 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
433 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
436 %0 = bitcast <2 x float> %a to x86_mmx
437 %1 = bitcast <2 x float> %b to x86_mmx
438 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
439 %3 = bitcast x86_mmx %2 to <2 x float>
443 declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
445 define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
446 ; X86-LABEL: test_pfrcpit2:
447 ; X86: # %bb.0: # %entry
448 ; X86-NEXT: pushl %ebp
449 ; X86-NEXT: movl %esp, %ebp
450 ; X86-NEXT: andl $-8, %esp
451 ; X86-NEXT: subl $8, %esp
452 ; X86-NEXT: movd 20(%ebp), %mm0
453 ; X86-NEXT: movd 16(%ebp), %mm1
454 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
455 ; X86-NEXT: movd 12(%ebp), %mm0
456 ; X86-NEXT: movd 8(%ebp), %mm2
457 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
458 ; X86-NEXT: pfrcpit2 %mm1, %mm2
459 ; X86-NEXT: movq %mm2, (%esp)
460 ; X86-NEXT: flds {{[0-9]+}}(%esp)
461 ; X86-NEXT: flds (%esp)
462 ; X86-NEXT: movl %ebp, %esp
463 ; X86-NEXT: popl %ebp
466 ; X64-LABEL: test_pfrcpit2:
467 ; X64: # %bb.0: # %entry
468 ; X64-NEXT: movdq2q %xmm1, %mm0
469 ; X64-NEXT: movdq2q %xmm0, %mm1
470 ; X64-NEXT: pfrcpit2 %mm0, %mm1
471 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
472 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
475 %0 = bitcast <2 x float> %a to x86_mmx
476 %1 = bitcast <2 x float> %b to x86_mmx
477 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
478 %3 = bitcast x86_mmx %2 to <2 x float>
482 declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
484 define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
485 ; X86-LABEL: test_pfrsqrt:
486 ; X86: # %bb.0: # %entry
487 ; X86-NEXT: pushl %ebp
488 ; X86-NEXT: movl %esp, %ebp
489 ; X86-NEXT: andl $-8, %esp
490 ; X86-NEXT: subl $8, %esp
491 ; X86-NEXT: movd 12(%ebp), %mm0
492 ; X86-NEXT: movd 8(%ebp), %mm1
493 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
494 ; X86-NEXT: pfrsqrt %mm1, %mm0
495 ; X86-NEXT: movq %mm0, (%esp)
496 ; X86-NEXT: flds {{[0-9]+}}(%esp)
497 ; X86-NEXT: flds (%esp)
498 ; X86-NEXT: movl %ebp, %esp
499 ; X86-NEXT: popl %ebp
502 ; X64-LABEL: test_pfrsqrt:
503 ; X64: # %bb.0: # %entry
504 ; X64-NEXT: movdq2q %xmm0, %mm0
505 ; X64-NEXT: pfrsqrt %mm0, %mm0
506 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
507 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
510 %0 = bitcast <2 x float> %a to x86_mmx
511 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
512 %2 = bitcast x86_mmx %1 to <2 x float>
516 declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
518 define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
519 ; X86-LABEL: test_pfrsqit1:
520 ; X86: # %bb.0: # %entry
521 ; X86-NEXT: pushl %ebp
522 ; X86-NEXT: movl %esp, %ebp
523 ; X86-NEXT: andl $-8, %esp
524 ; X86-NEXT: subl $8, %esp
525 ; X86-NEXT: movd 20(%ebp), %mm0
526 ; X86-NEXT: movd 16(%ebp), %mm1
527 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
528 ; X86-NEXT: movd 12(%ebp), %mm0
529 ; X86-NEXT: movd 8(%ebp), %mm2
530 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
531 ; X86-NEXT: pfrsqit1 %mm1, %mm2
532 ; X86-NEXT: movq %mm2, (%esp)
533 ; X86-NEXT: flds {{[0-9]+}}(%esp)
534 ; X86-NEXT: flds (%esp)
535 ; X86-NEXT: movl %ebp, %esp
536 ; X86-NEXT: popl %ebp
539 ; X64-LABEL: test_pfrsqit1:
540 ; X64: # %bb.0: # %entry
541 ; X64-NEXT: movdq2q %xmm1, %mm0
542 ; X64-NEXT: movdq2q %xmm0, %mm1
543 ; X64-NEXT: pfrsqit1 %mm0, %mm1
544 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
545 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
548 %0 = bitcast <2 x float> %a to x86_mmx
549 %1 = bitcast <2 x float> %b to x86_mmx
550 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
551 %3 = bitcast x86_mmx %2 to <2 x float>
555 declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
557 define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
558 ; X86-LABEL: test_pfsub:
559 ; X86: # %bb.0: # %entry
560 ; X86-NEXT: pushl %ebp
561 ; X86-NEXT: movl %esp, %ebp
562 ; X86-NEXT: andl $-8, %esp
563 ; X86-NEXT: subl $8, %esp
564 ; X86-NEXT: movd 20(%ebp), %mm0
565 ; X86-NEXT: movd 16(%ebp), %mm1
566 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
567 ; X86-NEXT: movd 12(%ebp), %mm0
568 ; X86-NEXT: movd 8(%ebp), %mm2
569 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
570 ; X86-NEXT: pfsub %mm1, %mm2
571 ; X86-NEXT: movq %mm2, (%esp)
572 ; X86-NEXT: flds {{[0-9]+}}(%esp)
573 ; X86-NEXT: flds (%esp)
574 ; X86-NEXT: movl %ebp, %esp
575 ; X86-NEXT: popl %ebp
578 ; X64-LABEL: test_pfsub:
579 ; X64: # %bb.0: # %entry
580 ; X64-NEXT: movdq2q %xmm1, %mm0
581 ; X64-NEXT: movdq2q %xmm0, %mm1
582 ; X64-NEXT: pfsub %mm0, %mm1
583 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
584 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
587 %0 = bitcast <2 x float> %a to x86_mmx
588 %1 = bitcast <2 x float> %b to x86_mmx
589 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
590 %3 = bitcast x86_mmx %2 to <2 x float>
594 declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
596 define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
597 ; X86-LABEL: test_pfsubr:
598 ; X86: # %bb.0: # %entry
599 ; X86-NEXT: pushl %ebp
600 ; X86-NEXT: movl %esp, %ebp
601 ; X86-NEXT: andl $-8, %esp
602 ; X86-NEXT: subl $8, %esp
603 ; X86-NEXT: movd 20(%ebp), %mm0
604 ; X86-NEXT: movd 16(%ebp), %mm1
605 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
606 ; X86-NEXT: movd 12(%ebp), %mm0
607 ; X86-NEXT: movd 8(%ebp), %mm2
608 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
609 ; X86-NEXT: pfsubr %mm1, %mm2
610 ; X86-NEXT: movq %mm2, (%esp)
611 ; X86-NEXT: flds {{[0-9]+}}(%esp)
612 ; X86-NEXT: flds (%esp)
613 ; X86-NEXT: movl %ebp, %esp
614 ; X86-NEXT: popl %ebp
617 ; X64-LABEL: test_pfsubr:
618 ; X64: # %bb.0: # %entry
619 ; X64-NEXT: movdq2q %xmm1, %mm0
620 ; X64-NEXT: movdq2q %xmm0, %mm1
621 ; X64-NEXT: pfsubr %mm0, %mm1
622 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
623 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
626 %0 = bitcast <2 x float> %a to x86_mmx
627 %1 = bitcast <2 x float> %b to x86_mmx
628 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
629 %3 = bitcast x86_mmx %2 to <2 x float>
633 declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
635 define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
636 ; X86-LABEL: test_pi2fd:
637 ; X86: # %bb.0: # %entry
638 ; X86-NEXT: pushl %ebp
639 ; X86-NEXT: movl %esp, %ebp
640 ; X86-NEXT: andl $-8, %esp
641 ; X86-NEXT: subl $8, %esp
642 ; X86-NEXT: pi2fd %mm0, %mm0
643 ; X86-NEXT: movq %mm0, (%esp)
644 ; X86-NEXT: flds {{[0-9]+}}(%esp)
645 ; X86-NEXT: flds (%esp)
646 ; X86-NEXT: movl %ebp, %esp
647 ; X86-NEXT: popl %ebp
650 ; X64-LABEL: test_pi2fd:
651 ; X64: # %bb.0: # %entry
652 ; X64-NEXT: pi2fd %mm0, %mm0
653 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
654 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
657 %0 = bitcast x86_mmx %a.coerce to <2 x i32>
658 %1 = bitcast <2 x i32> %0 to x86_mmx
659 %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
660 %3 = bitcast x86_mmx %2 to <2 x float>
664 declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
666 define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
667 ; X86-LABEL: test_pmulhrw:
668 ; X86: # %bb.0: # %entry
669 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
670 ; X86-NEXT: pmulhrw %mm1, %mm0
671 ; X86-NEXT: movq %mm0, (%eax)
674 ; X64-LABEL: test_pmulhrw:
675 ; X64: # %bb.0: # %entry
676 ; X64-NEXT: pmulhrw %mm1, %mm0
677 ; X64-NEXT: movq2dq %mm0, %xmm0
680 %0 = bitcast x86_mmx %a.coerce to <4 x i16>
681 %1 = bitcast x86_mmx %b.coerce to <4 x i16>
682 %2 = bitcast <4 x i16> %0 to x86_mmx
683 %3 = bitcast <4 x i16> %1 to x86_mmx
684 %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
685 %5 = bitcast x86_mmx %4 to <4 x i16>
689 declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
691 define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
692 ; X86-LABEL: test_pf2iw:
693 ; X86: # %bb.0: # %entry
694 ; X86-NEXT: pushl %ebp
695 ; X86-NEXT: movl %esp, %ebp
696 ; X86-NEXT: andl $-8, %esp
697 ; X86-NEXT: subl $8, %esp
698 ; X86-NEXT: movd 12(%ebp), %mm0
699 ; X86-NEXT: movd 8(%ebp), %mm1
700 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
701 ; X86-NEXT: pf2iw %mm1, %mm0
702 ; X86-NEXT: movq %mm0, (%esp)
703 ; X86-NEXT: movl (%esp), %eax
704 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
705 ; X86-NEXT: movl %ebp, %esp
706 ; X86-NEXT: popl %ebp
709 ; X64-LABEL: test_pf2iw:
710 ; X64: # %bb.0: # %entry
711 ; X64-NEXT: movdq2q %xmm0, %mm0
712 ; X64-NEXT: pf2iw %mm0, %mm0
713 ; X64-NEXT: movq2dq %mm0, %xmm0
716 %0 = bitcast <2 x float> %a to x86_mmx
717 %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
718 %2 = bitcast x86_mmx %1 to <2 x i32>
722 declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
724 define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
725 ; X86-LABEL: test_pfnacc:
726 ; X86: # %bb.0: # %entry
727 ; X86-NEXT: pushl %ebp
728 ; X86-NEXT: movl %esp, %ebp
729 ; X86-NEXT: andl $-8, %esp
730 ; X86-NEXT: subl $8, %esp
731 ; X86-NEXT: movd 20(%ebp), %mm0
732 ; X86-NEXT: movd 16(%ebp), %mm1
733 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
734 ; X86-NEXT: movd 12(%ebp), %mm0
735 ; X86-NEXT: movd 8(%ebp), %mm2
736 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
737 ; X86-NEXT: pfnacc %mm1, %mm2
738 ; X86-NEXT: movq %mm2, (%esp)
739 ; X86-NEXT: flds {{[0-9]+}}(%esp)
740 ; X86-NEXT: flds (%esp)
741 ; X86-NEXT: movl %ebp, %esp
742 ; X86-NEXT: popl %ebp
745 ; X64-LABEL: test_pfnacc:
746 ; X64: # %bb.0: # %entry
747 ; X64-NEXT: movdq2q %xmm1, %mm0
748 ; X64-NEXT: movdq2q %xmm0, %mm1
749 ; X64-NEXT: pfnacc %mm0, %mm1
750 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
751 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
754 %0 = bitcast <2 x float> %a to x86_mmx
755 %1 = bitcast <2 x float> %b to x86_mmx
756 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
757 %3 = bitcast x86_mmx %2 to <2 x float>
761 declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
763 define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
764 ; X86-LABEL: test_pfpnacc:
765 ; X86: # %bb.0: # %entry
766 ; X86-NEXT: pushl %ebp
767 ; X86-NEXT: movl %esp, %ebp
768 ; X86-NEXT: andl $-8, %esp
769 ; X86-NEXT: subl $8, %esp
770 ; X86-NEXT: movd 20(%ebp), %mm0
771 ; X86-NEXT: movd 16(%ebp), %mm1
772 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
773 ; X86-NEXT: movd 12(%ebp), %mm0
774 ; X86-NEXT: movd 8(%ebp), %mm2
775 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
776 ; X86-NEXT: pfpnacc %mm1, %mm2
777 ; X86-NEXT: movq %mm2, (%esp)
778 ; X86-NEXT: flds {{[0-9]+}}(%esp)
779 ; X86-NEXT: flds (%esp)
780 ; X86-NEXT: movl %ebp, %esp
781 ; X86-NEXT: popl %ebp
784 ; X64-LABEL: test_pfpnacc:
785 ; X64: # %bb.0: # %entry
786 ; X64-NEXT: movdq2q %xmm1, %mm0
787 ; X64-NEXT: movdq2q %xmm0, %mm1
788 ; X64-NEXT: pfpnacc %mm0, %mm1
789 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp)
790 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
793 %0 = bitcast <2 x float> %a to x86_mmx
794 %1 = bitcast <2 x float> %b to x86_mmx
795 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
796 %3 = bitcast x86_mmx %2 to <2 x float>
800 declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
802 define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
803 ; X86-LABEL: test_pi2fw:
804 ; X86: # %bb.0: # %entry
805 ; X86-NEXT: pushl %ebp
806 ; X86-NEXT: movl %esp, %ebp
807 ; X86-NEXT: andl $-8, %esp
808 ; X86-NEXT: subl $8, %esp
809 ; X86-NEXT: pi2fw %mm0, %mm0
810 ; X86-NEXT: movq %mm0, (%esp)
811 ; X86-NEXT: flds {{[0-9]+}}(%esp)
812 ; X86-NEXT: flds (%esp)
813 ; X86-NEXT: movl %ebp, %esp
814 ; X86-NEXT: popl %ebp
817 ; X64-LABEL: test_pi2fw:
818 ; X64: # %bb.0: # %entry
819 ; X64-NEXT: pi2fw %mm0, %mm0
820 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
821 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
824 %0 = bitcast x86_mmx %a.coerce to <2 x i32>
825 %1 = bitcast <2 x i32> %0 to x86_mmx
826 %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
827 %3 = bitcast x86_mmx %2 to <2 x float>
831 declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
833 define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
834 ; X86-LABEL: test_pswapdsf:
835 ; X86: # %bb.0: # %entry
836 ; X86-NEXT: pushl %ebp
837 ; X86-NEXT: movl %esp, %ebp
838 ; X86-NEXT: andl $-8, %esp
839 ; X86-NEXT: subl $8, %esp
840 ; X86-NEXT: movd 12(%ebp), %mm0
841 ; X86-NEXT: movd 8(%ebp), %mm1
842 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
843 ; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0]
844 ; X86-NEXT: movq %mm0, (%esp)
845 ; X86-NEXT: flds {{[0-9]+}}(%esp)
846 ; X86-NEXT: flds (%esp)
847 ; X86-NEXT: movl %ebp, %esp
848 ; X86-NEXT: popl %ebp
851 ; X64-LABEL: test_pswapdsf:
852 ; X64: # %bb.0: # %entry
853 ; X64-NEXT: movdq2q %xmm0, %mm0
854 ; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0]
855 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
856 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
859 %0 = bitcast <2 x float> %a to x86_mmx
860 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
861 %2 = bitcast x86_mmx %1 to <2 x float>
865 define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
866 ; X86-LABEL: test_pswapdsi:
867 ; X86: # %bb.0: # %entry
868 ; X86-NEXT: pushl %ebp
869 ; X86-NEXT: movl %esp, %ebp
870 ; X86-NEXT: andl $-8, %esp
871 ; X86-NEXT: subl $8, %esp
872 ; X86-NEXT: movd 12(%ebp), %mm0
873 ; X86-NEXT: movd 8(%ebp), %mm1
874 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
875 ; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0]
876 ; X86-NEXT: movq %mm0, (%esp)
877 ; X86-NEXT: movl (%esp), %eax
878 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
879 ; X86-NEXT: movl %ebp, %esp
880 ; X86-NEXT: popl %ebp
883 ; X64-LABEL: test_pswapdsi:
884 ; X64: # %bb.0: # %entry
885 ; X64-NEXT: movdq2q %xmm0, %mm0
886 ; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0]
887 ; X64-NEXT: movq2dq %mm0, %xmm0
890 %0 = bitcast <2 x i32> %a to x86_mmx
891 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
892 %2 = bitcast x86_mmx %1 to <2 x i32>
896 declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone