test/CodeGen/X86/3dnow-intrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X64
   4
   5 define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
   6 ; X86-LABEL: test_pavgusb:
   7 ; X86:       # %bb.0: # %entry
   8 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   9 ; X86-NEXT:    pavgusb %mm1, %mm0
  10 ; X86-NEXT:    movq %mm0, (%eax)
  11 ; X86-NEXT:    retl $4
  12 ;
  13 ; X64-LABEL: test_pavgusb:
  14 ; X64:       # %bb.0: # %entry
  15 ; X64-NEXT:    pavgusb %mm1, %mm0
  16 ; X64-NEXT:    movq2dq %mm0, %xmm0
  17 ; X64-NEXT:    retq
  18 entry:
  19   %0 = bitcast x86_mmx %a.coerce to <8 x i8>
  20   %1 = bitcast x86_mmx %b.coerce to <8 x i8>
  21   %2 = bitcast <8 x i8> %0 to x86_mmx
  22   %3 = bitcast <8 x i8> %1 to x86_mmx
  23   %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
  24   %5 = bitcast x86_mmx %4 to <8 x i8>
  25   ret <8 x i8> %5
  26 }
  27
  28 declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
  29
  30 define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
  31 ; X86-LABEL: test_pf2id:
  32 ; X86:       # %bb.0: # %entry
  33 ; X86-NEXT:    pushl %ebp
  34 ; X86-NEXT:    movl %esp, %ebp
  35 ; X86-NEXT:    andl $-8, %esp
  36 ; X86-NEXT:    subl $8, %esp
  37 ; X86-NEXT:    movd 12(%ebp), %mm0
  38 ; X86-NEXT:    movd 8(%ebp), %mm1
  39 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
  40 ; X86-NEXT:    pf2id %mm1, %mm0
  41 ; X86-NEXT:    movq %mm0, (%esp)
  42 ; X86-NEXT:    movl (%esp), %eax
  43 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
  44 ; X86-NEXT:    movl %ebp, %esp
  45 ; X86-NEXT:    popl %ebp
  46 ; X86-NEXT:    retl
  47 ;
  48 ; X64-LABEL: test_pf2id:
  49 ; X64:       # %bb.0: # %entry
  50 ; X64-NEXT:    movdq2q %xmm0, %mm0
  51 ; X64-NEXT:    pf2id %mm0, %mm0
  52 ; X64-NEXT:    movq2dq %mm0, %xmm0
  53 ; X64-NEXT:    retq
  54 entry:
  55   %0 = bitcast <2 x float> %a to x86_mmx
  56   %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
  57   %2 = bitcast x86_mmx %1 to <2 x i32>
  58   ret <2 x i32> %2
  59 }
  60
  61 declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
  62
  63 define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
  64 ; X86-LABEL: test_pfacc:
  65 ; X86:       # %bb.0: # %entry
  66 ; X86-NEXT:    pushl %ebp
  67 ; X86-NEXT:    movl %esp, %ebp
  68 ; X86-NEXT:    andl $-8, %esp
  69 ; X86-NEXT:    subl $8, %esp
  70 ; X86-NEXT:    movd 20(%ebp), %mm0
  71 ; X86-NEXT:    movd 16(%ebp), %mm1
  72 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
  73 ; X86-NEXT:    movd 12(%ebp), %mm0
  74 ; X86-NEXT:    movd 8(%ebp), %mm2
  75 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
  76 ; X86-NEXT:    pfacc %mm1, %mm2
  77 ; X86-NEXT:    movq %mm2, (%esp)
  78 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
  79 ; X86-NEXT:    flds (%esp)
  80 ; X86-NEXT:    movl %ebp, %esp
  81 ; X86-NEXT:    popl %ebp
  82 ; X86-NEXT:    retl
  83 ;
  84 ; X64-LABEL: test_pfacc:
  85 ; X64:       # %bb.0: # %entry
  86 ; X64-NEXT:    movdq2q %xmm1, %mm0
  87 ; X64-NEXT:    movdq2q %xmm0, %mm1
  88 ; X64-NEXT:    pfacc %mm0, %mm1
  89 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
  90 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
  91 ; X64-NEXT:    retq
  92 entry:
  93   %0 = bitcast <2 x float> %a to x86_mmx
  94   %1 = bitcast <2 x float> %b to x86_mmx
  95   %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
  96   %3 = bitcast x86_mmx %2 to <2 x float>
  97   ret <2 x float> %3
  98 }
  99
 100 declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
 101
 102 define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
 103 ; X86-LABEL: test_pfadd:
 104 ; X86:       # %bb.0: # %entry
 105 ; X86-NEXT:    pushl %ebp
 106 ; X86-NEXT:    movl %esp, %ebp
 107 ; X86-NEXT:    andl $-8, %esp
 108 ; X86-NEXT:    subl $8, %esp
 109 ; X86-NEXT:    movd 20(%ebp), %mm0
 110 ; X86-NEXT:    movd 16(%ebp), %mm1
 111 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 112 ; X86-NEXT:    movd 12(%ebp), %mm0
 113 ; X86-NEXT:    movd 8(%ebp), %mm2
 114 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 115 ; X86-NEXT:    pfadd %mm1, %mm2
 116 ; X86-NEXT:    movq %mm2, (%esp)
 117 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 118 ; X86-NEXT:    flds (%esp)
 119 ; X86-NEXT:    movl %ebp, %esp
 120 ; X86-NEXT:    popl %ebp
 121 ; X86-NEXT:    retl
 122 ;
 123 ; X64-LABEL: test_pfadd:
 124 ; X64:       # %bb.0: # %entry
 125 ; X64-NEXT:    movdq2q %xmm1, %mm0
 126 ; X64-NEXT:    movdq2q %xmm0, %mm1
 127 ; X64-NEXT:    pfadd %mm0, %mm1
 128 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 129 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 130 ; X64-NEXT:    retq
 131 entry:
 132   %0 = bitcast <2 x float> %a to x86_mmx
 133   %1 = bitcast <2 x float> %b to x86_mmx
 134   %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
 135   %3 = bitcast x86_mmx %2 to <2 x float>
 136   ret <2 x float> %3
 137 }
 138
 139 declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
 140
 141 define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
 142 ; X86-LABEL: test_pfcmpeq:
 143 ; X86:       # %bb.0: # %entry
 144 ; X86-NEXT:    pushl %ebp
 145 ; X86-NEXT:    movl %esp, %ebp
 146 ; X86-NEXT:    andl $-8, %esp
 147 ; X86-NEXT:    subl $8, %esp
 148 ; X86-NEXT:    movd 20(%ebp), %mm0
 149 ; X86-NEXT:    movd 16(%ebp), %mm1
 150 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 151 ; X86-NEXT:    movd 12(%ebp), %mm0
 152 ; X86-NEXT:    movd 8(%ebp), %mm2
 153 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 154 ; X86-NEXT:    pfcmpeq %mm1, %mm2
 155 ; X86-NEXT:    movq %mm2, (%esp)
 156 ; X86-NEXT:    movl (%esp), %eax
 157 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 158 ; X86-NEXT:    movl %ebp, %esp
 159 ; X86-NEXT:    popl %ebp
 160 ; X86-NEXT:    retl
 161 ;
 162 ; X64-LABEL: test_pfcmpeq:
 163 ; X64:       # %bb.0: # %entry
 164 ; X64-NEXT:    movdq2q %xmm1, %mm0
 165 ; X64-NEXT:    movdq2q %xmm0, %mm1
 166 ; X64-NEXT:    pfcmpeq %mm0, %mm1
 167 ; X64-NEXT:    movq2dq %mm1, %xmm0
 168 ; X64-NEXT:    retq
 169 entry:
 170   %0 = bitcast <2 x float> %a to x86_mmx
 171   %1 = bitcast <2 x float> %b to x86_mmx
 172   %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
 173   %3 = bitcast x86_mmx %2 to <2 x i32>
 174   ret <2 x i32> %3
 175 }
 176
 177 declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
 178
 179 define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
 180 ; X86-LABEL: test_pfcmpge:
 181 ; X86:       # %bb.0: # %entry
 182 ; X86-NEXT:    pushl %ebp
 183 ; X86-NEXT:    movl %esp, %ebp
 184 ; X86-NEXT:    andl $-8, %esp
 185 ; X86-NEXT:    subl $8, %esp
 186 ; X86-NEXT:    movd 20(%ebp), %mm0
 187 ; X86-NEXT:    movd 16(%ebp), %mm1
 188 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 189 ; X86-NEXT:    movd 12(%ebp), %mm0
 190 ; X86-NEXT:    movd 8(%ebp), %mm2
 191 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 192 ; X86-NEXT:    pfcmpge %mm1, %mm2
 193 ; X86-NEXT:    movq %mm2, (%esp)
 194 ; X86-NEXT:    movl (%esp), %eax
 195 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 196 ; X86-NEXT:    movl %ebp, %esp
 197 ; X86-NEXT:    popl %ebp
 198 ; X86-NEXT:    retl
 199 ;
 200 ; X64-LABEL: test_pfcmpge:
 201 ; X64:       # %bb.0: # %entry
 202 ; X64-NEXT:    movdq2q %xmm1, %mm0
 203 ; X64-NEXT:    movdq2q %xmm0, %mm1
 204 ; X64-NEXT:    pfcmpge %mm0, %mm1
 205 ; X64-NEXT:    movq2dq %mm1, %xmm0
 206 ; X64-NEXT:    retq
 207 entry:
 208   %0 = bitcast <2 x float> %a to x86_mmx
 209   %1 = bitcast <2 x float> %b to x86_mmx
 210   %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
 211   %3 = bitcast x86_mmx %2 to <2 x i32>
 212   ret <2 x i32> %3
 213 }
 214
 215 declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
 216
 217 define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
 218 ; X86-LABEL: test_pfcmpgt:
 219 ; X86:       # %bb.0: # %entry
 220 ; X86-NEXT:    pushl %ebp
 221 ; X86-NEXT:    movl %esp, %ebp
 222 ; X86-NEXT:    andl $-8, %esp
 223 ; X86-NEXT:    subl $8, %esp
 224 ; X86-NEXT:    movd 20(%ebp), %mm0
 225 ; X86-NEXT:    movd 16(%ebp), %mm1
 226 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 227 ; X86-NEXT:    movd 12(%ebp), %mm0
 228 ; X86-NEXT:    movd 8(%ebp), %mm2
 229 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 230 ; X86-NEXT:    pfcmpgt %mm1, %mm2
 231 ; X86-NEXT:    movq %mm2, (%esp)
 232 ; X86-NEXT:    movl (%esp), %eax
 233 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 234 ; X86-NEXT:    movl %ebp, %esp
 235 ; X86-NEXT:    popl %ebp
 236 ; X86-NEXT:    retl
 237 ;
 238 ; X64-LABEL: test_pfcmpgt:
 239 ; X64:       # %bb.0: # %entry
 240 ; X64-NEXT:    movdq2q %xmm1, %mm0
 241 ; X64-NEXT:    movdq2q %xmm0, %mm1
 242 ; X64-NEXT:    pfcmpgt %mm0, %mm1
 243 ; X64-NEXT:    movq2dq %mm1, %xmm0
 244 ; X64-NEXT:    retq
 245 entry:
 246   %0 = bitcast <2 x float> %a to x86_mmx
 247   %1 = bitcast <2 x float> %b to x86_mmx
 248   %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
 249   %3 = bitcast x86_mmx %2 to <2 x i32>
 250   ret <2 x i32> %3
 251 }
 252
 253 declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
 254
 255 define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
 256 ; X86-LABEL: test_pfmax:
 257 ; X86:       # %bb.0: # %entry
 258 ; X86-NEXT:    pushl %ebp
 259 ; X86-NEXT:    movl %esp, %ebp
 260 ; X86-NEXT:    andl $-8, %esp
 261 ; X86-NEXT:    subl $8, %esp
 262 ; X86-NEXT:    movd 20(%ebp), %mm0
 263 ; X86-NEXT:    movd 16(%ebp), %mm1
 264 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 265 ; X86-NEXT:    movd 12(%ebp), %mm0
 266 ; X86-NEXT:    movd 8(%ebp), %mm2
 267 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 268 ; X86-NEXT:    pfmax %mm1, %mm2
 269 ; X86-NEXT:    movq %mm2, (%esp)
 270 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 271 ; X86-NEXT:    flds (%esp)
 272 ; X86-NEXT:    movl %ebp, %esp
 273 ; X86-NEXT:    popl %ebp
 274 ; X86-NEXT:    retl
 275 ;
 276 ; X64-LABEL: test_pfmax:
 277 ; X64:       # %bb.0: # %entry
 278 ; X64-NEXT:    movdq2q %xmm1, %mm0
 279 ; X64-NEXT:    movdq2q %xmm0, %mm1
 280 ; X64-NEXT:    pfmax %mm0, %mm1
 281 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 282 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 283 ; X64-NEXT:    retq
 284 entry:
 285   %0 = bitcast <2 x float> %a to x86_mmx
 286   %1 = bitcast <2 x float> %b to x86_mmx
 287   %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
 288   %3 = bitcast x86_mmx %2 to <2 x float>
 289   ret <2 x float> %3
 290 }
 291
 292 declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
 293
 294 define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
 295 ; X86-LABEL: test_pfmin:
 296 ; X86:       # %bb.0: # %entry
 297 ; X86-NEXT:    pushl %ebp
 298 ; X86-NEXT:    movl %esp, %ebp
 299 ; X86-NEXT:    andl $-8, %esp
 300 ; X86-NEXT:    subl $8, %esp
 301 ; X86-NEXT:    movd 20(%ebp), %mm0
 302 ; X86-NEXT:    movd 16(%ebp), %mm1
 303 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 304 ; X86-NEXT:    movd 12(%ebp), %mm0
 305 ; X86-NEXT:    movd 8(%ebp), %mm2
 306 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 307 ; X86-NEXT:    pfmin %mm1, %mm2
 308 ; X86-NEXT:    movq %mm2, (%esp)
 309 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 310 ; X86-NEXT:    flds (%esp)
 311 ; X86-NEXT:    movl %ebp, %esp
 312 ; X86-NEXT:    popl %ebp
 313 ; X86-NEXT:    retl
 314 ;
 315 ; X64-LABEL: test_pfmin:
 316 ; X64:       # %bb.0: # %entry
 317 ; X64-NEXT:    movdq2q %xmm1, %mm0
 318 ; X64-NEXT:    movdq2q %xmm0, %mm1
 319 ; X64-NEXT:    pfmin %mm0, %mm1
 320 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 321 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 322 ; X64-NEXT:    retq
 323 entry:
 324   %0 = bitcast <2 x float> %a to x86_mmx
 325   %1 = bitcast <2 x float> %b to x86_mmx
 326   %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
 327   %3 = bitcast x86_mmx %2 to <2 x float>
 328   ret <2 x float> %3
 329 }
 330
 331 declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
 332
 333 define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
 334 ; X86-LABEL: test_pfmul:
 335 ; X86:       # %bb.0: # %entry
 336 ; X86-NEXT:    pushl %ebp
 337 ; X86-NEXT:    movl %esp, %ebp
 338 ; X86-NEXT:    andl $-8, %esp
 339 ; X86-NEXT:    subl $8, %esp
 340 ; X86-NEXT:    movd 20(%ebp), %mm0
 341 ; X86-NEXT:    movd 16(%ebp), %mm1
 342 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 343 ; X86-NEXT:    movd 12(%ebp), %mm0
 344 ; X86-NEXT:    movd 8(%ebp), %mm2
 345 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 346 ; X86-NEXT:    pfmul %mm1, %mm2
 347 ; X86-NEXT:    movq %mm2, (%esp)
 348 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 349 ; X86-NEXT:    flds (%esp)
 350 ; X86-NEXT:    movl %ebp, %esp
 351 ; X86-NEXT:    popl %ebp
 352 ; X86-NEXT:    retl
 353 ;
 354 ; X64-LABEL: test_pfmul:
 355 ; X64:       # %bb.0: # %entry
 356 ; X64-NEXT:    movdq2q %xmm1, %mm0
 357 ; X64-NEXT:    movdq2q %xmm0, %mm1
 358 ; X64-NEXT:    pfmul %mm0, %mm1
 359 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 360 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 361 ; X64-NEXT:    retq
 362 entry:
 363   %0 = bitcast <2 x float> %a to x86_mmx
 364   %1 = bitcast <2 x float> %b to x86_mmx
 365   %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
 366   %3 = bitcast x86_mmx %2 to <2 x float>
 367   ret <2 x float> %3
 368 }
 369
 370 declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
 371
 372 define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
 373 ; X86-LABEL: test_pfrcp:
 374 ; X86:       # %bb.0: # %entry
 375 ; X86-NEXT:    pushl %ebp
 376 ; X86-NEXT:    movl %esp, %ebp
 377 ; X86-NEXT:    andl $-8, %esp
 378 ; X86-NEXT:    subl $8, %esp
 379 ; X86-NEXT:    movd 12(%ebp), %mm0
 380 ; X86-NEXT:    movd 8(%ebp), %mm1
 381 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 382 ; X86-NEXT:    pfrcp %mm1, %mm0
 383 ; X86-NEXT:    movq %mm0, (%esp)
 384 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 385 ; X86-NEXT:    flds (%esp)
 386 ; X86-NEXT:    movl %ebp, %esp
 387 ; X86-NEXT:    popl %ebp
 388 ; X86-NEXT:    retl
 389 ;
 390 ; X64-LABEL: test_pfrcp:
 391 ; X64:       # %bb.0: # %entry
 392 ; X64-NEXT:    movdq2q %xmm0, %mm0
 393 ; X64-NEXT:    pfrcp %mm0, %mm0
 394 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
 395 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 396 ; X64-NEXT:    retq
 397 entry:
 398   %0 = bitcast <2 x float> %a to x86_mmx
 399   %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
 400   %2 = bitcast x86_mmx %1 to <2 x float>
 401   ret <2 x float> %2
 402 }
 403
 404 declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
 405
 406 define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
 407 ; X86-LABEL: test_pfrcpit1:
 408 ; X86:       # %bb.0: # %entry
 409 ; X86-NEXT:    pushl %ebp
 410 ; X86-NEXT:    movl %esp, %ebp
 411 ; X86-NEXT:    andl $-8, %esp
 412 ; X86-NEXT:    subl $8, %esp
 413 ; X86-NEXT:    movd 20(%ebp), %mm0
 414 ; X86-NEXT:    movd 16(%ebp), %mm1
 415 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 416 ; X86-NEXT:    movd 12(%ebp), %mm0
 417 ; X86-NEXT:    movd 8(%ebp), %mm2
 418 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 419 ; X86-NEXT:    pfrcpit1 %mm1, %mm2
 420 ; X86-NEXT:    movq %mm2, (%esp)
 421 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 422 ; X86-NEXT:    flds (%esp)
 423 ; X86-NEXT:    movl %ebp, %esp
 424 ; X86-NEXT:    popl %ebp
 425 ; X86-NEXT:    retl
 426 ;
 427 ; X64-LABEL: test_pfrcpit1:
 428 ; X64:       # %bb.0: # %entry
 429 ; X64-NEXT:    movdq2q %xmm1, %mm0
 430 ; X64-NEXT:    movdq2q %xmm0, %mm1
 431 ; X64-NEXT:    pfrcpit1 %mm0, %mm1
 432 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 433 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 434 ; X64-NEXT:    retq
 435 entry:
 436   %0 = bitcast <2 x float> %a to x86_mmx
 437   %1 = bitcast <2 x float> %b to x86_mmx
 438   %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
 439   %3 = bitcast x86_mmx %2 to <2 x float>
 440   ret <2 x float> %3
 441 }
 442
 443 declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
 444
 445 define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
 446 ; X86-LABEL: test_pfrcpit2:
 447 ; X86:       # %bb.0: # %entry
 448 ; X86-NEXT:    pushl %ebp
 449 ; X86-NEXT:    movl %esp, %ebp
 450 ; X86-NEXT:    andl $-8, %esp
 451 ; X86-NEXT:    subl $8, %esp
 452 ; X86-NEXT:    movd 20(%ebp), %mm0
 453 ; X86-NEXT:    movd 16(%ebp), %mm1
 454 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 455 ; X86-NEXT:    movd 12(%ebp), %mm0
 456 ; X86-NEXT:    movd 8(%ebp), %mm2
 457 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 458 ; X86-NEXT:    pfrcpit2 %mm1, %mm2
 459 ; X86-NEXT:    movq %mm2, (%esp)
 460 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 461 ; X86-NEXT:    flds (%esp)
 462 ; X86-NEXT:    movl %ebp, %esp
 463 ; X86-NEXT:    popl %ebp
 464 ; X86-NEXT:    retl
 465 ;
 466 ; X64-LABEL: test_pfrcpit2:
 467 ; X64:       # %bb.0: # %entry
 468 ; X64-NEXT:    movdq2q %xmm1, %mm0
 469 ; X64-NEXT:    movdq2q %xmm0, %mm1
 470 ; X64-NEXT:    pfrcpit2 %mm0, %mm1
 471 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 472 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 473 ; X64-NEXT:    retq
 474 entry:
 475   %0 = bitcast <2 x float> %a to x86_mmx
 476   %1 = bitcast <2 x float> %b to x86_mmx
 477   %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
 478   %3 = bitcast x86_mmx %2 to <2 x float>
 479   ret <2 x float> %3
 480 }
 481
 482 declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
 483
 484 define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
 485 ; X86-LABEL: test_pfrsqrt:
 486 ; X86:       # %bb.0: # %entry
 487 ; X86-NEXT:    pushl %ebp
 488 ; X86-NEXT:    movl %esp, %ebp
 489 ; X86-NEXT:    andl $-8, %esp
 490 ; X86-NEXT:    subl $8, %esp
 491 ; X86-NEXT:    movd 12(%ebp), %mm0
 492 ; X86-NEXT:    movd 8(%ebp), %mm1
 493 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 494 ; X86-NEXT:    pfrsqrt %mm1, %mm0
 495 ; X86-NEXT:    movq %mm0, (%esp)
 496 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 497 ; X86-NEXT:    flds (%esp)
 498 ; X86-NEXT:    movl %ebp, %esp
 499 ; X86-NEXT:    popl %ebp
 500 ; X86-NEXT:    retl
 501 ;
 502 ; X64-LABEL: test_pfrsqrt:
 503 ; X64:       # %bb.0: # %entry
 504 ; X64-NEXT:    movdq2q %xmm0, %mm0
 505 ; X64-NEXT:    pfrsqrt %mm0, %mm0
 506 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
 507 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 508 ; X64-NEXT:    retq
 509 entry:
 510   %0 = bitcast <2 x float> %a to x86_mmx
 511   %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
 512   %2 = bitcast x86_mmx %1 to <2 x float>
 513   ret <2 x float> %2
 514 }
 515
 516 declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
 517
 518 define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
 519 ; X86-LABEL: test_pfrsqit1:
 520 ; X86:       # %bb.0: # %entry
 521 ; X86-NEXT:    pushl %ebp
 522 ; X86-NEXT:    movl %esp, %ebp
 523 ; X86-NEXT:    andl $-8, %esp
 524 ; X86-NEXT:    subl $8, %esp
 525 ; X86-NEXT:    movd 20(%ebp), %mm0
 526 ; X86-NEXT:    movd 16(%ebp), %mm1
 527 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 528 ; X86-NEXT:    movd 12(%ebp), %mm0
 529 ; X86-NEXT:    movd 8(%ebp), %mm2
 530 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 531 ; X86-NEXT:    pfrsqit1 %mm1, %mm2
 532 ; X86-NEXT:    movq %mm2, (%esp)
 533 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 534 ; X86-NEXT:    flds (%esp)
 535 ; X86-NEXT:    movl %ebp, %esp
 536 ; X86-NEXT:    popl %ebp
 537 ; X86-NEXT:    retl
 538 ;
 539 ; X64-LABEL: test_pfrsqit1:
 540 ; X64:       # %bb.0: # %entry
 541 ; X64-NEXT:    movdq2q %xmm1, %mm0
 542 ; X64-NEXT:    movdq2q %xmm0, %mm1
 543 ; X64-NEXT:    pfrsqit1 %mm0, %mm1
 544 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 545 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 546 ; X64-NEXT:    retq
 547 entry:
 548   %0 = bitcast <2 x float> %a to x86_mmx
 549   %1 = bitcast <2 x float> %b to x86_mmx
 550   %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
 551   %3 = bitcast x86_mmx %2 to <2 x float>
 552   ret <2 x float> %3
 553 }
 554
 555 declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
 556
 557 define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
 558 ; X86-LABEL: test_pfsub:
 559 ; X86:       # %bb.0: # %entry
 560 ; X86-NEXT:    pushl %ebp
 561 ; X86-NEXT:    movl %esp, %ebp
 562 ; X86-NEXT:    andl $-8, %esp
 563 ; X86-NEXT:    subl $8, %esp
 564 ; X86-NEXT:    movd 20(%ebp), %mm0
 565 ; X86-NEXT:    movd 16(%ebp), %mm1
 566 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 567 ; X86-NEXT:    movd 12(%ebp), %mm0
 568 ; X86-NEXT:    movd 8(%ebp), %mm2
 569 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 570 ; X86-NEXT:    pfsub %mm1, %mm2
 571 ; X86-NEXT:    movq %mm2, (%esp)
 572 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 573 ; X86-NEXT:    flds (%esp)
 574 ; X86-NEXT:    movl %ebp, %esp
 575 ; X86-NEXT:    popl %ebp
 576 ; X86-NEXT:    retl
 577 ;
 578 ; X64-LABEL: test_pfsub:
 579 ; X64:       # %bb.0: # %entry
 580 ; X64-NEXT:    movdq2q %xmm1, %mm0
 581 ; X64-NEXT:    movdq2q %xmm0, %mm1
 582 ; X64-NEXT:    pfsub %mm0, %mm1
 583 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 584 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 585 ; X64-NEXT:    retq
 586 entry:
 587   %0 = bitcast <2 x float> %a to x86_mmx
 588   %1 = bitcast <2 x float> %b to x86_mmx
 589   %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
 590   %3 = bitcast x86_mmx %2 to <2 x float>
 591   ret <2 x float> %3
 592 }
 593
 594 declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
 595
 596 define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
 597 ; X86-LABEL: test_pfsubr:
 598 ; X86:       # %bb.0: # %entry
 599 ; X86-NEXT:    pushl %ebp
 600 ; X86-NEXT:    movl %esp, %ebp
 601 ; X86-NEXT:    andl $-8, %esp
 602 ; X86-NEXT:    subl $8, %esp
 603 ; X86-NEXT:    movd 20(%ebp), %mm0
 604 ; X86-NEXT:    movd 16(%ebp), %mm1
 605 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 606 ; X86-NEXT:    movd 12(%ebp), %mm0
 607 ; X86-NEXT:    movd 8(%ebp), %mm2
 608 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 609 ; X86-NEXT:    pfsubr %mm1, %mm2
 610 ; X86-NEXT:    movq %mm2, (%esp)
 611 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 612 ; X86-NEXT:    flds (%esp)
 613 ; X86-NEXT:    movl %ebp, %esp
 614 ; X86-NEXT:    popl %ebp
 615 ; X86-NEXT:    retl
 616 ;
 617 ; X64-LABEL: test_pfsubr:
 618 ; X64:       # %bb.0: # %entry
 619 ; X64-NEXT:    movdq2q %xmm1, %mm0
 620 ; X64-NEXT:    movdq2q %xmm0, %mm1
 621 ; X64-NEXT:    pfsubr %mm0, %mm1
 622 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 623 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 624 ; X64-NEXT:    retq
 625 entry:
 626   %0 = bitcast <2 x float> %a to x86_mmx
 627   %1 = bitcast <2 x float> %b to x86_mmx
 628   %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
 629   %3 = bitcast x86_mmx %2 to <2 x float>
 630   ret <2 x float> %3
 631 }
 632
 633 declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
 634
 635 define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
 636 ; X86-LABEL: test_pi2fd:
 637 ; X86:       # %bb.0: # %entry
 638 ; X86-NEXT:    pushl %ebp
 639 ; X86-NEXT:    movl %esp, %ebp
 640 ; X86-NEXT:    andl $-8, %esp
 641 ; X86-NEXT:    subl $8, %esp
 642 ; X86-NEXT:    pi2fd %mm0, %mm0
 643 ; X86-NEXT:    movq %mm0, (%esp)
 644 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 645 ; X86-NEXT:    flds (%esp)
 646 ; X86-NEXT:    movl %ebp, %esp
 647 ; X86-NEXT:    popl %ebp
 648 ; X86-NEXT:    retl
 649 ;
 650 ; X64-LABEL: test_pi2fd:
 651 ; X64:       # %bb.0: # %entry
 652 ; X64-NEXT:    pi2fd %mm0, %mm0
 653 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
 654 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 655 ; X64-NEXT:    retq
 656 entry:
 657   %0 = bitcast x86_mmx %a.coerce to <2 x i32>
 658   %1 = bitcast <2 x i32> %0 to x86_mmx
 659   %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
 660   %3 = bitcast x86_mmx %2 to <2 x float>
 661   ret <2 x float> %3
 662 }
 663
 664 declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
 665
 666 define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
 667 ; X86-LABEL: test_pmulhrw:
 668 ; X86:       # %bb.0: # %entry
 669 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 670 ; X86-NEXT:    pmulhrw %mm1, %mm0
 671 ; X86-NEXT:    movq %mm0, (%eax)
 672 ; X86-NEXT:    retl $4
 673 ;
 674 ; X64-LABEL: test_pmulhrw:
 675 ; X64:       # %bb.0: # %entry
 676 ; X64-NEXT:    pmulhrw %mm1, %mm0
 677 ; X64-NEXT:    movq2dq %mm0, %xmm0
 678 ; X64-NEXT:    retq
 679 entry:
 680   %0 = bitcast x86_mmx %a.coerce to <4 x i16>
 681   %1 = bitcast x86_mmx %b.coerce to <4 x i16>
 682   %2 = bitcast <4 x i16> %0 to x86_mmx
 683   %3 = bitcast <4 x i16> %1 to x86_mmx
 684   %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
 685   %5 = bitcast x86_mmx %4 to <4 x i16>
 686   ret <4 x i16> %5
 687 }
 688
 689 declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
 690
 691 define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
 692 ; X86-LABEL: test_pf2iw:
 693 ; X86:       # %bb.0: # %entry
 694 ; X86-NEXT:    pushl %ebp
 695 ; X86-NEXT:    movl %esp, %ebp
 696 ; X86-NEXT:    andl $-8, %esp
 697 ; X86-NEXT:    subl $8, %esp
 698 ; X86-NEXT:    movd 12(%ebp), %mm0
 699 ; X86-NEXT:    movd 8(%ebp), %mm1
 700 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 701 ; X86-NEXT:    pf2iw %mm1, %mm0
 702 ; X86-NEXT:    movq %mm0, (%esp)
 703 ; X86-NEXT:    movl (%esp), %eax
 704 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 705 ; X86-NEXT:    movl %ebp, %esp
 706 ; X86-NEXT:    popl %ebp
 707 ; X86-NEXT:    retl
 708 ;
 709 ; X64-LABEL: test_pf2iw:
 710 ; X64:       # %bb.0: # %entry
 711 ; X64-NEXT:    movdq2q %xmm0, %mm0
 712 ; X64-NEXT:    pf2iw %mm0, %mm0
 713 ; X64-NEXT:    movq2dq %mm0, %xmm0
 714 ; X64-NEXT:    retq
 715 entry:
 716   %0 = bitcast <2 x float> %a to x86_mmx
 717   %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
 718   %2 = bitcast x86_mmx %1 to <2 x i32>
 719   ret <2 x i32> %2
 720 }
 721
 722 declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
 723
 724 define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
 725 ; X86-LABEL: test_pfnacc:
 726 ; X86:       # %bb.0: # %entry
 727 ; X86-NEXT:    pushl %ebp
 728 ; X86-NEXT:    movl %esp, %ebp
 729 ; X86-NEXT:    andl $-8, %esp
 730 ; X86-NEXT:    subl $8, %esp
 731 ; X86-NEXT:    movd 20(%ebp), %mm0
 732 ; X86-NEXT:    movd 16(%ebp), %mm1
 733 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 734 ; X86-NEXT:    movd 12(%ebp), %mm0
 735 ; X86-NEXT:    movd 8(%ebp), %mm2
 736 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 737 ; X86-NEXT:    pfnacc %mm1, %mm2
 738 ; X86-NEXT:    movq %mm2, (%esp)
 739 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 740 ; X86-NEXT:    flds (%esp)
 741 ; X86-NEXT:    movl %ebp, %esp
 742 ; X86-NEXT:    popl %ebp
 743 ; X86-NEXT:    retl
 744 ;
 745 ; X64-LABEL: test_pfnacc:
 746 ; X64:       # %bb.0: # %entry
 747 ; X64-NEXT:    movdq2q %xmm1, %mm0
 748 ; X64-NEXT:    movdq2q %xmm0, %mm1
 749 ; X64-NEXT:    pfnacc %mm0, %mm1
 750 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 751 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 752 ; X64-NEXT:    retq
 753 entry:
 754   %0 = bitcast <2 x float> %a to x86_mmx
 755   %1 = bitcast <2 x float> %b to x86_mmx
 756   %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
 757   %3 = bitcast x86_mmx %2 to <2 x float>
 758   ret <2 x float> %3
 759 }
 760
 761 declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
 762
 763 define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
 764 ; X86-LABEL: test_pfpnacc:
 765 ; X86:       # %bb.0: # %entry
 766 ; X86-NEXT:    pushl %ebp
 767 ; X86-NEXT:    movl %esp, %ebp
 768 ; X86-NEXT:    andl $-8, %esp
 769 ; X86-NEXT:    subl $8, %esp
 770 ; X86-NEXT:    movd 20(%ebp), %mm0
 771 ; X86-NEXT:    movd 16(%ebp), %mm1
 772 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 773 ; X86-NEXT:    movd 12(%ebp), %mm0
 774 ; X86-NEXT:    movd 8(%ebp), %mm2
 775 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
 776 ; X86-NEXT:    pfpnacc %mm1, %mm2
 777 ; X86-NEXT:    movq %mm2, (%esp)
 778 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 779 ; X86-NEXT:    flds (%esp)
 780 ; X86-NEXT:    movl %ebp, %esp
 781 ; X86-NEXT:    popl %ebp
 782 ; X86-NEXT:    retl
 783 ;
 784 ; X64-LABEL: test_pfpnacc:
 785 ; X64:       # %bb.0: # %entry
 786 ; X64-NEXT:    movdq2q %xmm1, %mm0
 787 ; X64-NEXT:    movdq2q %xmm0, %mm1
 788 ; X64-NEXT:    pfpnacc %mm0, %mm1
 789 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
 790 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 791 ; X64-NEXT:    retq
 792 entry:
 793   %0 = bitcast <2 x float> %a to x86_mmx
 794   %1 = bitcast <2 x float> %b to x86_mmx
 795   %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
 796   %3 = bitcast x86_mmx %2 to <2 x float>
 797   ret <2 x float> %3
 798 }
 799
 800 declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
 801
 802 define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
 803 ; X86-LABEL: test_pi2fw:
 804 ; X86:       # %bb.0: # %entry
 805 ; X86-NEXT:    pushl %ebp
 806 ; X86-NEXT:    movl %esp, %ebp
 807 ; X86-NEXT:    andl $-8, %esp
 808 ; X86-NEXT:    subl $8, %esp
 809 ; X86-NEXT:    pi2fw %mm0, %mm0
 810 ; X86-NEXT:    movq %mm0, (%esp)
 811 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 812 ; X86-NEXT:    flds (%esp)
 813 ; X86-NEXT:    movl %ebp, %esp
 814 ; X86-NEXT:    popl %ebp
 815 ; X86-NEXT:    retl
 816 ;
 817 ; X64-LABEL: test_pi2fw:
 818 ; X64:       # %bb.0: # %entry
 819 ; X64-NEXT:    pi2fw %mm0, %mm0
 820 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
 821 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 822 ; X64-NEXT:    retq
 823 entry:
 824   %0 = bitcast x86_mmx %a.coerce to <2 x i32>
 825   %1 = bitcast <2 x i32> %0 to x86_mmx
 826   %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
 827   %3 = bitcast x86_mmx %2 to <2 x float>
 828   ret <2 x float> %3
 829 }
 830
 831 declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
 832
 833 define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
 834 ; X86-LABEL: test_pswapdsf:
 835 ; X86:       # %bb.0: # %entry
 836 ; X86-NEXT:    pushl %ebp
 837 ; X86-NEXT:    movl %esp, %ebp
 838 ; X86-NEXT:    andl $-8, %esp
 839 ; X86-NEXT:    subl $8, %esp
 840 ; X86-NEXT:    movd 12(%ebp), %mm0
 841 ; X86-NEXT:    movd 8(%ebp), %mm1
 842 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 843 ; X86-NEXT:    pswapd %mm1, %mm0 # mm0 = mm1[1,0]
 844 ; X86-NEXT:    movq %mm0, (%esp)
 845 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 846 ; X86-NEXT:    flds (%esp)
 847 ; X86-NEXT:    movl %ebp, %esp
 848 ; X86-NEXT:    popl %ebp
 849 ; X86-NEXT:    retl
 850 ;
 851 ; X64-LABEL: test_pswapdsf:
 852 ; X64:       # %bb.0: # %entry
 853 ; X64-NEXT:    movdq2q %xmm0, %mm0
 854 ; X64-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0]
 855 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
 856 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 857 ; X64-NEXT:    retq
 858 entry:
 859   %0 = bitcast <2 x float> %a to x86_mmx
 860   %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
 861   %2 = bitcast x86_mmx %1 to <2 x float>
 862   ret <2 x float> %2
 863 }
 864
 865 define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
 866 ; X86-LABEL: test_pswapdsi:
 867 ; X86:       # %bb.0: # %entry
 868 ; X86-NEXT:    pushl %ebp
 869 ; X86-NEXT:    movl %esp, %ebp
 870 ; X86-NEXT:    andl $-8, %esp
 871 ; X86-NEXT:    subl $8, %esp
 872 ; X86-NEXT:    movd 12(%ebp), %mm0
 873 ; X86-NEXT:    movd 8(%ebp), %mm1
 874 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
 875 ; X86-NEXT:    pswapd %mm1, %mm0 # mm0 = mm1[1,0]
 876 ; X86-NEXT:    movq %mm0, (%esp)
 877 ; X86-NEXT:    movl (%esp), %eax
 878 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 879 ; X86-NEXT:    movl %ebp, %esp
 880 ; X86-NEXT:    popl %ebp
 881 ; X86-NEXT:    retl
 882 ;
 883 ; X64-LABEL: test_pswapdsi:
 884 ; X64:       # %bb.0: # %entry
 885 ; X64-NEXT:    movdq2q %xmm0, %mm0
 886 ; X64-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0]
 887 ; X64-NEXT:    movq2dq %mm0, %xmm0
 888 ; X64-NEXT:    retq
 889 entry:
 890   %0 = bitcast <2 x i32> %a to x86_mmx
 891   %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
 892   %2 = bitcast x86_mmx %1 to <2 x i32>
 893   ret <2 x i32> %2
 894 }
 895
 896 declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone