test/CodeGen/X86/x86-64-psub.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=x86_64-pc-linux -mattr=mmx < %s | FileCheck %s
   3
   4 ; MMX packed sub opcodes were wrongly marked as commutative.
   5 ; This test checks that the operands of packed sub instructions are
   6 ; never interchanged by the "Two-Address instruction pass".
   7
   8 declare { i64, double } @getFirstParam()
   9 declare { i64, double } @getSecondParam()
  10
  11 define i64 @test_psubb() {
  12 ; CHECK-LABEL: test_psubb:
  13 ; CHECK:       # %bb.0: # %entry
  14 ; CHECK-NEXT:    pushq %rbx
  15 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
  16 ; CHECK-NEXT:    .cfi_offset %rbx, -16
  17 ; CHECK-NEXT:    callq getFirstParam
  18 ; CHECK-NEXT:    movq %rax, %rbx
  19 ; CHECK-NEXT:    callq getSecondParam
  20 ; CHECK-NEXT:    movq %rbx, %mm0
  21 ; CHECK-NEXT:    movq %rax, %mm1
  22 ; CHECK-NEXT:    psubb %mm1, %mm0
  23 ; CHECK-NEXT:    movq %mm0, %rax
  24 ; CHECK-NEXT:    popq %rbx
  25 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
  26 ; CHECK-NEXT:    retq
  27 entry:
  28   %call = tail call { i64, double } @getFirstParam()
  29   %0 = extractvalue { i64, double } %call, 0
  30   %call2 = tail call { i64, double } @getSecondParam()
  31   %1 = extractvalue { i64, double } %call2, 0
  32   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
  33   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
  34   %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
  35   %3 = bitcast <8 x i8> %2 to x86_mmx
  36   %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
  37   %5 = bitcast <8 x i8> %4 to x86_mmx
  38   %6 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %3, x86_mmx %5) nounwind
  39   %7 = bitcast x86_mmx %6 to <8 x i8>
  40   %8 = bitcast <8 x i8> %7 to <1 x i64>
  41   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
  42   ret i64 %retval.0.extract.i15
  43 }
  44
  45 define i64 @test_psubw() {
  46 ; CHECK-LABEL: test_psubw:
  47 ; CHECK:       # %bb.0: # %entry
  48 ; CHECK-NEXT:    pushq %rbx
  49 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
  50 ; CHECK-NEXT:    .cfi_offset %rbx, -16
  51 ; CHECK-NEXT:    callq getFirstParam
  52 ; CHECK-NEXT:    movq %rax, %rbx
  53 ; CHECK-NEXT:    callq getSecondParam
  54 ; CHECK-NEXT:    movq %rbx, %mm0
  55 ; CHECK-NEXT:    movq %rax, %mm1
  56 ; CHECK-NEXT:    psubw %mm1, %mm0
  57 ; CHECK-NEXT:    movq %mm0, %rax
  58 ; CHECK-NEXT:    popq %rbx
  59 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
  60 ; CHECK-NEXT:    retq
  61 entry:
  62   %call = tail call { i64, double } @getFirstParam()
  63   %0 = extractvalue { i64, double } %call, 0
  64   %call2 = tail call { i64, double } @getSecondParam()
  65   %1 = extractvalue { i64, double } %call2, 0
  66   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
  67   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
  68   %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
  69   %3 = bitcast <4 x i16> %2 to x86_mmx
  70   %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
  71   %5 = bitcast <4 x i16> %4 to x86_mmx
  72   %6 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %3, x86_mmx %5) nounwind
  73   %7 = bitcast x86_mmx %6 to <4 x i16>
  74   %8 = bitcast <4 x i16> %7 to <1 x i64>
  75   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
  76   ret i64 %retval.0.extract.i15
  77 }
  78
  79 define i64 @test_psubd() {
  80 ; CHECK-LABEL: test_psubd:
  81 ; CHECK:       # %bb.0: # %entry
  82 ; CHECK-NEXT:    pushq %rbx
  83 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
  84 ; CHECK-NEXT:    .cfi_offset %rbx, -16
  85 ; CHECK-NEXT:    callq getFirstParam
  86 ; CHECK-NEXT:    movq %rax, %rbx
  87 ; CHECK-NEXT:    callq getSecondParam
  88 ; CHECK-NEXT:    movq %rbx, %mm0
  89 ; CHECK-NEXT:    movq %rax, %mm1
  90 ; CHECK-NEXT:    psubd %mm1, %mm0
  91 ; CHECK-NEXT:    movq %mm0, %rax
  92 ; CHECK-NEXT:    popq %rbx
  93 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
  94 ; CHECK-NEXT:    retq
  95 entry:
  96   %call = tail call { i64, double } @getFirstParam()
  97   %0 = extractvalue { i64, double } %call, 0
  98   %call2 = tail call { i64, double } @getSecondParam()
  99   %1 = extractvalue { i64, double } %call2, 0
 100   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
 101   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
 102   %2 = bitcast <1 x i64> %__m1.0.insert.i to <2 x i32>
 103   %3 = bitcast <2 x i32> %2 to x86_mmx
 104   %4 = bitcast <1 x i64> %__m2.0.insert.i to <2 x i32>
 105   %5 = bitcast <2 x i32> %4 to x86_mmx
 106   %6 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %3, x86_mmx %5) nounwind
 107   %7 = bitcast x86_mmx %6 to <2 x i32>
 108   %8 = bitcast <2 x i32> %7 to <1 x i64>
 109   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
 110   ret i64 %retval.0.extract.i15
 111 }
 112
 113 define i64 @test_psubsb() {
 114 ; CHECK-LABEL: test_psubsb:
 115 ; CHECK:       # %bb.0: # %entry
 116 ; CHECK-NEXT:    pushq %rbx
 117 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 118 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 119 ; CHECK-NEXT:    callq getFirstParam
 120 ; CHECK-NEXT:    movq %rax, %rbx
 121 ; CHECK-NEXT:    callq getSecondParam
 122 ; CHECK-NEXT:    movq %rbx, %mm0
 123 ; CHECK-NEXT:    movq %rax, %mm1
 124 ; CHECK-NEXT:    psubsb %mm1, %mm0
 125 ; CHECK-NEXT:    movq %mm0, %rax
 126 ; CHECK-NEXT:    popq %rbx
 127 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 128 ; CHECK-NEXT:    retq
 129 entry:
 130   %call = tail call { i64, double } @getFirstParam()
 131   %0 = extractvalue { i64, double } %call, 0
 132   %call2 = tail call { i64, double } @getSecondParam()
 133   %1 = extractvalue { i64, double } %call2, 0
 134   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
 135   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
 136   %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
 137   %3 = bitcast <8 x i8> %2 to x86_mmx
 138   %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
 139   %5 = bitcast <8 x i8> %4 to x86_mmx
 140   %6 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %3, x86_mmx %5) nounwind
 141   %7 = bitcast x86_mmx %6 to <8 x i8>
 142   %8 = bitcast <8 x i8> %7 to <1 x i64>
 143   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
 144   ret i64 %retval.0.extract.i15
 145 }
 146
 147 define i64 @test_psubswv() {
 148 ; CHECK-LABEL: test_psubswv:
 149 ; CHECK:       # %bb.0: # %entry
 150 ; CHECK-NEXT:    pushq %rbx
 151 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 152 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 153 ; CHECK-NEXT:    callq getFirstParam
 154 ; CHECK-NEXT:    movq %rax, %rbx
 155 ; CHECK-NEXT:    callq getSecondParam
 156 ; CHECK-NEXT:    movq %rbx, %mm0
 157 ; CHECK-NEXT:    movq %rax, %mm1
 158 ; CHECK-NEXT:    psubsw %mm1, %mm0
 159 ; CHECK-NEXT:    movq %mm0, %rax
 160 ; CHECK-NEXT:    popq %rbx
 161 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 162 ; CHECK-NEXT:    retq
 163 entry:
 164   %call = tail call { i64, double } @getFirstParam()
 165   %0 = extractvalue { i64, double } %call, 0
 166   %call2 = tail call { i64, double } @getSecondParam()
 167   %1 = extractvalue { i64, double } %call2, 0
 168   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
 169   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
 170   %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
 171   %3 = bitcast <4 x i16> %2 to x86_mmx
 172   %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
 173   %5 = bitcast <4 x i16> %4 to x86_mmx
 174   %6 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %3, x86_mmx %5) nounwind
 175   %7 = bitcast x86_mmx %6 to <4 x i16>
 176   %8 = bitcast <4 x i16> %7 to <1 x i64>
 177   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
 178   ret i64 %retval.0.extract.i15
 179 }
 180
 181 define i64 @test_psubusbv() {
 182 ; CHECK-LABEL: test_psubusbv:
 183 ; CHECK:       # %bb.0: # %entry
 184 ; CHECK-NEXT:    pushq %rbx
 185 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 186 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 187 ; CHECK-NEXT:    callq getFirstParam
 188 ; CHECK-NEXT:    movq %rax, %rbx
 189 ; CHECK-NEXT:    callq getSecondParam
 190 ; CHECK-NEXT:    movq %rbx, %mm0
 191 ; CHECK-NEXT:    movq %rax, %mm1
 192 ; CHECK-NEXT:    psubusb %mm1, %mm0
 193 ; CHECK-NEXT:    movq %mm0, %rax
 194 ; CHECK-NEXT:    popq %rbx
 195 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 196 ; CHECK-NEXT:    retq
 197 entry:
 198   %call = tail call { i64, double } @getFirstParam()
 199   %0 = extractvalue { i64, double } %call, 0
 200   %call2 = tail call { i64, double } @getSecondParam()
 201   %1 = extractvalue { i64, double } %call2, 0
 202   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
 203   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
 204   %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
 205   %3 = bitcast <8 x i8> %2 to x86_mmx
 206   %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
 207   %5 = bitcast <8 x i8> %4 to x86_mmx
 208   %6 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %3, x86_mmx %5) nounwind
 209   %7 = bitcast x86_mmx %6 to <8 x i8>
 210   %8 = bitcast <8 x i8> %7 to <1 x i64>
 211   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
 212   ret i64 %retval.0.extract.i15
 213 }
 214
 215 define i64 @test_psubuswv() {
 216 ; CHECK-LABEL: test_psubuswv:
 217 ; CHECK:       # %bb.0: # %entry
 218 ; CHECK-NEXT:    pushq %rbx
 219 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 220 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 221 ; CHECK-NEXT:    callq getFirstParam
 222 ; CHECK-NEXT:    movq %rax, %rbx
 223 ; CHECK-NEXT:    callq getSecondParam
 224 ; CHECK-NEXT:    movq %rbx, %mm0
 225 ; CHECK-NEXT:    movq %rax, %mm1
 226 ; CHECK-NEXT:    psubusw %mm1, %mm0
 227 ; CHECK-NEXT:    movq %mm0, %rax
 228 ; CHECK-NEXT:    popq %rbx
 229 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 230 ; CHECK-NEXT:    retq
 231 entry:
 232   %call = tail call { i64, double } @getFirstParam()
 233   %0 = extractvalue { i64, double } %call, 0
 234   %call2 = tail call { i64, double } @getSecondParam()
 235   %1 = extractvalue { i64, double } %call2, 0
 236   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
 237   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
 238   %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
 239   %3 = bitcast <4 x i16> %2 to x86_mmx
 240   %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
 241   %5 = bitcast <4 x i16> %4 to x86_mmx
 242   %6 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %3, x86_mmx %5) nounwind
 243   %7 = bitcast x86_mmx %6 to <4 x i16>
 244   %8 = bitcast <4 x i16> %7 to <1 x i64>
 245   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
 246   ret i64 %retval.0.extract.i15
 247 }
 248
 249 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
 250
 251 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
 252
 253 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
 254
 255 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
 256
 257 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
 258
 259 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
 260
 261 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone