test/CodeGen/X86/lower-bitcast.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core2 -mattr=+sse2 | FileCheck %s
   3 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
   4
   5 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
   6 ; single paddd instruction. At the moment we produce the sequence
   7 ; pshufd+paddq+pshufd. This is fixed with the widening legalization.
   8
   9 define double @test1(double %A) {
  10 ; CHECK-LABEL: test1:
  11 ; CHECK:       # %bb.0:
  12 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
  13 ; CHECK-NEXT:    paddd {{.*}}(%rip), %xmm0
  14 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
  15 ; CHECK-NEXT:    retq
  16 ;
  17 ; CHECK-WIDE-LABEL: test1:
  18 ; CHECK-WIDE:       # %bb.0:
  19 ; CHECK-WIDE-NEXT:    paddd {{.*}}(%rip), %xmm0
  20 ; CHECK-WIDE-NEXT:    retq
  21   %1 = bitcast double %A to <2 x i32>
  22   %add = add <2 x i32> %1, <i32 3, i32 5>
  23   %2 = bitcast <2 x i32> %add to double
  24   ret double %2
  25 }
  26
  27 define double @test2(double %A, double %B) {
  28 ; CHECK-LABEL: test2:
  29 ; CHECK:       # %bb.0:
  30 ; CHECK-NEXT:    paddd %xmm1, %xmm0
  31 ; CHECK-NEXT:    retq
  32 ;
  33 ; CHECK-WIDE-LABEL: test2:
  34 ; CHECK-WIDE:       # %bb.0:
  35 ; CHECK-WIDE-NEXT:    paddd %xmm1, %xmm0
  36 ; CHECK-WIDE-NEXT:    retq
  37   %1 = bitcast double %A to <2 x i32>
  38   %2 = bitcast double %B to <2 x i32>
  39   %add = add <2 x i32> %1, %2
  40   %3 = bitcast <2 x i32> %add to double
  41   ret double %3
  42 }
  43
  44 define i64 @test3(i64 %A) {
  45 ; CHECK-LABEL: test3:
  46 ; CHECK:       # %bb.0:
  47 ; CHECK-NEXT:    movq %rdi, %xmm0
  48 ; CHECK-NEXT:    addps {{.*}}(%rip), %xmm0
  49 ; CHECK-NEXT:    movq %xmm0, %rax
  50 ; CHECK-NEXT:    retq
  51 ;
  52 ; CHECK-WIDE-LABEL: test3:
  53 ; CHECK-WIDE:       # %bb.0:
  54 ; CHECK-WIDE-NEXT:    movq %rdi, %xmm0
  55 ; CHECK-WIDE-NEXT:    addps {{.*}}(%rip), %xmm0
  56 ; CHECK-WIDE-NEXT:    movq %xmm0, %rax
  57 ; CHECK-WIDE-NEXT:    retq
  58   %1 = bitcast i64 %A to <2 x float>
  59   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
  60   %2 = bitcast <2 x float> %add to i64
  61   ret i64 %2
  62 }
  63
  64 ; FIXME: Ideally we should be able to fold the entire body of @test4 into a
  65 ; single paddd instruction. This is fixed with the widening legalization.
  66
  67 define i64 @test4(i64 %A) {
  68 ; CHECK-LABEL: test4:
  69 ; CHECK:       # %bb.0:
  70 ; CHECK-NEXT:    movq %rdi, %xmm0
  71 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
  72 ; CHECK-NEXT:    paddd {{.*}}(%rip), %xmm0
  73 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
  74 ; CHECK-NEXT:    movq %xmm0, %rax
  75 ; CHECK-NEXT:    retq
  76 ;
  77 ; CHECK-WIDE-LABEL: test4:
  78 ; CHECK-WIDE:       # %bb.0:
  79 ; CHECK-WIDE-NEXT:    movq %rdi, %xmm0
  80 ; CHECK-WIDE-NEXT:    paddd {{.*}}(%rip), %xmm0
  81 ; CHECK-WIDE-NEXT:    movq %xmm0, %rax
  82 ; CHECK-WIDE-NEXT:    retq
  83   %1 = bitcast i64 %A to <2 x i32>
  84   %add = add <2 x i32> %1, <i32 3, i32 5>
  85   %2 = bitcast <2 x i32> %add to i64
  86   ret i64 %2
  87 }
  88
  89 define double @test5(double %A) {
  90 ; CHECK-LABEL: test5:
  91 ; CHECK:       # %bb.0:
  92 ; CHECK-NEXT:    addps {{.*}}(%rip), %xmm0
  93 ; CHECK-NEXT:    retq
  94 ;
  95 ; CHECK-WIDE-LABEL: test5:
  96 ; CHECK-WIDE:       # %bb.0:
  97 ; CHECK-WIDE-NEXT:    addps {{.*}}(%rip), %xmm0
  98 ; CHECK-WIDE-NEXT:    retq
  99   %1 = bitcast double %A to <2 x float>
 100   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
 101   %2 = bitcast <2 x float> %add to double
 102   ret double %2
 103 }
 104
 105 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
 106 ; single paddw instruction. This is fixed with the widening legalization.
 107
 108 define double @test6(double %A) {
 109 ; CHECK-LABEL: test6:
 110 ; CHECK:       # %bb.0:
 111 ; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 112 ; CHECK-NEXT:    paddw {{.*}}(%rip), %xmm0
 113 ; CHECK-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
 114 ; CHECK-NEXT:    retq
 115 ;
 116 ; CHECK-WIDE-LABEL: test6:
 117 ; CHECK-WIDE:       # %bb.0:
 118 ; CHECK-WIDE-NEXT:    paddw {{.*}}(%rip), %xmm0
 119 ; CHECK-WIDE-NEXT:    retq
 120   %1 = bitcast double %A to <4 x i16>
 121   %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
 122   %2 = bitcast <4 x i16> %add to double
 123   ret double %2
 124 }
 125
 126 define double @test7(double %A, double %B) {
 127 ; CHECK-LABEL: test7:
 128 ; CHECK:       # %bb.0:
 129 ; CHECK-NEXT:    paddw %xmm1, %xmm0
 130 ; CHECK-NEXT:    retq
 131 ;
 132 ; CHECK-WIDE-LABEL: test7:
 133 ; CHECK-WIDE:       # %bb.0:
 134 ; CHECK-WIDE-NEXT:    paddw %xmm1, %xmm0
 135 ; CHECK-WIDE-NEXT:    retq
 136   %1 = bitcast double %A to <4 x i16>
 137   %2 = bitcast double %B to <4 x i16>
 138   %add = add <4 x i16> %1, %2
 139   %3 = bitcast <4 x i16> %add to double
 140   ret double %3
 141 }
 142
 143 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
 144 ; single paddb instruction. At the moment we produce the sequence
 145 ; pshufd+paddw+pshufd. This is fixed with the widening legalization.
 146
 147 define double @test8(double %A) {
 148 ; CHECK-LABEL: test8:
 149 ; CHECK:       # %bb.0:
 150 ; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 151 ; CHECK-NEXT:    paddb {{.*}}(%rip), %xmm0
 152 ; CHECK-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
 153 ; CHECK-NEXT:    retq
 154 ;
 155 ; CHECK-WIDE-LABEL: test8:
 156 ; CHECK-WIDE:       # %bb.0:
 157 ; CHECK-WIDE-NEXT:    paddb {{.*}}(%rip), %xmm0
 158 ; CHECK-WIDE-NEXT:    retq
 159   %1 = bitcast double %A to <8 x i8>
 160   %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
 161   %2 = bitcast <8 x i8> %add to double
 162   ret double %2
 163 }
 164
 165 define double @test9(double %A, double %B) {
 166 ; CHECK-LABEL: test9:
 167 ; CHECK:       # %bb.0:
 168 ; CHECK-NEXT:    paddb %xmm1, %xmm0
 169 ; CHECK-NEXT:    retq
 170 ;
 171 ; CHECK-WIDE-LABEL: test9:
 172 ; CHECK-WIDE:       # %bb.0:
 173 ; CHECK-WIDE-NEXT:    paddb %xmm1, %xmm0
 174 ; CHECK-WIDE-NEXT:    retq
 175   %1 = bitcast double %A to <8 x i8>
 176   %2 = bitcast double %B to <8 x i8>
 177   %add = add <8 x i8> %1, %2
 178   %3 = bitcast <8 x i8> %add to double
 179   ret double %3
 180 }