test/CodeGen/X86/sse2.ll

   1 ; Tests for SSE2 and below, without SSE3+.
   2 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
   3
   4 define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
   5         %tmp3 = load <2 x double>* %A, align 16
   6         %tmp7 = insertelement <2 x double> undef, double %B, i32 0
   7         %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
   8         store <2 x double> %tmp9, <2 x double>* %r, align 16
   9         ret void
  10
  11 ; CHECK: test1:
  12 ; CHECK:        movl    8(%esp), %eax
  13 ; CHECK-NEXT:   movapd  (%eax), %xmm0
  14 ; CHECK-NEXT:   movlpd  12(%esp), %xmm0
  15 ; CHECK-NEXT:   movl    4(%esp), %eax
  16 ; CHECK-NEXT:   movapd  %xmm0, (%eax)
  17 ; CHECK-NEXT:   ret
  18 }
  19
  20 define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
  21         %tmp3 = load <2 x double>* %A, align 16
  22         %tmp7 = insertelement <2 x double> undef, double %B, i32 0
  23         %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
  24         store <2 x double> %tmp9, <2 x double>* %r, align 16
  25         ret void
  26
  27 ; CHECK: test2:
  28 ; CHECK:        movl    8(%esp), %eax
  29 ; CHECK-NEXT:   movapd  (%eax), %xmm0
  30 ; CHECK-NEXT:   movhpd  12(%esp), %xmm0
  31 ; CHECK-NEXT:   movl    4(%esp), %eax
  32 ; CHECK-NEXT:   movapd  %xmm0, (%eax)
  33 ; CHECK-NEXT:   ret
  34 }
  35
  36
  37 define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
  38         %tmp = load <4 x float>* %B             ; <<4 x float>> [#uses=2]
  39         %tmp3 = load <4 x float>* %A            ; <<4 x float>> [#uses=2]
  40         %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0          ; <float> [#uses=1]
  41         %tmp7 = extractelement <4 x float> %tmp, i32 0          ; <float> [#uses=1]
  42         %tmp8 = extractelement <4 x float> %tmp3, i32 1         ; <float> [#uses=1]
  43         %tmp9 = extractelement <4 x float> %tmp, i32 1          ; <float> [#uses=1]
  44         %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0             ; <<4 x float>> [#uses=1]
  45         %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1           ; <<4 x float>> [#uses=1]
  46         %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2           ; <<4 x float>> [#uses=1]
  47         %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3           ; <<4 x float>> [#uses=1]
  48         store <4 x float> %tmp13, <4 x float>* %res
  49         ret void
  50 ; CHECK: @test3
  51 ; CHECK:        unpcklps
  52 }
  53
  54 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
  55         %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >               ; <<4 x float>> [#uses=1]
  56         store <4 x float> %tmp5, <4 x float>* %res
  57         ret void
  58 ; CHECK: @test4
  59 ; CHECK:        pshufd  $50, %xmm0, %xmm0
  60 }
  61
  62 define <4 x i32> @test5(i8** %ptr) nounwind {
  63 ; CHECK: test5:
  64 ; CHECK: pxor
  65 ; CHECK: punpcklbw
  66 ; CHECK: punpcklwd
  67
  68         %tmp = load i8** %ptr           ; <i8*> [#uses=1]
  69         %tmp.upgrd.1 = bitcast i8* %tmp to float*               ; <float*> [#uses=1]
  70         %tmp.upgrd.2 = load float* %tmp.upgrd.1         ; <float> [#uses=1]
  71         %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0               ; <<4 x float>> [#uses=1]
  72         %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1               ; <<4 x float>> [#uses=1]
  73         %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2             ; <<4 x float>> [#uses=1]
  74         %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3            ; <<4 x float>> [#uses=1]
  75         %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8>                ; <<16 x i8>> [#uses=1]
  76         %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >               ; <<16 x i8>> [#uses=1]
  77         %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16>          ; <<8 x i16>> [#uses=1]
  78         %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >                ; <<8 x i16>> [#uses=1]
  79         %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32>            ; <<4 x i32>> [#uses=1]
  80         ret <4 x i32> %tmp36
  81 }
  82
  83 define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
  84         %tmp1 = load <4 x float>* %A            ; <<4 x float>> [#uses=1]
  85         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
  86         store <4 x float> %tmp2, <4 x float>* %res
  87         ret void
  88
  89 ; CHECK: test6:
  90 ; CHECK:        movaps  (%eax), %xmm0
  91 ; CHECK:        movaps  %xmm0, (%eax)
  92 }
  93
  94 define void @test7() nounwind {
  95         bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
  96         shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
  97         store <4 x float> %2, <4 x float>* null
  98         ret void
  99
 100 ; CHECK: test7:
 101 ; CHECK:        pxor    %xmm0, %xmm0
 102 ; CHECK:        movaps  %xmm0, 0
 103 }
 104
 105 @x = external global [4 x i32]
 106
 107 define <2 x i64> @test8() nounwind {
 108         %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)            ; <i32> [#uses=1]
 109         %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1)           ; <i32> [#uses=1]
 110         %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2)           ; <i32> [#uses=1]
 111         %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3)           ; <i32> [#uses=1]
 112         %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0           ; <<4 x i32>> [#uses=1]
 113         %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1         ; <<4 x i32>> [#uses=1]
 114         %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2               ; <<4 x i32>> [#uses=1]
 115         %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3               ; <<4 x i32>> [#uses=1]
 116         %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>          ; <<2 x i64>> [#uses=1]
 117         ret <2 x i64> %tmp16
 118 ; CHECK: test8:
 119 ; CHECK: movups (%eax), %xmm0
 120 }
 121
 122 define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
 123         %tmp = insertelement <4 x float> undef, float %a, i32 0         ; <<4 x float>> [#uses=1]
 124         %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1                ; <<4 x float>> [#uses=1]
 125         %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2              ; <<4 x float>> [#uses=1]
 126         %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3              ; <<4 x float>> [#uses=1]
 127         ret <4 x float> %tmp13
 128 ; CHECK: test9:
 129 ; CHECK: movups 8(%esp), %xmm0
 130 }
 131
 132 define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
 133         %tmp = insertelement <4 x float> undef, float %a, i32 0         ; <<4 x float>> [#uses=1]
 134         %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1                ; <<4 x float>> [#uses=1]
 135         %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2              ; <<4 x float>> [#uses=1]
 136         %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3              ; <<4 x float>> [#uses=1]
 137         ret <4 x float> %tmp13
 138 ; CHECK: test10:
 139 ; CHECK: movaps 4(%esp), %xmm0
 140 }
 141
 142 define <2 x double> @test11(double %a, double %b) nounwind {
 143         %tmp = insertelement <2 x double> undef, double %a, i32 0               ; <<2 x double>> [#uses=1]
 144         %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1               ; <<2 x double>> [#uses=1]
 145         ret <2 x double> %tmp7
 146 ; CHECK: test11:
 147 ; CHECK: movapd 4(%esp), %xmm0
 148 }
 149
 150 define void @test12() nounwind {
 151         %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
 152         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
 153         %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
 154         %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
 155         store <4 x float> %tmp4, <4 x float>* null
 156         ret void
 157 ; CHECK: test12:
 158 ; CHECK: movhlps
 159 ; CHECK: shufps
 160 }
 161
 162 define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
 163         %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=1]
 164         %tmp5 = load <4 x float>* %C            ; <<4 x float>> [#uses=1]
 165         %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
 166         store <4 x float> %tmp11, <4 x float>* %res
 167         ret void
 168 ; CHECK: test13
 169 ; CHECK: shufps $69, (%eax), %xmm0
 170 ; CHECK: pshufd $-40, %xmm0, %xmm0
 171 }
 172
 173 define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
 174         %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
 175         %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
 176         %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
 177         %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
 178         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
 179         ret <4 x float> %tmp27
 180 ; CHECK: test14:
 181 ; CHECK:        addps   [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
 182 ; CHECK:        subps   [[X1]], [[X2:%xmm[0-9]+]]
 183 ; CHECK:        movlhps [[X2]], [[X0]]
 184 }
 185
 186 define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
 187 entry:
 188         %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
 189         %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
 190         %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
 191         ret <4 x float> %tmp4
 192 ; CHECK: test15:
 193 ; CHECK:        movhlps %xmm1, %xmm0
 194 }
 195
 196 ; PR8900
 197 ; CHECK: test16:
 198 ; CHECK: unpcklpd
 199 ; CHECK: ret
 200
 201 define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
 202   %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3
 203   %i6 = load <4 x double>* %i5, align 32
 204   %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
 205   ret <2 x double> %i7
 206 }
 207
 208 ; PR9009
 209 define fastcc void @test17() nounwind {
 210 entry:
 211   %0 = insertelement <4 x i32> undef, i32 undef, i32 1
 212   %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 213   %2 = bitcast <4 x i32> %1 to <4 x float>
 214   store <4 x float> %2, <4 x float> * undef
 215   ret void
 216 }
 217
 218 ; PR9210
 219 define <4 x float> @f(<4 x double>) nounwind {
 220 entry:
 221  %double2float.i = fptrunc <4 x double> %0 to <4 x float>
 222  ret <4 x float> %double2float.i
 223 }
 224