test/CodeGen/X86/fdiv-combine-vec.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefix=SSE
   3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx  | FileCheck %s --check-prefix=AVX
   4
   5 define <2 x double> @splat_fdiv_v2f64(<2 x double> %x, double %y) {
   6 ; SSE-LABEL: splat_fdiv_v2f64:
   7 ; SSE:       # %bb.0:
   8 ; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
   9 ; SSE-NEXT:    divsd %xmm1, %xmm2
  10 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0,0]
  11 ; SSE-NEXT:    mulpd %xmm2, %xmm0
  12 ; SSE-NEXT:    retq
  13 ;
  14 ; AVX-LABEL: splat_fdiv_v2f64:
  15 ; AVX:       # %bb.0:
  16 ; AVX-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
  17 ; AVX-NEXT:    vdivsd %xmm1, %xmm2, %xmm1
  18 ; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
  19 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
  20 ; AVX-NEXT:    retq
  21   %vy = insertelement <2 x double> undef, double %y, i32 0
  22   %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> zeroinitializer
  23   %r = fdiv fast <2 x double> %x, %splaty
  24   ret <2 x double> %r
  25 }
  26
  27 define <4 x double> @splat_fdiv_v4f64(<4 x double> %x, double %y) {
  28 ; SSE-LABEL: splat_fdiv_v4f64:
  29 ; SSE:       # %bb.0:
  30 ; SSE-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
  31 ; SSE-NEXT:    divsd %xmm2, %xmm3
  32 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm3 = xmm3[0,0]
  33 ; SSE-NEXT:    mulpd %xmm3, %xmm0
  34 ; SSE-NEXT:    mulpd %xmm3, %xmm1
  35 ; SSE-NEXT:    retq
  36 ;
  37 ; AVX-LABEL: splat_fdiv_v4f64:
  38 ; AVX:       # %bb.0:
  39 ; AVX-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
  40 ; AVX-NEXT:    vdivsd %xmm1, %xmm2, %xmm1
  41 ; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
  42 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
  43 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
  44 ; AVX-NEXT:    retq
  45   %vy = insertelement <4 x double> undef, double %y, i32 0
  46   %splaty = shufflevector <4 x double> %vy, <4 x double> undef, <4 x i32> zeroinitializer
  47   %r = fdiv arcp <4 x double> %x, %splaty
  48   ret <4 x double> %r
  49 }
  50
  51 define <4 x float> @splat_fdiv_v4f32(<4 x float> %x, float %y) {
  52 ; SSE-LABEL: splat_fdiv_v4f32:
  53 ; SSE:       # %bb.0:
  54 ; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
  55 ; SSE-NEXT:    divss %xmm1, %xmm2
  56 ; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0,0,0]
  57 ; SSE-NEXT:    mulps %xmm2, %xmm0
  58 ; SSE-NEXT:    retq
  59 ;
  60 ; AVX-LABEL: splat_fdiv_v4f32:
  61 ; AVX:       # %bb.0:
  62 ; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
  63 ; AVX-NEXT:    vdivss %xmm1, %xmm2, %xmm1
  64 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
  65 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
  66 ; AVX-NEXT:    retq
  67   %vy = insertelement <4 x float> undef, float %y, i32 0
  68   %splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer
  69   %r = fdiv arcp reassoc <4 x float> %x, %splaty
  70   ret <4 x float> %r
  71 }
  72
  73 define <8 x float> @splat_fdiv_v8f32(<8 x float> %x, float %y) {
  74 ; SSE-LABEL: splat_fdiv_v8f32:
  75 ; SSE:       # %bb.0:
  76 ; SSE-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
  77 ; SSE-NEXT:    divss %xmm2, %xmm3
  78 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,0,0,0]
  79 ; SSE-NEXT:    mulps %xmm3, %xmm0
  80 ; SSE-NEXT:    mulps %xmm3, %xmm1
  81 ; SSE-NEXT:    retq
  82 ;
  83 ; AVX-LABEL: splat_fdiv_v8f32:
  84 ; AVX:       # %bb.0:
  85 ; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
  86 ; AVX-NEXT:    vdivss %xmm1, %xmm2, %xmm1
  87 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
  88 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
  89 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
  90 ; AVX-NEXT:    retq
  91   %vy = insertelement <8 x float> undef, float %y, i32 0
  92   %splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer
  93   %r = fdiv fast <8 x float> %x, %splaty
  94   ret <8 x float> %r
  95 }
  96
  97 define <4 x float> @splat_fdiv_v4f32_estimate(<4 x float> %x, float %y) #0 {
  98 ; SSE-LABEL: splat_fdiv_v4f32_estimate:
  99 ; SSE:       # %bb.0:
 100 ; SSE-NEXT:    rcpss %xmm1, %xmm2
 101 ; SSE-NEXT:    mulss %xmm2, %xmm1
 102 ; SSE-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
 103 ; SSE-NEXT:    subss %xmm1, %xmm3
 104 ; SSE-NEXT:    mulss %xmm2, %xmm3
 105 ; SSE-NEXT:    addss %xmm2, %xmm3
 106 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,0,0,0]
 107 ; SSE-NEXT:    mulps %xmm3, %xmm0
 108 ; SSE-NEXT:    retq
 109 ;
 110 ; AVX-LABEL: splat_fdiv_v4f32_estimate:
 111 ; AVX:       # %bb.0:
 112 ; AVX-NEXT:    vrcpss %xmm1, %xmm1, %xmm2
 113 ; AVX-NEXT:    vmulss %xmm2, %xmm1, %xmm1
 114 ; AVX-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
 115 ; AVX-NEXT:    vsubss %xmm1, %xmm3, %xmm1
 116 ; AVX-NEXT:    vmulss %xmm1, %xmm2, %xmm1
 117 ; AVX-NEXT:    vaddss %xmm1, %xmm2, %xmm1
 118 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
 119 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
 120 ; AVX-NEXT:    retq
 121   %vy = insertelement <4 x float> undef, float %y, i32 0
 122   %splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer
 123   %r = fdiv arcp reassoc <4 x float> %x, %splaty
 124   ret <4 x float> %r
 125 }
 126
 127 define <8 x float> @splat_fdiv_v8f32_estimate(<8 x float> %x, float %y) #0 {
 128 ; SSE-LABEL: splat_fdiv_v8f32_estimate:
 129 ; SSE:       # %bb.0:
 130 ; SSE-NEXT:    rcpss %xmm2, %xmm3
 131 ; SSE-NEXT:    mulss %xmm3, %xmm2
 132 ; SSE-NEXT:    movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
 133 ; SSE-NEXT:    subss %xmm2, %xmm4
 134 ; SSE-NEXT:    mulss %xmm3, %xmm4
 135 ; SSE-NEXT:    addss %xmm3, %xmm4
 136 ; SSE-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,0,0,0]
 137 ; SSE-NEXT:    mulps %xmm4, %xmm0
 138 ; SSE-NEXT:    mulps %xmm4, %xmm1
 139 ; SSE-NEXT:    retq
 140 ;
 141 ; AVX-LABEL: splat_fdiv_v8f32_estimate:
 142 ; AVX:       # %bb.0:
 143 ; AVX-NEXT:    vrcpss %xmm1, %xmm1, %xmm2
 144 ; AVX-NEXT:    vmulss %xmm2, %xmm1, %xmm1
 145 ; AVX-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
 146 ; AVX-NEXT:    vsubss %xmm1, %xmm3, %xmm1
 147 ; AVX-NEXT:    vmulss %xmm1, %xmm2, %xmm1
 148 ; AVX-NEXT:    vaddss %xmm1, %xmm2, %xmm1
 149 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
 150 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
 151 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
 152 ; AVX-NEXT:    retq
 153   %vy = insertelement <8 x float> undef, float %y, i32 0
 154   %splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer
 155   %r = fdiv fast <8 x float> %x, %splaty
 156   ret <8 x float> %r
 157 }
 158
 159 attributes #0 = { "reciprocal-estimates"="divf,vec-divf" }