llvm/test/CodeGen/AArch64/arm64-vsqrt.ll

   1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
   2
   3 define <2 x float> @frecps_2s(ptr %A, ptr %B) nounwind {
   4 ;CHECK-LABEL: frecps_2s:
   5 ;CHECK: frecps.2s
   6         %tmp1 = load <2 x float>, ptr %A
   7         %tmp2 = load <2 x float>, ptr %B
   8         %tmp3 = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
   9         ret <2 x float> %tmp3
  10 }
  11
  12 define <4 x float> @frecps_4s(ptr %A, ptr %B) nounwind {
  13 ;CHECK-LABEL: frecps_4s:
  14 ;CHECK: frecps.4s
  15         %tmp1 = load <4 x float>, ptr %A
  16         %tmp2 = load <4 x float>, ptr %B
  17         %tmp3 = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
  18         ret <4 x float> %tmp3
  19 }
  20
  21 define <2 x double> @frecps_2d(ptr %A, ptr %B) nounwind {
  22 ;CHECK-LABEL: frecps_2d:
  23 ;CHECK: frecps.2d
  24         %tmp1 = load <2 x double>, ptr %A
  25         %tmp2 = load <2 x double>, ptr %B
  26         %tmp3 = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
  27         ret <2 x double> %tmp3
  28 }
  29
  30 declare <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
  31 declare <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
  32 declare <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone
  33
  34
  35 define <2 x float> @frsqrts_2s(ptr %A, ptr %B) nounwind {
  36 ;CHECK-LABEL: frsqrts_2s:
  37 ;CHECK: frsqrts.2s
  38         %tmp1 = load <2 x float>, ptr %A
  39         %tmp2 = load <2 x float>, ptr %B
  40         %tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
  41         ret <2 x float> %tmp3
  42 }
  43
  44 define <4 x float> @frsqrts_4s(ptr %A, ptr %B) nounwind {
  45 ;CHECK-LABEL: frsqrts_4s:
  46 ;CHECK: frsqrts.4s
  47         %tmp1 = load <4 x float>, ptr %A
  48         %tmp2 = load <4 x float>, ptr %B
  49         %tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
  50         ret <4 x float> %tmp3
  51 }
  52
  53 define <2 x double> @frsqrts_2d(ptr %A, ptr %B) nounwind {
  54 ;CHECK-LABEL: frsqrts_2d:
  55 ;CHECK: frsqrts.2d
  56         %tmp1 = load <2 x double>, ptr %A
  57         %tmp2 = load <2 x double>, ptr %B
  58         %tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
  59         ret <2 x double> %tmp3
  60 }
  61
  62 declare <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
  63 declare <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
  64 declare <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone
  65
  66 define <2 x float> @frecpe_2s(ptr %A) nounwind {
  67 ;CHECK-LABEL: frecpe_2s:
  68 ;CHECK: frecpe.2s
  69         %tmp1 = load <2 x float>, ptr %A
  70         %tmp3 = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %tmp1)
  71         ret <2 x float> %tmp3
  72 }
  73
  74 define <4 x float> @frecpe_4s(ptr %A) nounwind {
  75 ;CHECK-LABEL: frecpe_4s:
  76 ;CHECK: frecpe.4s
  77         %tmp1 = load <4 x float>, ptr %A
  78         %tmp3 = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %tmp1)
  79         ret <4 x float> %tmp3
  80 }
  81
  82 define <2 x double> @frecpe_2d(ptr %A) nounwind {
  83 ;CHECK-LABEL: frecpe_2d:
  84 ;CHECK: frecpe.2d
  85         %tmp1 = load <2 x double>, ptr %A
  86         %tmp3 = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %tmp1)
  87         ret <2 x double> %tmp3
  88 }
  89
  90 define float @frecpe_s(ptr %A) nounwind {
  91 ;CHECK-LABEL: frecpe_s:
  92 ;CHECK: frecpe s0, {{s[0-9]+}}
  93   %tmp1 = load float, ptr %A
  94   %tmp3 = call float @llvm.aarch64.neon.frecpe.f32(float %tmp1)
  95   ret float %tmp3
  96 }
  97
  98 define double @frecpe_d(ptr %A) nounwind {
  99 ;CHECK-LABEL: frecpe_d:
 100 ;CHECK: frecpe d0, {{d[0-9]+}}
 101   %tmp1 = load double, ptr %A
 102   %tmp3 = call double @llvm.aarch64.neon.frecpe.f64(double %tmp1)
 103   ret double %tmp3
 104 }
 105
 106 declare <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float>) nounwind readnone
 107 declare <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float>) nounwind readnone
 108 declare <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double>) nounwind readnone
 109 declare float @llvm.aarch64.neon.frecpe.f32(float) nounwind readnone
 110 declare double @llvm.aarch64.neon.frecpe.f64(double) nounwind readnone
 111
 112 define float @frecpx_s(ptr %A) nounwind {
 113 ;CHECK-LABEL: frecpx_s:
 114 ;CHECK: frecpx s0, {{s[0-9]+}}
 115   %tmp1 = load float, ptr %A
 116   %tmp3 = call float @llvm.aarch64.neon.frecpx.f32(float %tmp1)
 117   ret float %tmp3
 118 }
 119
 120 define double @frecpx_d(ptr %A) nounwind {
 121 ;CHECK-LABEL: frecpx_d:
 122 ;CHECK: frecpx d0, {{d[0-9]+}}
 123   %tmp1 = load double, ptr %A
 124   %tmp3 = call double @llvm.aarch64.neon.frecpx.f64(double %tmp1)
 125   ret double %tmp3
 126 }
 127
 128 declare float @llvm.aarch64.neon.frecpx.f32(float) nounwind readnone
 129 declare double @llvm.aarch64.neon.frecpx.f64(double) nounwind readnone
 130
 131 define <2 x float> @frsqrte_2s(ptr %A) nounwind {
 132 ;CHECK-LABEL: frsqrte_2s:
 133 ;CHECK: frsqrte.2s
 134         %tmp1 = load <2 x float>, ptr %A
 135         %tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %tmp1)
 136         ret <2 x float> %tmp3
 137 }
 138
 139 define <4 x float> @frsqrte_4s(ptr %A) nounwind {
 140 ;CHECK-LABEL: frsqrte_4s:
 141 ;CHECK: frsqrte.4s
 142         %tmp1 = load <4 x float>, ptr %A
 143         %tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %tmp1)
 144         ret <4 x float> %tmp3
 145 }
 146
 147 define <2 x double> @frsqrte_2d(ptr %A) nounwind {
 148 ;CHECK-LABEL: frsqrte_2d:
 149 ;CHECK: frsqrte.2d
 150         %tmp1 = load <2 x double>, ptr %A
 151         %tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %tmp1)
 152         ret <2 x double> %tmp3
 153 }
 154
 155 define float @frsqrte_s(ptr %A) nounwind {
 156 ;CHECK-LABEL: frsqrte_s:
 157 ;CHECK: frsqrte s0, {{s[0-9]+}}
 158   %tmp1 = load float, ptr %A
 159   %tmp3 = call float @llvm.aarch64.neon.frsqrte.f32(float %tmp1)
 160   ret float %tmp3
 161 }
 162
 163 define double @frsqrte_d(ptr %A) nounwind {
 164 ;CHECK-LABEL: frsqrte_d:
 165 ;CHECK: frsqrte d0, {{d[0-9]+}}
 166   %tmp1 = load double, ptr %A
 167   %tmp3 = call double @llvm.aarch64.neon.frsqrte.f64(double %tmp1)
 168   ret double %tmp3
 169 }
 170
 171 declare <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone
 172 declare <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone
 173 declare <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone
 174 declare float @llvm.aarch64.neon.frsqrte.f32(float) nounwind readnone
 175 declare double @llvm.aarch64.neon.frsqrte.f64(double) nounwind readnone
 176
 177 define <2 x i32> @urecpe_2s(ptr %A) nounwind {
 178 ;CHECK-LABEL: urecpe_2s:
 179 ;CHECK: urecpe.2s
 180         %tmp1 = load <2 x i32>, ptr %A
 181         %tmp3 = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %tmp1)
 182         ret <2 x i32> %tmp3
 183 }
 184
 185 define <4 x i32> @urecpe_4s(ptr %A) nounwind {
 186 ;CHECK-LABEL: urecpe_4s:
 187 ;CHECK: urecpe.4s
 188         %tmp1 = load <4 x i32>, ptr %A
 189         %tmp3 = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %tmp1)
 190         ret <4 x i32> %tmp3
 191 }
 192
 193 declare <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone
 194 declare <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone
 195
 196 define <2 x i32> @ursqrte_2s(ptr %A) nounwind {
 197 ;CHECK-LABEL: ursqrte_2s:
 198 ;CHECK: ursqrte.2s
 199         %tmp1 = load <2 x i32>, ptr %A
 200         %tmp3 = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
 201         ret <2 x i32> %tmp3
 202 }
 203
 204 define <4 x i32> @ursqrte_4s(ptr %A) nounwind {
 205 ;CHECK-LABEL: ursqrte_4s:
 206 ;CHECK: ursqrte.4s
 207         %tmp1 = load <4 x i32>, ptr %A
 208         %tmp3 = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
 209         ret <4 x i32> %tmp3
 210 }
 211
 212 declare <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone
 213 declare <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone
 214
 215 define float @f1(float %a, float %b) nounwind readnone optsize ssp {
 216 ; CHECK-LABEL: f1:
 217 ; CHECK: frsqrts s0, s0, s1
 218 ; CHECK-NEXT: ret
 219   %vrsqrtss.i = tail call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) nounwind
 220   ret float %vrsqrtss.i
 221 }
 222
 223 define double @f2(double %a, double %b) nounwind readnone optsize ssp {
 224 ; CHECK-LABEL: f2:
 225 ; CHECK: frsqrts d0, d0, d1
 226 ; CHECK-NEXT: ret
 227   %vrsqrtsd.i = tail call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) nounwind
 228   ret double %vrsqrtsd.i
 229 }
 230
 231 declare double @llvm.aarch64.neon.frsqrts.f64(double, double) nounwind readnone
 232 declare float @llvm.aarch64.neon.frsqrts.f32(float, float) nounwind readnone