llvm/test/CodeGen/NVPTX/sqrt-approx.ll

   1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \
   2 ; RUN:   | FileCheck %s
   3 ; RUN: %if ptxas %{                                                                   \
   4 ; RUN:   llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \
   5 ; RUN:   | %ptxas-verify                                                              \
   6 ; RUN: %}
   7
   8 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
   9
  10 declare float @llvm.sqrt.f32(float)
  11 declare double @llvm.sqrt.f64(double)
  12
  13 ; -- reciprocal sqrt --
  14
  15 ; CHECK-LABEL: test_rsqrt32
  16 define float @test_rsqrt32(float %a) #0 {
  17 ; CHECK: rsqrt.approx.f32
  18   %val = tail call float @llvm.sqrt.f32(float %a)
  19   %ret = fdiv float 1.0, %val
  20   ret float %ret
  21 }
  22
  23 ; CHECK-LABEL: test_rsqrt_ftz
  24 define float @test_rsqrt_ftz(float %a) #0 #1 {
  25 ; CHECK: rsqrt.approx.ftz.f32
  26   %val = tail call float @llvm.sqrt.f32(float %a)
  27   %ret = fdiv float 1.0, %val
  28   ret float %ret
  29 }
  30
  31 ; CHECK-LABEL: test_rsqrt64
  32 define double @test_rsqrt64(double %a) #0 {
  33 ; CHECK: rsqrt.approx.f64
  34   %val = tail call double @llvm.sqrt.f64(double %a)
  35   %ret = fdiv double 1.0, %val
  36   ret double %ret
  37 }
  38
  39 ; CHECK-LABEL: test_rsqrt64_ftz
  40 define double @test_rsqrt64_ftz(double %a) #0 #1 {
  41 ; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
  42 ; CHECK: rsqrt.approx.f64
  43   %val = tail call double @llvm.sqrt.f64(double %a)
  44   %ret = fdiv double 1.0, %val
  45   ret double %ret
  46 }
  47
  48 ; -- sqrt --
  49
  50 ; CHECK-LABEL: test_sqrt32
  51 define float @test_sqrt32(float %a) #0 {
  52 ; CHECK: sqrt.rn.f32
  53   %ret = tail call float @llvm.sqrt.f32(float %a)
  54   ret float %ret
  55 }
  56
  57 ; CHECK-LABEL: test_sqrt32_ninf
  58 define float @test_sqrt32_ninf(float %a) #0 {
  59 ; CHECK: sqrt.approx.f32
  60   %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
  61   ret float %ret
  62 }
  63
  64 ; CHECK-LABEL: test_sqrt_ftz
  65 define float @test_sqrt_ftz(float %a) #0 #1 {
  66 ; CHECK: sqrt.rn.ftz.f32
  67   %ret = tail call float @llvm.sqrt.f32(float %a)
  68   ret float %ret
  69 }
  70
  71 ; CHECK-LABEL: test_sqrt_ftz_ninf
  72 define float @test_sqrt_ftz_ninf(float %a) #0 #1 {
  73 ; CHECK: sqrt.approx.ftz.f32
  74   %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
  75   ret float %ret
  76 }
  77
  78 ; CHECK-LABEL: test_sqrt64
  79 define double @test_sqrt64(double %a) #0 {
  80 ; CHECK: sqrt.rn.f64
  81   %ret = tail call double @llvm.sqrt.f64(double %a)
  82   ret double %ret
  83 }
  84
  85 ; CHECK-LABEL: test_sqrt64_ninf
  86 define double @test_sqrt64_ninf(double %a) #0 {
  87 ; There's no sqrt.approx.f64 instruction; we emit
  88 ; reciprocal(rsqrt.approx.f64(x)).  There's no non-ftz approximate reciprocal,
  89 ; so we just use the ftz version.
  90 ; CHECK: rsqrt.approx.f64
  91 ; CHECK: rcp.approx.ftz.f64
  92   %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
  93   ret double %ret
  94 }
  95
  96 ; CHECK-LABEL: test_sqrt64_ftz
  97 define double @test_sqrt64_ftz(double %a) #0 #1 {
  98 ; CHECK: sqrt.rn.f64
  99   %ret = tail call double @llvm.sqrt.f64(double %a)
 100   ret double %ret
 101 }
 102
 103 ; CHECK-LABEL: test_sqrt64_ftz_ninf
 104 define double @test_sqrt64_ftz_ninf(double %a) #0 #1 {
 105 ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
 106 ; CHECK: rsqrt.approx.f64
 107 ; CHECK: rcp.approx.ftz.f64
 108   %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
 109   ret double %ret
 110 }
 111
 112 ; -- refined sqrt and rsqrt --
 113 ;
 114 ; The sqrt and rsqrt refinement algorithms both emit an rsqrt.approx, followed
 115 ; by some math.
 116
 117 ; CHECK-LABEL: test_rsqrt32_refined
 118 define float @test_rsqrt32_refined(float %a) #0 #2 {
 119 ; CHECK: rsqrt.approx.f32
 120   %val = tail call float @llvm.sqrt.f32(float %a)
 121   %ret = fdiv float 1.0, %val
 122   ret float %ret
 123 }
 124
 125 ; CHECK-LABEL: test_sqrt32_refined
 126 define float @test_sqrt32_refined(float %a) #0 #2 {
 127 ; CHECK: sqrt.rn.f32
 128   %ret = tail call float @llvm.sqrt.f32(float %a)
 129   ret float %ret
 130 }
 131
 132 ; CHECK-LABEL: test_sqrt32_refined_ninf
 133 define float @test_sqrt32_refined_ninf(float %a) #0 #2 {
 134 ; CHECK: rsqrt.approx.f32
 135   %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
 136   ret float %ret
 137 }
 138
 139 ; CHECK-LABEL: test_rsqrt64_refined
 140 define double @test_rsqrt64_refined(double %a) #0 #2 {
 141 ; CHECK: rsqrt.approx.f64
 142   %val = tail call double @llvm.sqrt.f64(double %a)
 143   %ret = fdiv double 1.0, %val
 144   ret double %ret
 145 }
 146
 147 ; CHECK-LABEL: test_sqrt64_refined
 148 define double @test_sqrt64_refined(double %a) #0 #2 {
 149 ; CHECK: sqrt.rn.f64
 150   %ret = tail call double @llvm.sqrt.f64(double %a)
 151   ret double %ret
 152 }
 153
 154 ; CHECK-LABEL: test_sqrt64_refined_ninf
 155 define double @test_sqrt64_refined_ninf(double %a) #0 #2 {
 156 ; CHECK: rsqrt.approx.f64
 157   %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
 158   ret double %ret
 159 }
 160
 161 ; -- refined sqrt and rsqrt with ftz enabled --
 162
 163 ; CHECK-LABEL: test_rsqrt32_refined_ftz
 164 define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 {
 165 ; CHECK: rsqrt.approx.ftz.f32
 166   %val = tail call float @llvm.sqrt.f32(float %a)
 167   %ret = fdiv float 1.0, %val
 168   ret float %ret
 169 }
 170
 171 ; CHECK-LABEL: test_sqrt32_refined_ftz
 172 define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
 173 ; CHECK: sqrt.rn.ftz.f32
 174   %ret = tail call float @llvm.sqrt.f32(float %a)
 175   ret float %ret
 176 }
 177
 178 ; CHECK-LABEL: test_sqrt32_refined_ftz_ninf
 179 define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 {
 180 ; CHECK: rsqrt.approx.ftz.f32
 181   %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
 182   ret float %ret
 183 }
 184
 185 ; CHECK-LABEL: test_rsqrt64_refined_ftz
 186 define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
 187 ; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version.
 188 ; CHECK: rsqrt.approx.f64
 189   %val = tail call double @llvm.sqrt.f64(double %a)
 190   %ret = fdiv double 1.0, %val
 191   ret double %ret
 192 }
 193
 194 ; CHECK-LABEL: test_sqrt64_refined_ftz
 195 define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
 196 ; CHECK: sqrt.rn.f64
 197   %ret = tail call double @llvm.sqrt.f64(double %a)
 198   ret double %ret
 199 }
 200
 201 ; CHECK-LABEL: test_sqrt64_refined_ftz_ninf
 202 define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 {
 203 ; CHECK: rsqrt.approx.f64
 204   %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
 205   ret double %ret
 206 }
 207
 208 attributes #0 = { "unsafe-fp-math" = "true" }
 209 attributes #1 = { "denormal-fp-math-f32" = "preserve-sign,preserve-sign" }
 210 attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" }