llvm/test/CodeGen/NVPTX/sqrt-approx.ll

   1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \
   2 ; RUN:   | FileCheck %s
   3
   4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
   5
   6 declare float @llvm.sqrt.f32(float)
   7 declare double @llvm.sqrt.f64(double)
   8
   9 ; -- reciprocal sqrt --
  10
  11 ; CHECK-LABEL: test_rsqrt32
  12 define float @test_rsqrt32(float %a) #0 {
  13 ; CHECK: rsqrt.approx.f32
  14   %val = tail call float @llvm.sqrt.f32(float %a)
  15   %ret = fdiv float 1.0, %val
  16   ret float %ret
  17 }
  18
  19 ; CHECK-LABEL: test_rsqrt_ftz
  20 define float @test_rsqrt_ftz(float %a) #0 #1 {
  21 ; CHECK: rsqrt.approx.ftz.f32
  22   %val = tail call float @llvm.sqrt.f32(float %a)
  23   %ret = fdiv float 1.0, %val
  24   ret float %ret
  25 }
  26
  27 ; CHECK-LABEL: test_rsqrt64
  28 define double @test_rsqrt64(double %a) #0 {
  29 ; CHECK: rsqrt.approx.f64
  30   %val = tail call double @llvm.sqrt.f64(double %a)
  31   %ret = fdiv double 1.0, %val
  32   ret double %ret
  33 }
  34
  35 ; CHECK-LABEL: test_rsqrt64_ftz
  36 define double @test_rsqrt64_ftz(double %a) #0 #1 {
  37 ; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
  38 ; CHECK: rsqrt.approx.f64
  39   %val = tail call double @llvm.sqrt.f64(double %a)
  40   %ret = fdiv double 1.0, %val
  41   ret double %ret
  42 }
  43
  44 ; -- sqrt --
  45
  46 ; CHECK-LABEL: test_sqrt32
  47 define float @test_sqrt32(float %a) #0 {
  48 ; CHECK: sqrt.rn.f32
  49   %ret = tail call float @llvm.sqrt.f32(float %a)
  50   ret float %ret
  51 }
  52
  53 ; CHECK-LABEL: test_sqrt32_ninf
  54 define float @test_sqrt32_ninf(float %a) #0 {
  55 ; CHECK: sqrt.approx.f32
  56   %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
  57   ret float %ret
  58 }
  59
  60 ; CHECK-LABEL: test_sqrt_ftz
  61 define float @test_sqrt_ftz(float %a) #0 #1 {
  62 ; CHECK: sqrt.rn.ftz.f32
  63   %ret = tail call float @llvm.sqrt.f32(float %a)
  64   ret float %ret
  65 }
  66
  67 ; CHECK-LABEL: test_sqrt_ftz_ninf
  68 define float @test_sqrt_ftz_ninf(float %a) #0 #1 {
  69 ; CHECK: sqrt.approx.ftz.f32
  70   %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
  71   ret float %ret
  72 }
  73
  74 ; CHECK-LABEL: test_sqrt64
  75 define double @test_sqrt64(double %a) #0 {
  76 ; CHECK: sqrt.rn.f64
  77   %ret = tail call double @llvm.sqrt.f64(double %a)
  78   ret double %ret
  79 }
  80
  81 ; CHECK-LABEL: test_sqrt64_ninf
  82 define double @test_sqrt64_ninf(double %a) #0 {
  83 ; There's no sqrt.approx.f64 instruction; we emit
  84 ; reciprocal(rsqrt.approx.f64(x)).  There's no non-ftz approximate reciprocal,
  85 ; so we just use the ftz version.
  86 ; CHECK: rsqrt.approx.f64
  87 ; CHECK: rcp.approx.ftz.f64
  88   %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
  89   ret double %ret
  90 }
  91
  92 ; CHECK-LABEL: test_sqrt64_ftz
  93 define double @test_sqrt64_ftz(double %a) #0 #1 {
  94 ; CHECK: sqrt.rn.f64
  95   %ret = tail call double @llvm.sqrt.f64(double %a)
  96   ret double %ret
  97 }
  98
  99 ; CHECK-LABEL: test_sqrt64_ftz_ninf
 100 define double @test_sqrt64_ftz_ninf(double %a) #0 #1 {
 101 ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
 102 ; CHECK: rsqrt.approx.f64
 103 ; CHECK: rcp.approx.ftz.f64
 104   %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
 105   ret double %ret
 106 }
 107
 108 ; -- refined sqrt and rsqrt --
 109 ;
 110 ; The sqrt and rsqrt refinement algorithms both emit an rsqrt.approx, followed
 111 ; by some math.
 112
 113 ; CHECK-LABEL: test_rsqrt32_refined
 114 define float @test_rsqrt32_refined(float %a) #0 #2 {
 115 ; CHECK: rsqrt.approx.f32
 116   %val = tail call float @llvm.sqrt.f32(float %a)
 117   %ret = fdiv float 1.0, %val
 118   ret float %ret
 119 }
 120
 121 ; CHECK-LABEL: test_sqrt32_refined
 122 define float @test_sqrt32_refined(float %a) #0 #2 {
 123 ; CHECK: sqrt.rn.f32
 124   %ret = tail call float @llvm.sqrt.f32(float %a)
 125   ret float %ret
 126 }
 127
 128 ; CHECK-LABEL: test_sqrt32_refined_ninf
 129 define float @test_sqrt32_refined_ninf(float %a) #0 #2 {
 130 ; CHECK: rsqrt.approx.f32
 131   %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
 132   ret float %ret
 133 }
 134
 135 ; CHECK-LABEL: test_rsqrt64_refined
 136 define double @test_rsqrt64_refined(double %a) #0 #2 {
 137 ; CHECK: rsqrt.approx.f64
 138   %val = tail call double @llvm.sqrt.f64(double %a)
 139   %ret = fdiv double 1.0, %val
 140   ret double %ret
 141 }
 142
 143 ; CHECK-LABEL: test_sqrt64_refined
 144 define double @test_sqrt64_refined(double %a) #0 #2 {
 145 ; CHECK: sqrt.rn.f64
 146   %ret = tail call double @llvm.sqrt.f64(double %a)
 147   ret double %ret
 148 }
 149
 150 ; CHECK-LABEL: test_sqrt64_refined_ninf
 151 define double @test_sqrt64_refined_ninf(double %a) #0 #2 {
 152 ; CHECK: rsqrt.approx.f64
 153   %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
 154   ret double %ret
 155 }
 156
 157 ; -- refined sqrt and rsqrt with ftz enabled --
 158
 159 ; CHECK-LABEL: test_rsqrt32_refined_ftz
 160 define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 {
 161 ; CHECK: rsqrt.approx.ftz.f32
 162   %val = tail call float @llvm.sqrt.f32(float %a)
 163   %ret = fdiv float 1.0, %val
 164   ret float %ret
 165 }
 166
 167 ; CHECK-LABEL: test_sqrt32_refined_ftz
 168 define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
 169 ; CHECK: sqrt.rn.ftz.f32
 170   %ret = tail call float @llvm.sqrt.f32(float %a)
 171   ret float %ret
 172 }
 173
 174 ; CHECK-LABEL: test_sqrt32_refined_ftz_ninf
 175 define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 {
 176 ; CHECK: rsqrt.approx.ftz.f32
 177   %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
 178   ret float %ret
 179 }
 180
 181 ; CHECK-LABEL: test_rsqrt64_refined_ftz
 182 define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
 183 ; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version.
 184 ; CHECK: rsqrt.approx.f64
 185   %val = tail call double @llvm.sqrt.f64(double %a)
 186   %ret = fdiv double 1.0, %val
 187   ret double %ret
 188 }
 189
 190 ; CHECK-LABEL: test_sqrt64_refined_ftz
 191 define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
 192 ; CHECK: sqrt.rn.f64
 193   %ret = tail call double @llvm.sqrt.f64(double %a)
 194   ret double %ret
 195 }
 196
 197 ; CHECK-LABEL: test_sqrt64_refined_ftz_ninf
 198 define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 {
 199 ; CHECK: rsqrt.approx.f64
 200   %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
 201   ret double %ret
 202 }
 203
 204 attributes #0 = { "unsafe-fp-math" = "true" }
 205 attributes #1 = { "denormal-fp-math-f32" = "preserve-sign,preserve-sign" }
 206 attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" }