test/CodeGen/NVPTX/sqrt-approx.ll

   1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \
   2 ; RUN:   | FileCheck %s
   3
   4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
   5
   6 declare float @llvm.sqrt.f32(float)
   7 declare double @llvm.sqrt.f64(double)
   8
   9 ; -- reciprocal sqrt --
  10
  11 ; CHECK-LABEL test_rsqrt32
  12 define float @test_rsqrt32(float %a) #0 {
  13 ; CHECK: rsqrt.approx.f32
  14   %val = tail call float @llvm.sqrt.f32(float %a)
  15   %ret = fdiv float 1.0, %val
  16   ret float %ret
  17 }
  18
  19 ; CHECK-LABEL test_rsqrt_ftz
  20 define float @test_rsqrt_ftz(float %a) #0 #1 {
  21 ; CHECK: rsqrt.approx.ftz.f32
  22   %val = tail call float @llvm.sqrt.f32(float %a)
  23   %ret = fdiv float 1.0, %val
  24   ret float %ret
  25 }
  26
  27 ; CHECK-LABEL test_rsqrt64
  28 define double @test_rsqrt64(double %a) #0 {
  29 ; CHECK: rsqrt.approx.f64
  30   %val = tail call double @llvm.sqrt.f64(double %a)
  31   %ret = fdiv double 1.0, %val
  32   ret double %ret
  33 }
  34
  35 ; CHECK-LABEL test_rsqrt64_ftz
  36 define double @test_rsqrt64_ftz(double %a) #0 #1 {
  37 ; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
  38 ; CHECK: rsqrt.approx.f64
  39   %val = tail call double @llvm.sqrt.f64(double %a)
  40   %ret = fdiv double 1.0, %val
  41   ret double %ret
  42 }
  43
  44 ; -- sqrt --
  45
  46 ; CHECK-LABEL test_sqrt32
  47 define float @test_sqrt32(float %a) #0 {
  48 ; CHECK: sqrt.approx.f32
  49   %ret = tail call float @llvm.sqrt.f32(float %a)
  50   ret float %ret
  51 }
  52
  53 ; CHECK-LABEL test_sqrt_ftz
  54 define float @test_sqrt_ftz(float %a) #0 #1 {
  55 ; CHECK: sqrt.approx.ftz.f32
  56   %ret = tail call float @llvm.sqrt.f32(float %a)
  57   ret float %ret
  58 }
  59
  60 ; CHECK-LABEL test_sqrt64
  61 define double @test_sqrt64(double %a) #0 {
  62 ; There's no sqrt.approx.f64 instruction; we emit
  63 ; reciprocal(rsqrt.approx.f64(x)).  There's no non-ftz approximate reciprocal,
  64 ; so we just use the ftz version.
  65 ; CHECK: rsqrt.approx.f64
  66 ; CHECK: rcp.approx.ftz.f64
  67   %ret = tail call double @llvm.sqrt.f64(double %a)
  68   ret double %ret
  69 }
  70
  71 ; CHECK-LABEL test_sqrt64_ftz
  72 define double @test_sqrt64_ftz(double %a) #0 #1 {
  73 ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
  74 ; CHECK: rsqrt.approx.f64
  75 ; CHECK: rcp.approx.ftz.f64
  76   %ret = tail call double @llvm.sqrt.f64(double %a)
  77   ret double %ret
  78 }
  79
  80 ; -- refined sqrt and rsqrt --
  81 ;
  82 ; The sqrt and rsqrt refinement algorithms both emit an rsqrt.approx, followed
  83 ; by some math.
  84
  85 ; CHECK-LABEL: test_rsqrt32_refined
  86 define float @test_rsqrt32_refined(float %a) #0 #2 {
  87 ; CHECK: rsqrt.approx.f32
  88   %val = tail call float @llvm.sqrt.f32(float %a)
  89   %ret = fdiv float 1.0, %val
  90   ret float %ret
  91 }
  92
  93 ; CHECK-LABEL: test_sqrt32_refined
  94 define float @test_sqrt32_refined(float %a) #0 #2 {
  95 ; CHECK: rsqrt.approx.f32
  96   %ret = tail call float @llvm.sqrt.f32(float %a)
  97   ret float %ret
  98 }
  99
 100 ; CHECK-LABEL: test_rsqrt64_refined
 101 define double @test_rsqrt64_refined(double %a) #0 #2 {
 102 ; CHECK: rsqrt.approx.f64
 103   %val = tail call double @llvm.sqrt.f64(double %a)
 104   %ret = fdiv double 1.0, %val
 105   ret double %ret
 106 }
 107
 108 ; CHECK-LABEL: test_sqrt64_refined
 109 define double @test_sqrt64_refined(double %a) #0 #2 {
 110 ; CHECK: rsqrt.approx.f64
 111   %ret = tail call double @llvm.sqrt.f64(double %a)
 112   ret double %ret
 113 }
 114
 115 ; -- refined sqrt and rsqrt with ftz enabled --
 116
 117 ; CHECK-LABEL: test_rsqrt32_refined_ftz
 118 define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 {
 119 ; CHECK: rsqrt.approx.ftz.f32
 120   %val = tail call float @llvm.sqrt.f32(float %a)
 121   %ret = fdiv float 1.0, %val
 122   ret float %ret
 123 }
 124
 125 ; CHECK-LABEL: test_sqrt32_refined_ftz
 126 define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
 127 ; CHECK: rsqrt.approx.ftz.f32
 128   %ret = tail call float @llvm.sqrt.f32(float %a)
 129   ret float %ret
 130 }
 131
 132 ; CHECK-LABEL: test_rsqrt64_refined_ftz
 133 define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
 134 ; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version.
 135 ; CHECK: rsqrt.approx.f64
 136   %val = tail call double @llvm.sqrt.f64(double %a)
 137   %ret = fdiv double 1.0, %val
 138   ret double %ret
 139 }
 140
 141 ; CHECK-LABEL: test_sqrt64_refined_ftz
 142 define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
 143 ; CHECK: rsqrt.approx.f64
 144   %ret = tail call double @llvm.sqrt.f64(double %a)
 145   ret double %ret
 146 }
 147
 148 attributes #0 = { "unsafe-fp-math" = "true" }
 149 attributes #1 = { "nvptx-f32ftz" = "true" }
 150 attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" }