llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll

   1 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
   2
   3 declare float @llvm.amdgcn.rcp.f32(float) #0
   4 declare double @llvm.amdgcn.rcp.f64(double) #0
   5
   6 declare double @llvm.amdgcn.sqrt.f64(double) #0
   7 declare float @llvm.amdgcn.sqrt.f32(float) #0
   8 declare double @llvm.sqrt.f64(double) #0
   9 declare float @llvm.sqrt.f32(float) #0
  10
  11 ; FUNC-LABEL: {{^}}rcp_undef_f32:
  12 ; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000
  13 ; SI-NOT: [[NAN]]
  14 ; SI: buffer_store_dword [[NAN]]
  15 define amdgpu_kernel void @rcp_undef_f32(ptr addrspace(1) %out) #1 {
  16   %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
  17   store float %rcp, ptr addrspace(1) %out, align 4
  18   ret void
  19 }
  20
  21 ; FUNC-LABEL: {{^}}rcp_2_f32:
  22 ; SI-NOT: v_rcp_f32
  23 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5
  24 define amdgpu_kernel void @rcp_2_f32(ptr addrspace(1) %out) #1 {
  25   %rcp = call float @llvm.amdgcn.rcp.f32(float 2.0)
  26   store float %rcp, ptr addrspace(1) %out, align 4
  27   ret void
  28 }
  29
  30 ; FUNC-LABEL: {{^}}rcp_10_f32:
  31 ; SI-NOT: v_rcp_f32
  32 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd
  33 define amdgpu_kernel void @rcp_10_f32(ptr addrspace(1) %out) #1 {
  34   %rcp = call float @llvm.amdgcn.rcp.f32(float 10.0)
  35   store float %rcp, ptr addrspace(1) %out, align 4
  36   ret void
  37 }
  38
  39 ; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32:
  40 ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
  41 ; SI-NOT: [[RESULT]]
  42 ; SI: buffer_store_dword [[RESULT]]
  43 define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(ptr addrspace(1) %out, float %src) #1 {
  44   %rcp = fdiv float 1.0, %src, !fpmath !0
  45   store float %rcp, ptr addrspace(1) %out, align 4
  46   ret void
  47 }
  48
  49 ; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32:
  50 ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
  51 ; SI-NOT: [[RESULT]]
  52 ; SI: buffer_store_dword [[RESULT]]
  53 define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #4 {
  54   %rcp = fdiv float 1.0, %src, !fpmath !0
  55   store float %rcp, ptr addrspace(1) %out, align 4
  56   ret void
  57 }
  58
  59 ; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32:
  60 ; SI: v_div_scale_f32
  61 define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #3 {
  62   %rcp = fdiv float 1.0, %src
  63   store float %rcp, ptr addrspace(1) %out, align 4
  64   ret void
  65 }
  66
  67 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32:
  68 ; SI: v_mul_f32
  69 ; SI: v_rsq_f32
  70 ; SI: v_mul_f32
  71 ; SI: v_fma_f32
  72 ; SI: v_fma_f32
  73 ; SI: v_fma_f32
  74 ; SI: v_fma_f32
  75 ; SI: v_fma_f32
  76 ; SI: v_rcp_f32
  77 define amdgpu_kernel void @safe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #1 {
  78   %sqrt = call contract float @llvm.sqrt.f32(float %src)
  79   %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt)
  80   store float %rcp, ptr addrspace(1) %out, align 4
  81   ret void
  82 }
  83
  84 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_amdgcn_sqrt_f32:
  85 ; SI: v_sqrt_f32_e32
  86 ; SI: v_rcp_f32_e32
  87 define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32(ptr addrspace(1) %out, float %src) #1 {
  88   %sqrt = call contract float @llvm.amdgcn.sqrt.f32(float %src)
  89   %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt)
  90   store float %rcp, ptr addrspace(1) %out, align 4
  91   ret void
  92 }
  93
  94 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract:
  95 ; SI: v_sqrt_f32_e32
  96 ; SI: v_rcp_f32_e32
  97 define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract(ptr addrspace(1) %out, float %src) #1 {
  98   %sqrt = call float @llvm.amdgcn.sqrt.f32(float %src)
  99   %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt)
 100   store float %rcp, ptr addrspace(1) %out, align 4
 101   ret void
 102 }
 103
 104 ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32:
 105 ; SI: v_sqrt_f32_e32
 106 ; SI: v_rcp_f32_e32
 107 define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #2 {
 108   %sqrt = call float @llvm.sqrt.f32(float %src)
 109   %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
 110   store float %rcp, ptr addrspace(1) %out, align 4
 111   ret void
 112 }
 113
 114 ; FUNC-LABEL: {{^}}rcp_f64:
 115 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
 116 ; SI-NOT: [[RESULT]]
 117 ; SI: buffer_store_dwordx2 [[RESULT]]
 118 define amdgpu_kernel void @rcp_f64(ptr addrspace(1) %out, double %src) #1 {
 119   %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
 120   store double %rcp, ptr addrspace(1) %out, align 8
 121   ret void
 122 }
 123
 124 ; FUNC-LABEL: {{^}}unsafe_rcp_f64:
 125 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
 126 ; SI-NOT: [[RESULT]]
 127 ; SI: buffer_store_dwordx2 [[RESULT]]
 128 define amdgpu_kernel void @unsafe_rcp_f64(ptr addrspace(1) %out, double %src) #2 {
 129   %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
 130   store double %rcp, ptr addrspace(1) %out, align 8
 131   ret void
 132 }
 133
 134 ; FUNC-LABEL: {{^}}rcp_pat_f64:
 135 ; SI: v_div_scale_f64
 136 define amdgpu_kernel void @rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
 137   %rcp = fdiv double 1.0, %src
 138   store double %rcp, ptr addrspace(1) %out, align 8
 139   ret void
 140 }
 141
 142 ; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64:
 143 ; SI: v_rcp_f64
 144 ; SI: v_fma_f64
 145 ; SI: v_fma_f64
 146 ; SI: v_fma_f64
 147 ; SI: v_fma_f64
 148 ; SI: v_fma_f64
 149 ; SI: v_fma_f64
 150 define amdgpu_kernel void @unsafe_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 {
 151   %rcp = fdiv double 1.0, %src
 152   store double %rcp, ptr addrspace(1) %out, align 8
 153   ret void
 154 }
 155
 156 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64:
 157 ; SI-NOT: v_rsq_f64_e32
 158 ; SI: v_rsq_f64
 159 ; SI: v_mul_f64
 160 ; SI: v_mul_f64
 161 ; SI: v_fma_f64
 162 ; SI: v_fma_f64
 163 ; SI: v_fma_f64
 164 ; SI: v_fma_f64
 165 ; SI: v_fma_f64
 166 ; SI: v_fma_f64
 167 ; SI: v_rcp_f64
 168 define amdgpu_kernel void @safe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
 169   %sqrt = call double @llvm.sqrt.f64(double %src)
 170   %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
 171   store double %rcp, ptr addrspace(1) %out, align 8
 172   ret void
 173 }
 174
 175 ; FUNC-LABEL: {{^}}safe_amdgcn_sqrt_rsq_rcp_pat_f64:
 176 ; SI-NOT: v_rsq_f64_e32
 177 ; SI: v_sqrt_f64
 178 ; SI: v_rcp_f64
 179 define amdgpu_kernel void @safe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
 180   %sqrt = call double @llvm.amdgcn.sqrt.f64(double %src)
 181   %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
 182   store double %rcp, ptr addrspace(1) %out, align 8
 183   ret void
 184 }
 185
 186 ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64:
 187 ; SI: v_rsq_f64
 188 ; SI: v_mul_f64
 189 ; SI: v_mul_f64
 190 ; SI: v_fma_f64
 191 ; SI: v_fma_f64
 192 ; SI: v_fma_f64
 193 ; SI: v_fma_f64
 194 ; SI: v_fma_f64
 195 ; SI: v_fma_f64
 196 ; SI: v_rcp_f64
 197 ; SI: buffer_store_dwordx2
 198 define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 {
 199   %sqrt = call double @llvm.sqrt.f64(double %src)
 200   %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
 201   store double %rcp, ptr addrspace(1) %out, align 8
 202   ret void
 203 }
 204
 205 ; FUNC-LABEL: {{^}}unsafe_amdgcn_sqrt_rsq_rcp_pat_f64:
 206 ; SI: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
 207 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SQRT]]
 208 ; SI: buffer_store_dwordx2 [[RESULT]]
 209 define amdgpu_kernel void @unsafe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 {
 210   %sqrt = call double @llvm.amdgcn.sqrt.f64(double %src)
 211   %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
 212   store double %rcp, ptr addrspace(1) %out, align 8
 213   ret void
 214 }
 215
 216 attributes #0 = { nounwind readnone }
 217 attributes #1 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 218 attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 219 attributes #3 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" }
 220 attributes #4 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" }
 221
 222 !0 = !{float 2.500000e+00}