1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
2 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
4 declare float @llvm.sqrt.f32(float)
5 declare double @llvm.sqrt.f64(double)
7 ; CHECK-LABEL: sqrt_div(
10 define float @sqrt_div(float %a, float %b) {
11 %t1 = tail call float @llvm.sqrt.f32(float %a)
12 %t2 = fdiv float %t1, %b
16 ; CHECK-LABEL: sqrt_div_fast(
18 ; CHECK: div.approx.f32
19 define float @sqrt_div_fast(float %a, float %b) #0 {
20 %t1 = tail call float @llvm.sqrt.f32(float %a)
21 %t2 = fdiv float %t1, %b
25 ; CHECK-LABEL: sqrt_div_fast_ninf(
26 ; CHECK: sqrt.approx.f32
27 ; CHECK: div.approx.f32
28 define float @sqrt_div_fast_ninf(float %a, float %b) #0 {
29 %t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
30 %t2 = fdiv float %t1, %b
34 ; CHECK-LABEL: sqrt_div_ftz(
35 ; CHECK: sqrt.rn.ftz.f32
36 ; CHECK: div.rn.ftz.f32
37 define float @sqrt_div_ftz(float %a, float %b) #1 {
38 %t1 = tail call float @llvm.sqrt.f32(float %a)
39 %t2 = fdiv float %t1, %b
43 ; CHECK-LABEL: sqrt_div_fast_ftz(
44 ; CHECK: sqrt.rn.ftz.f32
45 ; CHECK: div.approx.ftz.f32
46 define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
47 %t1 = tail call float @llvm.sqrt.f32(float %a)
48 %t2 = fdiv float %t1, %b
52 ; CHECK-LABEL: sqrt_div_fast_ftz_ninf(
53 ; CHECK: sqrt.approx.ftz.f32
54 ; CHECK: div.approx.ftz.f32
55 define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 {
56 %t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
57 %t2 = fdiv float %t1, %b
61 ; There are no fast-math or ftz versions of sqrt and div for f64. We use
62 ; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
64 ; CHECK-LABEL: sqrt_div_fast_ftz_f64(
67 define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
68 %t1 = tail call double @llvm.sqrt.f64(double %a)
69 %t2 = fdiv double %t1, %b
73 ; CHECK-LABEL: sqrt_div_fast_ftz_f64_ninf(
74 ; CHECK: rsqrt.approx.f64
75 ; CHECK: rcp.approx.ftz.f64
77 define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 {
78 %t1 = tail call ninf afn double @llvm.sqrt.f64(double %a)
79 %t2 = fdiv double %t1, %b
84 ; CHECK-NOT: rsqrt.approx
86 ; CHECK-NOT: rsqrt.approx
87 define float @rsqrt(float %a) {
88 %b = tail call float @llvm.sqrt.f32(float %a)
89 %ret = fdiv float 1.0, %b
93 ; CHECK-LABEL: rsqrt_fast(
96 ; CHECK: rsqrt.approx.f32
99 define float @rsqrt_fast(float %a) #0 {
100 %b = tail call float @llvm.sqrt.f32(float %a)
101 %ret = fdiv float 1.0, %b
105 ; CHECK-LABEL: rsqrt_fast_ftz(
108 ; CHECK: rsqrt.approx.ftz.f32
111 define float @rsqrt_fast_ftz(float %a) #0 #1 {
112 %b = tail call float @llvm.sqrt.f32(float %a)
113 %ret = fdiv float 1.0, %b
119 define float @fadd(float %a, float %b) {
120 %t1 = fadd float %a, %b
124 ; CHECK-LABEL: fadd_ftz
125 ; CHECK: add.rn.ftz.f32
126 define float @fadd_ftz(float %a, float %b) #1 {
127 %t1 = fadd float %a, %b
131 declare float @llvm.sin.f32(float)
132 declare float @llvm.cos.f32(float)
134 ; CHECK-LABEL: fsin_approx
135 ; CHECK: sin.approx.f32
136 define float @fsin_approx(float %a) #0 {
137 %r = tail call float @llvm.sin.f32(float %a)
141 ; CHECK-LABEL: fcos_approx
142 ; CHECK: cos.approx.f32
143 define float @fcos_approx(float %a) #0 {
144 %r = tail call float @llvm.cos.f32(float %a)
148 ; CHECK-LABEL: repeated_div_recip_allowed
149 define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) {
155 %x = fdiv arcp float %a, %divisor
156 %y = fdiv arcp float %b, %divisor
157 %z = fmul float %x, %y
158 %w = select i1 %pred, float %z, float %y
162 ; CHECK-LABEL: repeated_div_recip_allowed_sel
163 define float @repeated_div_recip_allowed_sel(i1 %pred, float %a, float %b, float %divisor) {
166 %x = fdiv arcp float %a, %divisor
167 %y = fdiv arcp float %b, %divisor
168 %w = select i1 %pred, float %x, float %y
172 ; CHECK-LABEL: repeated_div_recip_allowed_ftz
173 define float @repeated_div_recip_allowed_ftz(i1 %pred, float %a, float %b, float %divisor) #1 {
174 ; CHECK: rcp.rn.ftz.f32
175 ; CHECK: mul.rn.ftz.f32
176 ; CHECK: mul.rn.ftz.f32
177 ; CHECK: mul.rn.ftz.f32
179 %x = fdiv arcp float %a, %divisor
180 %y = fdiv arcp float %b, %divisor
181 %z = fmul float %x, %y
182 %w = select i1 %pred, float %z, float %y
186 ; CHECK-LABEL: repeated_div_recip_allowed_ftz_sel
187 define float @repeated_div_recip_allowed_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #1 {
189 ; CHECK: div.rn.ftz.f32
190 %x = fdiv arcp float %a, %divisor
191 %y = fdiv arcp float %b, %divisor
192 %w = select i1 %pred, float %x, float %y
196 ; CHECK-LABEL: repeated_div_fast
197 define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
198 ; CHECK: rcp.approx.f32
203 %x = fdiv float %a, %divisor
204 %y = fdiv float %b, %divisor
205 %z = fmul float %x, %y
206 %w = select i1 %pred, float %z, float %y
210 ; CHECK-LABEL: repeated_div_fast_sel
211 define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) #0 {
213 ; CHECK: div.approx.f32
214 %x = fdiv float %a, %divisor
215 %y = fdiv float %b, %divisor
216 %w = select i1 %pred, float %x, float %y
220 ; CHECK-LABEL: repeated_div_fast_ftz
221 define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
222 ; CHECK: rcp.approx.ftz.f32
227 %x = fdiv float %a, %divisor
228 %y = fdiv float %b, %divisor
229 %z = fmul float %x, %y
230 %w = select i1 %pred, float %z, float %y
234 ; CHECK-LABEL: repeated_div_fast_ftz_sel
235 define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
237 ; CHECK: div.approx.ftz.f32
238 %x = fdiv float %a, %divisor
239 %y = fdiv float %b, %divisor
240 %w = select i1 %pred, float %x, float %y
245 define float @frem(float %a, float %b) #0 {
246 ; CHECK-NOT: testp.infinite
247 %rem = frem float %a, %b
251 ; CHECK-LABEL: frem_ftz
252 define float @frem_ftz(float %a, float %b) #0 #1 {
253 ; CHECK-NOT: testp.infinite
254 %rem = frem float %a, %b
258 ; CHECK-LABEL: frem_f64
259 define double @frem_f64(double %a, double %b) #0 {
260 ; CHECK-NOT: testp.infinite
261 %rem = frem double %a, %b
265 attributes #0 = { "unsafe-fp-math" = "true" }
266 attributes #1 = { "denormal-fp-math-f32" = "preserve-sign" }