1 ; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
2 ; RUN: llc < %s -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK,CHECK-F16
3 ; RUN: llc < %s -mcpu=sm_80 --nvptx-no-f16-math | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
4 ; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
5 ; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
6 ; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_80 --nvptx-no-f16-math | %ptxas-verify -arch=sm_80 %}
8 target triple = "nvptx64-nvidia-cuda"
10 ; Checks that llvm intrinsics for math functions are correctly lowered to PTX.
12 declare float @llvm.ceil.f32(float) #0
13 declare double @llvm.ceil.f64(double) #0
14 declare float @llvm.floor.f32(float) #0
15 declare double @llvm.floor.f64(double) #0
16 declare float @llvm.round.f32(float) #0
17 declare double @llvm.round.f64(double) #0
18 declare float @llvm.nearbyint.f32(float) #0
19 declare double @llvm.nearbyint.f64(double) #0
20 declare float @llvm.rint.f32(float) #0
21 declare double @llvm.rint.f64(double) #0
22 declare float @llvm.roundeven.f32(float) #0
23 declare double @llvm.roundeven.f64(double) #0
24 declare float @llvm.trunc.f32(float) #0
25 declare double @llvm.trunc.f64(double) #0
26 declare float @llvm.fabs.f32(float) #0
27 declare double @llvm.fabs.f64(double) #0
28 declare half @llvm.minnum.f16(half, half) #0
29 declare float @llvm.minnum.f32(float, float) #0
30 declare double @llvm.minnum.f64(double, double) #0
31 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #0
32 declare half @llvm.maxnum.f16(half, half) #0
33 declare float @llvm.maxnum.f32(float, float) #0
34 declare double @llvm.maxnum.f64(double, double) #0
35 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #0
36 declare float @llvm.fma.f32(float, float, float) #0
37 declare double @llvm.fma.f64(double, double, double) #0
41 ; CHECK-LABEL: ceil_float
42 define float @ceil_float(float %a) {
43 ; CHECK: cvt.rpi.f32.f32
44 %b = call float @llvm.ceil.f32(float %a)
48 ; CHECK-LABEL: ceil_float_ftz
49 define float @ceil_float_ftz(float %a) #1 {
50 ; CHECK: cvt.rpi.ftz.f32.f32
51 %b = call float @llvm.ceil.f32(float %a)
55 ; CHECK-LABEL: ceil_double
56 define double @ceil_double(double %a) {
57 ; CHECK: cvt.rpi.f64.f64
58 %b = call double @llvm.ceil.f64(double %a)
64 ; CHECK-LABEL: floor_float
65 define float @floor_float(float %a) {
66 ; CHECK: cvt.rmi.f32.f32
67 %b = call float @llvm.floor.f32(float %a)
71 ; CHECK-LABEL: floor_float_ftz
72 define float @floor_float_ftz(float %a) #1 {
73 ; CHECK: cvt.rmi.ftz.f32.f32
74 %b = call float @llvm.floor.f32(float %a)
78 ; CHECK-LABEL: floor_double
79 define double @floor_double(double %a) {
80 ; CHECK: cvt.rmi.f64.f64
81 %b = call double @llvm.floor.f64(double %a)
87 ; CHECK-LABEL: round_float
88 define float @round_float(float %a) {
89 ; check the use of sign mask and 0.5 to implement round
90 ; CHECK: and.b32 [[R1:%r[0-9]+]], {{.*}}, -2147483648;
91 ; CHECK: or.b32 {{.*}}, [[R1]], 1056964608;
92 %b = call float @llvm.round.f32(float %a)
96 ; CHECK-LABEL: round_float_ftz
97 define float @round_float_ftz(float %a) #1 {
98 ; check the use of sign mask and 0.5 to implement round
99 ; CHECK: and.b32 [[R1:%r[0-9]+]], {{.*}}, -2147483648;
100 ; CHECK: or.b32 {{.*}}, [[R1]], 1056964608;
101 %b = call float @llvm.round.f32(float %a)
105 ; CHECK-LABEL: round_double
106 define double @round_double(double %a) {
107 ; check the use of 0.5 to implement round
108 ; CHECK: setp.lt.f64 {{.*}}, [[R:%fd[0-9]+]], 0d3FE0000000000000;
109 ; CHECK: add.rn.f64 {{.*}}, [[R]], 0d3FE0000000000000;
110 %b = call double @llvm.round.f64(double %a)
114 ; ---- nearbyint ----
116 ; CHECK-LABEL: nearbyint_float
117 define float @nearbyint_float(float %a) {
118 ; CHECK: cvt.rni.f32.f32
119 %b = call float @llvm.nearbyint.f32(float %a)
123 ; CHECK-LABEL: nearbyint_float_ftz
124 define float @nearbyint_float_ftz(float %a) #1 {
125 ; CHECK: cvt.rni.ftz.f32.f32
126 %b = call float @llvm.nearbyint.f32(float %a)
130 ; CHECK-LABEL: nearbyint_double
131 define double @nearbyint_double(double %a) {
132 ; CHECK: cvt.rni.f64.f64
133 %b = call double @llvm.nearbyint.f64(double %a)
139 ; CHECK-LABEL: rint_float
140 define float @rint_float(float %a) {
141 ; CHECK: cvt.rni.f32.f32
142 %b = call float @llvm.rint.f32(float %a)
146 ; CHECK-LABEL: rint_float_ftz
147 define float @rint_float_ftz(float %a) #1 {
148 ; CHECK: cvt.rni.ftz.f32.f32
149 %b = call float @llvm.rint.f32(float %a)
153 ; CHECK-LABEL: rint_double
154 define double @rint_double(double %a) {
155 ; CHECK: cvt.rni.f64.f64
156 %b = call double @llvm.rint.f64(double %a)
160 ; ---- roundeven ----
162 ; CHECK-LABEL: roundeven_float
163 define float @roundeven_float(float %a) {
164 ; CHECK: cvt.rni.f32.f32
165 %b = call float @llvm.roundeven.f32(float %a)
169 ; CHECK-LABEL: roundeven_float_ftz
170 define float @roundeven_float_ftz(float %a) #1 {
171 ; CHECK: cvt.rni.ftz.f32.f32
172 %b = call float @llvm.roundeven.f32(float %a)
176 ; CHECK-LABEL: roundeven_double
177 define double @roundeven_double(double %a) {
178 ; CHECK: cvt.rni.f64.f64
179 %b = call double @llvm.roundeven.f64(double %a)
185 ; CHECK-LABEL: trunc_float
186 define float @trunc_float(float %a) {
187 ; CHECK: cvt.rzi.f32.f32
188 %b = call float @llvm.trunc.f32(float %a)
192 ; CHECK-LABEL: trunc_float_ftz
193 define float @trunc_float_ftz(float %a) #1 {
194 ; CHECK: cvt.rzi.ftz.f32.f32
195 %b = call float @llvm.trunc.f32(float %a)
199 ; CHECK-LABEL: trunc_double
200 define double @trunc_double(double %a) {
201 ; CHECK: cvt.rzi.f64.f64
202 %b = call double @llvm.trunc.f64(double %a)
208 ; CHECK-LABEL: abs_float
209 define float @abs_float(float %a) {
211 %b = call float @llvm.fabs.f32(float %a)
215 ; CHECK-LABEL: abs_float_ftz
216 define float @abs_float_ftz(float %a) #1 {
218 %b = call float @llvm.fabs.f32(float %a)
222 ; CHECK-LABEL: abs_double
223 define double @abs_double(double %a) {
225 %b = call double @llvm.fabs.f64(double %a)
231 ; CHECK-LABEL: min_half
232 define half @min_half(half %a, half %b) {
233 ; CHECK-NOF16: min.f32
235 %x = call half @llvm.minnum.f16(half %a, half %b)
239 ; CHECK-LABEL: min_float
240 define float @min_float(float %a, float %b) {
242 %x = call float @llvm.minnum.f32(float %a, float %b)
246 ; CHECK-LABEL: min_imm1
247 define float @min_imm1(float %a) {
249 %x = call float @llvm.minnum.f32(float %a, float 0.0)
253 ; CHECK-LABEL: min_imm2
254 define float @min_imm2(float %a) {
256 %x = call float @llvm.minnum.f32(float 0.0, float %a)
260 ; CHECK-LABEL: min_float_ftz
261 define float @min_float_ftz(float %a, float %b) #1 {
263 %x = call float @llvm.minnum.f32(float %a, float %b)
267 ; CHECK-LABEL: min_double
268 define double @min_double(double %a, double %b) {
270 %x = call double @llvm.minnum.f64(double %a, double %b)
274 ; CHECK-LABEL: min_v2half
275 define <2 x half> @min_v2half(<2 x half> %a, <2 x half> %b) {
276 ; CHECK-NOF16: min.f32
277 ; CHECK-NOF16: min.f32
278 ; CHECK-F16: min.f16x2
279 %x = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
285 ; CHECK-LABEL: max_half
286 define half @max_half(half %a, half %b) {
287 ; CHECK-NOF16: max.f32
289 %x = call half @llvm.maxnum.f16(half %a, half %b)
293 ; CHECK-LABEL: max_imm1
294 define float @max_imm1(float %a) {
296 %x = call float @llvm.maxnum.f32(float %a, float 0.0)
300 ; CHECK-LABEL: max_imm2
301 define float @max_imm2(float %a) {
303 %x = call float @llvm.maxnum.f32(float 0.0, float %a)
307 ; CHECK-LABEL: max_float
308 define float @max_float(float %a, float %b) {
310 %x = call float @llvm.maxnum.f32(float %a, float %b)
314 ; CHECK-LABEL: max_float_ftz
315 define float @max_float_ftz(float %a, float %b) #1 {
317 %x = call float @llvm.maxnum.f32(float %a, float %b)
321 ; CHECK-LABEL: max_double
322 define double @max_double(double %a, double %b) {
324 %x = call double @llvm.maxnum.f64(double %a, double %b)
328 ; CHECK-LABEL: max_v2half
329 define <2 x half> @max_v2half(<2 x half> %a, <2 x half> %b) {
330 ; CHECK-NOF16: max.f32
331 ; CHECK-NOF16: max.f32
332 ; CHECK-F16: max.f16x2
333 %x = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
339 ; CHECK-LABEL: @fma_float
340 define float @fma_float(float %a, float %b, float %c) {
342 %x = call float @llvm.fma.f32(float %a, float %b, float %c)
346 ; CHECK-LABEL: @fma_float_ftz
347 define float @fma_float_ftz(float %a, float %b, float %c) #1 {
348 ; CHECK: fma.rn.ftz.f32
349 %x = call float @llvm.fma.f32(float %a, float %b, float %c)
353 ; CHECK-LABEL: @fma_double
354 define double @fma_double(double %a, double %b, double %c) {
356 %x = call double @llvm.fma.f64(double %a, double %b, double %c)
360 attributes #0 = { nounwind readnone }
361 attributes #1 = { "denormal-fp-math-f32" = "preserve-sign" }