1 ; Check that nvvm intrinsics get simplified to target-generic intrinsics where
4 ; We run this test twice; once with ftz on, and again with ftz off. Behold the
8 ; RUN: echo 'attributes #0 = { "denormal-fp-math-f32" = "preserve-sign" }' >> %t.ftz
9 ; RUN: opt < %t.ftz -instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ
11 ; RUN: cat %s > %t.noftz
12 ; RUN: echo 'attributes #0 = { "denormal-fp-math-f32" = "ieee" }' >> %t.noftz
13 ; RUN: opt < %t.noftz -instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ
15 ; We handle nvvm intrinsics with ftz variants as follows:
16 ; - If the module is in ftz mode, the ftz variant is transformed into the
17 ; regular llvm intrinsic, and the non-ftz variant is left alone.
18 ; - If the module is not in ftz mode, it's the reverse: Only the non-ftz
19 ; variant is transformed, and the ftz variant is left alone.
21 ; Check NVVM intrinsics that map directly to LLVM target-generic intrinsics.
23 ; CHECK-LABEL: @ceil_double
24 define double @ceil_double(double %a) #0 {
25 ; CHECK: call double @llvm.ceil.f64
26 %ret = call double @llvm.nvvm.ceil.d(double %a)
29 ; CHECK-LABEL: @ceil_float
30 define float @ceil_float(float %a) #0 {
31 ; NOFTZ: call float @llvm.ceil.f32
32 ; FTZ: call float @llvm.nvvm.ceil.f
33 %ret = call float @llvm.nvvm.ceil.f(float %a)
36 ; CHECK-LABEL: @ceil_float_ftz
37 define float @ceil_float_ftz(float %a) #0 {
38 ; NOFTZ: call float @llvm.nvvm.ceil.ftz.f
39 ; FTZ: call float @llvm.ceil.f32
40 %ret = call float @llvm.nvvm.ceil.ftz.f(float %a)
44 ; CHECK-LABEL: @fabs_double
45 define double @fabs_double(double %a) #0 {
46 ; CHECK: call double @llvm.fabs.f64
47 %ret = call double @llvm.nvvm.fabs.d(double %a)
50 ; CHECK-LABEL: @fabs_float
51 define float @fabs_float(float %a) #0 {
52 ; NOFTZ: call float @llvm.fabs.f32
53 ; FTZ: call float @llvm.nvvm.fabs.f
54 %ret = call float @llvm.nvvm.fabs.f(float %a)
57 ; CHECK-LABEL: @fabs_float_ftz
58 define float @fabs_float_ftz(float %a) #0 {
59 ; NOFTZ: call float @llvm.nvvm.fabs.ftz.f
60 ; FTZ: call float @llvm.fabs.f32
61 %ret = call float @llvm.nvvm.fabs.ftz.f(float %a)
65 ; CHECK-LABEL: @floor_double
66 define double @floor_double(double %a) #0 {
67 ; CHECK: call double @llvm.floor.f64
68 %ret = call double @llvm.nvvm.floor.d(double %a)
71 ; CHECK-LABEL: @floor_float
72 define float @floor_float(float %a) #0 {
73 ; NOFTZ: call float @llvm.floor.f32
74 ; FTZ: call float @llvm.nvvm.floor.f
75 %ret = call float @llvm.nvvm.floor.f(float %a)
78 ; CHECK-LABEL: @floor_float_ftz
79 define float @floor_float_ftz(float %a) #0 {
80 ; NOFTZ: call float @llvm.nvvm.floor.ftz.f
81 ; FTZ: call float @llvm.floor.f32
82 %ret = call float @llvm.nvvm.floor.ftz.f(float %a)
86 ; CHECK-LABEL: @fma_double
87 define double @fma_double(double %a, double %b, double %c) #0 {
88 ; CHECK: call double @llvm.fma.f64
89 %ret = call double @llvm.nvvm.fma.rn.d(double %a, double %b, double %c)
92 ; CHECK-LABEL: @fma_float
93 define float @fma_float(float %a, float %b, float %c) #0 {
94 ; NOFTZ: call float @llvm.fma.f32
95 ; FTZ: call float @llvm.nvvm.fma.rn.f
96 %ret = call float @llvm.nvvm.fma.rn.f(float %a, float %b, float %c)
99 ; CHECK-LABEL: @fma_float_ftz
100 define float @fma_float_ftz(float %a, float %b, float %c) #0 {
101 ; NOFTZ: call float @llvm.nvvm.fma.rn.ftz.f
102 ; FTZ: call float @llvm.fma.f32
103 %ret = call float @llvm.nvvm.fma.rn.ftz.f(float %a, float %b, float %c)
107 ; CHECK-LABEL: @fmax_double
108 define double @fmax_double(double %a, double %b) #0 {
109 ; CHECK: call double @llvm.maxnum.f64
110 %ret = call double @llvm.nvvm.fmax.d(double %a, double %b)
113 ; CHECK-LABEL: @fmax_float
114 define float @fmax_float(float %a, float %b) #0 {
115 ; NOFTZ: call float @llvm.maxnum.f32
116 ; FTZ: call float @llvm.nvvm.fmax.f
117 %ret = call float @llvm.nvvm.fmax.f(float %a, float %b)
120 ; CHECK-LABEL: @fmax_float_ftz
121 define float @fmax_float_ftz(float %a, float %b) #0 {
122 ; NOFTZ: call float @llvm.nvvm.fmax.ftz.f
123 ; FTZ: call float @llvm.maxnum.f32
124 %ret = call float @llvm.nvvm.fmax.ftz.f(float %a, float %b)
128 ; CHECK-LABEL: @fmin_double
129 define double @fmin_double(double %a, double %b) #0 {
130 ; CHECK: call double @llvm.minnum.f64
131 %ret = call double @llvm.nvvm.fmin.d(double %a, double %b)
134 ; CHECK-LABEL: @fmin_float
135 define float @fmin_float(float %a, float %b) #0 {
136 ; NOFTZ: call float @llvm.minnum.f32
137 ; FTZ: call float @llvm.nvvm.fmin.f
138 %ret = call float @llvm.nvvm.fmin.f(float %a, float %b)
141 ; CHECK-LABEL: @fmin_float_ftz
142 define float @fmin_float_ftz(float %a, float %b) #0 {
143 ; NOFTZ: call float @llvm.nvvm.fmin.ftz.f
144 ; FTZ: call float @llvm.minnum.f32
145 %ret = call float @llvm.nvvm.fmin.ftz.f(float %a, float %b)
149 ; CHECK-LABEL: @round_double
150 define double @round_double(double %a) #0 {
151 ; CHECK: call double @llvm.round.f64
152 %ret = call double @llvm.nvvm.round.d(double %a)
155 ; CHECK-LABEL: @round_float
156 define float @round_float(float %a) #0 {
157 ; NOFTZ: call float @llvm.round.f32
158 ; FTZ: call float @llvm.nvvm.round.f
159 %ret = call float @llvm.nvvm.round.f(float %a)
162 ; CHECK-LABEL: @round_float_ftz
163 define float @round_float_ftz(float %a) #0 {
164 ; NOFTZ: call float @llvm.nvvm.round.ftz.f
165 ; FTZ: call float @llvm.round.f32
166 %ret = call float @llvm.nvvm.round.ftz.f(float %a)
170 ; CHECK-LABEL: @trunc_double
171 define double @trunc_double(double %a) #0 {
172 ; CHECK: call double @llvm.trunc.f64
173 %ret = call double @llvm.nvvm.trunc.d(double %a)
176 ; CHECK-LABEL: @trunc_float
177 define float @trunc_float(float %a) #0 {
178 ; NOFTZ: call float @llvm.trunc.f32
179 ; FTZ: call float @llvm.nvvm.trunc.f
180 %ret = call float @llvm.nvvm.trunc.f(float %a)
183 ; CHECK-LABEL: @trunc_float_ftz
184 define float @trunc_float_ftz(float %a) #0 {
185 ; NOFTZ: call float @llvm.nvvm.trunc.ftz.f
186 ; FTZ: call float @llvm.trunc.f32
187 %ret = call float @llvm.nvvm.trunc.ftz.f(float %a)
191 ; Check NVVM intrinsics that correspond to LLVM cast operations.
193 ; CHECK-LABEL: @test_d2i
194 define i32 @test_d2i(double %a) #0 {
195 ; CHECK: fptosi double %a to i32
196 %ret = call i32 @llvm.nvvm.d2i.rz(double %a)
199 ; CHECK-LABEL: @test_f2i
200 define i32 @test_f2i(float %a) #0 {
201 ; CHECK: fptosi float %a to i32
202 %ret = call i32 @llvm.nvvm.f2i.rz(float %a)
205 ; CHECK-LABEL: @test_d2ll
206 define i64 @test_d2ll(double %a) #0 {
207 ; CHECK: fptosi double %a to i64
208 %ret = call i64 @llvm.nvvm.d2ll.rz(double %a)
211 ; CHECK-LABEL: @test_f2ll
212 define i64 @test_f2ll(float %a) #0 {
213 ; CHECK: fptosi float %a to i64
214 %ret = call i64 @llvm.nvvm.f2ll.rz(float %a)
217 ; CHECK-LABEL: @test_d2ui
218 define i32 @test_d2ui(double %a) #0 {
219 ; CHECK: fptoui double %a to i32
220 %ret = call i32 @llvm.nvvm.d2ui.rz(double %a)
223 ; CHECK-LABEL: @test_f2ui
224 define i32 @test_f2ui(float %a) #0 {
225 ; CHECK: fptoui float %a to i32
226 %ret = call i32 @llvm.nvvm.f2ui.rz(float %a)
229 ; CHECK-LABEL: @test_d2ull
230 define i64 @test_d2ull(double %a) #0 {
231 ; CHECK: fptoui double %a to i64
232 %ret = call i64 @llvm.nvvm.d2ull.rz(double %a)
235 ; CHECK-LABEL: @test_f2ull
236 define i64 @test_f2ull(float %a) #0 {
237 ; CHECK: fptoui float %a to i64
238 %ret = call i64 @llvm.nvvm.f2ull.rz(float %a)
242 ; CHECK-LABEL: @test_i2d
243 define double @test_i2d(i32 %a) #0 {
244 ; CHECK: sitofp i32 %a to double
245 %ret = call double @llvm.nvvm.i2d.rz(i32 %a)
248 ; CHECK-LABEL: @test_i2f
249 define float @test_i2f(i32 %a) #0 {
250 ; CHECK: sitofp i32 %a to float
251 %ret = call float @llvm.nvvm.i2f.rz(i32 %a)
254 ; CHECK-LABEL: @test_ll2d
255 define double @test_ll2d(i64 %a) #0 {
256 ; CHECK: sitofp i64 %a to double
257 %ret = call double @llvm.nvvm.ll2d.rz(i64 %a)
260 ; CHECK-LABEL: @test_ll2f
261 define float @test_ll2f(i64 %a) #0 {
262 ; CHECK: sitofp i64 %a to float
263 %ret = call float @llvm.nvvm.ll2f.rz(i64 %a)
266 ; CHECK-LABEL: @test_ui2d
267 define double @test_ui2d(i32 %a) #0 {
268 ; CHECK: uitofp i32 %a to double
269 %ret = call double @llvm.nvvm.ui2d.rz(i32 %a)
272 ; CHECK-LABEL: @test_ui2f
273 define float @test_ui2f(i32 %a) #0 {
274 ; CHECK: uitofp i32 %a to float
275 %ret = call float @llvm.nvvm.ui2f.rz(i32 %a)
278 ; CHECK-LABEL: @test_ull2d
279 define double @test_ull2d(i64 %a) #0 {
280 ; CHECK: uitofp i64 %a to double
281 %ret = call double @llvm.nvvm.ull2d.rz(i64 %a)
284 ; CHECK-LABEL: @test_ull2f
285 define float @test_ull2f(i64 %a) #0 {
286 ; CHECK: uitofp i64 %a to float
287 %ret = call float @llvm.nvvm.ull2f.rz(i64 %a)
291 ; Check NVVM intrinsics that map to LLVM binary operations.
293 ; CHECK-LABEL: @test_add_rn_d
294 define double @test_add_rn_d(double %a, double %b) #0 {
296 %ret = call double @llvm.nvvm.add.rn.d(double %a, double %b)
299 ; CHECK-LABEL: @test_add_rn_f
300 define float @test_add_rn_f(float %a, float %b) #0 {
302 ; FTZ: call float @llvm.nvvm.add.rn.f
303 %ret = call float @llvm.nvvm.add.rn.f(float %a, float %b)
306 ; CHECK-LABEL: @test_add_rn_f_ftz
307 define float @test_add_rn_f_ftz(float %a, float %b) #0 {
308 ; NOFTZ: call float @llvm.nvvm.add.rn.f
310 %ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
314 ; CHECK-LABEL: @test_mul_rn_d
315 define double @test_mul_rn_d(double %a, double %b) #0 {
317 %ret = call double @llvm.nvvm.mul.rn.d(double %a, double %b)
320 ; CHECK-LABEL: @test_mul_rn_f
321 define float @test_mul_rn_f(float %a, float %b) #0 {
323 ; FTZ: call float @llvm.nvvm.mul.rn.f
324 %ret = call float @llvm.nvvm.mul.rn.f(float %a, float %b)
327 ; CHECK-LABEL: @test_mul_rn_f_ftz
328 define float @test_mul_rn_f_ftz(float %a, float %b) #0 {
329 ; NOFTZ: call float @llvm.nvvm.mul.rn.f
331 %ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
335 ; CHECK-LABEL: @test_div_rn_d
336 define double @test_div_rn_d(double %a, double %b) #0 {
338 %ret = call double @llvm.nvvm.div.rn.d(double %a, double %b)
341 ; CHECK-LABEL: @test_div_rn_f
342 define float @test_div_rn_f(float %a, float %b) #0 {
344 ; FTZ: call float @llvm.nvvm.div.rn.f
345 %ret = call float @llvm.nvvm.div.rn.f(float %a, float %b)
348 ; CHECK-LABEL: @test_div_rn_f_ftz
349 define float @test_div_rn_f_ftz(float %a, float %b) #0 {
350 ; NOFTZ: call float @llvm.nvvm.div.rn.f
352 %ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
356 ; Check NVVM intrinsics that require us to emit custom IR.
358 ; CHECK-LABEL: @test_rcp_rn_f
359 define float @test_rcp_rn_f(float %a) #0 {
360 ; NOFTZ: fdiv float 1.0{{.*}} %a
361 ; FTZ: call float @llvm.nvvm.rcp.rn.f
362 %ret = call float @llvm.nvvm.rcp.rn.f(float %a)
365 ; CHECK-LABEL: @test_rcp_rn_f_ftz
366 define float @test_rcp_rn_f_ftz(float %a) #0 {
367 ; NOFTZ: call float @llvm.nvvm.rcp.rn.f
368 ; FTZ: fdiv float 1.0{{.*}} %a
369 %ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
373 ; CHECK-LABEL: @test_sqrt_rn_d
374 define double @test_sqrt_rn_d(double %a) #0 {
375 ; CHECK: call double @llvm.sqrt.f64(double %a)
376 %ret = call double @llvm.nvvm.sqrt.rn.d(double %a)
379 ; nvvm.sqrt.f is a special case: It goes to a llvm.sqrt.f
380 ; CHECK-LABEL: @test_sqrt_f
381 define float @test_sqrt_f(float %a) #0 {
382 ; CHECK: call float @llvm.sqrt.f32(float %a)
383 %ret = call float @llvm.nvvm.sqrt.f(float %a)
386 ; CHECK-LABEL: @test_sqrt_rn_f
387 define float @test_sqrt_rn_f(float %a) #0 {
388 ; NOFTZ: call float @llvm.sqrt.f32(float %a)
389 ; FTZ: call float @llvm.nvvm.sqrt.rn.f
390 %ret = call float @llvm.nvvm.sqrt.rn.f(float %a)
393 ; CHECK-LABEL: @test_sqrt_rn_f_ftz
394 define float @test_sqrt_rn_f_ftz(float %a) #0 {
395 ; NOFTZ: call float @llvm.nvvm.sqrt.rn.f
396 ; FTZ: call float @llvm.sqrt.f32(float %a)
397 %ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
401 declare double @llvm.nvvm.add.rn.d(double, double)
402 declare float @llvm.nvvm.add.rn.f(float, float)
403 declare float @llvm.nvvm.add.rn.ftz.f(float, float)
404 declare double @llvm.nvvm.ceil.d(double)
405 declare float @llvm.nvvm.ceil.f(float)
406 declare float @llvm.nvvm.ceil.ftz.f(float)
407 declare float @llvm.nvvm.d2f.rm(double)
408 declare float @llvm.nvvm.d2f.rm.ftz(double)
409 declare float @llvm.nvvm.d2f.rp(double)
410 declare float @llvm.nvvm.d2f.rp.ftz(double)
411 declare float @llvm.nvvm.d2f.rz(double)
412 declare float @llvm.nvvm.d2f.rz.ftz(double)
413 declare i32 @llvm.nvvm.d2i.rz(double)
414 declare i64 @llvm.nvvm.d2ll.rz(double)
415 declare i32 @llvm.nvvm.d2ui.rz(double)
416 declare i64 @llvm.nvvm.d2ull.rz(double)
417 declare double @llvm.nvvm.div.rn.d(double, double)
418 declare float @llvm.nvvm.div.rn.f(float, float)
419 declare float @llvm.nvvm.div.rn.ftz.f(float, float)
420 declare i16 @llvm.nvvm.f2h.rz(float)
421 declare i16 @llvm.nvvm.f2h.rz.ftz(float)
422 declare i32 @llvm.nvvm.f2i.rz(float)
423 declare i32 @llvm.nvvm.f2i.rz.ftz(float)
424 declare i64 @llvm.nvvm.f2ll.rz(float)
425 declare i64 @llvm.nvvm.f2ll.rz.ftz(float)
426 declare i32 @llvm.nvvm.f2ui.rz(float)
427 declare i32 @llvm.nvvm.f2ui.rz.ftz(float)
428 declare i64 @llvm.nvvm.f2ull.rz(float)
429 declare i64 @llvm.nvvm.f2ull.rz.ftz(float)
430 declare double @llvm.nvvm.fabs.d(double)
431 declare float @llvm.nvvm.fabs.f(float)
432 declare float @llvm.nvvm.fabs.ftz.f(float)
433 declare double @llvm.nvvm.floor.d(double)
434 declare float @llvm.nvvm.floor.f(float)
435 declare float @llvm.nvvm.floor.ftz.f(float)
436 declare double @llvm.nvvm.fma.rn.d(double, double, double)
437 declare float @llvm.nvvm.fma.rn.f(float, float, float)
438 declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float)
439 declare double @llvm.nvvm.fmax.d(double, double)
440 declare float @llvm.nvvm.fmax.f(float, float)
441 declare float @llvm.nvvm.fmax.ftz.f(float, float)
442 declare double @llvm.nvvm.fmin.d(double, double)
443 declare float @llvm.nvvm.fmin.f(float, float)
444 declare float @llvm.nvvm.fmin.ftz.f(float, float)
445 declare double @llvm.nvvm.i2d.rz(i32)
446 declare float @llvm.nvvm.i2f.rz(i32)
447 declare double @llvm.nvvm.ll2d.rz(i64)
448 declare float @llvm.nvvm.ll2f.rz(i64)
449 declare double @llvm.nvvm.lohi.i2d(i32, i32)
450 declare double @llvm.nvvm.mul.rn.d(double, double)
451 declare float @llvm.nvvm.mul.rn.f(float, float)
452 declare float @llvm.nvvm.mul.rn.ftz.f(float, float)
453 declare double @llvm.nvvm.rcp.rm.d(double)
454 declare double @llvm.nvvm.rcp.rn.d(double)
455 declare float @llvm.nvvm.rcp.rn.f(float)
456 declare float @llvm.nvvm.rcp.rn.ftz.f(float)
457 declare double @llvm.nvvm.round.d(double)
458 declare float @llvm.nvvm.round.f(float)
459 declare float @llvm.nvvm.round.ftz.f(float)
460 declare float @llvm.nvvm.sqrt.f(float)
461 declare double @llvm.nvvm.sqrt.rn.d(double)
462 declare float @llvm.nvvm.sqrt.rn.f(float)
463 declare float @llvm.nvvm.sqrt.rn.ftz.f(float)
464 declare double @llvm.nvvm.trunc.d(double)
465 declare float @llvm.nvvm.trunc.f(float)
466 declare float @llvm.nvvm.trunc.ftz.f(float)
467 declare double @llvm.nvvm.ui2d.rz(i32)
468 declare float @llvm.nvvm.ui2f.rn(i32)
469 declare float @llvm.nvvm.ui2f.rz(i32)
470 declare double @llvm.nvvm.ull2d.rz(i64)
471 declare float @llvm.nvvm.ull2f.rz(i64)