1 ; Check that nvvm intrinsics get simplified to target-generic intrinsics where
4 ; We run this test twice; once with ftz on, and again with ftz off. Behold the
8 ; RUN: echo 'attributes #0 = { "denormal-fp-math-f32" = "preserve-sign" }' >> %t.ftz
9 ; RUN: opt < %t.ftz -passes=instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ
11 ; RUN: cat %s > %t.noftz
12 ; RUN: echo 'attributes #0 = { "denormal-fp-math-f32" = "ieee" }' >> %t.noftz
13 ; RUN: opt < %t.noftz -passes=instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ
15 ; We handle nvvm intrinsics with ftz variants as follows:
16 ; - If the module is in ftz mode, the ftz variant is transformed into the
17 ; regular llvm intrinsic, and the non-ftz variant is left alone.
18 ; - If the module is not in ftz mode, it's the reverse: Only the non-ftz
19 ; variant is transformed, and the ftz variant is left alone.
21 ; Check NVVM intrinsics that map directly to LLVM target-generic intrinsics.
23 ; CHECK-LABEL: @ceil_double
24 define double @ceil_double(double %a) #0 {
25 ; CHECK: call double @llvm.ceil.f64
26 %ret = call double @llvm.nvvm.ceil.d(double %a)
29 ; CHECK-LABEL: @ceil_float
30 define float @ceil_float(float %a) #0 {
31 ; NOFTZ: call float @llvm.ceil.f32
32 ; FTZ: call float @llvm.nvvm.ceil.f
33 %ret = call float @llvm.nvvm.ceil.f(float %a)
36 ; CHECK-LABEL: @ceil_float_ftz
37 define float @ceil_float_ftz(float %a) #0 {
38 ; NOFTZ: call float @llvm.nvvm.ceil.ftz.f
39 ; FTZ: call float @llvm.ceil.f32
40 %ret = call float @llvm.nvvm.ceil.ftz.f(float %a)
44 ; CHECK-LABEL: @fabs_double
45 define double @fabs_double(double %a) #0 {
46 ; CHECK: call double @llvm.fabs.f64
47 %ret = call double @llvm.nvvm.fabs.d(double %a)
50 ; CHECK-LABEL: @fabs_float
51 define float @fabs_float(float %a) #0 {
52 ; CHECK: call float @llvm.nvvm.fabs.f
53 %ret = call float @llvm.nvvm.fabs.f(float %a)
56 ; CHECK-LABEL: @fabs_float_ftz
57 define float @fabs_float_ftz(float %a) #0 {
58 ; CHECK: call float @llvm.nvvm.fabs.ftz.f
59 %ret = call float @llvm.nvvm.fabs.ftz.f(float %a)
63 ; CHECK-LABEL: @floor_double
64 define double @floor_double(double %a) #0 {
65 ; CHECK: call double @llvm.floor.f64
66 %ret = call double @llvm.nvvm.floor.d(double %a)
69 ; CHECK-LABEL: @floor_float
70 define float @floor_float(float %a) #0 {
71 ; NOFTZ: call float @llvm.floor.f32
72 ; FTZ: call float @llvm.nvvm.floor.f
73 %ret = call float @llvm.nvvm.floor.f(float %a)
76 ; CHECK-LABEL: @floor_float_ftz
77 define float @floor_float_ftz(float %a) #0 {
78 ; NOFTZ: call float @llvm.nvvm.floor.ftz.f
79 ; FTZ: call float @llvm.floor.f32
80 %ret = call float @llvm.nvvm.floor.ftz.f(float %a)
84 ; CHECK-LABEL: @fma_double
85 define double @fma_double(double %a, double %b, double %c) #0 {
86 ; CHECK: call double @llvm.fma.f64
87 %ret = call double @llvm.nvvm.fma.rn.d(double %a, double %b, double %c)
90 ; CHECK-LABEL: @fma_float
91 define float @fma_float(float %a, float %b, float %c) #0 {
92 ; NOFTZ: call float @llvm.fma.f32
93 ; FTZ: call float @llvm.nvvm.fma.rn.f
94 %ret = call float @llvm.nvvm.fma.rn.f(float %a, float %b, float %c)
97 ; CHECK-LABEL: @fma_float_ftz
98 define float @fma_float_ftz(float %a, float %b, float %c) #0 {
99 ; NOFTZ: call float @llvm.nvvm.fma.rn.ftz.f
100 ; FTZ: call float @llvm.fma.f32
101 %ret = call float @llvm.nvvm.fma.rn.ftz.f(float %a, float %b, float %c)
105 ; CHECK-LABEL: @fmax_double
106 define double @fmax_double(double %a, double %b) #0 {
107 ; CHECK: call double @llvm.maxnum.f64
108 %ret = call double @llvm.nvvm.fmax.d(double %a, double %b)
111 ; CHECK-LABEL: @fmax_float
112 define float @fmax_float(float %a, float %b) #0 {
113 ; NOFTZ: call float @llvm.maxnum.f32
114 ; FTZ: call float @llvm.nvvm.fmax.f
115 %ret = call float @llvm.nvvm.fmax.f(float %a, float %b)
118 ; CHECK-LABEL: @fmax_float_ftz
119 define float @fmax_float_ftz(float %a, float %b) #0 {
120 ; NOFTZ: call float @llvm.nvvm.fmax.ftz.f
121 ; FTZ: call float @llvm.maxnum.f32
122 %ret = call float @llvm.nvvm.fmax.ftz.f(float %a, float %b)
126 ; CHECK-LABEL: @fmin_double
127 define double @fmin_double(double %a, double %b) #0 {
128 ; CHECK: call double @llvm.minnum.f64
129 %ret = call double @llvm.nvvm.fmin.d(double %a, double %b)
132 ; CHECK-LABEL: @fmin_float
133 define float @fmin_float(float %a, float %b) #0 {
134 ; NOFTZ: call float @llvm.minnum.f32
135 ; FTZ: call float @llvm.nvvm.fmin.f
136 %ret = call float @llvm.nvvm.fmin.f(float %a, float %b)
139 ; CHECK-LABEL: @fmin_float_ftz
140 define float @fmin_float_ftz(float %a, float %b) #0 {
141 ; NOFTZ: call float @llvm.nvvm.fmin.ftz.f
142 ; FTZ: call float @llvm.minnum.f32
143 %ret = call float @llvm.nvvm.fmin.ftz.f(float %a, float %b)
147 ; CHECK-LABEL: @round_double
148 define double @round_double(double %a) #0 {
149 ; CHECK: call double @llvm.nvvm.round.d
150 %ret = call double @llvm.nvvm.round.d(double %a)
153 ; CHECK-LABEL: @round_float
154 define float @round_float(float %a) #0 {
155 ; CHECK: call float @llvm.nvvm.round.f
156 %ret = call float @llvm.nvvm.round.f(float %a)
159 ; CHECK-LABEL: @round_float_ftz
160 define float @round_float_ftz(float %a) #0 {
161 ; CHECK: call float @llvm.nvvm.round.ftz.f
162 %ret = call float @llvm.nvvm.round.ftz.f(float %a)
166 ; CHECK-LABEL: @trunc_double
167 define double @trunc_double(double %a) #0 {
168 ; CHECK: call double @llvm.trunc.f64
169 %ret = call double @llvm.nvvm.trunc.d(double %a)
172 ; CHECK-LABEL: @trunc_float
173 define float @trunc_float(float %a) #0 {
174 ; NOFTZ: call float @llvm.trunc.f32
175 ; FTZ: call float @llvm.nvvm.trunc.f
176 %ret = call float @llvm.nvvm.trunc.f(float %a)
179 ; CHECK-LABEL: @trunc_float_ftz
180 define float @trunc_float_ftz(float %a) #0 {
181 ; NOFTZ: call float @llvm.nvvm.trunc.ftz.f
182 ; FTZ: call float @llvm.trunc.f32
183 %ret = call float @llvm.nvvm.trunc.ftz.f(float %a)
187 ; Check NVVM intrinsics that correspond to LLVM cast operations.
189 ; CHECK-LABEL: @test_d2i
190 define i32 @test_d2i(double %a) #0 {
191 ; CHECK: fptosi double %a to i32
192 %ret = call i32 @llvm.nvvm.d2i.rz(double %a)
195 ; CHECK-LABEL: @test_f2i
196 define i32 @test_f2i(float %a) #0 {
197 ; CHECK: fptosi float %a to i32
198 %ret = call i32 @llvm.nvvm.f2i.rz(float %a)
201 ; CHECK-LABEL: @test_d2ll
202 define i64 @test_d2ll(double %a) #0 {
203 ; CHECK: fptosi double %a to i64
204 %ret = call i64 @llvm.nvvm.d2ll.rz(double %a)
207 ; CHECK-LABEL: @test_f2ll
208 define i64 @test_f2ll(float %a) #0 {
209 ; CHECK: fptosi float %a to i64
210 %ret = call i64 @llvm.nvvm.f2ll.rz(float %a)
213 ; CHECK-LABEL: @test_d2ui
214 define i32 @test_d2ui(double %a) #0 {
215 ; CHECK: fptoui double %a to i32
216 %ret = call i32 @llvm.nvvm.d2ui.rz(double %a)
219 ; CHECK-LABEL: @test_f2ui
220 define i32 @test_f2ui(float %a) #0 {
221 ; CHECK: fptoui float %a to i32
222 %ret = call i32 @llvm.nvvm.f2ui.rz(float %a)
225 ; CHECK-LABEL: @test_d2ull
226 define i64 @test_d2ull(double %a) #0 {
227 ; CHECK: fptoui double %a to i64
228 %ret = call i64 @llvm.nvvm.d2ull.rz(double %a)
231 ; CHECK-LABEL: @test_f2ull
232 define i64 @test_f2ull(float %a) #0 {
233 ; CHECK: fptoui float %a to i64
234 %ret = call i64 @llvm.nvvm.f2ull.rz(float %a)
238 ; CHECK-LABEL: @test_i2d
239 define double @test_i2d(i32 %a) #0 {
240 ; CHECK: sitofp i32 %a to double
241 %ret = call double @llvm.nvvm.i2d.rn(i32 %a)
244 ; CHECK-LABEL: @test_i2f
245 define float @test_i2f(i32 %a) #0 {
246 ; CHECK: sitofp i32 %a to float
247 %ret = call float @llvm.nvvm.i2f.rn(i32 %a)
250 ; CHECK-LABEL: @test_ll2d
251 define double @test_ll2d(i64 %a) #0 {
252 ; CHECK: sitofp i64 %a to double
253 %ret = call double @llvm.nvvm.ll2d.rn(i64 %a)
256 ; CHECK-LABEL: @test_ll2f
257 define float @test_ll2f(i64 %a) #0 {
258 ; CHECK: sitofp i64 %a to float
259 %ret = call float @llvm.nvvm.ll2f.rn(i64 %a)
262 ; CHECK-LABEL: @test_ui2d
263 define double @test_ui2d(i32 %a) #0 {
264 ; CHECK: uitofp i32 %a to double
265 %ret = call double @llvm.nvvm.ui2d.rn(i32 %a)
268 ; CHECK-LABEL: @test_ui2f
269 define float @test_ui2f(i32 %a) #0 {
270 ; CHECK: uitofp i32 %a to float
271 %ret = call float @llvm.nvvm.ui2f.rn(i32 %a)
274 ; CHECK-LABEL: @test_ull2d
275 define double @test_ull2d(i64 %a) #0 {
276 ; CHECK: uitofp i64 %a to double
277 %ret = call double @llvm.nvvm.ull2d.rn(i64 %a)
280 ; CHECK-LABEL: @test_ull2f
281 define float @test_ull2f(i64 %a) #0 {
282 ; CHECK: uitofp i64 %a to float
283 %ret = call float @llvm.nvvm.ull2f.rn(i64 %a)
287 ; Check NVVM intrinsics that map to LLVM binary operations.
289 ; CHECK-LABEL: @test_add_rn_d
290 define double @test_add_rn_d(double %a, double %b) #0 {
291 ; CHECK: call double @llvm.nvvm.add.rn.d
292 %ret = call double @llvm.nvvm.add.rn.d(double %a, double %b)
295 ; CHECK-LABEL: @test_add_rn_f
296 define float @test_add_rn_f(float %a, float %b) #0 {
297 ; CHECK: call float @llvm.nvvm.add.rn.f
298 %ret = call float @llvm.nvvm.add.rn.f(float %a, float %b)
301 ; CHECK-LABEL: @test_add_rn_f_ftz
302 define float @test_add_rn_f_ftz(float %a, float %b) #0 {
303 ; CHECK: call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
304 %ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
308 ; CHECK-LABEL: @test_mul_rn_d
309 define double @test_mul_rn_d(double %a, double %b) #0 {
310 ; CHECK: call double @llvm.nvvm.mul.rn.d
311 %ret = call double @llvm.nvvm.mul.rn.d(double %a, double %b)
314 ; CHECK-LABEL: @test_mul_rn_f
315 define float @test_mul_rn_f(float %a, float %b) #0 {
316 ; CHECK: call float @llvm.nvvm.mul.rn.f
317 %ret = call float @llvm.nvvm.mul.rn.f(float %a, float %b)
320 ; CHECK-LABEL: @test_mul_rn_f_ftz
321 define float @test_mul_rn_f_ftz(float %a, float %b) #0 {
322 ; CHECK: call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
323 %ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
327 ; CHECK-LABEL: @test_div_rn_d
328 define double @test_div_rn_d(double %a, double %b) #0 {
330 %ret = call double @llvm.nvvm.div.rn.d(double %a, double %b)
333 ; CHECK-LABEL: @test_div_rn_f
334 define float @test_div_rn_f(float %a, float %b) #0 {
335 ; CHECK: call float @llvm.nvvm.div.rn.f
336 %ret = call float @llvm.nvvm.div.rn.f(float %a, float %b)
339 ; CHECK-LABEL: @test_div_rn_f_ftz
340 define float @test_div_rn_f_ftz(float %a, float %b) #0 {
341 ; CHECK: call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
342 %ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
346 ; Check NVVM intrinsics that require us to emit custom IR.
348 ; CHECK-LABEL: @test_rcp_rn_f
349 define float @test_rcp_rn_f(float %a) #0 {
350 ; CHECK: call float @llvm.nvvm.rcp.rn.f
351 %ret = call float @llvm.nvvm.rcp.rn.f(float %a)
354 ; CHECK-LABEL: @test_rcp_rn_f_ftz
355 define float @test_rcp_rn_f_ftz(float %a) #0 {
356 ; CHECK: call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
357 %ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
361 ; CHECK-LABEL: @test_sqrt_rn_d
362 define double @test_sqrt_rn_d(double %a) #0 {
363 ; CHECK: call double @llvm.sqrt.f64(double %a)
364 %ret = call double @llvm.nvvm.sqrt.rn.d(double %a)
367 ; nvvm.sqrt.f is a special case: It goes to a llvm.sqrt.f
368 ; CHECK-LABEL: @test_sqrt_f
369 define float @test_sqrt_f(float %a) #0 {
370 ; CHECK: call float @llvm.sqrt.f32(float %a)
371 %ret = call float @llvm.nvvm.sqrt.f(float %a)
374 ; CHECK-LABEL: @test_sqrt_rn_f
375 define float @test_sqrt_rn_f(float %a) #0 {
376 ; CHECK: call float @llvm.nvvm.sqrt.rn.f
377 %ret = call float @llvm.nvvm.sqrt.rn.f(float %a)
380 ; CHECK-LABEL: @test_sqrt_rn_f_ftz
381 define float @test_sqrt_rn_f_ftz(float %a) #0 {
382 ; CHECK: call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
383 %ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
387 ; CHECK-LABEL: @test_fshl_clamp_1
388 define i32 @test_fshl_clamp_1(i32 %a, i32 %b) {
389 ; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 3)
390 %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 3)
394 ; CHECK-LABEL: @test_fshl_clamp_2
395 define i32 @test_fshl_clamp_2(i32 %a, i32 %b) {
397 %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 300)
401 ; CHECK-LABEL: @test_fshl_clamp_3
402 define i32 @test_fshl_clamp_3(i32 %a, i32 %b, i32 %c) {
403 ; CHECK: call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c)
404 %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c)
408 ; CHECK-LABEL: @test_fshr_clamp_1
409 define i32 @test_fshr_clamp_1(i32 %a, i32 %b) {
410 ; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 29)
411 %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 3)
415 ; CHECK-LABEL: @test_fshr_clamp_2
416 define i32 @test_fshr_clamp_2(i32 %a, i32 %b) {
418 %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 300)
422 ; CHECK-LABEL: @test_fshr_clamp_3
423 define i32 @test_fshr_clamp_3(i32 %a, i32 %b, i32 %c) {
424 ; CHECK: call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c)
425 %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c)
429 declare double @llvm.nvvm.add.rn.d(double, double)
430 declare float @llvm.nvvm.add.rn.f(float, float)
431 declare float @llvm.nvvm.add.rn.ftz.f(float, float)
432 declare double @llvm.nvvm.ceil.d(double)
433 declare float @llvm.nvvm.ceil.f(float)
434 declare float @llvm.nvvm.ceil.ftz.f(float)
435 declare float @llvm.nvvm.d2f.rm(double)
436 declare float @llvm.nvvm.d2f.rm.ftz(double)
437 declare float @llvm.nvvm.d2f.rp(double)
438 declare float @llvm.nvvm.d2f.rp.ftz(double)
439 declare float @llvm.nvvm.d2f.rz(double)
440 declare float @llvm.nvvm.d2f.rz.ftz(double)
441 declare i32 @llvm.nvvm.d2i.rz(double)
442 declare i64 @llvm.nvvm.d2ll.rz(double)
443 declare i32 @llvm.nvvm.d2ui.rz(double)
444 declare i64 @llvm.nvvm.d2ull.rz(double)
445 declare double @llvm.nvvm.div.rn.d(double, double)
446 declare float @llvm.nvvm.div.rn.f(float, float)
447 declare float @llvm.nvvm.div.rn.ftz.f(float, float)
448 declare i16 @llvm.nvvm.f2h.rz(float)
449 declare i16 @llvm.nvvm.f2h.rz.ftz(float)
450 declare i32 @llvm.nvvm.f2i.rz(float)
451 declare i32 @llvm.nvvm.f2i.rz.ftz(float)
452 declare i64 @llvm.nvvm.f2ll.rz(float)
453 declare i64 @llvm.nvvm.f2ll.rz.ftz(float)
454 declare i32 @llvm.nvvm.f2ui.rz(float)
455 declare i32 @llvm.nvvm.f2ui.rz.ftz(float)
456 declare i64 @llvm.nvvm.f2ull.rz(float)
457 declare i64 @llvm.nvvm.f2ull.rz.ftz(float)
458 declare double @llvm.nvvm.fabs.d(double)
459 declare float @llvm.nvvm.fabs.f(float)
460 declare float @llvm.nvvm.fabs.ftz.f(float)
461 declare double @llvm.nvvm.floor.d(double)
462 declare float @llvm.nvvm.floor.f(float)
463 declare float @llvm.nvvm.floor.ftz.f(float)
464 declare double @llvm.nvvm.fma.rn.d(double, double, double)
465 declare float @llvm.nvvm.fma.rn.f(float, float, float)
466 declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float)
467 declare double @llvm.nvvm.fmax.d(double, double)
468 declare float @llvm.nvvm.fmax.f(float, float)
469 declare float @llvm.nvvm.fmax.ftz.f(float, float)
470 declare double @llvm.nvvm.fmin.d(double, double)
471 declare float @llvm.nvvm.fmin.f(float, float)
472 declare float @llvm.nvvm.fmin.ftz.f(float, float)
473 declare double @llvm.nvvm.i2d.rn(i32)
474 declare float @llvm.nvvm.i2f.rn(i32)
475 declare double @llvm.nvvm.ll2d.rn(i64)
476 declare float @llvm.nvvm.ll2f.rn(i64)
477 declare double @llvm.nvvm.lohi.i2d(i32, i32)
478 declare double @llvm.nvvm.mul.rn.d(double, double)
479 declare float @llvm.nvvm.mul.rn.f(float, float)
480 declare float @llvm.nvvm.mul.rn.ftz.f(float, float)
481 declare double @llvm.nvvm.rcp.rm.d(double)
482 declare double @llvm.nvvm.rcp.rn.d(double)
483 declare float @llvm.nvvm.rcp.rn.f(float)
484 declare float @llvm.nvvm.rcp.rn.ftz.f(float)
485 declare double @llvm.nvvm.round.d(double)
486 declare float @llvm.nvvm.round.f(float)
487 declare float @llvm.nvvm.round.ftz.f(float)
488 declare float @llvm.nvvm.sqrt.f(float)
489 declare double @llvm.nvvm.sqrt.rn.d(double)
490 declare float @llvm.nvvm.sqrt.rn.f(float)
491 declare float @llvm.nvvm.sqrt.rn.ftz.f(float)
492 declare double @llvm.nvvm.trunc.d(double)
493 declare float @llvm.nvvm.trunc.f(float)
494 declare float @llvm.nvvm.trunc.ftz.f(float)
495 declare double @llvm.nvvm.ui2d.rn(i32)
496 declare float @llvm.nvvm.ui2f.rn(i32)
497 declare double @llvm.nvvm.ull2d.rn(i64)
498 declare float @llvm.nvvm.ull2f.rn(i64)
499 declare i32 @llvm.nvvm.fshr.clamp.i32(i32, i32, i32)
500 declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32)