1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck %s
2 ; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
4 declare i16 @llvm.nvvm.abs.bf16(i16)
5 declare i32 @llvm.nvvm.abs.bf16x2(i32)
6 declare i16 @llvm.nvvm.neg.bf16(i16)
7 declare i32 @llvm.nvvm.neg.bf16x2(i32)
9 declare float @llvm.nvvm.fmin.nan.f(float, float)
10 declare float @llvm.nvvm.fmin.ftz.nan.f(float, float)
11 declare half @llvm.nvvm.fmin.f16(half, half)
12 declare half @llvm.nvvm.fmin.ftz.f16(half, half)
13 declare half @llvm.nvvm.fmin.nan.f16(half, half)
14 declare half @llvm.nvvm.fmin.ftz.nan.f16(half, half)
15 declare <2 x half> @llvm.nvvm.fmin.f16x2(<2 x half>, <2 x half>)
16 declare <2 x half> @llvm.nvvm.fmin.ftz.f16x2(<2 x half>, <2 x half>)
17 declare <2 x half> @llvm.nvvm.fmin.nan.f16x2(<2 x half>, <2 x half>)
18 declare <2 x half> @llvm.nvvm.fmin.ftz.nan.f16x2(<2 x half>, <2 x half>)
19 declare i16 @llvm.nvvm.fmin.bf16(i16, i16)
20 declare i16 @llvm.nvvm.fmin.nan.bf16(i16, i16)
21 declare i32 @llvm.nvvm.fmin.bf16x2(i32, i32)
22 declare i32 @llvm.nvvm.fmin.nan.bf16x2(i32, i32)
24 declare float @llvm.nvvm.fmax.nan.f(float, float)
25 declare float @llvm.nvvm.fmax.ftz.nan.f(float, float)
26 declare half @llvm.nvvm.fmax.f16(half, half)
27 declare half @llvm.nvvm.fmax.ftz.f16(half, half)
28 declare half @llvm.nvvm.fmax.nan.f16(half, half)
29 declare half @llvm.nvvm.fmax.ftz.nan.f16(half, half)
30 declare <2 x half> @llvm.nvvm.fmax.f16x2(<2 x half>, <2 x half>)
31 declare <2 x half> @llvm.nvvm.fmax.ftz.f16x2(<2 x half>, <2 x half>)
32 declare <2 x half> @llvm.nvvm.fmax.nan.f16x2(<2 x half>, <2 x half>)
33 declare <2 x half> @llvm.nvvm.fmax.ftz.nan.f16x2(<2 x half>, <2 x half>)
34 declare i16 @llvm.nvvm.fmax.bf16(i16, i16)
35 declare i16 @llvm.nvvm.fmax.nan.bf16(i16, i16)
36 declare i32 @llvm.nvvm.fmax.bf16x2(i32, i32)
37 declare i32 @llvm.nvvm.fmax.nan.bf16x2(i32, i32)
39 declare half @llvm.nvvm.fma.rn.relu.f16(half, half, half)
40 declare half @llvm.nvvm.fma.rn.ftz.relu.f16(half, half, half)
41 declare <2 x half> @llvm.nvvm.fma.rn.relu.f16x2(<2 x half>, <2 x half>, <2 x half>)
42 declare <2 x half> @llvm.nvvm.fma.rn.ftz.relu.f16x2(<2 x half>, <2 x half>, <2 x half>)
43 declare i16 @llvm.nvvm.fma.rn.bf16(i16, i16, i16)
44 declare i16 @llvm.nvvm.fma.rn.relu.bf16(i16, i16, i16)
45 declare i32 @llvm.nvvm.fma.rn.bf16x2(i32, i32, i32)
46 declare i32 @llvm.nvvm.fma.rn.relu.bf16x2(i32, i32, i32)
48 ; CHECK-LABEL: abs_bf16
49 define i16 @abs_bf16(i16 %0) {
52 %res = call i16 @llvm.nvvm.abs.bf16(i16 %0);
56 ; CHECK-LABEL: abs_bf16x2
57 define i32 @abs_bf16x2(i32 %0) {
60 %res = call i32 @llvm.nvvm.abs.bf16x2(i32 %0);
64 ; CHECK-LABEL: neg_bf16
65 define i16 @neg_bf16(i16 %0) {
68 %res = call i16 @llvm.nvvm.neg.bf16(i16 %0);
72 ; CHECK-LABEL: neg_bf16x2
73 define i32 @neg_bf16x2(i32 %0) {
76 %res = call i32 @llvm.nvvm.neg.bf16x2(i32 %0);
80 ; CHECK-LABEL: fmin_nan_f
81 define float @fmin_nan_f(float %0, float %1) {
84 %res = call float @llvm.nvvm.fmin.nan.f(float %0, float %1);
88 ; CHECK-LABEL: fmin_ftz_nan_f
89 define float @fmin_ftz_nan_f(float %0, float %1) {
91 ; CHECK: min.ftz.NaN.f32
92 %res = call float @llvm.nvvm.fmin.ftz.nan.f(float %0, float %1);
96 ; CHECK-LABEL: fmin_f16
97 define half @fmin_f16(half %0, half %1) {
100 %res = call half @llvm.nvvm.fmin.f16(half %0, half %1)
104 ; CHECK-LABEL: fmin_ftz_f16
105 define half @fmin_ftz_f16(half %0, half %1) {
108 %res = call half @llvm.nvvm.fmin.ftz.f16(half %0, half %1)
112 ; CHECK-LABEL: fmin_nan_f16
113 define half @fmin_nan_f16(half %0, half %1) {
116 %res = call half @llvm.nvvm.fmin.nan.f16(half %0, half %1)
120 ; CHECK-LABEL: fmin_ftz_nan_f16
121 define half @fmin_ftz_nan_f16(half %0, half %1) {
123 ; CHECK: min.ftz.NaN.f16
124 %res = call half @llvm.nvvm.fmin.ftz.nan.f16(half %0, half %1)
128 ; CHECK-LABEL: fmin_f16x2
129 define <2 x half> @fmin_f16x2(<2 x half> %0, <2 x half> %1) {
132 %res = call <2 x half> @llvm.nvvm.fmin.f16x2(<2 x half> %0, <2 x half> %1)
136 ; CHECK-LABEL: fmin_ftz_f16x2
137 define <2 x half> @fmin_ftz_f16x2(<2 x half> %0, <2 x half> %1) {
139 ; CHECK: min.ftz.f16x2
140 %res = call <2 x half> @llvm.nvvm.fmin.ftz.f16x2(<2 x half> %0, <2 x half> %1)
144 ; CHECK-LABEL: fmin_nan_f16x2
145 define <2 x half> @fmin_nan_f16x2(<2 x half> %0, <2 x half> %1) {
147 ; CHECK: min.NaN.f16x2
148 %res = call <2 x half> @llvm.nvvm.fmin.nan.f16x2(<2 x half> %0, <2 x half> %1)
152 ; CHECK-LABEL: fmin_ftz_nan_f16x2
153 define <2 x half> @fmin_ftz_nan_f16x2(<2 x half> %0, <2 x half> %1) {
155 ; CHECK: min.ftz.NaN.f16x2
156 %res = call <2 x half> @llvm.nvvm.fmin.ftz.nan.f16x2(<2 x half> %0, <2 x half> %1)
160 ; CHECK-LABEL: fmin_bf16
161 define i16 @fmin_bf16(i16 %0, i16 %1) {
164 %res = call i16 @llvm.nvvm.fmin.bf16(i16 %0, i16 %1)
168 ; CHECK-LABEL: fmin_nan_bf16
169 define i16 @fmin_nan_bf16(i16 %0, i16 %1) {
171 ; CHECK: min.NaN.bf16
172 %res = call i16 @llvm.nvvm.fmin.nan.bf16(i16 %0, i16 %1)
176 ; CHECK-LABEL: fmin_bf16x2
177 define i32 @fmin_bf16x2(i32 %0, i32 %1) {
180 %res = call i32 @llvm.nvvm.fmin.bf16x2(i32 %0, i32 %1)
184 ; CHECK-LABEL: fmin_nan_bf16x2
185 define i32 @fmin_nan_bf16x2(i32 %0, i32 %1) {
187 ; CHECK: min.NaN.bf16x2
188 %res = call i32 @llvm.nvvm.fmin.nan.bf16x2(i32 %0, i32 %1)
192 ; CHECK-LABEL: fmax_nan_f
193 define float @fmax_nan_f(float %0, float %1) {
196 %res = call float @llvm.nvvm.fmax.nan.f(float %0, float %1);
200 ; CHECK-LABEL: fmax_ftz_nan_f
201 define float @fmax_ftz_nan_f(float %0, float %1) {
203 ; CHECK: max.ftz.NaN.f32
204 %res = call float @llvm.nvvm.fmax.ftz.nan.f(float %0, float %1);
208 ; CHECK-LABEL: fmax_f16
209 define half @fmax_f16(half %0, half %1) {
212 %res = call half @llvm.nvvm.fmax.f16(half %0, half %1)
216 ; CHECK-LABEL: fmax_ftz_f16
217 define half @fmax_ftz_f16(half %0, half %1) {
220 %res = call half @llvm.nvvm.fmax.ftz.f16(half %0, half %1)
224 ; CHECK-LABEL: fmax_nan_f16
225 define half @fmax_nan_f16(half %0, half %1) {
228 %res = call half @llvm.nvvm.fmax.nan.f16(half %0, half %1)
232 ; CHECK-LABEL: fmax_ftz_nan_f16
233 define half @fmax_ftz_nan_f16(half %0, half %1) {
235 ; CHECK: max.ftz.NaN.f16
236 %res = call half @llvm.nvvm.fmax.ftz.nan.f16(half %0, half %1)
240 ; CHECK-LABEL: fmax_f16x2
241 define <2 x half> @fmax_f16x2(<2 x half> %0, <2 x half> %1) {
244 %res = call <2 x half> @llvm.nvvm.fmax.f16x2(<2 x half> %0, <2 x half> %1)
248 ; CHECK-LABEL: fmax_ftz_f16x2
249 define <2 x half> @fmax_ftz_f16x2(<2 x half> %0, <2 x half> %1) {
251 ; CHECK: max.ftz.f16x2
252 %res = call <2 x half> @llvm.nvvm.fmax.ftz.f16x2(<2 x half> %0, <2 x half> %1)
256 ; CHECK-LABEL: fmax_nan_f16x2
257 define <2 x half> @fmax_nan_f16x2(<2 x half> %0, <2 x half> %1) {
259 ; CHECK: max.NaN.f16x2
260 %res = call <2 x half> @llvm.nvvm.fmax.nan.f16x2(<2 x half> %0, <2 x half> %1)
264 ; CHECK-LABEL: fmax_ftz_nan_f16x2
265 define <2 x half> @fmax_ftz_nan_f16x2(<2 x half> %0, <2 x half> %1) {
267 ; CHECK: max.ftz.NaN.f16x2
268 %res = call <2 x half> @llvm.nvvm.fmax.ftz.nan.f16x2(<2 x half> %0, <2 x half> %1)
272 ; CHECK-LABEL: fmax_bf16
273 define i16 @fmax_bf16(i16 %0, i16 %1) {
276 %res = call i16 @llvm.nvvm.fmax.bf16(i16 %0, i16 %1)
280 ; CHECK-LABEL: fmax_nan_bf16
281 define i16 @fmax_nan_bf16(i16 %0, i16 %1) {
283 ; CHECK: max.NaN.bf16
284 %res = call i16 @llvm.nvvm.fmax.nan.bf16(i16 %0, i16 %1)
288 ; CHECK-LABEL: fmax_bf16x2
289 define i32 @fmax_bf16x2(i32 %0, i32 %1) {
292 %res = call i32 @llvm.nvvm.fmax.bf16x2(i32 %0, i32 %1)
296 ; CHECK-LABEL: fmax_nan_bf16x2
297 define i32 @fmax_nan_bf16x2(i32 %0, i32 %1) {
299 ; CHECK: max.NaN.bf16x2
300 %res = call i32 @llvm.nvvm.fmax.nan.bf16x2(i32 %0, i32 %1)
304 ; CHECK-LABEL: fma_rn_relu_f16
305 define half @fma_rn_relu_f16(half %0, half %1, half %2) {
307 ; CHECK: fma.rn.relu.f16
308 %res = call half @llvm.nvvm.fma.rn.relu.f16(half %0, half %1, half %2)
312 ; CHECK-LABEL: fma_rn_ftz_relu_f16
313 define half @fma_rn_ftz_relu_f16(half %0, half %1, half %2) {
315 ; CHECK: fma.rn.ftz.relu.f16
316 %res = call half @llvm.nvvm.fma.rn.ftz.relu.f16(half %0, half %1, half %2)
320 ; CHECK-LABEL: fma_rn_relu_f16x2
321 define <2 x half> @fma_rn_relu_f16x2(<2 x half> %0, <2 x half> %1, <2 x half> %2) {
323 ; CHECK: fma.rn.relu.f16x2
324 %res = call <2 x half> @llvm.nvvm.fma.rn.relu.f16x2(<2 x half> %0, <2 x half> %1, <2 x half> %2)
328 ; CHECK-LABEL: fma_rn_ftz_relu_f16x2
329 define <2 x half> @fma_rn_ftz_relu_f16x2(<2 x half> %0, <2 x half> %1, <2 x half> %2) {
331 ; CHECK: fma.rn.ftz.relu.f16x2
332 %res = call <2 x half> @llvm.nvvm.fma.rn.ftz.relu.f16x2(<2 x half> %0, <2 x half> %1, <2 x half> %2)
336 ; CHECK-LABEL: fma_rn_bf16
337 define i16 @fma_rn_bf16(i16 %0, i16 %1, i16 %2) {
340 %res = call i16 @llvm.nvvm.fma.rn.bf16(i16 %0, i16 %1, i16 %2)
344 ; CHECK-LABEL: fma_rn_relu_bf16
345 define i16 @fma_rn_relu_bf16(i16 %0, i16 %1, i16 %2) {
347 ; CHECK: fma.rn.relu.bf16
348 %res = call i16 @llvm.nvvm.fma.rn.relu.bf16(i16 %0, i16 %1, i16 %2)
352 ; CHECK-LABEL: fma_rn_bf16x2
353 define i32 @fma_rn_bf16x2(i32 %0, i32 %1, i32 %2) {
355 ; CHECK: fma.rn.bf16x2
356 %res = call i32 @llvm.nvvm.fma.rn.bf16x2(i32 %0, i32 %1, i32 %2)
360 ; CHECK-LABEL: fma_rn_relu_bf16x2
361 define i32 @fma_rn_relu_bf16x2(i32 %0, i32 %1, i32 %2) {
363 ; CHECK: fma.rn.relu.bf16x2
364 %res = call i32 @llvm.nvvm.fma.rn.relu.bf16x2(i32 %0, i32 %1, i32 %2)