1 ; RUN: opt < %s -passes=instcombine -S -mtriple=nvptx-nvidia-cuda -march=nvptx64 \
2 ; RUN: -mcpu=sm_80 -mattr=+ptx70 | \
5 declare half @llvm.nvvm.fmin.f16(half, half)
6 declare half @llvm.nvvm.fmin.ftz.f16(half, half)
7 declare <2 x half> @llvm.nvvm.fmin.f16x2(<2 x half>, <2 x half>)
8 declare <2 x half> @llvm.nvvm.fmin.ftz.f16x2(<2 x half>, <2 x half>)
9 declare float @llvm.nvvm.fmin.nan.f(float, float)
10 declare float @llvm.nvvm.fmin.ftz.nan.f(float, float)
11 declare half @llvm.nvvm.fmin.nan.f16(half, half)
12 declare half @llvm.nvvm.fmin.ftz.nan.f16(half, half)
13 declare <2 x half> @llvm.nvvm.fmin.nan.f16x2(<2 x half>, <2 x half>)
14 declare <2 x half> @llvm.nvvm.fmin.ftz.nan.f16x2(<2 x half>, <2 x half>)
16 declare half @llvm.nvvm.fmax.f16(half, half)
17 declare half @llvm.nvvm.fmax.ftz.f16(half, half)
18 declare <2 x half> @llvm.nvvm.fmax.f16x2(<2 x half>, <2 x half>)
19 declare <2 x half> @llvm.nvvm.fmax.ftz.f16x2(<2 x half>, <2 x half>)
20 declare float @llvm.nvvm.fmax.nan.f(float, float)
21 declare float @llvm.nvvm.fmax.ftz.nan.f(float, float)
22 declare half @llvm.nvvm.fmax.nan.f16(half, half)
23 declare half @llvm.nvvm.fmax.ftz.nan.f16(half, half)
24 declare <2 x half> @llvm.nvvm.fmax.nan.f16x2(<2 x half>, <2 x half>)
25 declare <2 x half> @llvm.nvvm.fmax.ftz.nan.f16x2(<2 x half>, <2 x half>)
27 ; f16 and f16x2 fma are available since ptx 4.2 and sm_53.
28 declare half @llvm.nvvm.fma.rn.f16(half, half, half)
29 declare half @llvm.nvvm.fma.rn.ftz.f16(half, half, half)
30 declare <2 x half> @llvm.nvvm.fma.rn.f16x2(<2 x half>, <2 x half>, <2 x half>)
31 declare <2 x half> @llvm.nvvm.fma.rn.ftz.f16x2(<2 x half>, <2 x half>, <2 x half>)
33 ; CHECK-LABEL: fmin_f16
34 define half @fmin_f16(half %0, half %1) {
35 ; CHECK-NOT: @llvm.nvvm.fmin.f16
36 ; CHECK: @llvm.minnum.f16
37 %res = call half @llvm.nvvm.fmin.f16(half %0, half %1)
41 ; CHECK-LABEL: fmin_ftz_f16
42 define half @fmin_ftz_f16(half %0, half %1) #0 {
43 ; CHECK-NOT: @llvm.nvvm.fmin.ftz.f16
44 ; CHECK: @llvm.minnum.f16
45 %res = call half @llvm.nvvm.fmin.ftz.f16(half %0, half %1)
49 ; CHECK-LABEL: fmin_ftz_f16_no_attr
50 define half @fmin_ftz_f16_no_attr(half %0, half %1) {
51 ; CHECK-NOT: @llvm.minnum.f16
52 ; CHECK: @llvm.nvvm.fmin.ftz.f16
53 %res = call half @llvm.nvvm.fmin.ftz.f16(half %0, half %1)
57 ; CHECK-LABEL: fmin_f16x2
58 define <2 x half> @fmin_f16x2(<2 x half> %0, <2 x half> %1) {
59 ; CHECK-NOT: @llvm.nvvm.fmin.f16x2
60 ; CHECK: @llvm.minnum.v2f16
61 %res = call <2 x half> @llvm.nvvm.fmin.f16x2(<2 x half> %0, <2 x half> %1)
65 ; CHECK-LABEL: fmin_ftz_f16x2
66 define <2 x half> @fmin_ftz_f16x2(<2 x half> %0, <2 x half> %1) #0 {
67 ; CHECK-NOT: @llvm.nvvm.fmin.ftz.f16x2
68 ; CHECK: @llvm.minnum.v2f16
69 %res = call <2 x half> @llvm.nvvm.fmin.ftz.f16x2(<2 x half> %0, <2 x half> %1)
73 ; CHECK-LABEL: fmin_ftz_f16x2_no_attr
74 define <2 x half> @fmin_ftz_f16x2_no_attr(<2 x half> %0, <2 x half> %1) {
75 ; CHECK-NOT: @llvm.minnum.v2f16
76 ; CHECK: @llvm.nvvm.fmin.ftz.f16x2
77 %res = call <2 x half> @llvm.nvvm.fmin.ftz.f16x2(<2 x half> %0, <2 x half> %1)
81 ; CHECK-LABEL: fmin_nan_f
82 define float @fmin_nan_f(float %0, float %1) {
83 ; CHECK-NOT: @llvm.nvvm.fmin.nan.f
84 ; CHECK: @llvm.minimum.f32
85 %res = call float @llvm.nvvm.fmin.nan.f(float %0, float %1)
89 ; CHECK-LABEL: fmin_ftz_nan_f
90 define float @fmin_ftz_nan_f(float %0, float %1) #1 {
91 ; CHECK-NOT: @llvm.nvvm.fmin.ftz.nan.f
92 ; CHECK: @llvm.minimum.f32
93 %res = call float @llvm.nvvm.fmin.ftz.nan.f(float %0, float %1)
97 ; CHECK-LABEL: fmin_ftz_nan_f_no_attr
98 define float @fmin_ftz_nan_f_no_attr(float %0, float %1) {
99 ; CHECK: @llvm.nvvm.fmin.ftz.nan.f
100 ; CHECK-NOT: @llvm.minimum.f32
101 %res = call float @llvm.nvvm.fmin.ftz.nan.f(float %0, float %1)
105 ; CHECK-LABEL: fmin_nan_f16
106 define half @fmin_nan_f16(half %0, half %1) {
107 ; CHECK-NOT: @llvm.nvvm.fmin.nan.f16
108 ; CHECK: @llvm.minimum.f16
109 %res = call half @llvm.nvvm.fmin.nan.f16(half %0, half %1)
113 ; CHECK-LABEL: fmin_ftz_nan_f16
114 define half @fmin_ftz_nan_f16(half %0, half %1) #0 {
115 ; CHECK-NOT: @llvm.nvvm.fmin.ftz.nan.f16
116 ; CHECK: @llvm.minimum.f16
117 %res = call half @llvm.nvvm.fmin.ftz.nan.f16(half %0, half %1)
121 ; CHECK-LABEL: fmin_ftz_nan_f16_no_attr
122 define half @fmin_ftz_nan_f16_no_attr(half %0, half %1) {
123 ; CHECK: @llvm.nvvm.fmin.ftz.nan.f16
124 ; CHECK-NOT: @llvm.minimum.f16
125 %res = call half @llvm.nvvm.fmin.ftz.nan.f16(half %0, half %1)
129 ; CHECK-LABEL: fmin_nan_f16x2
130 define <2 x half> @fmin_nan_f16x2(<2 x half> %0, <2 x half> %1) {
131 ; CHECK-NOT: @llvm.nvvm.fmin.nan.f16x2
132 ; CHECK: @llvm.minimum.v2f16
133 %res = call <2 x half> @llvm.nvvm.fmin.nan.f16x2(<2 x half> %0, <2 x half> %1)
137 ; CHECK-LABEL: fmin_ftz_nan_f16x2
138 define <2 x half> @fmin_ftz_nan_f16x2(<2 x half> %0, <2 x half> %1) #0 {
139 ; CHECK-NOT: @llvm.nvvm.fmin.ftz.nan.f16x2
140 ; CHECK: @llvm.minimum.v2f16
141 %res = call <2 x half> @llvm.nvvm.fmin.ftz.nan.f16x2(<2 x half> %0, <2 x half> %1)
145 ; CHECK-LABEL: fmin_ftz_nan_f16x2_no_attr
146 define <2 x half> @fmin_ftz_nan_f16x2_no_attr(<2 x half> %0, <2 x half> %1) {
147 ; CHECK-NOT: @llvm.minimum.v2f16
148 ; CHECK: @llvm.nvvm.fmin.ftz.nan.f16x2
149 %res = call <2 x half> @llvm.nvvm.fmin.ftz.nan.f16x2(<2 x half> %0, <2 x half> %1)
153 ; CHECK-LABEL: fmax_f16
154 define half @fmax_f16(half %0, half %1) {
155 ; CHECK-NOT: @llvm.nvvm.fmax.f16
156 ; CHECK: @llvm.maxnum.f16
157 %res = call half @llvm.nvvm.fmax.f16(half %0, half %1)
161 ; CHECK-LABEL: fmax_ftz_f16
162 define half @fmax_ftz_f16(half %0, half %1) #0 {
163 ; CHECK-NOT: @llvm.nvvm.fmax.ftz.f16
164 ; CHECK: @llvm.maxnum.f16
165 %res = call half @llvm.nvvm.fmax.ftz.f16(half %0, half %1)
169 ; CHECK-LABEL: fmax_ftz_f16_no_attr
170 define half @fmax_ftz_f16_no_attr(half %0, half %1) {
171 ; CHECK-NOT: @llvm.maxnum.f16
172 ; CHECK: @llvm.nvvm.fmax.ftz.f16
173 %res = call half @llvm.nvvm.fmax.ftz.f16(half %0, half %1)
177 ; CHECK-LABEL: fmax_f16x2
178 define <2 x half> @fmax_f16x2(<2 x half> %0, <2 x half> %1) {
179 ; CHECK-NOT: @llvm.nvvm.fmax.f16x2
180 ; CHECK: @llvm.maxnum.v2f16
181 %res = call <2 x half> @llvm.nvvm.fmax.f16x2(<2 x half> %0, <2 x half> %1)
185 ; CHECK-LABEL: fmax_ftz_f16x2
186 define <2 x half> @fmax_ftz_f16x2(<2 x half> %0, <2 x half> %1) #0 {
187 ; CHECK-NOT: @llvm.nvvm.fmax.ftz.f16x2
188 ; CHECK: @llvm.maxnum.v2f16
189 %res = call <2 x half> @llvm.nvvm.fmax.ftz.f16x2(<2 x half> %0, <2 x half> %1)
193 ; CHECK-LABEL: fmax_ftz_f16x2_no_attr
194 define <2 x half> @fmax_ftz_f16x2_no_attr(<2 x half> %0, <2 x half> %1) {
195 ; CHECK-NOT: @llvm.maxnum.v2f16
196 ; CHECK: @llvm.nvvm.fmax.ftz.f16x2
197 %res = call <2 x half> @llvm.nvvm.fmax.ftz.f16x2(<2 x half> %0, <2 x half> %1)
201 ; CHECK-LABEL: fmax_nan_f
202 define float @fmax_nan_f(float %0, float %1) {
203 ; CHECK-NOT: @llvm.nvvm.fmax.nan.f
204 ; CHECK: @llvm.maximum.f32
205 %res = call float @llvm.nvvm.fmax.nan.f(float %0, float %1)
209 ; CHECK-LABEL: fmax_ftz_nan_f
210 define float @fmax_ftz_nan_f(float %0, float %1) #1 {
211 ; CHECK-NOT: @llvm.nvvm.fmax.ftz.nan.f
212 ; CHECK: @llvm.maximum.f32
213 %res = call float @llvm.nvvm.fmax.ftz.nan.f(float %0, float %1)
217 ; CHECK-LABEL: fmax_ftz_nan_f_no_attr
218 define float @fmax_ftz_nan_f_no_attr(float %0, float %1) {
219 ; CHECK: @llvm.nvvm.fmax.ftz.nan.f
220 ; CHECK-NOT: @llvm.maximum.f32
221 %res = call float @llvm.nvvm.fmax.ftz.nan.f(float %0, float %1)
225 ; CHECK-LABEL: fmax_nan_f16
226 define half @fmax_nan_f16(half %0, half %1) {
227 ; CHECK-NOT: @llvm.nvvm.fmax.nan.f16
228 ; CHECK: @llvm.maximum.f16
229 %res = call half @llvm.nvvm.fmax.nan.f16(half %0, half %1)
233 ; CHECK-LABEL: fmax_ftz_nan_f16
234 define half @fmax_ftz_nan_f16(half %0, half %1) #0 {
235 ; CHECK-NOT: @llvm.nvvm.fmax.ftz.nan.f16
236 ; CHECK: @llvm.maximum.f16
237 %res = call half @llvm.nvvm.fmax.ftz.nan.f16(half %0, half %1)
241 ; CHECK-LABEL: fmax_ftz_nan_f16_no_attr
242 define half @fmax_ftz_nan_f16_no_attr(half %0, half %1) {
243 ; CHECK: @llvm.nvvm.fmax.ftz.nan.f16
244 ; CHECK-NOT: @llvm.maximum.f16
245 %res = call half @llvm.nvvm.fmax.ftz.nan.f16(half %0, half %1)
249 ; CHECK-LABEL: fmax_nan_f16x2
250 define <2 x half> @fmax_nan_f16x2(<2 x half> %0, <2 x half> %1) {
251 ; CHECK-NOT: @llvm.nvvm.fmax.nan.f16x2
252 ; CHECK: @llvm.maximum.v2f16
253 %res = call <2 x half> @llvm.nvvm.fmax.nan.f16x2(<2 x half> %0, <2 x half> %1)
257 ; CHECK-LABEL: fmax_ftz_nan_f16x2
258 define <2 x half> @fmax_ftz_nan_f16x2(<2 x half> %0, <2 x half> %1) #0 {
259 ; CHECK-NOT: @llvm.nvvm.fmax.ftz.nan.f16x2
260 ; CHECK: @llvm.maximum.v2f16
261 %res = call <2 x half> @llvm.nvvm.fmax.ftz.nan.f16x2(<2 x half> %0, <2 x half> %1)
265 ; CHECK-LABEL: fmax_ftz_nan_f16x2_no_attr
266 define <2 x half> @fmax_ftz_nan_f16x2_no_attr(<2 x half> %0, <2 x half> %1) {
267 ; CHECK-NOT: @llvm.maximum.v2f16
268 ; CHECK: @llvm.nvvm.fmax.ftz.nan.f16x2
269 %res = call <2 x half> @llvm.nvvm.fmax.ftz.nan.f16x2(<2 x half> %0, <2 x half> %1)
273 ; CHECK-LABEL: fma_rn_f16
274 define half @fma_rn_f16(half %0, half %1, half %2) {
275 ; CHECK-NOT: @llvm.nvvm.fma.rn.f16
276 ; CHECK: @llvm.fma.f16
277 %res = call half @llvm.nvvm.fma.rn.f16(half %0, half %1, half %2)
281 ; CHECK-LABEL: fma_rn_ftz_f16_no_attr
282 define half @fma_rn_ftz_f16_no_attr(half %0, half %1, half %2) {
283 ; CHECK-NOT: @llvm.fma.f16
284 ; CHECK: @llvm.nvvm.fma.rn.ftz.f16
285 %res = call half @llvm.nvvm.fma.rn.ftz.f16(half %0, half %1, half %2)
289 ; CHECK-LABEL: fma_rn_ftz_f16
290 define half @fma_rn_ftz_f16(half %0, half %1, half %2) #0 {
291 ; CHECK-NOT: @llvm.nvvm.fma.rn.ftz.f16
292 ; CHECK: @llvm.fma.f16
293 %res = call half @llvm.nvvm.fma.rn.ftz.f16(half %0, half %1, half %2)
297 ; CHECK-LABEL: fma_rn_f16x2
298 define <2 x half> @fma_rn_f16x2(<2 x half> %0, <2 x half> %1, <2 x half> %2) {
299 ; CHECK-NOT: @llvm.nvvm.fma.rn.f16x2
300 ; CHECK: @llvm.fma.v2f16
301 %res = call <2 x half> @llvm.nvvm.fma.rn.f16x2(<2 x half> %0, <2 x half> %1, <2 x half> %2)
305 ; CHECK-LABEL: fma_rn_ftz_f16x2
306 define <2 x half> @fma_rn_ftz_f16x2(<2 x half> %0, <2 x half> %1, <2 x half> %2) #0 {
307 ; CHECK-NOT: @llvm.nvvm.fma.rn.ftz.f16x2
308 ; CHECK: @llvm.fma.v2f16
309 %res = call <2 x half> @llvm.nvvm.fma.rn.ftz.f16x2(<2 x half> %0, <2 x half> %1, <2 x half> %2)
313 ; CHECK-LABEL: fma_rn_ftz_f16x2_no_attr
314 define <2 x half> @fma_rn_ftz_f16x2_no_attr(<2 x half> %0, <2 x half> %1, <2 x half> %2) {
315 ; CHECK-NOT: @llvm.fma.v2f16
316 ; CHECK: @llvm.nvvm.fma.rn.ftz.f16x2
317 %res = call <2 x half> @llvm.nvvm.fma.rn.ftz.f16x2(<2 x half> %0, <2 x half> %1, <2 x half> %2)
321 attributes #0 = { "denormal-fp-math"="preserve-sign" }
322 attributes #1 = { "denormal-fp-math-f32"="preserve-sign" }