1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG,SDAG-GFX11 %s
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11 %s
4 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12 %s
5 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-GFX12 %s
7 define i32 @test_minmax_i32(i32 %a, i32 %b, i32 %c) {
8 ; GFX11-LABEL: test_minmax_i32:
10 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2
12 ; GFX11-NEXT: s_setpc_b64 s[30:31]
14 ; GFX12-LABEL: test_minmax_i32:
16 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
17 ; GFX12-NEXT: s_wait_expcnt 0x0
18 ; GFX12-NEXT: s_wait_samplecnt 0x0
19 ; GFX12-NEXT: s_wait_bvhcnt 0x0
20 ; GFX12-NEXT: s_wait_kmcnt 0x0
21 ; GFX12-NEXT: v_maxmin_i32 v0, v0, v1, v2
22 ; GFX12-NEXT: s_setpc_b64 s[30:31]
23 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
24 %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c)
28 define amdgpu_ps void @s_test_minmax_i32(i32 inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) inreg %out) {
29 ; SDAG-LABEL: s_test_minmax_i32:
31 ; SDAG-NEXT: s_max_i32 s0, s0, s1
32 ; SDAG-NEXT: s_mov_b32 s5, s4
33 ; SDAG-NEXT: s_min_i32 s0, s0, s2
34 ; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
35 ; SDAG-NEXT: s_mov_b32 s4, s3
36 ; SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
39 ; GISEL-LABEL: s_test_minmax_i32:
41 ; GISEL-NEXT: s_max_i32 s0, s0, s1
42 ; GISEL-NEXT: s_mov_b32 s6, s3
43 ; GISEL-NEXT: s_min_i32 s0, s0, s2
44 ; GISEL-NEXT: s_mov_b32 s7, s4
45 ; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
46 ; GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
47 ; GISEL-NEXT: s_endpgm
48 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
49 %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c)
50 store i32 %sminmax, ptr addrspace(1) %out
54 define i32 @test_minmax_commuted_i32(i32 %a, i32 %b, i32 %c) {
55 ; GFX11-LABEL: test_minmax_commuted_i32:
57 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58 ; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2
59 ; GFX11-NEXT: s_setpc_b64 s[30:31]
61 ; GFX12-LABEL: test_minmax_commuted_i32:
63 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
64 ; GFX12-NEXT: s_wait_expcnt 0x0
65 ; GFX12-NEXT: s_wait_samplecnt 0x0
66 ; GFX12-NEXT: s_wait_bvhcnt 0x0
67 ; GFX12-NEXT: s_wait_kmcnt 0x0
68 ; GFX12-NEXT: v_maxmin_i32 v0, v0, v1, v2
69 ; GFX12-NEXT: s_setpc_b64 s[30:31]
70 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
71 %sminmax = call i32 @llvm.smin.i32(i32 %c, i32 %smax)
75 define i32 @test_maxmin_i32(i32 %a, i32 %b, i32 %c) {
76 ; GFX11-LABEL: test_maxmin_i32:
78 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2
80 ; GFX11-NEXT: s_setpc_b64 s[30:31]
82 ; GFX12-LABEL: test_maxmin_i32:
84 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
85 ; GFX12-NEXT: s_wait_expcnt 0x0
86 ; GFX12-NEXT: s_wait_samplecnt 0x0
87 ; GFX12-NEXT: s_wait_bvhcnt 0x0
88 ; GFX12-NEXT: s_wait_kmcnt 0x0
89 ; GFX12-NEXT: v_minmax_i32 v0, v0, v1, v2
90 ; GFX12-NEXT: s_setpc_b64 s[30:31]
91 %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
92 %smaxmin = call i32 @llvm.smax.i32(i32 %smin, i32 %c)
96 define i32 @test_maxmin_commuted_i32(i32 %a, i32 %b, i32 %c) {
97 ; GFX11-LABEL: test_maxmin_commuted_i32:
99 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100 ; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2
101 ; GFX11-NEXT: s_setpc_b64 s[30:31]
103 ; GFX12-LABEL: test_maxmin_commuted_i32:
105 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
106 ; GFX12-NEXT: s_wait_expcnt 0x0
107 ; GFX12-NEXT: s_wait_samplecnt 0x0
108 ; GFX12-NEXT: s_wait_bvhcnt 0x0
109 ; GFX12-NEXT: s_wait_kmcnt 0x0
110 ; GFX12-NEXT: v_minmax_i32 v0, v0, v1, v2
111 ; GFX12-NEXT: s_setpc_b64 s[30:31]
112 %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
113 %smaxmin = call i32 @llvm.smax.i32(i32 %c, i32 %smin)
117 define void @test_smed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) {
118 ; GFX11-LABEL: test_smed3_i32:
120 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121 ; GFX11-NEXT: v_med3_i32 v2, v2, v3, v4
122 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
123 ; GFX11-NEXT: s_setpc_b64 s[30:31]
125 ; GFX12-LABEL: test_smed3_i32:
127 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
128 ; GFX12-NEXT: s_wait_expcnt 0x0
129 ; GFX12-NEXT: s_wait_samplecnt 0x0
130 ; GFX12-NEXT: s_wait_bvhcnt 0x0
131 ; GFX12-NEXT: s_wait_kmcnt 0x0
132 ; GFX12-NEXT: v_med3_i32 v2, v2, v3, v4
133 ; GFX12-NEXT: global_store_b32 v[0:1], v2, off
134 ; GFX12-NEXT: s_setpc_b64 s[30:31]
135 %tmp0 = call i32 @llvm.smin.i32(i32 %x, i32 %y)
136 %tmp1 = call i32 @llvm.smax.i32(i32 %x, i32 %y)
137 %tmp2 = call i32 @llvm.smin.i32(i32 %tmp1, i32 %z)
138 %tmp3 = call i32 @llvm.smax.i32(i32 %tmp0, i32 %tmp2)
139 store i32 %tmp3, ptr addrspace(1) %arg
143 define i32 @test_minmax_u32(i32 %a, i32 %b, i32 %c) {
144 ; GFX11-LABEL: test_minmax_u32:
146 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147 ; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2
148 ; GFX11-NEXT: s_setpc_b64 s[30:31]
150 ; GFX12-LABEL: test_minmax_u32:
152 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
153 ; GFX12-NEXT: s_wait_expcnt 0x0
154 ; GFX12-NEXT: s_wait_samplecnt 0x0
155 ; GFX12-NEXT: s_wait_bvhcnt 0x0
156 ; GFX12-NEXT: s_wait_kmcnt 0x0
157 ; GFX12-NEXT: v_maxmin_u32 v0, v0, v1, v2
158 ; GFX12-NEXT: s_setpc_b64 s[30:31]
159 %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
160 %uminmax = call i32 @llvm.umin.i32(i32 %umax, i32 %c)
164 define amdgpu_ps void @s_test_minmax_u32(i32 inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) inreg %out) {
165 ; SDAG-LABEL: s_test_minmax_u32:
167 ; SDAG-NEXT: s_max_u32 s0, s0, s1
168 ; SDAG-NEXT: s_mov_b32 s5, s4
169 ; SDAG-NEXT: s_min_u32 s0, s0, s2
170 ; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
171 ; SDAG-NEXT: s_mov_b32 s4, s3
172 ; SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
173 ; SDAG-NEXT: s_endpgm
175 ; GISEL-LABEL: s_test_minmax_u32:
177 ; GISEL-NEXT: s_max_u32 s0, s0, s1
178 ; GISEL-NEXT: s_mov_b32 s6, s3
179 ; GISEL-NEXT: s_min_u32 s0, s0, s2
180 ; GISEL-NEXT: s_mov_b32 s7, s4
181 ; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
182 ; GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
183 ; GISEL-NEXT: s_endpgm
184 %smax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
185 %sminmax = call i32 @llvm.umin.i32(i32 %smax, i32 %c)
186 store i32 %sminmax, ptr addrspace(1) %out
190 define i32 @test_minmax_commuted_u32(i32 %a, i32 %b, i32 %c) {
191 ; GFX11-LABEL: test_minmax_commuted_u32:
193 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2
195 ; GFX11-NEXT: s_setpc_b64 s[30:31]
197 ; GFX12-LABEL: test_minmax_commuted_u32:
199 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
200 ; GFX12-NEXT: s_wait_expcnt 0x0
201 ; GFX12-NEXT: s_wait_samplecnt 0x0
202 ; GFX12-NEXT: s_wait_bvhcnt 0x0
203 ; GFX12-NEXT: s_wait_kmcnt 0x0
204 ; GFX12-NEXT: v_maxmin_u32 v0, v0, v1, v2
205 ; GFX12-NEXT: s_setpc_b64 s[30:31]
206 %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
207 %uminmax = call i32 @llvm.umin.i32(i32 %c, i32 %umax)
211 define i32 @test_maxmin_u32(i32 %a, i32 %b, i32 %c) {
212 ; GFX11-LABEL: test_maxmin_u32:
214 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215 ; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2
216 ; GFX11-NEXT: s_setpc_b64 s[30:31]
218 ; GFX12-LABEL: test_maxmin_u32:
220 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
221 ; GFX12-NEXT: s_wait_expcnt 0x0
222 ; GFX12-NEXT: s_wait_samplecnt 0x0
223 ; GFX12-NEXT: s_wait_bvhcnt 0x0
224 ; GFX12-NEXT: s_wait_kmcnt 0x0
225 ; GFX12-NEXT: v_minmax_u32 v0, v0, v1, v2
226 ; GFX12-NEXT: s_setpc_b64 s[30:31]
227 %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
228 %umaxmin = call i32 @llvm.umax.i32(i32 %umin, i32 %c)
232 define i32 @test_maxmin_commuted_u32(i32 %a, i32 %b, i32 %c) {
233 ; GFX11-LABEL: test_maxmin_commuted_u32:
235 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2
237 ; GFX11-NEXT: s_setpc_b64 s[30:31]
239 ; GFX12-LABEL: test_maxmin_commuted_u32:
241 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
242 ; GFX12-NEXT: s_wait_expcnt 0x0
243 ; GFX12-NEXT: s_wait_samplecnt 0x0
244 ; GFX12-NEXT: s_wait_bvhcnt 0x0
245 ; GFX12-NEXT: s_wait_kmcnt 0x0
246 ; GFX12-NEXT: v_minmax_u32 v0, v0, v1, v2
247 ; GFX12-NEXT: s_setpc_b64 s[30:31]
248 %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
249 %umaxmin = call i32 @llvm.umax.i32(i32 %c, i32 %umin)
253 define void @test_umed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) {
254 ; GFX11-LABEL: test_umed3_i32:
256 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257 ; GFX11-NEXT: v_med3_u32 v2, v2, v3, v4
258 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
259 ; GFX11-NEXT: s_setpc_b64 s[30:31]
261 ; GFX12-LABEL: test_umed3_i32:
263 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
264 ; GFX12-NEXT: s_wait_expcnt 0x0
265 ; GFX12-NEXT: s_wait_samplecnt 0x0
266 ; GFX12-NEXT: s_wait_bvhcnt 0x0
267 ; GFX12-NEXT: s_wait_kmcnt 0x0
268 ; GFX12-NEXT: v_med3_u32 v2, v2, v3, v4
269 ; GFX12-NEXT: global_store_b32 v[0:1], v2, off
270 ; GFX12-NEXT: s_setpc_b64 s[30:31]
271 %tmp0 = call i32 @llvm.umin.i32(i32 %x, i32 %y)
272 %tmp1 = call i32 @llvm.umax.i32(i32 %x, i32 %y)
273 %tmp2 = call i32 @llvm.umin.i32(i32 %tmp1, i32 %z)
274 %tmp3 = call i32 @llvm.umax.i32(i32 %tmp0, i32 %tmp2)
275 store i32 %tmp3, ptr addrspace(1) %arg
279 define float @test_minmax_f32_ieee_true(float %a, float %b, float %c) {
280 ; SDAG-GFX11-LABEL: test_minmax_f32_ieee_true:
281 ; SDAG-GFX11: ; %bb.0:
282 ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283 ; SDAG-GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
284 ; SDAG-GFX11-NEXT: v_max_f32_e32 v2, v2, v2
285 ; SDAG-GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2
286 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
288 ; GISEL-GFX11-LABEL: test_minmax_f32_ieee_true:
289 ; GISEL-GFX11: ; %bb.0:
290 ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291 ; GISEL-GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
292 ; GISEL-GFX11-NEXT: v_max_f32_e32 v2, v2, v2
293 ; GISEL-GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2
294 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
296 ; SDAG-GFX12-LABEL: test_minmax_f32_ieee_true:
297 ; SDAG-GFX12: ; %bb.0:
298 ; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
299 ; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
300 ; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
301 ; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
302 ; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
303 ; SDAG-GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
304 ; SDAG-GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2
305 ; SDAG-GFX12-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
306 ; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
308 ; GISEL-GFX12-LABEL: test_minmax_f32_ieee_true:
309 ; GISEL-GFX12: ; %bb.0:
310 ; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
311 ; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
312 ; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
313 ; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
314 ; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
315 ; GISEL-GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
316 ; GISEL-GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2
317 ; GISEL-GFX12-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
318 ; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
319 %max = call float @llvm.maxnum.f32(float %a, float %b)
320 %minmax = call float @llvm.minnum.f32(float %max, float %c)
324 define amdgpu_ps void @s_test_minmax_f32_ieee_false(float inreg %a, float inreg %b, float inreg %c, ptr addrspace(1) inreg %out) {
325 ; SDAG-GFX11-LABEL: s_test_minmax_f32_ieee_false:
326 ; SDAG-GFX11: ; %bb.0:
327 ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
328 ; SDAG-GFX11-NEXT: s_mov_b32 s5, s4
329 ; SDAG-GFX11-NEXT: s_mov_b32 s4, s3
330 ; SDAG-GFX11-NEXT: v_maxmin_f32 v0, s0, s1, v0
331 ; SDAG-GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
332 ; SDAG-GFX11-NEXT: s_endpgm
334 ; GISEL-GFX11-LABEL: s_test_minmax_f32_ieee_false:
335 ; GISEL-GFX11: ; %bb.0:
336 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
337 ; GISEL-GFX11-NEXT: s_mov_b32 s6, s3
338 ; GISEL-GFX11-NEXT: s_mov_b32 s7, s4
339 ; GISEL-GFX11-NEXT: v_maxmin_f32 v0, s0, s1, v0
340 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7]
341 ; GISEL-GFX11-NEXT: s_endpgm
343 ; SDAG-GFX12-LABEL: s_test_minmax_f32_ieee_false:
344 ; SDAG-GFX12: ; %bb.0:
345 ; SDAG-GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
346 ; SDAG-GFX12-NEXT: s_mov_b32 s5, s4
347 ; SDAG-GFX12-NEXT: s_mov_b32 s4, s3
348 ; SDAG-GFX12-NEXT: v_maxmin_num_f32 v0, s0, s1, v0
349 ; SDAG-GFX12-NEXT: global_store_b32 v1, v0, s[4:5]
350 ; SDAG-GFX12-NEXT: s_endpgm
352 ; GISEL-GFX12-LABEL: s_test_minmax_f32_ieee_false:
353 ; GISEL-GFX12: ; %bb.0:
354 ; GISEL-GFX12-NEXT: s_max_num_f32 s0, s0, s1
355 ; GISEL-GFX12-NEXT: s_mov_b32 s6, s3
356 ; GISEL-GFX12-NEXT: s_mov_b32 s7, s4
357 ; GISEL-GFX12-NEXT: v_mov_b32_e32 v1, 0
358 ; GISEL-GFX12-NEXT: s_min_num_f32 s0, s0, s2
359 ; GISEL-GFX12-NEXT: v_mov_b32_e32 v0, s0
360 ; GISEL-GFX12-NEXT: global_store_b32 v1, v0, s[6:7]
361 ; GISEL-GFX12-NEXT: s_endpgm
362 %smax = call float @llvm.maxnum.f32(float %a, float %b)
363 %sminmax = call float @llvm.minnum.f32(float %smax, float %c)
364 store float %sminmax, ptr addrspace(1) %out
368 define amdgpu_ps float @test_minmax_commuted_f32_ieee_false(float %a, float %b, float %c) {
369 ; GFX11-LABEL: test_minmax_commuted_f32_ieee_false:
371 ; GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2
372 ; GFX11-NEXT: ; return to shader part epilog
374 ; GFX12-LABEL: test_minmax_commuted_f32_ieee_false:
376 ; GFX12-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
377 ; GFX12-NEXT: ; return to shader part epilog
378 %max = call float @llvm.maxnum.f32(float %a, float %b)
379 %minmax = call float @llvm.minnum.f32(float %c, float %max)
383 define float @test_maxmin_f32_ieee_true(float %a, float %b, float %c) {
384 ; SDAG-GFX11-LABEL: test_maxmin_f32_ieee_true:
385 ; SDAG-GFX11: ; %bb.0:
386 ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387 ; SDAG-GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
388 ; SDAG-GFX11-NEXT: v_max_f32_e32 v2, v2, v2
389 ; SDAG-GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2
390 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
392 ; GISEL-GFX11-LABEL: test_maxmin_f32_ieee_true:
393 ; GISEL-GFX11: ; %bb.0:
394 ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395 ; GISEL-GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
396 ; GISEL-GFX11-NEXT: v_max_f32_e32 v2, v2, v2
397 ; GISEL-GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2
398 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
400 ; SDAG-GFX12-LABEL: test_maxmin_f32_ieee_true:
401 ; SDAG-GFX12: ; %bb.0:
402 ; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
403 ; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
404 ; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
405 ; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
406 ; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
407 ; SDAG-GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
408 ; SDAG-GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2
409 ; SDAG-GFX12-NEXT: v_minmax_num_f32 v0, v0, v1, v2
410 ; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
412 ; GISEL-GFX12-LABEL: test_maxmin_f32_ieee_true:
413 ; GISEL-GFX12: ; %bb.0:
414 ; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
415 ; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
416 ; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
417 ; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
418 ; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
419 ; GISEL-GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
420 ; GISEL-GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2
421 ; GISEL-GFX12-NEXT: v_minmax_num_f32 v0, v0, v1, v2
422 ; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
423 %min = call float @llvm.minnum.f32(float %a, float %b)
424 %maxmin = call float @llvm.maxnum.f32(float %min, float %c)
428 define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b, float %c) {
429 ; GFX11-LABEL: test_maxmin_commuted_f32_ieee_false:
431 ; GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2
432 ; GFX11-NEXT: ; return to shader part epilog
434 ; GFX12-LABEL: test_maxmin_commuted_f32_ieee_false:
436 ; GFX12-NEXT: v_minmax_num_f32 v0, v0, v1, v2
437 ; GFX12-NEXT: ; return to shader part epilog
438 %min = call float @llvm.minnum.f32(float %a, float %b)
439 %maxmin = call float @llvm.maxnum.f32(float %c, float %min)
443 define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) #0 {
444 ; GFX11-LABEL: test_med3_f32:
446 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447 ; GFX11-NEXT: v_med3_f32 v2, v2, v3, v4
448 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
449 ; GFX11-NEXT: s_setpc_b64 s[30:31]
451 ; GFX12-LABEL: test_med3_f32:
453 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
454 ; GFX12-NEXT: s_wait_expcnt 0x0
455 ; GFX12-NEXT: s_wait_samplecnt 0x0
456 ; GFX12-NEXT: s_wait_bvhcnt 0x0
457 ; GFX12-NEXT: s_wait_kmcnt 0x0
458 ; GFX12-NEXT: v_med3_num_f32 v2, v2, v3, v4
459 ; GFX12-NEXT: global_store_b32 v[0:1], v2, off
460 ; GFX12-NEXT: s_setpc_b64 s[30:31]
461 %tmp0 = call float @llvm.minnum.f32(float %x, float %y)
462 %tmp1 = call float @llvm.maxnum.f32(float %x, float %y)
463 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z)
464 %tmp3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
465 store float %tmp3, ptr addrspace(1) %arg
469 define amdgpu_ps half @test_minmax_f16_ieee_false(half %a, half %b, half %c) {
470 ; GFX11-LABEL: test_minmax_f16_ieee_false:
472 ; GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2
473 ; GFX11-NEXT: ; return to shader part epilog
475 ; GFX12-LABEL: test_minmax_f16_ieee_false:
477 ; GFX12-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
478 ; GFX12-NEXT: ; return to shader part epilog
479 %max = call half @llvm.maxnum.f16(half %a, half %b)
480 %minmax = call half @llvm.minnum.f16(half %max, half %c)
484 define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b, half inreg %c, ptr addrspace(1) inreg %out) {
485 ; SDAG-GFX11-LABEL: s_test_minmax_f16_ieee_false:
486 ; SDAG-GFX11: ; %bb.0:
487 ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
488 ; SDAG-GFX11-NEXT: s_mov_b32 s5, s4
489 ; SDAG-GFX11-NEXT: s_mov_b32 s4, s3
490 ; SDAG-GFX11-NEXT: v_maxmin_f16 v0, s0, s1, v0
491 ; SDAG-GFX11-NEXT: global_store_b16 v1, v0, s[4:5]
492 ; SDAG-GFX11-NEXT: s_endpgm
494 ; GISEL-GFX11-LABEL: s_test_minmax_f16_ieee_false:
495 ; GISEL-GFX11: ; %bb.0:
496 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
497 ; GISEL-GFX11-NEXT: s_mov_b32 s6, s3
498 ; GISEL-GFX11-NEXT: s_mov_b32 s7, s4
499 ; GISEL-GFX11-NEXT: v_maxmin_f16 v0, s0, s1, v0
500 ; GISEL-GFX11-NEXT: global_store_b16 v1, v0, s[6:7]
501 ; GISEL-GFX11-NEXT: s_endpgm
503 ; SDAG-GFX12-LABEL: s_test_minmax_f16_ieee_false:
504 ; SDAG-GFX12: ; %bb.0:
505 ; SDAG-GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
506 ; SDAG-GFX12-NEXT: s_mov_b32 s5, s4
507 ; SDAG-GFX12-NEXT: s_mov_b32 s4, s3
508 ; SDAG-GFX12-NEXT: v_maxmin_num_f16 v0, s0, s1, v0
509 ; SDAG-GFX12-NEXT: global_store_b16 v1, v0, s[4:5]
510 ; SDAG-GFX12-NEXT: s_endpgm
512 ; GISEL-GFX12-LABEL: s_test_minmax_f16_ieee_false:
513 ; GISEL-GFX12: ; %bb.0:
514 ; GISEL-GFX12-NEXT: s_max_num_f16 s0, s0, s1
515 ; GISEL-GFX12-NEXT: s_mov_b32 s6, s3
516 ; GISEL-GFX12-NEXT: s_mov_b32 s7, s4
517 ; GISEL-GFX12-NEXT: v_mov_b32_e32 v1, 0
518 ; GISEL-GFX12-NEXT: s_min_num_f16 s0, s0, s2
519 ; GISEL-GFX12-NEXT: v_mov_b32_e32 v0, s0
520 ; GISEL-GFX12-NEXT: global_store_b16 v1, v0, s[6:7]
521 ; GISEL-GFX12-NEXT: s_endpgm
522 %smax = call half @llvm.maxnum.f16(half %a, half %b)
523 %sminmax = call half @llvm.minnum.f16(half %smax, half %c)
524 store half %sminmax, ptr addrspace(1) %out
528 define half @test_minmax_commuted_f16_ieee_true(half %a, half %b, half %c) {
529 ; SDAG-GFX11-LABEL: test_minmax_commuted_f16_ieee_true:
530 ; SDAG-GFX11: ; %bb.0:
531 ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532 ; SDAG-GFX11-NEXT: v_max_f16_e32 v1, v1, v1
533 ; SDAG-GFX11-NEXT: v_max_f16_e32 v0, v0, v0
534 ; SDAG-GFX11-NEXT: v_max_f16_e32 v2, v2, v2
535 ; SDAG-GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2
536 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
538 ; GISEL-GFX11-LABEL: test_minmax_commuted_f16_ieee_true:
539 ; GISEL-GFX11: ; %bb.0:
540 ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
541 ; GISEL-GFX11-NEXT: v_max_f16_e32 v0, v0, v0
542 ; GISEL-GFX11-NEXT: v_max_f16_e32 v1, v1, v1
543 ; GISEL-GFX11-NEXT: v_max_f16_e32 v2, v2, v2
544 ; GISEL-GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2
545 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
547 ; SDAG-GFX12-LABEL: test_minmax_commuted_f16_ieee_true:
548 ; SDAG-GFX12: ; %bb.0:
549 ; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
550 ; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
551 ; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
552 ; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
553 ; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
554 ; SDAG-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1
555 ; SDAG-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0
556 ; SDAG-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2
557 ; SDAG-GFX12-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
558 ; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
560 ; GISEL-GFX12-LABEL: test_minmax_commuted_f16_ieee_true:
561 ; GISEL-GFX12: ; %bb.0:
562 ; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
563 ; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
564 ; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
565 ; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
566 ; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
567 ; GISEL-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0
568 ; GISEL-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1
569 ; GISEL-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2
570 ; GISEL-GFX12-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
571 ; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
572 %max = call half @llvm.maxnum.f16(half %a, half %b)
573 %minmax = call half @llvm.minnum.f16(half %c, half %max)
577 define amdgpu_ps half @test_maxmin_f16_ieee_false(half %a, half %b, half %c) {
578 ; GFX11-LABEL: test_maxmin_f16_ieee_false:
580 ; GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2
581 ; GFX11-NEXT: ; return to shader part epilog
583 ; GFX12-LABEL: test_maxmin_f16_ieee_false:
585 ; GFX12-NEXT: v_minmax_num_f16 v0, v0, v1, v2
586 ; GFX12-NEXT: ; return to shader part epilog
587 %min = call half @llvm.minnum.f16(half %a, half %b)
588 %maxmin = call half @llvm.maxnum.f16(half %min, half %c)
592 define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) {
593 ; SDAG-GFX11-LABEL: test_maxmin_commuted_f16_ieee_true:
594 ; SDAG-GFX11: ; %bb.0:
595 ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
596 ; SDAG-GFX11-NEXT: v_max_f16_e32 v1, v1, v1
597 ; SDAG-GFX11-NEXT: v_max_f16_e32 v0, v0, v0
598 ; SDAG-GFX11-NEXT: v_max_f16_e32 v2, v2, v2
599 ; SDAG-GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2
600 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
602 ; GISEL-GFX11-LABEL: test_maxmin_commuted_f16_ieee_true:
603 ; GISEL-GFX11: ; %bb.0:
604 ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605 ; GISEL-GFX11-NEXT: v_max_f16_e32 v0, v0, v0
606 ; GISEL-GFX11-NEXT: v_max_f16_e32 v1, v1, v1
607 ; GISEL-GFX11-NEXT: v_max_f16_e32 v2, v2, v2
608 ; GISEL-GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2
609 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
611 ; SDAG-GFX12-LABEL: test_maxmin_commuted_f16_ieee_true:
612 ; SDAG-GFX12: ; %bb.0:
613 ; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
614 ; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
615 ; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
616 ; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
617 ; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
618 ; SDAG-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1
619 ; SDAG-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0
620 ; SDAG-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2
621 ; SDAG-GFX12-NEXT: v_minmax_num_f16 v0, v0, v1, v2
622 ; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
624 ; GISEL-GFX12-LABEL: test_maxmin_commuted_f16_ieee_true:
625 ; GISEL-GFX12: ; %bb.0:
626 ; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
627 ; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
628 ; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
629 ; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
630 ; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
631 ; GISEL-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0
632 ; GISEL-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1
633 ; GISEL-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2
634 ; GISEL-GFX12-NEXT: v_minmax_num_f16 v0, v0, v1, v2
635 ; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
636 %min = call half @llvm.minnum.f16(half %a, half %b)
637 %maxmin = call half @llvm.maxnum.f16(half %c, half %min)
641 define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 {
642 ; GFX11-LABEL: test_med3_f16:
644 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645 ; GFX11-NEXT: v_med3_f16 v2, v2, v3, v4
646 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off
647 ; GFX11-NEXT: s_setpc_b64 s[30:31]
649 ; GFX12-LABEL: test_med3_f16:
651 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
652 ; GFX12-NEXT: s_wait_expcnt 0x0
653 ; GFX12-NEXT: s_wait_samplecnt 0x0
654 ; GFX12-NEXT: s_wait_bvhcnt 0x0
655 ; GFX12-NEXT: s_wait_kmcnt 0x0
656 ; GFX12-NEXT: v_med3_num_f16 v2, v2, v3, v4
657 ; GFX12-NEXT: global_store_b16 v[0:1], v2, off
658 ; GFX12-NEXT: s_setpc_b64 s[30:31]
659 %tmp0 = call half @llvm.minnum.f16(half %x, half %y)
660 %tmp1 = call half @llvm.maxnum.f16(half %x, half %y)
661 %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z)
662 %tmp3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
663 store half %tmp3, ptr addrspace(1) %arg
667 declare i32 @llvm.smin.i32(i32, i32)
668 declare i32 @llvm.smax.i32(i32, i32)
669 declare i32 @llvm.umin.i32(i32, i32)
670 declare i32 @llvm.umax.i32(i32, i32)
671 declare half @llvm.minnum.f16(half, half)
672 declare half @llvm.maxnum.f16(half, half)
673 declare float @llvm.minnum.f32(float, float)
674 declare float @llvm.maxnum.f32(float, float)
675 attributes #0 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }