1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG %s
3 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL %s
5 define i32 @test_minmax_i32(i32 %a, i32 %b, i32 %c) {
6 ; GFX11-LABEL: test_minmax_i32:
8 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9 ; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2
10 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
12 %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c)
16 define amdgpu_ps void @s_test_minmax_i32(i32 inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) inreg %out) {
17 ; SDAG-LABEL: s_test_minmax_i32:
19 ; SDAG-NEXT: s_max_i32 s0, s0, s1
20 ; SDAG-NEXT: s_mov_b32 s5, s4
21 ; SDAG-NEXT: s_min_i32 s0, s0, s2
22 ; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
23 ; SDAG-NEXT: s_mov_b32 s4, s3
24 ; SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
26 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
29 ; GISEL-LABEL: s_test_minmax_i32:
31 ; GISEL-NEXT: s_max_i32 s0, s0, s1
32 ; GISEL-NEXT: s_mov_b32 s6, s3
33 ; GISEL-NEXT: s_min_i32 s0, s0, s2
34 ; GISEL-NEXT: s_mov_b32 s7, s4
35 ; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
36 ; GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
38 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
39 ; GISEL-NEXT: s_endpgm
40 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
41 %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c)
42 store i32 %sminmax, ptr addrspace(1) %out
46 define i32 @test_minmax_commuted_i32(i32 %a, i32 %b, i32 %c) {
47 ; GFX11-LABEL: test_minmax_commuted_i32:
49 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2
51 ; GFX11-NEXT: s_setpc_b64 s[30:31]
52 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
53 %sminmax = call i32 @llvm.smin.i32(i32 %c, i32 %smax)
57 define i32 @test_maxmin_i32(i32 %a, i32 %b, i32 %c) {
58 ; GFX11-LABEL: test_maxmin_i32:
60 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2
62 ; GFX11-NEXT: s_setpc_b64 s[30:31]
63 %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
64 %smaxmin = call i32 @llvm.smax.i32(i32 %smin, i32 %c)
68 define i32 @test_maxmin_commuted_i32(i32 %a, i32 %b, i32 %c) {
69 ; GFX11-LABEL: test_maxmin_commuted_i32:
71 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72 ; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2
73 ; GFX11-NEXT: s_setpc_b64 s[30:31]
74 %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
75 %smaxmin = call i32 @llvm.smax.i32(i32 %c, i32 %smin)
79 define void @test_smed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) {
80 ; GFX11-LABEL: test_smed3_i32:
82 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; GFX11-NEXT: v_med3_i32 v2, v2, v3, v4
84 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
85 ; GFX11-NEXT: s_setpc_b64 s[30:31]
86 %tmp0 = call i32 @llvm.smin.i32(i32 %x, i32 %y)
87 %tmp1 = call i32 @llvm.smax.i32(i32 %x, i32 %y)
88 %tmp2 = call i32 @llvm.smin.i32(i32 %tmp1, i32 %z)
89 %tmp3 = call i32 @llvm.smax.i32(i32 %tmp0, i32 %tmp2)
90 store i32 %tmp3, ptr addrspace(1) %arg
94 define i32 @test_minmax_u32(i32 %a, i32 %b, i32 %c) {
95 ; GFX11-LABEL: test_minmax_u32:
97 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98 ; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2
99 ; GFX11-NEXT: s_setpc_b64 s[30:31]
100 %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
101 %uminmax = call i32 @llvm.umin.i32(i32 %umax, i32 %c)
105 define amdgpu_ps void @s_test_minmax_u32(i32 inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) inreg %out) {
106 ; SDAG-LABEL: s_test_minmax_u32:
108 ; SDAG-NEXT: s_max_u32 s0, s0, s1
109 ; SDAG-NEXT: s_mov_b32 s5, s4
110 ; SDAG-NEXT: s_min_u32 s0, s0, s2
111 ; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
112 ; SDAG-NEXT: s_mov_b32 s4, s3
113 ; SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
115 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
116 ; SDAG-NEXT: s_endpgm
118 ; GISEL-LABEL: s_test_minmax_u32:
120 ; GISEL-NEXT: s_max_u32 s0, s0, s1
121 ; GISEL-NEXT: s_mov_b32 s6, s3
122 ; GISEL-NEXT: s_min_u32 s0, s0, s2
123 ; GISEL-NEXT: s_mov_b32 s7, s4
124 ; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
125 ; GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
126 ; GISEL-NEXT: s_nop 0
127 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
128 ; GISEL-NEXT: s_endpgm
129 %smax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
130 %sminmax = call i32 @llvm.umin.i32(i32 %smax, i32 %c)
131 store i32 %sminmax, ptr addrspace(1) %out
135 define i32 @test_minmax_commuted_u32(i32 %a, i32 %b, i32 %c) {
136 ; GFX11-LABEL: test_minmax_commuted_u32:
138 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139 ; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2
140 ; GFX11-NEXT: s_setpc_b64 s[30:31]
141 %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
142 %uminmax = call i32 @llvm.umin.i32(i32 %c, i32 %umax)
146 define i32 @test_maxmin_u32(i32 %a, i32 %b, i32 %c) {
147 ; GFX11-LABEL: test_maxmin_u32:
149 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2
151 ; GFX11-NEXT: s_setpc_b64 s[30:31]
152 %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
153 %umaxmin = call i32 @llvm.umax.i32(i32 %umin, i32 %c)
157 define i32 @test_maxmin_commuted_u32(i32 %a, i32 %b, i32 %c) {
158 ; GFX11-LABEL: test_maxmin_commuted_u32:
160 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161 ; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2
162 ; GFX11-NEXT: s_setpc_b64 s[30:31]
163 %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
164 %umaxmin = call i32 @llvm.umax.i32(i32 %c, i32 %umin)
168 define void @test_umed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) {
169 ; GFX11-LABEL: test_umed3_i32:
171 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX11-NEXT: v_med3_u32 v2, v2, v3, v4
173 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
174 ; GFX11-NEXT: s_setpc_b64 s[30:31]
175 %tmp0 = call i32 @llvm.umin.i32(i32 %x, i32 %y)
176 %tmp1 = call i32 @llvm.umax.i32(i32 %x, i32 %y)
177 %tmp2 = call i32 @llvm.umin.i32(i32 %tmp1, i32 %z)
178 %tmp3 = call i32 @llvm.umax.i32(i32 %tmp0, i32 %tmp2)
179 store i32 %tmp3, ptr addrspace(1) %arg
183 define float @test_minmax_f32_ieee_true(float %a, float %b, float %c) {
184 ; SDAG-LABEL: test_minmax_f32_ieee_true:
186 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
188 ; SDAG-NEXT: v_max_f32_e32 v2, v2, v2
189 ; SDAG-NEXT: v_maxmin_f32 v0, v0, v1, v2
190 ; SDAG-NEXT: s_setpc_b64 s[30:31]
192 ; GISEL-LABEL: test_minmax_f32_ieee_true:
194 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195 ; GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
196 ; GISEL-NEXT: v_max_f32_e32 v2, v2, v2
197 ; GISEL-NEXT: v_maxmin_f32 v0, v0, v1, v2
198 ; GISEL-NEXT: s_setpc_b64 s[30:31]
199 %max = call float @llvm.maxnum.f32(float %a, float %b)
200 %minmax = call float @llvm.minnum.f32(float %max, float %c)
204 define amdgpu_ps void @s_test_minmax_f32_ieee_false(float inreg %a, float inreg %b, float inreg %c, ptr addrspace(1) inreg %out) {
205 ; SDAG-LABEL: s_test_minmax_f32_ieee_false:
207 ; SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
208 ; SDAG-NEXT: s_mov_b32 s5, s4
209 ; SDAG-NEXT: s_mov_b32 s4, s3
210 ; SDAG-NEXT: v_maxmin_f32 v0, s0, s1, v0
211 ; SDAG-NEXT: global_store_b32 v1, v0, s[4:5]
213 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
214 ; SDAG-NEXT: s_endpgm
216 ; GISEL-LABEL: s_test_minmax_f32_ieee_false:
218 ; GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
219 ; GISEL-NEXT: s_mov_b32 s6, s3
220 ; GISEL-NEXT: s_mov_b32 s7, s4
221 ; GISEL-NEXT: v_maxmin_f32 v0, s0, s1, v0
222 ; GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
223 ; GISEL-NEXT: s_nop 0
224 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
225 ; GISEL-NEXT: s_endpgm
226 %smax = call float @llvm.maxnum.f32(float %a, float %b)
227 %sminmax = call float @llvm.minnum.f32(float %smax, float %c)
228 store float %sminmax, ptr addrspace(1) %out
232 define amdgpu_ps float @test_minmax_commuted_f32_ieee_false(float %a, float %b, float %c) {
233 ; GFX11-LABEL: test_minmax_commuted_f32_ieee_false:
235 ; GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2
236 ; GFX11-NEXT: ; return to shader part epilog
237 %max = call float @llvm.maxnum.f32(float %a, float %b)
238 %minmax = call float @llvm.minnum.f32(float %c, float %max)
242 define float @test_maxmin_f32_ieee_true(float %a, float %b, float %c) {
243 ; SDAG-LABEL: test_maxmin_f32_ieee_true:
245 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246 ; SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
247 ; SDAG-NEXT: v_max_f32_e32 v2, v2, v2
248 ; SDAG-NEXT: v_minmax_f32 v0, v0, v1, v2
249 ; SDAG-NEXT: s_setpc_b64 s[30:31]
251 ; GISEL-LABEL: test_maxmin_f32_ieee_true:
253 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
255 ; GISEL-NEXT: v_max_f32_e32 v2, v2, v2
256 ; GISEL-NEXT: v_minmax_f32 v0, v0, v1, v2
257 ; GISEL-NEXT: s_setpc_b64 s[30:31]
258 %min = call float @llvm.minnum.f32(float %a, float %b)
259 %maxmin = call float @llvm.maxnum.f32(float %min, float %c)
263 define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b, float %c) {
264 ; GFX11-LABEL: test_maxmin_commuted_f32_ieee_false:
266 ; GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2
267 ; GFX11-NEXT: ; return to shader part epilog
268 %min = call float @llvm.minnum.f32(float %a, float %b)
269 %maxmin = call float @llvm.maxnum.f32(float %c, float %min)
273 define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) #0 {
274 ; GFX11-LABEL: test_med3_f32:
276 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277 ; GFX11-NEXT: v_med3_f32 v2, v2, v3, v4
278 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off
279 ; GFX11-NEXT: s_setpc_b64 s[30:31]
280 %tmp0 = call float @llvm.minnum.f32(float %x, float %y)
281 %tmp1 = call float @llvm.maxnum.f32(float %x, float %y)
282 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z)
283 %tmp3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
284 store float %tmp3, ptr addrspace(1) %arg
288 define amdgpu_ps half @test_minmax_f16_ieee_false(half %a, half %b, half %c) {
289 ; GFX11-LABEL: test_minmax_f16_ieee_false:
291 ; GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2
292 ; GFX11-NEXT: ; return to shader part epilog
293 %max = call half @llvm.maxnum.f16(half %a, half %b)
294 %minmax = call half @llvm.minnum.f16(half %max, half %c)
298 define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b, half inreg %c, ptr addrspace(1) inreg %out) {
299 ; SDAG-LABEL: s_test_minmax_f16_ieee_false:
301 ; SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
302 ; SDAG-NEXT: s_mov_b32 s5, s4
303 ; SDAG-NEXT: s_mov_b32 s4, s3
304 ; SDAG-NEXT: v_maxmin_f16 v0, s0, s1, v0
305 ; SDAG-NEXT: global_store_b16 v1, v0, s[4:5]
307 ; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
308 ; SDAG-NEXT: s_endpgm
310 ; GISEL-LABEL: s_test_minmax_f16_ieee_false:
312 ; GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
313 ; GISEL-NEXT: s_mov_b32 s6, s3
314 ; GISEL-NEXT: s_mov_b32 s7, s4
315 ; GISEL-NEXT: v_maxmin_f16 v0, s0, s1, v0
316 ; GISEL-NEXT: global_store_b16 v1, v0, s[6:7]
317 ; GISEL-NEXT: s_nop 0
318 ; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
319 ; GISEL-NEXT: s_endpgm
320 %smax = call half @llvm.maxnum.f16(half %a, half %b)
321 %sminmax = call half @llvm.minnum.f16(half %smax, half %c)
322 store half %sminmax, ptr addrspace(1) %out
326 define half @test_minmax_commuted_f16_ieee_true(half %a, half %b, half %c) {
327 ; SDAG-LABEL: test_minmax_commuted_f16_ieee_true:
329 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330 ; SDAG-NEXT: v_max_f16_e32 v1, v1, v1
331 ; SDAG-NEXT: v_max_f16_e32 v0, v0, v0
332 ; SDAG-NEXT: v_max_f16_e32 v2, v2, v2
333 ; SDAG-NEXT: v_maxmin_f16 v0, v0, v1, v2
334 ; SDAG-NEXT: s_setpc_b64 s[30:31]
336 ; GISEL-LABEL: test_minmax_commuted_f16_ieee_true:
338 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GISEL-NEXT: v_max_f16_e32 v0, v0, v0
340 ; GISEL-NEXT: v_max_f16_e32 v1, v1, v1
341 ; GISEL-NEXT: v_max_f16_e32 v2, v2, v2
342 ; GISEL-NEXT: v_maxmin_f16 v0, v0, v1, v2
343 ; GISEL-NEXT: s_setpc_b64 s[30:31]
344 %max = call half @llvm.maxnum.f16(half %a, half %b)
345 %minmax = call half @llvm.minnum.f16(half %c, half %max)
349 define amdgpu_ps half @test_maxmin_f16_ieee_false(half %a, half %b, half %c) {
350 ; GFX11-LABEL: test_maxmin_f16_ieee_false:
352 ; GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2
353 ; GFX11-NEXT: ; return to shader part epilog
354 %min = call half @llvm.minnum.f16(half %a, half %b)
355 %maxmin = call half @llvm.maxnum.f16(half %min, half %c)
359 define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) {
360 ; SDAG-LABEL: test_maxmin_commuted_f16_ieee_true:
362 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363 ; SDAG-NEXT: v_max_f16_e32 v1, v1, v1
364 ; SDAG-NEXT: v_max_f16_e32 v0, v0, v0
365 ; SDAG-NEXT: v_max_f16_e32 v2, v2, v2
366 ; SDAG-NEXT: v_minmax_f16 v0, v0, v1, v2
367 ; SDAG-NEXT: s_setpc_b64 s[30:31]
369 ; GISEL-LABEL: test_maxmin_commuted_f16_ieee_true:
371 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372 ; GISEL-NEXT: v_max_f16_e32 v0, v0, v0
373 ; GISEL-NEXT: v_max_f16_e32 v1, v1, v1
374 ; GISEL-NEXT: v_max_f16_e32 v2, v2, v2
375 ; GISEL-NEXT: v_minmax_f16 v0, v0, v1, v2
376 ; GISEL-NEXT: s_setpc_b64 s[30:31]
377 %min = call half @llvm.minnum.f16(half %a, half %b)
378 %maxmin = call half @llvm.maxnum.f16(half %c, half %min)
382 define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 {
383 ; GFX11-LABEL: test_med3_f16:
385 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386 ; GFX11-NEXT: v_med3_f16 v2, v2, v3, v4
387 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off
388 ; GFX11-NEXT: s_setpc_b64 s[30:31]
389 %tmp0 = call half @llvm.minnum.f16(half %x, half %y)
390 %tmp1 = call half @llvm.maxnum.f16(half %x, half %y)
391 %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z)
392 %tmp3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
393 store half %tmp3, ptr addrspace(1) %arg
397 declare i32 @llvm.smin.i32(i32, i32)
398 declare i32 @llvm.smax.i32(i32, i32)
399 declare i32 @llvm.umin.i32(i32, i32)
400 declare i32 @llvm.umax.i32(i32, i32)
401 declare half @llvm.minnum.f16(half, half)
402 declare half @llvm.maxnum.f16(half, half)
403 declare float @llvm.minnum.f32(float, float)
404 declare float @llvm.maxnum.f32(float, float)
405 attributes #0 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }