1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
6 ; Test if fcmp+select patterns form min/max instructions when allowed
9 ; TODO: Merge with fmin_legacy.ll/fmax_legacy.ll
11 define float @v_test_fmin_legacy_ule_f32_safe(float %a, float %b) {
12 ; GFX7-LABEL: v_test_fmin_legacy_ule_f32_safe:
14 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0
16 ; GFX7-NEXT: s_setpc_b64 s[30:31]
18 ; GFX9-LABEL: v_test_fmin_legacy_ule_f32_safe:
20 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1
22 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
23 ; GFX9-NEXT: s_setpc_b64 s[30:31]
25 ; GFX12-LABEL: v_test_fmin_legacy_ule_f32_safe:
27 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
28 ; GFX12-NEXT: s_wait_expcnt 0x0
29 ; GFX12-NEXT: s_wait_samplecnt 0x0
30 ; GFX12-NEXT: s_wait_bvhcnt 0x0
31 ; GFX12-NEXT: s_wait_kmcnt 0x0
32 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
33 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
34 ; GFX12-NEXT: s_setpc_b64 s[30:31]
35 %cmp = fcmp ule float %a, %b
36 %val = select i1 %cmp, float %a, float %b
40 define float @v_test_fmin_legacy_ule_f32_nnan_flag(float %a, float %b) {
41 ; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag:
43 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0
45 ; GFX7-NEXT: s_setpc_b64 s[30:31]
47 ; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag:
49 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1
51 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
52 ; GFX9-NEXT: s_setpc_b64 s[30:31]
54 ; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag:
56 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
57 ; GFX12-NEXT: s_wait_expcnt 0x0
58 ; GFX12-NEXT: s_wait_samplecnt 0x0
59 ; GFX12-NEXT: s_wait_bvhcnt 0x0
60 ; GFX12-NEXT: s_wait_kmcnt 0x0
61 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
62 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
63 ; GFX12-NEXT: s_setpc_b64 s[30:31]
64 %cmp = fcmp ule float %a, %b
65 %val = select nnan i1 %cmp, float %a, float %b
69 define float @v_test_fmin_legacy_ule_f32_nsz_flag(float %a, float %b) {
70 ; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag:
72 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0
74 ; GFX7-NEXT: s_setpc_b64 s[30:31]
76 ; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag:
78 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1
80 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
81 ; GFX9-NEXT: s_setpc_b64 s[30:31]
83 ; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag:
85 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
86 ; GFX12-NEXT: s_wait_expcnt 0x0
87 ; GFX12-NEXT: s_wait_samplecnt 0x0
88 ; GFX12-NEXT: s_wait_bvhcnt 0x0
89 ; GFX12-NEXT: s_wait_kmcnt 0x0
90 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
91 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
92 ; GFX12-NEXT: s_setpc_b64 s[30:31]
93 %cmp = fcmp ule float %a, %b
94 %val = select nsz i1 %cmp, float %a, float %b
98 define float @v_test_fmin_legacy_ule_f32_nnan_nsz_flag(float %a, float %b) {
99 ; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag:
101 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
103 ; GFX7-NEXT: s_setpc_b64 s[30:31]
105 ; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag:
107 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
109 ; GFX9-NEXT: s_setpc_b64 s[30:31]
111 ; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag:
113 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
114 ; GFX12-NEXT: s_wait_expcnt 0x0
115 ; GFX12-NEXT: s_wait_samplecnt 0x0
116 ; GFX12-NEXT: s_wait_bvhcnt 0x0
117 ; GFX12-NEXT: s_wait_kmcnt 0x0
118 ; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
119 ; GFX12-NEXT: s_setpc_b64 s[30:31]
120 %cmp = fcmp ule float %a, %b
121 %val = select nnan nsz i1 %cmp, float %a, float %b
125 define float @v_test_fmax_legacy_uge_f32_safe(float %a, float %b) {
126 ; GFX7-LABEL: v_test_fmax_legacy_uge_f32_safe:
128 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0
130 ; GFX7-NEXT: s_setpc_b64 s[30:31]
132 ; GFX9-LABEL: v_test_fmax_legacy_uge_f32_safe:
134 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
136 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
137 ; GFX9-NEXT: s_setpc_b64 s[30:31]
139 ; GFX12-LABEL: v_test_fmax_legacy_uge_f32_safe:
141 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
142 ; GFX12-NEXT: s_wait_expcnt 0x0
143 ; GFX12-NEXT: s_wait_samplecnt 0x0
144 ; GFX12-NEXT: s_wait_bvhcnt 0x0
145 ; GFX12-NEXT: s_wait_kmcnt 0x0
146 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
147 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
148 ; GFX12-NEXT: s_setpc_b64 s[30:31]
149 %cmp = fcmp uge float %a, %b
150 %val = select i1 %cmp, float %a, float %b
154 define float @v_test_fmax_legacy_uge_f32_nnan_flag(float %a, float %b) {
155 ; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag:
157 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0
159 ; GFX7-NEXT: s_setpc_b64 s[30:31]
161 ; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag:
163 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
165 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
166 ; GFX9-NEXT: s_setpc_b64 s[30:31]
168 ; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag:
170 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
171 ; GFX12-NEXT: s_wait_expcnt 0x0
172 ; GFX12-NEXT: s_wait_samplecnt 0x0
173 ; GFX12-NEXT: s_wait_bvhcnt 0x0
174 ; GFX12-NEXT: s_wait_kmcnt 0x0
175 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
176 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
177 ; GFX12-NEXT: s_setpc_b64 s[30:31]
178 %cmp = fcmp uge float %a, %b
179 %val = select nnan i1 %cmp, float %a, float %b
183 define float @v_test_fmax_legacy_uge_f32_nsz_flag(float %a, float %b) {
184 ; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag:
186 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0
188 ; GFX7-NEXT: s_setpc_b64 s[30:31]
190 ; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag:
192 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
194 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
195 ; GFX9-NEXT: s_setpc_b64 s[30:31]
197 ; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag:
199 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
200 ; GFX12-NEXT: s_wait_expcnt 0x0
201 ; GFX12-NEXT: s_wait_samplecnt 0x0
202 ; GFX12-NEXT: s_wait_bvhcnt 0x0
203 ; GFX12-NEXT: s_wait_kmcnt 0x0
204 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
205 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
206 ; GFX12-NEXT: s_setpc_b64 s[30:31]
207 %cmp = fcmp uge float %a, %b
208 %val = select nsz i1 %cmp, float %a, float %b
212 define float @v_test_fmax_legacy_uge_f32_nnan_nsz_flag(float %a, float %b) {
213 ; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag:
215 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
217 ; GFX7-NEXT: s_setpc_b64 s[30:31]
219 ; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag:
221 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
223 ; GFX9-NEXT: s_setpc_b64 s[30:31]
225 ; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag:
227 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
228 ; GFX12-NEXT: s_wait_expcnt 0x0
229 ; GFX12-NEXT: s_wait_samplecnt 0x0
230 ; GFX12-NEXT: s_wait_bvhcnt 0x0
231 ; GFX12-NEXT: s_wait_kmcnt 0x0
232 ; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
233 ; GFX12-NEXT: s_setpc_b64 s[30:31]
234 %cmp = fcmp uge float %a, %b
235 %val = select nnan nsz i1 %cmp, float %a, float %b
239 define <2 x float> @v_test_fmin_legacy_ule_v2f32_safe(<2 x float> %a, <2 x float> %b) {
240 ; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_safe:
242 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
244 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
245 ; GFX7-NEXT: s_setpc_b64 s[30:31]
247 ; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_safe:
249 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
251 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
252 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
253 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
254 ; GFX9-NEXT: s_setpc_b64 s[30:31]
256 ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_safe:
258 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
259 ; GFX12-NEXT: s_wait_expcnt 0x0
260 ; GFX12-NEXT: s_wait_samplecnt 0x0
261 ; GFX12-NEXT: s_wait_bvhcnt 0x0
262 ; GFX12-NEXT: s_wait_kmcnt 0x0
263 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
264 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
265 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
266 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
267 ; GFX12-NEXT: s_setpc_b64 s[30:31]
268 %cmp = fcmp ule <2 x float> %a, %b
269 %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
273 define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_flag(<2 x float> %a, <2 x float> %b) {
274 ; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag:
276 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
278 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
279 ; GFX7-NEXT: s_setpc_b64 s[30:31]
281 ; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag:
283 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
285 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
286 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
287 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
288 ; GFX9-NEXT: s_setpc_b64 s[30:31]
290 ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag:
292 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
293 ; GFX12-NEXT: s_wait_expcnt 0x0
294 ; GFX12-NEXT: s_wait_samplecnt 0x0
295 ; GFX12-NEXT: s_wait_bvhcnt 0x0
296 ; GFX12-NEXT: s_wait_kmcnt 0x0
297 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
298 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
299 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
300 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
301 ; GFX12-NEXT: s_setpc_b64 s[30:31]
302 %cmp = fcmp ule <2 x float> %a, %b
303 %val = select nnan <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
307 define <2 x float> @v_test_fmin_legacy_ule_v2f32_nsz_flag(<2 x float> %a, <2 x float> %b) {
308 ; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag:
310 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
312 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
313 ; GFX7-NEXT: s_setpc_b64 s[30:31]
315 ; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag:
317 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
319 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
320 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
321 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
322 ; GFX9-NEXT: s_setpc_b64 s[30:31]
324 ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag:
326 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
327 ; GFX12-NEXT: s_wait_expcnt 0x0
328 ; GFX12-NEXT: s_wait_samplecnt 0x0
329 ; GFX12-NEXT: s_wait_bvhcnt 0x0
330 ; GFX12-NEXT: s_wait_kmcnt 0x0
331 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
332 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
333 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
334 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
335 ; GFX12-NEXT: s_setpc_b64 s[30:31]
336 %cmp = fcmp ule <2 x float> %a, %b
337 %val = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
341 define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <2 x float> %b) {
342 ; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
344 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
346 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
347 ; GFX7-NEXT: s_setpc_b64 s[30:31]
349 ; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
351 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
353 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
354 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
355 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
356 ; GFX9-NEXT: s_setpc_b64 s[30:31]
358 ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
360 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
361 ; GFX12-NEXT: s_wait_expcnt 0x0
362 ; GFX12-NEXT: s_wait_samplecnt 0x0
363 ; GFX12-NEXT: s_wait_bvhcnt 0x0
364 ; GFX12-NEXT: s_wait_kmcnt 0x0
365 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
366 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
367 ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
368 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
369 ; GFX12-NEXT: s_setpc_b64 s[30:31]
370 %cmp = fcmp ule <2 x float> %a, %b
371 %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
375 define <2 x float> @v_test_fmax_legacy_uge_v2f32_safe(<2 x float> %a, <2 x float> %b) {
376 ; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_safe:
378 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
380 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
381 ; GFX7-NEXT: s_setpc_b64 s[30:31]
383 ; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_safe:
385 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
387 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
388 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3
389 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
390 ; GFX9-NEXT: s_setpc_b64 s[30:31]
392 ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_safe:
394 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
395 ; GFX12-NEXT: s_wait_expcnt 0x0
396 ; GFX12-NEXT: s_wait_samplecnt 0x0
397 ; GFX12-NEXT: s_wait_bvhcnt 0x0
398 ; GFX12-NEXT: s_wait_kmcnt 0x0
399 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
400 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
401 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
402 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
403 ; GFX12-NEXT: s_setpc_b64 s[30:31]
404 %cmp = fcmp uge <2 x float> %a, %b
405 %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
409 define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_flag(<2 x float> %a, <2 x float> %b) {
410 ; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag:
412 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
414 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
415 ; GFX7-NEXT: s_setpc_b64 s[30:31]
417 ; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag:
419 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
421 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
422 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3
423 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
424 ; GFX9-NEXT: s_setpc_b64 s[30:31]
426 ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag:
428 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
429 ; GFX12-NEXT: s_wait_expcnt 0x0
430 ; GFX12-NEXT: s_wait_samplecnt 0x0
431 ; GFX12-NEXT: s_wait_bvhcnt 0x0
432 ; GFX12-NEXT: s_wait_kmcnt 0x0
433 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
434 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
435 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
436 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
437 ; GFX12-NEXT: s_setpc_b64 s[30:31]
438 %cmp = fcmp uge <2 x float> %a, %b
439 %val = select nnan <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
443 define <2 x float> @v_test_fmax_legacy_uge_v2f32_nsz_flag(<2 x float> %a, <2 x float> %b) {
444 ; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag:
446 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
448 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
449 ; GFX7-NEXT: s_setpc_b64 s[30:31]
451 ; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag:
453 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
455 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
456 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3
457 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
458 ; GFX9-NEXT: s_setpc_b64 s[30:31]
460 ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag:
462 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
463 ; GFX12-NEXT: s_wait_expcnt 0x0
464 ; GFX12-NEXT: s_wait_samplecnt 0x0
465 ; GFX12-NEXT: s_wait_bvhcnt 0x0
466 ; GFX12-NEXT: s_wait_kmcnt 0x0
467 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
468 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
469 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
470 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
471 ; GFX12-NEXT: s_setpc_b64 s[30:31]
472 %cmp = fcmp uge <2 x float> %a, %b
473 %val = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
477 define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <2 x float> %b) {
478 ; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
480 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
482 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
483 ; GFX7-NEXT: s_setpc_b64 s[30:31]
485 ; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
487 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
489 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
490 ; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3
491 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
492 ; GFX9-NEXT: s_setpc_b64 s[30:31]
494 ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
496 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
497 ; GFX12-NEXT: s_wait_expcnt 0x0
498 ; GFX12-NEXT: s_wait_samplecnt 0x0
499 ; GFX12-NEXT: s_wait_bvhcnt 0x0
500 ; GFX12-NEXT: s_wait_kmcnt 0x0
501 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
502 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
503 ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
504 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
505 ; GFX12-NEXT: s_setpc_b64 s[30:31]
506 %cmp = fcmp uge <2 x float> %a, %b
507 %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
511 define half @v_test_fmin_legacy_ule_f16_safe(half %a, half %b) {
512 ; GFX7-LABEL: v_test_fmin_legacy_ule_f16_safe:
514 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
516 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
517 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
518 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
519 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0
520 ; GFX7-NEXT: s_setpc_b64 s[30:31]
522 ; GFX9-LABEL: v_test_fmin_legacy_ule_f16_safe:
524 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
525 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
526 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
527 ; GFX9-NEXT: s_setpc_b64 s[30:31]
529 ; GFX12-LABEL: v_test_fmin_legacy_ule_f16_safe:
531 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
532 ; GFX12-NEXT: s_wait_expcnt 0x0
533 ; GFX12-NEXT: s_wait_samplecnt 0x0
534 ; GFX12-NEXT: s_wait_bvhcnt 0x0
535 ; GFX12-NEXT: s_wait_kmcnt 0x0
536 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
537 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
538 ; GFX12-NEXT: s_setpc_b64 s[30:31]
539 %cmp = fcmp ule half %a, %b
540 %val = select i1 %cmp, half %a, half %b
544 define half @v_test_fmin_legacy_ule_f16_nnan_flag(half %a, half %b) {
545 ; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag:
547 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
548 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
549 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
550 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
551 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
552 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0
553 ; GFX7-NEXT: s_setpc_b64 s[30:31]
555 ; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag:
557 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
559 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
560 ; GFX9-NEXT: s_setpc_b64 s[30:31]
562 ; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag:
564 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
565 ; GFX12-NEXT: s_wait_expcnt 0x0
566 ; GFX12-NEXT: s_wait_samplecnt 0x0
567 ; GFX12-NEXT: s_wait_bvhcnt 0x0
568 ; GFX12-NEXT: s_wait_kmcnt 0x0
569 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
570 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
571 ; GFX12-NEXT: s_setpc_b64 s[30:31]
572 %cmp = fcmp ule half %a, %b
573 %val = select nnan i1 %cmp, half %a, half %b
577 define half @v_test_fmin_legacy_ule_f16_nsz_flag(half %a, half %b) {
578 ; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag:
580 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
582 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
583 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
584 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
585 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0
586 ; GFX7-NEXT: s_setpc_b64 s[30:31]
588 ; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag:
590 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
592 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
593 ; GFX9-NEXT: s_setpc_b64 s[30:31]
595 ; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag:
597 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
598 ; GFX12-NEXT: s_wait_expcnt 0x0
599 ; GFX12-NEXT: s_wait_samplecnt 0x0
600 ; GFX12-NEXT: s_wait_bvhcnt 0x0
601 ; GFX12-NEXT: s_wait_kmcnt 0x0
602 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
603 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
604 ; GFX12-NEXT: s_setpc_b64 s[30:31]
605 %cmp = fcmp ule half %a, %b
606 %val = select nsz i1 %cmp, half %a, half %b
610 define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) {
611 ; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
613 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
614 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
615 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
616 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
617 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
618 ; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
619 ; GFX7-NEXT: s_setpc_b64 s[30:31]
621 ; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
623 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
624 ; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
625 ; GFX9-NEXT: s_setpc_b64 s[30:31]
627 ; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
629 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
630 ; GFX12-NEXT: s_wait_expcnt 0x0
631 ; GFX12-NEXT: s_wait_samplecnt 0x0
632 ; GFX12-NEXT: s_wait_bvhcnt 0x0
633 ; GFX12-NEXT: s_wait_kmcnt 0x0
634 ; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1
635 ; GFX12-NEXT: s_setpc_b64 s[30:31]
636 %cmp = fcmp ule half %a, %b
637 %val = select nnan nsz i1 %cmp, half %a, half %b
641 define half @v_test_fmax_legacy_uge_f16_safe(half %a, half %b) {
642 ; GFX7-LABEL: v_test_fmax_legacy_uge_f16_safe:
644 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
646 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
647 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
648 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
649 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0
650 ; GFX7-NEXT: s_setpc_b64 s[30:31]
652 ; GFX9-LABEL: v_test_fmax_legacy_uge_f16_safe:
654 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1
656 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
657 ; GFX9-NEXT: s_setpc_b64 s[30:31]
659 ; GFX12-LABEL: v_test_fmax_legacy_uge_f16_safe:
661 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
662 ; GFX12-NEXT: s_wait_expcnt 0x0
663 ; GFX12-NEXT: s_wait_samplecnt 0x0
664 ; GFX12-NEXT: s_wait_bvhcnt 0x0
665 ; GFX12-NEXT: s_wait_kmcnt 0x0
666 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
667 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
668 ; GFX12-NEXT: s_setpc_b64 s[30:31]
669 %cmp = fcmp uge half %a, %b
670 %val = select i1 %cmp, half %a, half %b
674 define half @v_test_fmax_legacy_uge_f16_nnan_flag(half %a, half %b) {
675 ; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag:
677 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
679 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
680 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
681 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
682 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0
683 ; GFX7-NEXT: s_setpc_b64 s[30:31]
685 ; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag:
687 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1
689 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
690 ; GFX9-NEXT: s_setpc_b64 s[30:31]
692 ; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag:
694 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
695 ; GFX12-NEXT: s_wait_expcnt 0x0
696 ; GFX12-NEXT: s_wait_samplecnt 0x0
697 ; GFX12-NEXT: s_wait_bvhcnt 0x0
698 ; GFX12-NEXT: s_wait_kmcnt 0x0
699 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
700 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
701 ; GFX12-NEXT: s_setpc_b64 s[30:31]
702 %cmp = fcmp uge half %a, %b
703 %val = select nnan i1 %cmp, half %a, half %b
707 define half @v_test_fmax_legacy_uge_f16_nsz_flag(half %a, half %b) {
708 ; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag:
710 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
711 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
712 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
713 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
714 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
715 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0
716 ; GFX7-NEXT: s_setpc_b64 s[30:31]
718 ; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag:
720 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
721 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1
722 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
723 ; GFX9-NEXT: s_setpc_b64 s[30:31]
725 ; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag:
727 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
728 ; GFX12-NEXT: s_wait_expcnt 0x0
729 ; GFX12-NEXT: s_wait_samplecnt 0x0
730 ; GFX12-NEXT: s_wait_bvhcnt 0x0
731 ; GFX12-NEXT: s_wait_kmcnt 0x0
732 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
733 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
734 ; GFX12-NEXT: s_setpc_b64 s[30:31]
735 %cmp = fcmp uge half %a, %b
736 %val = select nsz i1 %cmp, half %a, half %b
740 define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) {
741 ; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
743 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
744 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
745 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
746 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
747 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
748 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
749 ; GFX7-NEXT: s_setpc_b64 s[30:31]
751 ; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
753 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754 ; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
755 ; GFX9-NEXT: s_setpc_b64 s[30:31]
757 ; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
759 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
760 ; GFX12-NEXT: s_wait_expcnt 0x0
761 ; GFX12-NEXT: s_wait_samplecnt 0x0
762 ; GFX12-NEXT: s_wait_bvhcnt 0x0
763 ; GFX12-NEXT: s_wait_kmcnt 0x0
764 ; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1
765 ; GFX12-NEXT: s_setpc_b64 s[30:31]
766 %cmp = fcmp uge half %a, %b
767 %val = select nnan nsz i1 %cmp, half %a, half %b
771 define <2 x half> @v_test_fmin_legacy_ule_v2f16_safe(<2 x half> %a, <2 x half> %b) {
772 ; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_safe:
774 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
775 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
776 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
777 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
778 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
779 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
780 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
781 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
782 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
783 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
784 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
785 ; GFX7-NEXT: s_setpc_b64 s[30:31]
787 ; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_safe:
789 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1
791 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0
792 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2
793 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
794 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
795 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
796 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
797 ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
798 ; GFX9-NEXT: s_setpc_b64 s[30:31]
800 ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_safe:
802 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
803 ; GFX12-NEXT: s_wait_expcnt 0x0
804 ; GFX12-NEXT: s_wait_samplecnt 0x0
805 ; GFX12-NEXT: s_wait_bvhcnt 0x0
806 ; GFX12-NEXT: s_wait_kmcnt 0x0
807 ; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1
808 ; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0
809 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
810 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
811 ; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
812 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
813 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
814 ; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
815 ; GFX12-NEXT: s_setpc_b64 s[30:31]
816 %cmp = fcmp ule <2 x half> %a, %b
817 %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
821 define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_flag(<2 x half> %a, <2 x half> %b) {
822 ; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag:
824 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
826 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
827 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
828 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
829 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
830 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
831 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
832 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
833 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
834 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
835 ; GFX7-NEXT: s_setpc_b64 s[30:31]
837 ; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag:
839 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
840 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1
841 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0
842 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2
843 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
844 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
845 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
846 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
847 ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
848 ; GFX9-NEXT: s_setpc_b64 s[30:31]
850 ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag:
852 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
853 ; GFX12-NEXT: s_wait_expcnt 0x0
854 ; GFX12-NEXT: s_wait_samplecnt 0x0
855 ; GFX12-NEXT: s_wait_bvhcnt 0x0
856 ; GFX12-NEXT: s_wait_kmcnt 0x0
857 ; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1
858 ; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0
859 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
860 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
861 ; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
862 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
863 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
864 ; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
865 ; GFX12-NEXT: s_setpc_b64 s[30:31]
866 %cmp = fcmp ule <2 x half> %a, %b
867 %val = select nnan <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
871 define <2 x half> @v_test_fmin_legacy_ule_v2f16_nsz_flag(<2 x half> %a, <2 x half> %b) {
872 ; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag:
874 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
875 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
876 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
877 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
878 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
879 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
880 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
881 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
882 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
883 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
884 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
885 ; GFX7-NEXT: s_setpc_b64 s[30:31]
887 ; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag:
889 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1
891 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0
892 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2
893 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
894 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
895 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
896 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
897 ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
898 ; GFX9-NEXT: s_setpc_b64 s[30:31]
900 ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag:
902 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
903 ; GFX12-NEXT: s_wait_expcnt 0x0
904 ; GFX12-NEXT: s_wait_samplecnt 0x0
905 ; GFX12-NEXT: s_wait_bvhcnt 0x0
906 ; GFX12-NEXT: s_wait_kmcnt 0x0
907 ; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1
908 ; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0
909 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
910 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
911 ; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
912 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
913 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
914 ; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
915 ; GFX12-NEXT: s_setpc_b64 s[30:31]
916 %cmp = fcmp ule <2 x half> %a, %b
917 %val = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
921 define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2 x half> %b) {
922 ; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
924 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
925 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
926 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
927 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
928 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
929 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
930 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
931 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
932 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
933 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
934 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
935 ; GFX7-NEXT: s_setpc_b64 s[30:31]
937 ; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
939 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
940 ; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
941 ; GFX9-NEXT: s_setpc_b64 s[30:31]
943 ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
945 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
946 ; GFX12-NEXT: s_wait_expcnt 0x0
947 ; GFX12-NEXT: s_wait_samplecnt 0x0
948 ; GFX12-NEXT: s_wait_bvhcnt 0x0
949 ; GFX12-NEXT: s_wait_kmcnt 0x0
950 ; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1
951 ; GFX12-NEXT: s_setpc_b64 s[30:31]
952 %cmp = fcmp ule <2 x half> %a, %b
953 %val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
957 define <2 x half> @v_test_fmax_legacy_uge_v2f16_safe(<2 x half> %a, <2 x half> %b) {
958 ; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_safe:
960 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
961 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
962 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
963 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
964 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
965 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
966 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
967 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
968 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
969 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
970 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
971 ; GFX7-NEXT: s_setpc_b64 s[30:31]
973 ; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_safe:
975 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
976 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1
977 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0
978 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2
979 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
980 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1
981 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
982 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
983 ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
984 ; GFX9-NEXT: s_setpc_b64 s[30:31]
986 ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_safe:
988 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
989 ; GFX12-NEXT: s_wait_expcnt 0x0
990 ; GFX12-NEXT: s_wait_samplecnt 0x0
991 ; GFX12-NEXT: s_wait_bvhcnt 0x0
992 ; GFX12-NEXT: s_wait_kmcnt 0x0
993 ; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1
994 ; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0
995 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
996 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
997 ; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
998 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
999 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
1000 ; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1001 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1002 %cmp = fcmp uge <2 x half> %a, %b
1003 %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
1007 define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_flag(<2 x half> %a, <2 x half> %b) {
1008 ; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag:
1010 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1012 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1013 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1014 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1015 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1016 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1017 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1018 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1019 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
1020 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
1021 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1023 ; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag:
1025 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1026 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1027 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1028 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2
1029 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1030 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1
1031 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1032 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1033 ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
1034 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1036 ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag:
1038 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1039 ; GFX12-NEXT: s_wait_expcnt 0x0
1040 ; GFX12-NEXT: s_wait_samplecnt 0x0
1041 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1042 ; GFX12-NEXT: s_wait_kmcnt 0x0
1043 ; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1044 ; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1045 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1046 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
1047 ; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1048 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
1049 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
1050 ; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1051 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1052 %cmp = fcmp uge <2 x half> %a, %b
1053 %val = select nnan <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
1057 define <2 x half> @v_test_fmax_legacy_uge_v2f16_nsz_flag(<2 x half> %a, <2 x half> %b) {
1058 ; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag:
1060 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1061 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1062 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1063 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1064 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1065 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1066 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1067 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1068 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1069 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
1070 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
1071 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1073 ; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag:
1075 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1076 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1077 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1078 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2
1079 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1080 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1
1081 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1082 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1083 ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
1084 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1086 ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag:
1088 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1089 ; GFX12-NEXT: s_wait_expcnt 0x0
1090 ; GFX12-NEXT: s_wait_samplecnt 0x0
1091 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1092 ; GFX12-NEXT: s_wait_kmcnt 0x0
1093 ; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1094 ; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1095 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1096 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
1097 ; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1098 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
1099 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
1100 ; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1101 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1102 %cmp = fcmp uge <2 x half> %a, %b
1103 %val = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
1107 define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2 x half> %b) {
1108 ; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
1110 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1111 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1112 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1113 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1114 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1115 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1116 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1117 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1118 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1119 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
1120 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
1121 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1123 ; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
1125 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1126 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
1127 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1129 ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
1131 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1132 ; GFX12-NEXT: s_wait_expcnt 0x0
1133 ; GFX12-NEXT: s_wait_samplecnt 0x0
1134 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1135 ; GFX12-NEXT: s_wait_kmcnt 0x0
1136 ; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1
1137 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1138 %cmp = fcmp uge <2 x half> %a, %b
1139 %val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
1143 define <4 x half> @v_test_fmin_legacy_ule_v4f16_safe(<4 x half> %a, <4 x half> %b) {
1144 ; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_safe:
1146 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1147 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1148 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1149 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1150 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1151 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1152 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1153 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1154 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1155 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1156 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1157 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1158 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1159 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1160 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1161 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1162 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1163 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0
1164 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1
1165 ; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2
1166 ; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3
1167 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1169 ; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_safe:
1171 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1172 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3
1173 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1174 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1175 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1176 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6
1177 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
1178 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
1179 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
1180 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
1181 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
1182 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
1183 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
1184 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1185 ; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4
1186 ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
1187 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1189 ; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_safe:
1191 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1192 ; GFX12-NEXT: s_wait_expcnt 0x0
1193 ; GFX12-NEXT: s_wait_samplecnt 0x0
1194 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1195 ; GFX12-NEXT: s_wait_kmcnt 0x0
1196 ; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3
1197 ; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1
1198 ; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1199 ; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0
1200 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1201 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
1202 ; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
1203 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6
1204 ; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
1205 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
1206 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
1207 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
1208 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
1209 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1210 ; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
1211 ; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
1212 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1213 %cmp = fcmp ule <4 x half> %a, %b
1214 %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
1218 define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_flag(<4 x half> %a, <4 x half> %b) {
1219 ; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag:
1221 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1223 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1224 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1225 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1226 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1227 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1228 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1229 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1230 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1231 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1232 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1233 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1234 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1235 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1236 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1237 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1238 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0
1239 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1
1240 ; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2
1241 ; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3
1242 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1244 ; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag:
1246 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1247 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3
1248 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1249 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1250 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1251 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6
1252 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
1253 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
1254 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
1255 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
1256 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
1257 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
1258 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
1259 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1260 ; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4
1261 ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
1262 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1264 ; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag:
1266 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1267 ; GFX12-NEXT: s_wait_expcnt 0x0
1268 ; GFX12-NEXT: s_wait_samplecnt 0x0
1269 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1270 ; GFX12-NEXT: s_wait_kmcnt 0x0
1271 ; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3
1272 ; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1
1273 ; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1274 ; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0
1275 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1276 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
1277 ; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
1278 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6
1279 ; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
1280 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
1281 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
1282 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
1283 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
1284 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1285 ; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
1286 ; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
1287 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1288 %cmp = fcmp ule <4 x half> %a, %b
1289 %val = select nnan <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
1293 define <4 x half> @v_test_fmin_legacy_ule_v4f16_nsz_flag(<4 x half> %a, <4 x half> %b) {
1294 ; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag:
1296 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1297 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1298 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1299 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1300 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1301 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1302 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1303 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1304 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1305 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1306 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1307 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1308 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1309 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1310 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1311 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1312 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1313 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0
1314 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1
1315 ; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2
1316 ; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3
1317 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1319 ; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag:
1321 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3
1323 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1324 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1325 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1326 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6
1327 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
1328 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
1329 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
1330 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
1331 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
1332 ; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
1333 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
1334 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1335 ; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4
1336 ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
1337 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1339 ; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag:
1341 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1342 ; GFX12-NEXT: s_wait_expcnt 0x0
1343 ; GFX12-NEXT: s_wait_samplecnt 0x0
1344 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1345 ; GFX12-NEXT: s_wait_kmcnt 0x0
1346 ; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3
1347 ; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1
1348 ; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1349 ; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0
1350 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1351 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
1352 ; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
1353 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6
1354 ; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
1355 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
1356 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
1357 ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
1358 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
1359 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1360 ; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
1361 ; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
1362 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1363 %cmp = fcmp ule <4 x half> %a, %b
1364 %val = select nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
1368 define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4 x half> %b) {
1369 ; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
1371 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1372 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1373 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1374 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1375 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1376 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1377 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1378 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1379 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1380 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1381 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1382 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1383 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1384 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1385 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1386 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1387 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1388 ; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0
1389 ; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1
1390 ; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2
1391 ; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3
1392 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1394 ; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
1396 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1397 ; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
1398 ; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
1399 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1401 ; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
1403 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1404 ; GFX12-NEXT: s_wait_expcnt 0x0
1405 ; GFX12-NEXT: s_wait_samplecnt 0x0
1406 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1407 ; GFX12-NEXT: s_wait_kmcnt 0x0
1408 ; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2
1409 ; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3
1410 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1411 %cmp = fcmp ule <4 x half> %a, %b
1412 %val = select nnan nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
1416 define <4 x half> @v_test_fmax_legacy_uge_v4f16_safe(<4 x half> %a, <4 x half> %b) {
1417 ; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_safe:
1419 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1420 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1421 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1422 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1423 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1424 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1425 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1426 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1427 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1428 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1429 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1430 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1431 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1432 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1433 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1434 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1435 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1436 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0
1437 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1
1438 ; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2
1439 ; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3
1440 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1442 ; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_safe:
1444 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1445 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3
1446 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1447 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1448 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1449 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6
1450 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
1451 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4
1452 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
1453 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3
1454 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
1455 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2
1456 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
1457 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1458 ; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4
1459 ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
1460 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1462 ; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_safe:
1464 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1465 ; GFX12-NEXT: s_wait_expcnt 0x0
1466 ; GFX12-NEXT: s_wait_samplecnt 0x0
1467 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1468 ; GFX12-NEXT: s_wait_kmcnt 0x0
1469 ; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3
1470 ; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1
1471 ; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1472 ; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0
1473 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1474 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4
1475 ; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
1476 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6
1477 ; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
1478 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2
1479 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
1480 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3
1481 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
1482 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1483 ; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
1484 ; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
1485 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1486 %cmp = fcmp uge <4 x half> %a, %b
1487 %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
1491 define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_flag(<4 x half> %a, <4 x half> %b) {
1492 ; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag:
1494 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1495 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1496 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1497 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1498 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1499 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1500 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1501 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1502 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1503 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1504 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1505 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1506 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1507 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1508 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1509 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1510 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1511 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0
1512 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1
1513 ; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2
1514 ; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3
1515 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1517 ; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag:
1519 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1520 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3
1521 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1522 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1523 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1524 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6
1525 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
1526 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4
1527 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
1528 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3
1529 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
1530 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2
1531 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
1532 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1533 ; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4
1534 ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
1535 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1537 ; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag:
1539 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1540 ; GFX12-NEXT: s_wait_expcnt 0x0
1541 ; GFX12-NEXT: s_wait_samplecnt 0x0
1542 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1543 ; GFX12-NEXT: s_wait_kmcnt 0x0
1544 ; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3
1545 ; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1
1546 ; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1547 ; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0
1548 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1549 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4
1550 ; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
1551 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6
1552 ; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
1553 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2
1554 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
1555 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3
1556 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
1557 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1558 ; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
1559 ; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
1560 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1561 %cmp = fcmp uge <4 x half> %a, %b
1562 %val = select nnan <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
1566 define <4 x half> @v_test_fmax_legacy_uge_v4f16_nsz_flag(<4 x half> %a, <4 x half> %b) {
1567 ; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag:
1569 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1570 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1571 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1572 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1573 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1574 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1575 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1576 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1577 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1578 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1579 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1580 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1581 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1582 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1583 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1584 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1585 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1586 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0
1587 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1
1588 ; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2
1589 ; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3
1590 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1592 ; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag:
1594 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1595 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3
1596 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1597 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1598 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1599 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6
1600 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
1601 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4
1602 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
1603 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3
1604 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
1605 ; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2
1606 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
1607 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1608 ; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4
1609 ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
1610 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1612 ; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag:
1614 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1615 ; GFX12-NEXT: s_wait_expcnt 0x0
1616 ; GFX12-NEXT: s_wait_samplecnt 0x0
1617 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1618 ; GFX12-NEXT: s_wait_kmcnt 0x0
1619 ; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3
1620 ; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1
1621 ; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1622 ; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0
1623 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1624 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4
1625 ; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
1626 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6
1627 ; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
1628 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2
1629 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
1630 ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3
1631 ; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
1632 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1633 ; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
1634 ; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
1635 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1636 %cmp = fcmp uge <4 x half> %a, %b
1637 %val = select nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
1641 define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4 x half> %b) {
1642 ; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
1644 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1645 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1646 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1647 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1648 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1649 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1650 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1651 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1652 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1653 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1654 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1655 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1656 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1657 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1658 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1659 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1660 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1661 ; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0
1662 ; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1
1663 ; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2
1664 ; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3
1665 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1667 ; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
1669 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1670 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
1671 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
1672 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1674 ; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
1676 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1677 ; GFX12-NEXT: s_wait_expcnt 0x0
1678 ; GFX12-NEXT: s_wait_samplecnt 0x0
1679 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1680 ; GFX12-NEXT: s_wait_kmcnt 0x0
1681 ; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2
1682 ; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3
1683 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1684 %cmp = fcmp uge <4 x half> %a, %b
1685 %val = select nnan nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
1689 define float @v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float %arg1) {
1690 ; GFX7-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs:
1692 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1693 ; GFX7-NEXT: v_add_f32_e32 v0, v0, v0
1694 ; GFX7-NEXT: v_add_f32_e32 v1, v1, v1
1695 ; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
1696 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1698 ; GFX9-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs:
1700 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1701 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v0
1702 ; GFX9-NEXT: v_add_f32_e32 v1, v1, v1
1703 ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
1704 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1706 ; GFX12-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs:
1708 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1709 ; GFX12-NEXT: s_wait_expcnt 0x0
1710 ; GFX12-NEXT: s_wait_samplecnt 0x0
1711 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1712 ; GFX12-NEXT: s_wait_kmcnt 0x0
1713 ; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1
1714 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1715 ; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
1716 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1717 %a = fadd nnan float %arg0, %arg0
1718 %b = fadd nnan float %arg1, %arg1
1719 %cmp = fcmp ule float %a, %b
1720 %val = select nsz i1 %cmp, float %a, float %b
1724 define float @v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float %arg1) {
1725 ; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs:
1727 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1728 ; GFX7-NEXT: v_add_f32_e32 v0, v0, v0
1729 ; GFX7-NEXT: v_add_f32_e32 v1, v1, v1
1730 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
1731 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1733 ; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs:
1735 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1736 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v0
1737 ; GFX9-NEXT: v_add_f32_e32 v1, v1, v1
1738 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
1739 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1741 ; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs:
1743 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1744 ; GFX12-NEXT: s_wait_expcnt 0x0
1745 ; GFX12-NEXT: s_wait_samplecnt 0x0
1746 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1747 ; GFX12-NEXT: s_wait_kmcnt 0x0
1748 ; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1
1749 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1750 ; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
1751 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1752 %a = fadd nnan float %arg0, %arg0
1753 %b = fadd nnan float %arg1, %arg1
1754 %cmp = fcmp uge float %a, %b
1755 %val = select nsz i1 %cmp, float %a, float %b