1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s
3 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s
5 ; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s
6 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s
8 ; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s
9 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s
11 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SAFE %s
12 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-NNAN %s
15 define half @test_fmin_legacy_ule_f16(half %a, half %b) #0 {
16 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_f16:
18 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
20 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
21 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
23 ; GFX9-NNAN-LABEL: test_fmin_legacy_ule_f16:
25 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26 ; GFX9-NNAN-NEXT: v_min_f16_e32 v0, v0, v1
27 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
29 ; VI-SAFE-LABEL: test_fmin_legacy_ule_f16:
31 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
33 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
34 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
36 ; VI-NNAN-LABEL: test_fmin_legacy_ule_f16:
38 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v1
40 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
42 ; SI-SAFE-LABEL: test_fmin_legacy_ule_f16:
44 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
46 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
47 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
48 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
49 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v1, v0
50 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
52 ; SI-NNAN-LABEL: test_fmin_legacy_ule_f16:
54 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
56 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
57 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
58 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
59 ; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v1
60 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
62 ; GFX11-SAFE-LABEL: test_fmin_legacy_ule_f16:
63 ; GFX11-SAFE: ; %bb.0:
64 ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
66 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
67 ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
69 ; GFX11-NNAN-LABEL: test_fmin_legacy_ule_f16:
70 ; GFX11-NNAN: ; %bb.0:
71 ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72 ; GFX11-NNAN-NEXT: v_min_f16_e32 v0, v0, v1
73 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
74 %cmp = fcmp ule half %a, %b
75 %val = select i1 %cmp, half %a, half %b
79 define <2 x half> @test_fmin_legacy_ule_v2f16(<2 x half> %a, <2 x half> %b) #0 {
80 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
82 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1
84 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
85 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2
86 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
87 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
88 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
89 ; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
90 ; GFX9-SAFE-NEXT: v_perm_b32 v0, v2, v0, s4
91 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
93 ; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v2f16:
95 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96 ; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v1
97 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
99 ; VI-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
101 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1
103 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
104 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2
105 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
106 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
107 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2
108 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
109 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
110 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
112 ; VI-NNAN-LABEL: test_fmin_legacy_ule_v2f16:
114 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115 ; VI-NNAN-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
116 ; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v1
117 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v2
118 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
120 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
122 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
124 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
125 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
126 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
127 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
128 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
129 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
130 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
131 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v2, v0
132 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v3, v1
133 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
135 ; SI-NNAN-LABEL: test_fmin_legacy_ule_v2f16:
137 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
139 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
140 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
141 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
142 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
143 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
144 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
145 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
146 ; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v2
147 ; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v3
148 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
150 ; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
151 ; GFX11-SAFE: ; %bb.0:
152 ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1
154 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
155 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
156 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
157 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
158 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
159 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
160 ; GFX11-SAFE-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
161 ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
163 ; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v2f16:
164 ; GFX11-NNAN: ; %bb.0:
165 ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166 ; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v1
167 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
168 %cmp = fcmp ule <2 x half> %a, %b
169 %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
173 define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) #0 {
174 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
175 ; GFX9-SAFE: ; %bb.0:
176 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
178 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
179 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
180 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
181 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
182 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
183 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
184 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
185 ; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
186 ; GFX9-SAFE-NEXT: v_perm_b32 v0, v4, v0, s4
187 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
189 ; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v3f16:
190 ; GFX9-NNAN: ; %bb.0:
191 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192 ; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v3
193 ; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
194 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
196 ; VI-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
198 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
200 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
201 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
202 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
203 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
204 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
205 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
206 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
207 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4
208 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
209 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
211 ; VI-NNAN-LABEL: test_fmin_legacy_ule_v3f16:
213 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214 ; VI-NNAN-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
215 ; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2
216 ; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3
217 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v4
218 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
220 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
222 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
224 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
225 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
226 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
227 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
228 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
229 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
230 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
231 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
232 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
233 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
234 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
235 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v3, v0
236 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v4, v1
237 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v5, v2
238 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
240 ; SI-NNAN-LABEL: test_fmin_legacy_ule_v3f16:
242 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5
244 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
245 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4
246 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
247 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
248 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
249 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5
250 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
251 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4
252 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
253 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
254 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
255 ; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v3
256 ; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v4
257 ; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v5
258 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
260 ; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
261 ; GFX11-SAFE: ; %bb.0:
262 ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
264 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
265 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
266 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
267 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
268 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
269 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc_lo
270 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
271 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
272 ; GFX11-SAFE-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
273 ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
275 ; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v3f16:
276 ; GFX11-NNAN: ; %bb.0:
277 ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278 ; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
279 ; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v3
280 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
281 %cmp = fcmp ule <3 x half> %a, %b
282 %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b
286 define <4 x half> @test_fmin_legacy_ule_v4f16(<4 x half> %a, <4 x half> %b) #0 {
287 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
288 ; GFX9-SAFE: ; %bb.0:
289 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3
291 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1
292 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
293 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
294 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6
295 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
296 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
297 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
298 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
299 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
300 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
301 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
302 ; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
303 ; GFX9-SAFE-NEXT: v_perm_b32 v0, v4, v0, s4
304 ; GFX9-SAFE-NEXT: v_perm_b32 v1, v6, v1, s4
305 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
307 ; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v4f16:
308 ; GFX9-NNAN: ; %bb.0:
309 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
310 ; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
311 ; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v3
312 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
314 ; VI-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
316 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3
318 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1
319 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
320 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
321 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6
322 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
323 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
324 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
325 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
326 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
327 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
328 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
329 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4
330 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
331 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v6
332 ; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
333 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
335 ; VI-NNAN-LABEL: test_fmin_legacy_ule_v4f16:
337 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338 ; VI-NNAN-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
339 ; VI-NNAN-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
340 ; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3
341 ; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2
342 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v5
343 ; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v4
344 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
346 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
348 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
350 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7
351 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
352 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6
353 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
354 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
355 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
356 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
357 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
358 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7
359 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
360 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6
361 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
362 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
363 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
364 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
365 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v4, v0
366 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v5, v1
367 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v6, v2
368 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v7, v3
369 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
371 ; SI-NNAN-LABEL: test_fmin_legacy_ule_v4f16:
373 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7
375 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
376 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6
377 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
378 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5
379 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
380 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4
381 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
382 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7
383 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
384 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6
385 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
386 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5
387 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
388 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4
389 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
390 ; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v4
391 ; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v5
392 ; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v6
393 ; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v7
394 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
396 ; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
397 ; GFX11-SAFE: ; %bb.0:
398 ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v3
400 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v1
401 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v2
402 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v0
403 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
404 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
405 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
406 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6
407 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
408 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
409 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
410 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
411 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
412 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
413 ; GFX11-SAFE-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
414 ; GFX11-SAFE-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
415 ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
417 ; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v4f16:
418 ; GFX11-NNAN: ; %bb.0:
419 ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420 ; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
421 ; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v3
422 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
423 %cmp = fcmp ule <4 x half> %a, %b
424 %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
428 define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 {
429 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
430 ; GFX9-SAFE: ; %bb.0:
431 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7
433 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3
434 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6
435 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2
436 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v15, v14
437 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5
438 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1
439 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc
440 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v13, v12
441 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
442 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0
443 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
444 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v11, v10
445 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc
446 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v9, v8
447 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
448 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v7
449 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
450 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v2, v6
451 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
452 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v5
453 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
454 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v4
455 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
456 ; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
457 ; GFX9-SAFE-NEXT: v_perm_b32 v0, v8, v0, s4
458 ; GFX9-SAFE-NEXT: v_perm_b32 v1, v10, v1, s4
459 ; GFX9-SAFE-NEXT: v_perm_b32 v2, v12, v2, s4
460 ; GFX9-SAFE-NEXT: v_perm_b32 v3, v14, v3, s4
461 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
463 ; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v8f16:
464 ; GFX9-NNAN: ; %bb.0:
465 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466 ; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v4
467 ; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v5
468 ; GFX9-NNAN-NEXT: v_pk_min_f16 v2, v2, v6
469 ; GFX9-NNAN-NEXT: v_pk_min_f16 v3, v3, v7
470 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
472 ; VI-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
474 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7
476 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3
477 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6
478 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2
479 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v15, v14
480 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5
481 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1
482 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc
483 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v13, v12
484 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
485 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0
486 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
487 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v11, v10
488 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc
489 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v9, v8
490 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
491 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v7
492 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
493 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v2, v6
494 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
495 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v5
496 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
497 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v4
498 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
499 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v8
500 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
501 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v10
502 ; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
503 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v12
504 ; VI-SAFE-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
505 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v14
506 ; VI-SAFE-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
507 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
509 ; VI-NNAN-LABEL: test_fmin_legacy_ule_v8f16:
511 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512 ; VI-NNAN-NEXT: v_min_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
513 ; VI-NNAN-NEXT: v_min_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
514 ; VI-NNAN-NEXT: v_min_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
515 ; VI-NNAN-NEXT: v_min_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
516 ; VI-NNAN-NEXT: v_min_f16_e32 v3, v3, v7
517 ; VI-NNAN-NEXT: v_min_f16_e32 v2, v2, v6
518 ; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v5
519 ; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v4
520 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v11
521 ; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v10
522 ; VI-NNAN-NEXT: v_or_b32_e32 v2, v2, v9
523 ; VI-NNAN-NEXT: v_or_b32_e32 v3, v3, v8
524 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
526 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
528 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7
530 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v15, v15
531 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6
532 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14
533 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
534 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13
535 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
536 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12
537 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
538 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11
539 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
540 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10
541 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
542 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9
543 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
544 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8
545 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7
546 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15
547 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6
548 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14
549 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
550 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13
551 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
552 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12
553 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
554 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11
555 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
556 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10
557 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
558 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9
559 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
560 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8
561 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v8, v0
562 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v9, v1
563 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v10, v2
564 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v11, v3
565 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v4, v12, v4
566 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v5, v13, v5
567 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v6, v14, v6
568 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v7, v15, v7
569 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
571 ; SI-NNAN-LABEL: test_fmin_legacy_ule_v8f16:
573 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
574 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v15, v15
575 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7
576 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14
577 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6
578 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13
579 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5
580 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12
581 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4
582 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11
583 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
584 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10
585 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
586 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9
587 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
588 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8
589 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
590 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15
591 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7
592 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14
593 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6
594 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13
595 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5
596 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12
597 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4
598 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11
599 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
600 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10
601 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
602 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9
603 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
604 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8
605 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
606 ; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v8
607 ; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v9
608 ; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v10
609 ; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v11
610 ; SI-NNAN-NEXT: v_min_f32_e32 v4, v4, v12
611 ; SI-NNAN-NEXT: v_min_f32_e32 v5, v5, v13
612 ; SI-NNAN-NEXT: v_min_f32_e32 v6, v6, v14
613 ; SI-NNAN-NEXT: v_min_f32_e32 v7, v7, v15
614 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
616 ; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
617 ; GFX11-SAFE: ; %bb.0:
618 ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v7
620 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v3
621 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6
622 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2
623 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v5
624 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v1
625 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v11, v10
626 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
627 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0
628 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc_lo
629 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v13, v12
630 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v11, v12, v13, vcc_lo
631 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v15, v14
632 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v12, v14, v15, vcc_lo
633 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v9, v8
634 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc_lo
635 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v2, v6
636 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo
637 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v4
638 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
639 ; GFX11-SAFE-NEXT: v_perm_b32 v2, v11, v2, 0x5040100
640 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
641 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v5
642 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
643 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v7
644 ; GFX11-SAFE-NEXT: v_perm_b32 v1, v12, v1, 0x5040100
645 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
646 ; GFX11-SAFE-NEXT: v_perm_b32 v0, v8, v0, 0x5040100
647 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2)
648 ; GFX11-SAFE-NEXT: v_perm_b32 v3, v10, v3, 0x5040100
649 ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
651 ; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v8f16:
652 ; GFX11-NNAN: ; %bb.0:
653 ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
654 ; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v4
655 ; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v5
656 ; GFX11-NNAN-NEXT: v_pk_min_f16 v2, v2, v6
657 ; GFX11-NNAN-NEXT: v_pk_min_f16 v3, v3, v7
658 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
659 %cmp = fcmp ule <8 x half> %a, %b
660 %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b
664 attributes #0 = { nounwind }