1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s
3 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s
5 ; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s
6 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s
8 ; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s
9 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s
12 define half @test_fmin_legacy_ule_f16(half %a, half %b) #0 {
13 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_f16:
15 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
17 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
18 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
20 ; GFX9-NNAN-LABEL: test_fmin_legacy_ule_f16:
22 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX9-NNAN-NEXT: v_min_f16_e32 v0, v0, v1
24 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
26 ; VI-SAFE-LABEL: test_fmin_legacy_ule_f16:
28 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
30 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
31 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
33 ; VI-NNAN-LABEL: test_fmin_legacy_ule_f16:
35 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v1
37 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
39 ; SI-SAFE-LABEL: test_fmin_legacy_ule_f16:
41 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
43 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
44 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
45 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
46 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v1, v0
47 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
49 ; SI-NNAN-LABEL: test_fmin_legacy_ule_f16:
51 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
53 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
54 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
55 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
56 ; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v1
57 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
58 %cmp = fcmp ule half %a, %b
59 %val = select i1 %cmp, half %a, half %b
63 define <2 x half> @test_fmin_legacy_ule_v2f16(<2 x half> %a, <2 x half> %b) #0 {
64 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
66 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1
68 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
69 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2
70 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
71 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
72 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
73 ; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0
74 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v2, 16, v0
75 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
77 ; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v2f16:
79 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80 ; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v1
81 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
83 ; VI-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
85 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1
87 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
88 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2
89 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
90 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1
91 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2
92 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
93 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
94 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
96 ; VI-NNAN-LABEL: test_fmin_legacy_ule_v2f16:
98 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99 ; VI-NNAN-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
100 ; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v1
101 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v2
102 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
104 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
106 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
108 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
109 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
110 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
111 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
112 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
113 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
114 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
115 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v2, v0
116 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v3, v1
117 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
119 ; SI-NNAN-LABEL: test_fmin_legacy_ule_v2f16:
121 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
123 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
124 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
125 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
126 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
127 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
128 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
129 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
130 ; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v2
131 ; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v3
132 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
133 %cmp = fcmp ule <2 x half> %a, %b
134 %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
138 define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) #0 {
139 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
140 ; GFX9-SAFE: ; %bb.0:
141 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
143 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
144 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
145 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
146 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
147 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
148 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
149 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
150 ; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0
151 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v4, 16, v0
152 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
154 ; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v3f16:
155 ; GFX9-NNAN: ; %bb.0:
156 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v3
158 ; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
159 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
161 ; VI-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
163 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
165 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
166 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
167 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
168 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
169 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
170 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
171 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
172 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4
173 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
174 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
176 ; VI-NNAN-LABEL: test_fmin_legacy_ule_v3f16:
178 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179 ; VI-NNAN-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
180 ; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2
181 ; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3
182 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v4
183 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
185 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
187 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
188 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
189 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
190 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
191 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
192 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
193 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
194 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
195 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
196 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
197 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
198 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
199 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
200 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v3, v0
201 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v4, v1
202 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v5, v2
203 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
205 ; SI-NNAN-LABEL: test_fmin_legacy_ule_v3f16:
207 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5
209 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
210 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4
211 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
212 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
213 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
214 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5
215 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
216 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4
217 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
218 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
219 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
220 ; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v3
221 ; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v4
222 ; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v5
223 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
224 %cmp = fcmp ule <3 x half> %a, %b
225 %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b
229 define <4 x half> @test_fmin_legacy_ule_v4f16(<4 x half> %a, <4 x half> %b) #0 {
230 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
231 ; GFX9-SAFE: ; %bb.0:
232 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3
234 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1
235 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6
236 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
237 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
238 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
239 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
240 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
241 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
242 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
243 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
244 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
245 ; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0xffff
246 ; GFX9-SAFE-NEXT: v_and_b32_e32 v0, v2, v0
247 ; GFX9-SAFE-NEXT: v_and_b32_e32 v1, v2, v1
248 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v4, 16, v0
249 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v1, v6, 16, v1
250 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
252 ; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v4f16:
253 ; GFX9-NNAN: ; %bb.0:
254 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255 ; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
256 ; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v3
257 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
259 ; VI-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
261 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3
263 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1
264 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6
265 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
266 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
267 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
268 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4
269 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
270 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3
271 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
272 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2
273 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
274 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4
275 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
276 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v6
277 ; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
278 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
280 ; VI-NNAN-LABEL: test_fmin_legacy_ule_v4f16:
282 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283 ; VI-NNAN-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
284 ; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3
285 ; VI-NNAN-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
286 ; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2
287 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v5
288 ; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v4
289 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
291 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
293 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
295 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7
296 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
297 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6
298 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
299 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
300 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
301 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
302 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
303 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7
304 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
305 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6
306 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
307 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
308 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
309 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
310 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v4, v0
311 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v5, v1
312 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v6, v2
313 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v7, v3
314 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
316 ; SI-NNAN-LABEL: test_fmin_legacy_ule_v4f16:
318 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7
320 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
321 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6
322 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
323 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5
324 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
325 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4
326 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
327 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7
328 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
329 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6
330 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
331 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5
332 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
333 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4
334 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
335 ; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v4
336 ; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v5
337 ; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v6
338 ; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v7
339 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
340 %cmp = fcmp ule <4 x half> %a, %b
341 %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
345 define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 {
346 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
347 ; GFX9-SAFE: ; %bb.0:
348 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7
350 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3
351 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v15, v14
352 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6
353 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2
354 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc
355 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v13, v12
356 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5
357 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1
358 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
359 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v11, v10
360 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
361 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0
362 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc
363 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v9, v8
364 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
365 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v7
366 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
367 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v2, v6
368 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
369 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v5
370 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
371 ; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v4
372 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
373 ; GFX9-SAFE-NEXT: v_mov_b32_e32 v4, 0xffff
374 ; GFX9-SAFE-NEXT: v_and_b32_e32 v0, v4, v0
375 ; GFX9-SAFE-NEXT: v_and_b32_e32 v1, v4, v1
376 ; GFX9-SAFE-NEXT: v_and_b32_e32 v2, v4, v2
377 ; GFX9-SAFE-NEXT: v_and_b32_e32 v3, v4, v3
378 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v8, 16, v0
379 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v1, v10, 16, v1
380 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v2, v12, 16, v2
381 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v3, v14, 16, v3
382 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
384 ; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v8f16:
385 ; GFX9-NNAN: ; %bb.0:
386 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387 ; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v4
388 ; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v5
389 ; GFX9-NNAN-NEXT: v_pk_min_f16 v2, v2, v6
390 ; GFX9-NNAN-NEXT: v_pk_min_f16 v3, v3, v7
391 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
393 ; VI-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
395 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7
397 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3
398 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v15, v14
399 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6
400 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2
401 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc
402 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v13, v12
403 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5
404 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1
405 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
406 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v11, v10
407 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
408 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0
409 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc
410 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v9, v8
411 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
412 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v7
413 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
414 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v2, v6
415 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
416 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v5
417 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
418 ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v4
419 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
420 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v8
421 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
422 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v10
423 ; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
424 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v12
425 ; VI-SAFE-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
426 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v14
427 ; VI-SAFE-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
428 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
430 ; VI-NNAN-LABEL: test_fmin_legacy_ule_v8f16:
432 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433 ; VI-NNAN-NEXT: v_min_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
434 ; VI-NNAN-NEXT: v_min_f16_e32 v3, v3, v7
435 ; VI-NNAN-NEXT: v_min_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
436 ; VI-NNAN-NEXT: v_min_f16_e32 v2, v2, v6
437 ; VI-NNAN-NEXT: v_min_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
438 ; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v5
439 ; VI-NNAN-NEXT: v_min_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
440 ; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v4
441 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v11
442 ; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v10
443 ; VI-NNAN-NEXT: v_or_b32_e32 v2, v2, v9
444 ; VI-NNAN-NEXT: v_or_b32_e32 v3, v3, v8
445 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
447 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
449 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7
451 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v15, v15
452 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6
453 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14
454 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
455 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13
456 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
457 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12
458 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
459 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11
460 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
461 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10
462 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
463 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9
464 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
465 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8
466 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7
467 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15
468 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6
469 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14
470 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
471 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13
472 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
473 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12
474 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
475 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11
476 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
477 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10
478 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
479 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9
480 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
481 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8
482 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v8, v0
483 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v9, v1
484 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v10, v2
485 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v11, v3
486 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v4, v12, v4
487 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v5, v13, v5
488 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v6, v14, v6
489 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v7, v15, v7
490 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
492 ; SI-NNAN-LABEL: test_fmin_legacy_ule_v8f16:
494 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v15, v15
496 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7
497 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14
498 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6
499 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13
500 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5
501 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12
502 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4
503 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11
504 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
505 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10
506 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
507 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9
508 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
509 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8
510 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
511 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15
512 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7
513 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14
514 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6
515 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13
516 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5
517 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12
518 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4
519 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11
520 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
521 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10
522 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
523 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9
524 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
525 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8
526 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
527 ; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v8
528 ; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v9
529 ; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v10
530 ; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v11
531 ; SI-NNAN-NEXT: v_min_f32_e32 v4, v4, v12
532 ; SI-NNAN-NEXT: v_min_f32_e32 v5, v5, v13
533 ; SI-NNAN-NEXT: v_min_f32_e32 v6, v6, v14
534 ; SI-NNAN-NEXT: v_min_f32_e32 v7, v7, v15
535 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
536 %cmp = fcmp ule <8 x half> %a, %b
537 %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b
541 attributes #0 = { nounwind }