1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s
3 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s
5 ; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s
6 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s
8 ; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s
9 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s
11 define half @test_fmax_legacy_ugt_f16(half %a, half %b) #0 {
12 ; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_f16:
14 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1
16 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
17 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
19 ; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_f16:
21 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX9-NNAN-NEXT: v_max_f16_e32 v0, v0, v1
23 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
25 ; VI-SAFE-LABEL: test_fmax_legacy_ugt_f16:
27 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1
29 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
30 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
32 ; VI-NNAN-LABEL: test_fmax_legacy_ugt_f16:
34 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v1
36 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
38 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_f16:
40 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
42 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
43 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
44 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
45 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v1, v0
46 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
48 ; SI-NNAN-LABEL: test_fmax_legacy_ugt_f16:
50 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
52 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
53 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
54 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
55 ; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v1
56 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
57 %cmp = fcmp ugt half %a, %b
58 %val = select i1 %cmp, half %a, half %b
62 define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 {
63 ; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
65 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1
67 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
68 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v2
69 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
70 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1
71 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
72 ; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0
73 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v2, 16, v0
74 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
76 ; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
78 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v1
80 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
82 ; VI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
84 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1
86 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
87 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v2
88 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
89 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1
90 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2
91 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
92 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
93 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
95 ; VI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
97 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98 ; VI-NNAN-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
99 ; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v1
100 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v2
101 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
103 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
105 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
107 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
108 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
109 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
110 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
111 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
112 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
113 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
114 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v2, v0
115 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v3, v1
116 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
118 ; SI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
120 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
122 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
123 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
124 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
125 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
126 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
127 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
128 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
129 ; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v2
130 ; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v3
131 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
132 %cmp = fcmp ugt <2 x half> %a, %b
133 %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
137 define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 {
138 ; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
139 ; GFX9-SAFE: ; %bb.0:
140 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
142 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
143 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4
144 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
145 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3
146 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
147 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2
148 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
149 ; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0
150 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v4, 16, v0
151 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
153 ; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
154 ; GFX9-NNAN: ; %bb.0:
155 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156 ; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v3
157 ; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2
158 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
160 ; VI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
162 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
164 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
165 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4
166 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
167 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3
168 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
169 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2
170 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
171 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4
172 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
173 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
175 ; VI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
177 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; VI-NNAN-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
179 ; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2
180 ; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3
181 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v4
182 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
184 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
186 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
188 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
189 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
190 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
191 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
192 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
193 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
194 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
195 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
196 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
197 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
198 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
199 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v3, v0
200 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v4, v1
201 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v5, v2
202 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
204 ; SI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
206 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5
208 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
209 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4
210 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
211 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
212 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
213 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5
214 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
215 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4
216 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
217 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
218 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
219 ; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v3
220 ; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v4
221 ; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v5
222 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
223 %cmp = fcmp ugt <3 x half> %a, %b
224 %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b
228 define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 {
229 ; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
230 ; GFX9-SAFE: ; %bb.0:
231 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3
233 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1
234 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v7, v6
235 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
236 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
237 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
238 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4
239 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
240 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3
241 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
242 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2
243 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
244 ; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0xffff
245 ; GFX9-SAFE-NEXT: v_and_b32_e32 v0, v2, v0
246 ; GFX9-SAFE-NEXT: v_and_b32_e32 v1, v2, v1
247 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v4, 16, v0
248 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v1, v6, 16, v1
249 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
251 ; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
252 ; GFX9-NNAN: ; %bb.0:
253 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2
255 ; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v3
256 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
258 ; VI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
260 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3
262 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1
263 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v7, v6
264 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
265 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
266 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
267 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4
268 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
269 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3
270 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
271 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2
272 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
273 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4
274 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
275 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v6
276 ; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
277 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
279 ; VI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
281 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282 ; VI-NNAN-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
283 ; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3
284 ; VI-NNAN-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
285 ; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2
286 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v5
287 ; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v4
288 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
290 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
292 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
294 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7
295 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
296 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6
297 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
298 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
299 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
300 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
301 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
302 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7
303 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
304 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6
305 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
306 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
307 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
308 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
309 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v4, v0
310 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v5, v1
311 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v6, v2
312 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v7, v3
313 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
315 ; SI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
317 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7
319 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
320 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6
321 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
322 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5
323 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
324 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4
325 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
326 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7
327 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
328 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6
329 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
330 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5
331 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
332 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4
333 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
334 ; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v4
335 ; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v5
336 ; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v6
337 ; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v7
338 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
339 %cmp = fcmp ugt <4 x half> %a, %b
340 %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
344 define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
345 ; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
346 ; GFX9-SAFE: ; %bb.0:
347 ; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7
349 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3
350 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v15, v14
351 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6
352 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2
353 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc
354 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v13, v12
355 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5
356 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1
357 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
358 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v11, v10
359 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
360 ; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0
361 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc
362 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v9, v8
363 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
364 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v7
365 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
366 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v2, v6
367 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
368 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v5
369 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
370 ; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v4
371 ; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
372 ; GFX9-SAFE-NEXT: v_mov_b32_e32 v4, 0xffff
373 ; GFX9-SAFE-NEXT: v_and_b32_e32 v0, v4, v0
374 ; GFX9-SAFE-NEXT: v_and_b32_e32 v1, v4, v1
375 ; GFX9-SAFE-NEXT: v_and_b32_e32 v2, v4, v2
376 ; GFX9-SAFE-NEXT: v_and_b32_e32 v3, v4, v3
377 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v8, 16, v0
378 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v1, v10, 16, v1
379 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v2, v12, 16, v2
380 ; GFX9-SAFE-NEXT: v_lshl_or_b32 v3, v14, 16, v3
381 ; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
383 ; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
384 ; GFX9-NNAN: ; %bb.0:
385 ; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386 ; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v4
387 ; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v5
388 ; GFX9-NNAN-NEXT: v_pk_max_f16 v2, v2, v6
389 ; GFX9-NNAN-NEXT: v_pk_max_f16 v3, v3, v7
390 ; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
392 ; VI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
394 ; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7
396 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3
397 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v15, v14
398 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6
399 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2
400 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc
401 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v13, v12
402 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5
403 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1
404 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
405 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v11, v10
406 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
407 ; VI-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0
408 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc
409 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v9, v8
410 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
411 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v7
412 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
413 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v2, v6
414 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
415 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v5
416 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
417 ; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v4
418 ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
419 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v8
420 ; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
421 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v10
422 ; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
423 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v12
424 ; VI-SAFE-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
425 ; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v14
426 ; VI-SAFE-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
427 ; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
429 ; VI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
431 ; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432 ; VI-NNAN-NEXT: v_max_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
433 ; VI-NNAN-NEXT: v_max_f16_e32 v3, v3, v7
434 ; VI-NNAN-NEXT: v_max_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
435 ; VI-NNAN-NEXT: v_max_f16_e32 v2, v2, v6
436 ; VI-NNAN-NEXT: v_max_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
437 ; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v5
438 ; VI-NNAN-NEXT: v_max_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
439 ; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v4
440 ; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v11
441 ; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v10
442 ; VI-NNAN-NEXT: v_or_b32_e32 v2, v2, v9
443 ; VI-NNAN-NEXT: v_or_b32_e32 v3, v3, v8
444 ; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
446 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
448 ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7
450 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v15, v15
451 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6
452 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14
453 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
454 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13
455 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
456 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12
457 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
458 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11
459 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
460 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10
461 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
462 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9
463 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
464 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8
465 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7
466 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15
467 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6
468 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14
469 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
470 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13
471 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
472 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12
473 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
474 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11
475 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
476 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10
477 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
478 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9
479 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
480 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8
481 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v8, v0
482 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v9, v1
483 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v10, v2
484 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v11, v3
485 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v4, v12, v4
486 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v5, v13, v5
487 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v6, v14, v6
488 ; SI-SAFE-NEXT: v_max_legacy_f32_e32 v7, v15, v7
489 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31]
491 ; SI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
493 ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v15, v15
495 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7
496 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14
497 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6
498 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13
499 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5
500 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12
501 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4
502 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11
503 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3
504 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10
505 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2
506 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9
507 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1
508 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8
509 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0
510 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15
511 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7
512 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14
513 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6
514 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13
515 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5
516 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12
517 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4
518 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11
519 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3
520 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10
521 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2
522 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9
523 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1
524 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8
525 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0
526 ; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v8
527 ; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v9
528 ; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v10
529 ; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v11
530 ; SI-NNAN-NEXT: v_max_f32_e32 v4, v4, v12
531 ; SI-NNAN-NEXT: v_max_f32_e32 v5, v5, v13
532 ; SI-NNAN-NEXT: v_max_f32_e32 v6, v6, v14
533 ; SI-NNAN-NEXT: v_max_f32_e32 v7, v7, v15
534 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31]
535 %cmp = fcmp ugt <8 x half> %a, %b
536 %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b
540 attributes #0 = { nounwind }