1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
8 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
10 define half @v_maximum_f16(half %src0, half %src1) {
11 ; GFX7-LABEL: v_maximum_f16:
13 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
15 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
16 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
17 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
18 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
19 ; GFX7-NEXT: v_max_f32_e32 v3, v0, v1
20 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
21 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
22 ; GFX7-NEXT: s_setpc_b64 s[30:31]
24 ; GFX8-LABEL: v_maximum_f16:
26 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27 ; GFX8-NEXT: v_max_f16_e32 v2, v0, v1
28 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
29 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
30 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
31 ; GFX8-NEXT: s_setpc_b64 s[30:31]
33 ; GFX9-LABEL: v_maximum_f16:
35 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
37 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
38 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
39 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
40 ; GFX9-NEXT: s_setpc_b64 s[30:31]
42 ; GFX940-LABEL: v_maximum_f16:
44 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45 ; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
46 ; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
47 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
48 ; GFX940-NEXT: s_nop 1
49 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
50 ; GFX940-NEXT: s_setpc_b64 s[30:31]
52 ; GFX10-LABEL: v_maximum_f16:
54 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; GFX10-NEXT: v_max_f16_e32 v2, v0, v1
56 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
57 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
58 ; GFX10-NEXT: s_setpc_b64 s[30:31]
60 ; GFX11-LABEL: v_maximum_f16:
62 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63 ; GFX11-NEXT: v_max_f16_e32 v2, v0, v1
64 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
65 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
66 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
67 ; GFX11-NEXT: s_setpc_b64 s[30:31]
69 ; GFX12-LABEL: v_maximum_f16:
71 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
72 ; GFX12-NEXT: s_wait_expcnt 0x0
73 ; GFX12-NEXT: s_wait_samplecnt 0x0
74 ; GFX12-NEXT: s_wait_bvhcnt 0x0
75 ; GFX12-NEXT: s_wait_kmcnt 0x0
76 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
77 ; GFX12-NEXT: s_setpc_b64 s[30:31]
78 %op = call half @llvm.maximum.f16(half %src0, half %src1)
82 define half @v_maximum_f16__nnan(half %src0, half %src1) {
83 ; GFX7-LABEL: v_maximum_f16__nnan:
85 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
87 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
88 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
89 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
90 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
91 ; GFX7-NEXT: s_setpc_b64 s[30:31]
93 ; GFX8-LABEL: v_maximum_f16__nnan:
95 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
97 ; GFX8-NEXT: s_setpc_b64 s[30:31]
99 ; GFX9-LABEL: v_maximum_f16__nnan:
101 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
103 ; GFX9-NEXT: s_setpc_b64 s[30:31]
105 ; GFX940-LABEL: v_maximum_f16__nnan:
107 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX940-NEXT: v_max_f16_e32 v0, v0, v1
109 ; GFX940-NEXT: s_setpc_b64 s[30:31]
111 ; GFX10-LABEL: v_maximum_f16__nnan:
113 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
115 ; GFX10-NEXT: s_setpc_b64 s[30:31]
117 ; GFX11-LABEL: v_maximum_f16__nnan:
119 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v1
121 ; GFX11-NEXT: s_setpc_b64 s[30:31]
123 ; GFX12-LABEL: v_maximum_f16__nnan:
125 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
126 ; GFX12-NEXT: s_wait_expcnt 0x0
127 ; GFX12-NEXT: s_wait_samplecnt 0x0
128 ; GFX12-NEXT: s_wait_bvhcnt 0x0
129 ; GFX12-NEXT: s_wait_kmcnt 0x0
130 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
131 ; GFX12-NEXT: s_setpc_b64 s[30:31]
132 %op = call nnan half @llvm.maximum.f16(half %src0, half %src1)
136 define half @v_maximum_f16__nsz(half %src0, half %src1) {
137 ; GFX7-LABEL: v_maximum_f16__nsz:
139 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
141 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
142 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
143 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
144 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
145 ; GFX7-NEXT: v_max_f32_e32 v3, v0, v1
146 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
147 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
148 ; GFX7-NEXT: s_setpc_b64 s[30:31]
150 ; GFX8-LABEL: v_maximum_f16__nsz:
152 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153 ; GFX8-NEXT: v_max_f16_e32 v2, v0, v1
154 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
155 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
156 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
157 ; GFX8-NEXT: s_setpc_b64 s[30:31]
159 ; GFX9-LABEL: v_maximum_f16__nsz:
161 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162 ; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
163 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
164 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
165 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
166 ; GFX9-NEXT: s_setpc_b64 s[30:31]
168 ; GFX940-LABEL: v_maximum_f16__nsz:
170 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
172 ; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
173 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
174 ; GFX940-NEXT: s_nop 1
175 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
176 ; GFX940-NEXT: s_setpc_b64 s[30:31]
178 ; GFX10-LABEL: v_maximum_f16__nsz:
180 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181 ; GFX10-NEXT: v_max_f16_e32 v2, v0, v1
182 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
183 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
184 ; GFX10-NEXT: s_setpc_b64 s[30:31]
186 ; GFX11-LABEL: v_maximum_f16__nsz:
188 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189 ; GFX11-NEXT: v_max_f16_e32 v2, v0, v1
190 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
191 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
192 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
193 ; GFX11-NEXT: s_setpc_b64 s[30:31]
195 ; GFX12-LABEL: v_maximum_f16__nsz:
197 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
198 ; GFX12-NEXT: s_wait_expcnt 0x0
199 ; GFX12-NEXT: s_wait_samplecnt 0x0
200 ; GFX12-NEXT: s_wait_bvhcnt 0x0
201 ; GFX12-NEXT: s_wait_kmcnt 0x0
202 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
203 ; GFX12-NEXT: s_setpc_b64 s[30:31]
204 %op = call nsz half @llvm.maximum.f16(half %src0, half %src1)
208 define half @v_maximum_f16__nnan_nsz(half %src0, half %src1) {
209 ; GFX7-LABEL: v_maximum_f16__nnan_nsz:
211 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
213 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
214 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
215 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
216 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
217 ; GFX7-NEXT: s_setpc_b64 s[30:31]
219 ; GFX8-LABEL: v_maximum_f16__nnan_nsz:
221 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
223 ; GFX8-NEXT: s_setpc_b64 s[30:31]
225 ; GFX9-LABEL: v_maximum_f16__nnan_nsz:
227 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228 ; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
229 ; GFX9-NEXT: s_setpc_b64 s[30:31]
231 ; GFX940-LABEL: v_maximum_f16__nnan_nsz:
233 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234 ; GFX940-NEXT: v_max_f16_e32 v0, v0, v1
235 ; GFX940-NEXT: s_setpc_b64 s[30:31]
237 ; GFX10-LABEL: v_maximum_f16__nnan_nsz:
239 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240 ; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
241 ; GFX10-NEXT: s_setpc_b64 s[30:31]
243 ; GFX11-LABEL: v_maximum_f16__nnan_nsz:
245 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v1
247 ; GFX11-NEXT: s_setpc_b64 s[30:31]
249 ; GFX12-LABEL: v_maximum_f16__nnan_nsz:
251 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
252 ; GFX12-NEXT: s_wait_expcnt 0x0
253 ; GFX12-NEXT: s_wait_samplecnt 0x0
254 ; GFX12-NEXT: s_wait_bvhcnt 0x0
255 ; GFX12-NEXT: s_wait_kmcnt 0x0
256 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
257 ; GFX12-NEXT: s_setpc_b64 s[30:31]
258 %op = call nnan nsz half @llvm.maximum.f16(half %src0, half %src1)
262 define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) {
263 ; GFX7-LABEL: v_maximum_f16__nnan_src0:
265 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
267 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
268 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
269 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
270 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
271 ; GFX7-NEXT: v_add_f32_e32 v0, 1.0, v0
272 ; GFX7-NEXT: v_max_f32_e32 v3, v0, v1
273 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
274 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
275 ; GFX7-NEXT: s_setpc_b64 s[30:31]
277 ; GFX8-LABEL: v_maximum_f16__nnan_src0:
279 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280 ; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0
281 ; GFX8-NEXT: v_max_f16_e32 v2, v0, v1
282 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
283 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
284 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
285 ; GFX8-NEXT: s_setpc_b64 s[30:31]
287 ; GFX9-LABEL: v_maximum_f16__nnan_src0:
289 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX9-NEXT: v_add_f16_e32 v0, 1.0, v0
291 ; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
292 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
293 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
294 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
295 ; GFX9-NEXT: s_setpc_b64 s[30:31]
297 ; GFX940-LABEL: v_maximum_f16__nnan_src0:
299 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
300 ; GFX940-NEXT: v_add_f16_e32 v0, 1.0, v0
301 ; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
302 ; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
303 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
304 ; GFX940-NEXT: s_nop 1
305 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
306 ; GFX940-NEXT: s_setpc_b64 s[30:31]
308 ; GFX10-LABEL: v_maximum_f16__nnan_src0:
310 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311 ; GFX10-NEXT: v_add_f16_e32 v0, 1.0, v0
312 ; GFX10-NEXT: v_max_f16_e32 v2, v0, v1
313 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
314 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
315 ; GFX10-NEXT: s_setpc_b64 s[30:31]
317 ; GFX11-LABEL: v_maximum_f16__nnan_src0:
319 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
320 ; GFX11-NEXT: v_add_f16_e32 v0, 1.0, v0
321 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
322 ; GFX11-NEXT: v_max_f16_e32 v2, v0, v1
323 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
324 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
325 ; GFX11-NEXT: s_setpc_b64 s[30:31]
327 ; GFX12-LABEL: v_maximum_f16__nnan_src0:
329 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
330 ; GFX12-NEXT: s_wait_expcnt 0x0
331 ; GFX12-NEXT: s_wait_samplecnt 0x0
332 ; GFX12-NEXT: s_wait_bvhcnt 0x0
333 ; GFX12-NEXT: s_wait_kmcnt 0x0
334 ; GFX12-NEXT: v_add_f16_e32 v0, 1.0, v0
335 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
336 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
337 ; GFX12-NEXT: s_setpc_b64 s[30:31]
338 %src0 = fadd nnan half %arg0, 1.0
339 %op = call half @llvm.maximum.f16(half %src0, half %src1)
343 define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) {
344 ; GFX7-LABEL: v_maximum_f16__nnan_src1:
346 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
348 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
349 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
350 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
351 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
352 ; GFX7-NEXT: v_add_f32_e32 v1, 1.0, v1
353 ; GFX7-NEXT: v_max_f32_e32 v3, v0, v1
354 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
355 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
356 ; GFX7-NEXT: s_setpc_b64 s[30:31]
358 ; GFX8-LABEL: v_maximum_f16__nnan_src1:
360 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361 ; GFX8-NEXT: v_add_f16_e32 v1, 1.0, v1
362 ; GFX8-NEXT: v_max_f16_e32 v2, v0, v1
363 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
364 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
365 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
366 ; GFX8-NEXT: s_setpc_b64 s[30:31]
368 ; GFX9-LABEL: v_maximum_f16__nnan_src1:
370 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371 ; GFX9-NEXT: v_add_f16_e32 v1, 1.0, v1
372 ; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
373 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
374 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
375 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
376 ; GFX9-NEXT: s_setpc_b64 s[30:31]
378 ; GFX940-LABEL: v_maximum_f16__nnan_src1:
380 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381 ; GFX940-NEXT: v_add_f16_e32 v1, 1.0, v1
382 ; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
383 ; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
384 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
385 ; GFX940-NEXT: s_nop 1
386 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
387 ; GFX940-NEXT: s_setpc_b64 s[30:31]
389 ; GFX10-LABEL: v_maximum_f16__nnan_src1:
391 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392 ; GFX10-NEXT: v_add_f16_e32 v1, 1.0, v1
393 ; GFX10-NEXT: v_max_f16_e32 v2, v0, v1
394 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
395 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
396 ; GFX10-NEXT: s_setpc_b64 s[30:31]
398 ; GFX11-LABEL: v_maximum_f16__nnan_src1:
400 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
401 ; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1
402 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
403 ; GFX11-NEXT: v_max_f16_e32 v2, v0, v1
404 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
405 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
406 ; GFX11-NEXT: s_setpc_b64 s[30:31]
408 ; GFX12-LABEL: v_maximum_f16__nnan_src1:
410 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
411 ; GFX12-NEXT: s_wait_expcnt 0x0
412 ; GFX12-NEXT: s_wait_samplecnt 0x0
413 ; GFX12-NEXT: s_wait_bvhcnt 0x0
414 ; GFX12-NEXT: s_wait_kmcnt 0x0
415 ; GFX12-NEXT: v_add_f16_e32 v1, 1.0, v1
416 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
417 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
418 ; GFX12-NEXT: s_setpc_b64 s[30:31]
419 %src1 = fadd nnan half %arg1, 1.0
420 %op = call half @llvm.maximum.f16(half %src0, half %src1)
424 define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
425 ; GFX7-LABEL: s_maximum_f16:
427 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s7
429 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s6
430 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
431 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
432 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
433 ; GFX7-NEXT: v_max_f32_e32 v3, v1, v0
434 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v0
435 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
436 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
437 ; GFX7-NEXT: ;;#ASMSTART
438 ; GFX7-NEXT: ; use v0
439 ; GFX7-NEXT: ;;#ASMEND
440 ; GFX7-NEXT: s_setpc_b64 s[30:31]
442 ; GFX8-LABEL: s_maximum_f16:
444 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445 ; GFX8-NEXT: v_mov_b32_e32 v0, s7
446 ; GFX8-NEXT: v_max_f16_e32 v1, s6, v0
447 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00
448 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s6, v0
449 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
450 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
451 ; GFX8-NEXT: ;;#ASMSTART
452 ; GFX8-NEXT: ; use v0
453 ; GFX8-NEXT: ;;#ASMEND
454 ; GFX8-NEXT: s_setpc_b64 s[30:31]
456 ; GFX9-LABEL: s_maximum_f16:
458 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459 ; GFX9-NEXT: v_mov_b32_e32 v0, s7
460 ; GFX9-NEXT: v_max_f16_e32 v1, s6, v0
461 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
462 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s6, v0
463 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
464 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
465 ; GFX9-NEXT: ;;#ASMSTART
466 ; GFX9-NEXT: ; use v0
467 ; GFX9-NEXT: ;;#ASMEND
468 ; GFX9-NEXT: s_setpc_b64 s[30:31]
470 ; GFX940-LABEL: s_maximum_f16:
472 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473 ; GFX940-NEXT: v_mov_b32_e32 v0, s1
474 ; GFX940-NEXT: v_max_f16_e32 v1, s0, v0
475 ; GFX940-NEXT: v_mov_b32_e32 v2, 0x7e00
476 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
477 ; GFX940-NEXT: s_nop 1
478 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
479 ; GFX940-NEXT: v_and_b32_e32 v0, 0xffff, v0
480 ; GFX940-NEXT: ;;#ASMSTART
481 ; GFX940-NEXT: ; use v0
482 ; GFX940-NEXT: ;;#ASMEND
483 ; GFX940-NEXT: s_setpc_b64 s[30:31]
485 ; GFX10-LABEL: s_maximum_f16:
487 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488 ; GFX10-NEXT: v_max_f16_e64 v0, s6, s7
489 ; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s6, s7
490 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
491 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
492 ; GFX10-NEXT: ;;#ASMSTART
493 ; GFX10-NEXT: ; use v0
494 ; GFX10-NEXT: ;;#ASMEND
495 ; GFX10-NEXT: s_setpc_b64 s[30:31]
497 ; GFX11-LABEL: s_maximum_f16:
499 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
500 ; GFX11-NEXT: v_max_f16_e64 v0, s0, s1
501 ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
502 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
503 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
504 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
505 ; GFX11-NEXT: ;;#ASMSTART
506 ; GFX11-NEXT: ; use v0
507 ; GFX11-NEXT: ;;#ASMEND
508 ; GFX11-NEXT: s_setpc_b64 s[30:31]
510 ; GFX12-LABEL: s_maximum_f16:
512 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
513 ; GFX12-NEXT: s_wait_expcnt 0x0
514 ; GFX12-NEXT: s_wait_samplecnt 0x0
515 ; GFX12-NEXT: s_wait_bvhcnt 0x0
516 ; GFX12-NEXT: s_wait_kmcnt 0x0
517 ; GFX12-NEXT: s_maximum_f16 s0, s0, s1
518 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
519 ; GFX12-NEXT: s_and_b32 s0, 0xffff, s0
520 ; GFX12-NEXT: ;;#ASMSTART
521 ; GFX12-NEXT: ; use s0
522 ; GFX12-NEXT: ;;#ASMEND
523 ; GFX12-NEXT: s_setpc_b64 s[30:31]
524 %op = call half @llvm.maximum.f16(half %src0, half %src1)
525 %cast = bitcast half %op to i16
526 %zext = zext i16 %cast to i32
527 call void asm sideeffect "; use $0", "s"(i32 %zext)
531 define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
532 ; GFX7-LABEL: v_maximum_v2f16:
534 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
536 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
537 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
538 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
539 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
540 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
541 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
542 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
543 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
544 ; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
545 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
546 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
547 ; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
548 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
549 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
550 ; GFX7-NEXT: s_setpc_b64 s[30:31]
552 ; GFX8-LABEL: v_maximum_v2f16:
554 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
556 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
557 ; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
558 ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
559 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
560 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
561 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
562 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
563 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
564 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
565 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
566 ; GFX8-NEXT: s_setpc_b64 s[30:31]
568 ; GFX9-LABEL: v_maximum_v2f16:
570 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571 ; GFX9-NEXT: v_pk_max_f16 v2, v0, v1
572 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
573 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
574 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
575 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
576 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
577 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
578 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
579 ; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
580 ; GFX9-NEXT: s_setpc_b64 s[30:31]
582 ; GFX940-LABEL: v_maximum_v2f16:
584 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585 ; GFX940-NEXT: v_pk_max_f16 v2, v0, v1
586 ; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
587 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
588 ; GFX940-NEXT: s_mov_b32 s0, 0x5040100
589 ; GFX940-NEXT: s_nop 0
590 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
591 ; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
592 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
593 ; GFX940-NEXT: s_nop 1
594 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
595 ; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
596 ; GFX940-NEXT: s_setpc_b64 s[30:31]
598 ; GFX10-LABEL: v_maximum_v2f16:
600 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
601 ; GFX10-NEXT: v_pk_max_f16 v2, v0, v1
602 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
603 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v2
604 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo
605 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
606 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
607 ; GFX10-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
608 ; GFX10-NEXT: s_setpc_b64 s[30:31]
610 ; GFX11-LABEL: v_maximum_v2f16:
612 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
613 ; GFX11-NEXT: v_pk_max_f16 v2, v0, v1
614 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
615 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0
616 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
617 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
618 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
619 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
620 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v3
621 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
622 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
623 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
624 ; GFX11-NEXT: s_setpc_b64 s[30:31]
626 ; GFX12-LABEL: v_maximum_v2f16:
628 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
629 ; GFX12-NEXT: s_wait_expcnt 0x0
630 ; GFX12-NEXT: s_wait_samplecnt 0x0
631 ; GFX12-NEXT: s_wait_bvhcnt 0x0
632 ; GFX12-NEXT: s_wait_kmcnt 0x0
633 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
634 ; GFX12-NEXT: s_setpc_b64 s[30:31]
635 %op = call <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
639 define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
640 ; GFX7-LABEL: v_maximum_v2f16__nnan:
642 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
644 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
645 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
646 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
647 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
648 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
649 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
650 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
651 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
652 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
653 ; GFX7-NEXT: s_setpc_b64 s[30:31]
655 ; GFX8-LABEL: v_maximum_v2f16__nnan:
657 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658 ; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
659 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
660 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
661 ; GFX8-NEXT: s_setpc_b64 s[30:31]
663 ; GFX9-LABEL: v_maximum_v2f16__nnan:
665 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
667 ; GFX9-NEXT: s_setpc_b64 s[30:31]
669 ; GFX940-LABEL: v_maximum_v2f16__nnan:
671 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
672 ; GFX940-NEXT: v_pk_max_f16 v0, v0, v1
673 ; GFX940-NEXT: s_setpc_b64 s[30:31]
675 ; GFX10-LABEL: v_maximum_v2f16__nnan:
677 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v1
679 ; GFX10-NEXT: s_setpc_b64 s[30:31]
681 ; GFX11-LABEL: v_maximum_v2f16__nnan:
683 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
685 ; GFX11-NEXT: s_setpc_b64 s[30:31]
687 ; GFX12-LABEL: v_maximum_v2f16__nnan:
689 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
690 ; GFX12-NEXT: s_wait_expcnt 0x0
691 ; GFX12-NEXT: s_wait_samplecnt 0x0
692 ; GFX12-NEXT: s_wait_bvhcnt 0x0
693 ; GFX12-NEXT: s_wait_kmcnt 0x0
694 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
695 ; GFX12-NEXT: s_setpc_b64 s[30:31]
696 %op = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
700 define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
701 ; GFX7-LABEL: v_maximum_v2f16__nsz:
703 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
705 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
706 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
707 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
708 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
709 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
710 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
711 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
712 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
713 ; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
714 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
715 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
716 ; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
717 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
718 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
719 ; GFX7-NEXT: s_setpc_b64 s[30:31]
721 ; GFX8-LABEL: v_maximum_v2f16__nsz:
723 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
724 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
725 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
726 ; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
727 ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
728 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
729 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
730 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
731 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
732 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
733 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
734 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
735 ; GFX8-NEXT: s_setpc_b64 s[30:31]
737 ; GFX9-LABEL: v_maximum_v2f16__nsz:
739 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740 ; GFX9-NEXT: v_pk_max_f16 v2, v0, v1
741 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
742 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
743 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
744 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
745 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
746 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
747 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
748 ; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
749 ; GFX9-NEXT: s_setpc_b64 s[30:31]
751 ; GFX940-LABEL: v_maximum_v2f16__nsz:
753 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754 ; GFX940-NEXT: v_pk_max_f16 v2, v0, v1
755 ; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
756 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
757 ; GFX940-NEXT: s_mov_b32 s0, 0x5040100
758 ; GFX940-NEXT: s_nop 0
759 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
760 ; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
761 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
762 ; GFX940-NEXT: s_nop 1
763 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
764 ; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
765 ; GFX940-NEXT: s_setpc_b64 s[30:31]
767 ; GFX10-LABEL: v_maximum_v2f16__nsz:
769 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
770 ; GFX10-NEXT: v_pk_max_f16 v2, v0, v1
771 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
772 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v2
773 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo
774 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
775 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
776 ; GFX10-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
777 ; GFX10-NEXT: s_setpc_b64 s[30:31]
779 ; GFX11-LABEL: v_maximum_v2f16__nsz:
781 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
782 ; GFX11-NEXT: v_pk_max_f16 v2, v0, v1
783 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
784 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0
785 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
786 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
787 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
788 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
789 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v3
790 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
791 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
792 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
793 ; GFX11-NEXT: s_setpc_b64 s[30:31]
795 ; GFX12-LABEL: v_maximum_v2f16__nsz:
797 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
798 ; GFX12-NEXT: s_wait_expcnt 0x0
799 ; GFX12-NEXT: s_wait_samplecnt 0x0
800 ; GFX12-NEXT: s_wait_bvhcnt 0x0
801 ; GFX12-NEXT: s_wait_kmcnt 0x0
802 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
803 ; GFX12-NEXT: s_setpc_b64 s[30:31]
804 %op = call nsz <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
808 define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1) {
809 ; GFX7-LABEL: v_maximum_v2f16__nnan_nsz:
811 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
812 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
813 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
814 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
815 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
816 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
817 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
818 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
819 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
820 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
821 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
822 ; GFX7-NEXT: s_setpc_b64 s[30:31]
824 ; GFX8-LABEL: v_maximum_v2f16__nnan_nsz:
826 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
827 ; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
828 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
829 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
830 ; GFX8-NEXT: s_setpc_b64 s[30:31]
832 ; GFX9-LABEL: v_maximum_v2f16__nnan_nsz:
834 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
835 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
836 ; GFX9-NEXT: s_setpc_b64 s[30:31]
838 ; GFX940-LABEL: v_maximum_v2f16__nnan_nsz:
840 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841 ; GFX940-NEXT: v_pk_max_f16 v0, v0, v1
842 ; GFX940-NEXT: s_setpc_b64 s[30:31]
844 ; GFX10-LABEL: v_maximum_v2f16__nnan_nsz:
846 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v1
848 ; GFX10-NEXT: s_setpc_b64 s[30:31]
850 ; GFX11-LABEL: v_maximum_v2f16__nnan_nsz:
852 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
853 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
854 ; GFX11-NEXT: s_setpc_b64 s[30:31]
856 ; GFX12-LABEL: v_maximum_v2f16__nnan_nsz:
858 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
859 ; GFX12-NEXT: s_wait_expcnt 0x0
860 ; GFX12-NEXT: s_wait_samplecnt 0x0
861 ; GFX12-NEXT: s_wait_bvhcnt 0x0
862 ; GFX12-NEXT: s_wait_kmcnt 0x0
863 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
864 ; GFX12-NEXT: s_setpc_b64 s[30:31]
865 %op = call nnan nsz <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
869 define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
870 ; GFX7-LABEL: s_maximum_v2f16:
872 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
873 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s17
874 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s7
875 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, s16
876 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, s6
877 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
878 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
879 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
880 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
881 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
882 ; GFX7-NEXT: v_max_f32_e32 v4, v1, v0
883 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v0
884 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
885 ; GFX7-NEXT: v_max_f32_e32 v1, v3, v2
886 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v2
887 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
888 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
889 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
890 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
891 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
892 ; GFX7-NEXT: ;;#ASMSTART
893 ; GFX7-NEXT: ; use v0
894 ; GFX7-NEXT: ;;#ASMEND
895 ; GFX7-NEXT: s_setpc_b64 s[30:31]
897 ; GFX8-LABEL: s_maximum_v2f16:
899 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900 ; GFX8-NEXT: s_lshr_b32 s4, s7, 16
901 ; GFX8-NEXT: s_lshr_b32 s5, s6, 16
902 ; GFX8-NEXT: v_mov_b32_e32 v0, s4
903 ; GFX8-NEXT: v_max_f16_e32 v1, s5, v0
904 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00
905 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s5, v0
906 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
907 ; GFX8-NEXT: v_mov_b32_e32 v1, s7
908 ; GFX8-NEXT: v_max_f16_e32 v3, s6, v1
909 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s6, v1
910 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
911 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
912 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
913 ; GFX8-NEXT: ;;#ASMSTART
914 ; GFX8-NEXT: ; use v0
915 ; GFX8-NEXT: ;;#ASMEND
916 ; GFX8-NEXT: s_setpc_b64 s[30:31]
918 ; GFX9-LABEL: s_maximum_v2f16:
920 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
921 ; GFX9-NEXT: v_mov_b32_e32 v0, s7
922 ; GFX9-NEXT: v_mov_b32_e32 v1, s7
923 ; GFX9-NEXT: s_lshr_b32 s4, s7, 16
924 ; GFX9-NEXT: v_pk_max_f16 v1, s6, v1
925 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
926 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s6, v0
927 ; GFX9-NEXT: s_lshr_b32 s5, s6, 16
928 ; GFX9-NEXT: v_mov_b32_e32 v3, s4
929 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
930 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
931 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s5, v3
932 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
933 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
934 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
935 ; GFX9-NEXT: ;;#ASMSTART
936 ; GFX9-NEXT: ; use v0
937 ; GFX9-NEXT: ;;#ASMEND
938 ; GFX9-NEXT: s_setpc_b64 s[30:31]
940 ; GFX940-LABEL: s_maximum_v2f16:
942 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
943 ; GFX940-NEXT: v_mov_b32_e32 v0, s1
944 ; GFX940-NEXT: v_mov_b32_e32 v1, s1
945 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
946 ; GFX940-NEXT: v_pk_max_f16 v1, s0, v1
947 ; GFX940-NEXT: v_mov_b32_e32 v2, 0x7e00
948 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
949 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
950 ; GFX940-NEXT: v_mov_b32_e32 v3, s1
951 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
952 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
953 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v3
954 ; GFX940-NEXT: v_and_b32_e32 v0, 0xffff, v0
955 ; GFX940-NEXT: s_nop 0
956 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
957 ; GFX940-NEXT: v_lshl_or_b32 v0, v1, 16, v0
958 ; GFX940-NEXT: ;;#ASMSTART
959 ; GFX940-NEXT: ; use v0
960 ; GFX940-NEXT: ;;#ASMEND
961 ; GFX940-NEXT: s_setpc_b64 s[30:31]
963 ; GFX10-LABEL: s_maximum_v2f16:
965 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966 ; GFX10-NEXT: v_pk_max_f16 v0, s6, s7
967 ; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s6, s7
968 ; GFX10-NEXT: s_lshr_b32 s4, s7, 16
969 ; GFX10-NEXT: s_lshr_b32 s5, s6, 16
970 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0
971 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
972 ; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s5, s4
973 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
974 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
975 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
976 ; GFX10-NEXT: ;;#ASMSTART
977 ; GFX10-NEXT: ; use v0
978 ; GFX10-NEXT: ;;#ASMEND
979 ; GFX10-NEXT: s_setpc_b64 s[30:31]
981 ; GFX11-LABEL: s_maximum_v2f16:
983 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984 ; GFX11-NEXT: v_pk_max_f16 v0, s0, s1
985 ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
986 ; GFX11-NEXT: s_lshr_b32 s2, s1, 16
987 ; GFX11-NEXT: s_lshr_b32 s0, s0, 16
988 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
989 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
990 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
991 ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s2
992 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
993 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
994 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
995 ; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
996 ; GFX11-NEXT: ;;#ASMSTART
997 ; GFX11-NEXT: ; use v0
998 ; GFX11-NEXT: ;;#ASMEND
999 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1001 ; GFX12-LABEL: s_maximum_v2f16:
1003 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1004 ; GFX12-NEXT: s_wait_expcnt 0x0
1005 ; GFX12-NEXT: s_wait_samplecnt 0x0
1006 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1007 ; GFX12-NEXT: s_wait_kmcnt 0x0
1008 ; GFX12-NEXT: v_pk_maximum_f16 v0, s0, s1
1009 ; GFX12-NEXT: ;;#ASMSTART
1010 ; GFX12-NEXT: ; use v0
1011 ; GFX12-NEXT: ;;#ASMEND
1012 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1013 %op = call <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
1014 %cast = bitcast <2 x half> %op to i32
1015 call void asm sideeffect "; use $0", "s"(i32 %cast)
1019 define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
1020 ; GFX7-LABEL: v_maximum_v3f16:
1022 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1023 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1024 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1025 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1026 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1027 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1028 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1029 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1030 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1031 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1032 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1033 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1034 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1035 ; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
1036 ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1037 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1038 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1039 ; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
1040 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1041 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1042 ; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
1043 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1044 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1045 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1047 ; GFX8-LABEL: v_maximum_v3f16:
1049 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1050 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1051 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1052 ; GFX8-NEXT: v_max_f16_e32 v6, v5, v4
1053 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00
1054 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4
1055 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc
1056 ; GFX8-NEXT: v_max_f16_e32 v5, v1, v3
1057 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1058 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
1059 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v2
1060 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1061 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
1062 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
1063 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1064 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1066 ; GFX9-LABEL: v_maximum_v3f16:
1068 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1069 ; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
1070 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
1071 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1072 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1073 ; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
1074 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1075 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1076 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1077 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1078 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1079 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1080 ; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
1081 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1083 ; GFX940-LABEL: v_maximum_v3f16:
1085 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086 ; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
1087 ; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
1088 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1089 ; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
1090 ; GFX940-NEXT: s_mov_b32 s0, 0x5040100
1091 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1092 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1093 ; GFX940-NEXT: s_nop 1
1094 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1095 ; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1096 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1097 ; GFX940-NEXT: s_nop 1
1098 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1099 ; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
1100 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1102 ; GFX10-LABEL: v_maximum_v3f16:
1104 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1105 ; GFX10-NEXT: v_pk_max_f16 v4, v0, v2
1106 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1107 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v4
1108 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
1109 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1110 ; GFX10-NEXT: v_pk_max_f16 v2, v1, v3
1111 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
1112 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1113 ; GFX10-NEXT: v_perm_b32 v0, v0, v4, 0x5040100
1114 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
1115 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1117 ; GFX11-LABEL: v_maximum_v3f16:
1119 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1120 ; GFX11-NEXT: v_pk_max_f16 v4, v0, v2
1121 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1122 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1123 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1124 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1125 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4
1126 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
1127 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5
1128 ; GFX11-NEXT: v_pk_max_f16 v4, v1, v3
1129 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1130 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
1131 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1132 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1133 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
1134 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1135 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1137 ; GFX12-LABEL: v_maximum_v3f16:
1139 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1140 ; GFX12-NEXT: s_wait_expcnt 0x0
1141 ; GFX12-NEXT: s_wait_samplecnt 0x0
1142 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1143 ; GFX12-NEXT: s_wait_kmcnt 0x0
1144 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1145 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1146 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1147 %op = call <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1151 define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
1152 ; GFX7-LABEL: v_maximum_v3f16__nnan:
1154 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1155 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1156 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1157 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1158 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1159 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1160 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1161 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1162 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1163 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1164 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1165 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1166 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1167 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
1168 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
1169 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
1170 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1172 ; GFX8-LABEL: v_maximum_v3f16__nnan:
1174 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1175 ; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1176 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
1177 ; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
1178 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
1179 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1181 ; GFX9-LABEL: v_maximum_v3f16__nnan:
1183 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1184 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
1185 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
1186 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1188 ; GFX940-LABEL: v_maximum_v3f16__nnan:
1190 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1191 ; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
1192 ; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
1193 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1195 ; GFX10-LABEL: v_maximum_v3f16__nnan:
1197 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1198 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v2
1199 ; GFX10-NEXT: v_pk_max_f16 v1, v1, v3
1200 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1202 ; GFX11-LABEL: v_maximum_v3f16__nnan:
1204 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1205 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
1206 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
1207 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1209 ; GFX12-LABEL: v_maximum_v3f16__nnan:
1211 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1212 ; GFX12-NEXT: s_wait_expcnt 0x0
1213 ; GFX12-NEXT: s_wait_samplecnt 0x0
1214 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1215 ; GFX12-NEXT: s_wait_kmcnt 0x0
1216 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1217 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1218 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1219 %op = call nnan <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1223 define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
1224 ; GFX7-LABEL: v_maximum_v3f16__nsz:
1226 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1227 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1228 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1229 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1230 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1231 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1232 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1233 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1234 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1235 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1236 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1237 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1238 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1239 ; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
1240 ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1241 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1242 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1243 ; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
1244 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1245 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1246 ; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
1247 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1248 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1249 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1251 ; GFX8-LABEL: v_maximum_v3f16__nsz:
1253 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1254 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1255 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1256 ; GFX8-NEXT: v_max_f16_e32 v6, v5, v4
1257 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00
1258 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4
1259 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc
1260 ; GFX8-NEXT: v_max_f16_e32 v5, v1, v3
1261 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1262 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
1263 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v2
1264 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1265 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
1266 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
1267 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1268 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1270 ; GFX9-LABEL: v_maximum_v3f16__nsz:
1272 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
1274 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
1275 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1276 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1277 ; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
1278 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1279 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1280 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1281 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1282 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1283 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1284 ; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
1285 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1287 ; GFX940-LABEL: v_maximum_v3f16__nsz:
1289 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1290 ; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
1291 ; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
1292 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1293 ; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
1294 ; GFX940-NEXT: s_mov_b32 s0, 0x5040100
1295 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1296 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1297 ; GFX940-NEXT: s_nop 1
1298 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1299 ; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1300 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1301 ; GFX940-NEXT: s_nop 1
1302 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1303 ; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
1304 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1306 ; GFX10-LABEL: v_maximum_v3f16__nsz:
1308 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1309 ; GFX10-NEXT: v_pk_max_f16 v4, v0, v2
1310 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1311 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v4
1312 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
1313 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1314 ; GFX10-NEXT: v_pk_max_f16 v2, v1, v3
1315 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
1316 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1317 ; GFX10-NEXT: v_perm_b32 v0, v0, v4, 0x5040100
1318 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
1319 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1321 ; GFX11-LABEL: v_maximum_v3f16__nsz:
1323 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1324 ; GFX11-NEXT: v_pk_max_f16 v4, v0, v2
1325 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1326 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1327 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1328 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1329 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4
1330 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
1331 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5
1332 ; GFX11-NEXT: v_pk_max_f16 v4, v1, v3
1333 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1334 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
1335 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1336 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1337 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
1338 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1339 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1341 ; GFX12-LABEL: v_maximum_v3f16__nsz:
1343 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1344 ; GFX12-NEXT: s_wait_expcnt 0x0
1345 ; GFX12-NEXT: s_wait_samplecnt 0x0
1346 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1347 ; GFX12-NEXT: s_wait_kmcnt 0x0
1348 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1349 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1350 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1351 %op = call nsz <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1355 define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1) {
1356 ; GFX7-LABEL: v_maximum_v3f16__nnan_nsz:
1358 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1359 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1360 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1361 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1362 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1363 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1364 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1365 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1366 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1367 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1368 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1369 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1370 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1371 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
1372 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
1373 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
1374 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1376 ; GFX8-LABEL: v_maximum_v3f16__nnan_nsz:
1378 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1379 ; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1380 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
1381 ; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
1382 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
1383 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1385 ; GFX9-LABEL: v_maximum_v3f16__nnan_nsz:
1387 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1388 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
1389 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
1390 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1392 ; GFX940-LABEL: v_maximum_v3f16__nnan_nsz:
1394 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1395 ; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
1396 ; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
1397 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1399 ; GFX10-LABEL: v_maximum_v3f16__nnan_nsz:
1401 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v2
1403 ; GFX10-NEXT: v_pk_max_f16 v1, v1, v3
1404 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1406 ; GFX11-LABEL: v_maximum_v3f16__nnan_nsz:
1408 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1409 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
1410 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
1411 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1413 ; GFX12-LABEL: v_maximum_v3f16__nnan_nsz:
1415 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1416 ; GFX12-NEXT: s_wait_expcnt 0x0
1417 ; GFX12-NEXT: s_wait_samplecnt 0x0
1418 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1419 ; GFX12-NEXT: s_wait_kmcnt 0x0
1420 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1421 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1422 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1423 %op = call nnan nsz <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1427 define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
1428 ; GFX7-LABEL: v_maximum_v4f16:
1430 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1432 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1433 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1434 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1435 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1436 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1437 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1438 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1439 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1440 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1441 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1442 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1443 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1444 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1445 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1446 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1447 ; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
1448 ; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1449 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1450 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1451 ; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
1452 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1453 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1454 ; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
1455 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1456 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1457 ; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
1458 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1459 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1460 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1462 ; GFX8-LABEL: v_maximum_v4f16:
1464 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1465 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3
1466 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1
1467 ; GFX8-NEXT: v_max_f16_e32 v6, v5, v4
1468 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00
1469 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4
1470 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc
1471 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1472 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1473 ; GFX8-NEXT: v_max_f16_e32 v8, v6, v5
1474 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
1475 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v8, vcc
1476 ; GFX8-NEXT: v_max_f16_e32 v6, v1, v3
1477 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1478 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
1479 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v2
1480 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1481 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
1482 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v5
1483 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1484 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
1485 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1486 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1488 ; GFX9-LABEL: v_maximum_v4f16:
1490 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491 ; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
1492 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
1493 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1494 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
1495 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1496 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1497 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1498 ; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
1499 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1500 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1501 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1502 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1503 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1504 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1505 ; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
1506 ; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
1507 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1509 ; GFX940-LABEL: v_maximum_v4f16:
1511 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1512 ; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
1513 ; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
1514 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1515 ; GFX940-NEXT: s_mov_b32 s0, 0x5040100
1516 ; GFX940-NEXT: s_nop 0
1517 ; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
1518 ; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1519 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1520 ; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
1521 ; GFX940-NEXT: s_nop 0
1522 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1523 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1524 ; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0
1525 ; GFX940-NEXT: s_nop 0
1526 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1527 ; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1528 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1529 ; GFX940-NEXT: s_nop 1
1530 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1531 ; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
1532 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1534 ; GFX10-LABEL: v_maximum_v4f16:
1536 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1537 ; GFX10-NEXT: v_pk_max_f16 v4, v1, v3
1538 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1539 ; GFX10-NEXT: v_pk_max_f16 v5, v0, v2
1540 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo
1541 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1542 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5
1543 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1544 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo
1545 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1546 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1547 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1548 ; GFX10-NEXT: v_perm_b32 v0, v0, v5, 0x5040100
1549 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1550 ; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100
1551 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1553 ; GFX11-LABEL: v_maximum_v4f16:
1555 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556 ; GFX11-NEXT: v_pk_max_f16 v4, v1, v3
1557 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1558 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
1559 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
1560 ; GFX11-NEXT: v_pk_max_f16 v7, v0, v2
1561 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v2
1562 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1563 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1564 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1565 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v7
1566 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1567 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1568 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v8
1569 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1570 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
1571 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5
1572 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1573 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
1574 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1575 ; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
1576 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1578 ; GFX12-LABEL: v_maximum_v4f16:
1580 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1581 ; GFX12-NEXT: s_wait_expcnt 0x0
1582 ; GFX12-NEXT: s_wait_samplecnt 0x0
1583 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1584 ; GFX12-NEXT: s_wait_kmcnt 0x0
1585 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1586 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1587 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1588 %op = call <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1592 define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
1593 ; GFX7-LABEL: v_maximum_v4f16__nnan:
1595 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1596 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1597 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1598 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1599 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1600 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1601 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1602 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1603 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1604 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1605 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1606 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1607 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1608 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1609 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1610 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1611 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1612 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
1613 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
1614 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
1615 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
1616 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1618 ; GFX8-LABEL: v_maximum_v4f16__nnan:
1620 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1621 ; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1622 ; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1623 ; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
1624 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
1625 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
1626 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
1627 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1629 ; GFX9-LABEL: v_maximum_v4f16__nnan:
1631 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
1633 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
1634 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1636 ; GFX940-LABEL: v_maximum_v4f16__nnan:
1638 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1639 ; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
1640 ; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
1641 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1643 ; GFX10-LABEL: v_maximum_v4f16__nnan:
1645 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1646 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v2
1647 ; GFX10-NEXT: v_pk_max_f16 v1, v1, v3
1648 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1650 ; GFX11-LABEL: v_maximum_v4f16__nnan:
1652 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
1654 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
1655 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1657 ; GFX12-LABEL: v_maximum_v4f16__nnan:
1659 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1660 ; GFX12-NEXT: s_wait_expcnt 0x0
1661 ; GFX12-NEXT: s_wait_samplecnt 0x0
1662 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1663 ; GFX12-NEXT: s_wait_kmcnt 0x0
1664 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1665 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1666 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1667 %op = call nnan <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1671 define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
1672 ; GFX7-LABEL: v_maximum_v4f16__nsz:
1674 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1675 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1676 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1677 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1678 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1679 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1680 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1681 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1682 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1683 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1684 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1685 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1686 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1687 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1688 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1689 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1690 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1691 ; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
1692 ; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1693 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1694 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1695 ; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
1696 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1697 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1698 ; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
1699 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1700 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1701 ; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
1702 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1703 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1704 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1706 ; GFX8-LABEL: v_maximum_v4f16__nsz:
1708 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1709 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3
1710 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1
1711 ; GFX8-NEXT: v_max_f16_e32 v6, v5, v4
1712 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00
1713 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4
1714 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc
1715 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1716 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1717 ; GFX8-NEXT: v_max_f16_e32 v8, v6, v5
1718 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
1719 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v8, vcc
1720 ; GFX8-NEXT: v_max_f16_e32 v6, v1, v3
1721 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1722 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
1723 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v2
1724 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1725 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
1726 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v5
1727 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1728 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
1729 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1730 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1732 ; GFX9-LABEL: v_maximum_v4f16__nsz:
1734 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1735 ; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
1736 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
1737 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1738 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
1739 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1740 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1741 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1742 ; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
1743 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1744 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1745 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1746 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1747 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1748 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
1749 ; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
1750 ; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
1751 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1753 ; GFX940-LABEL: v_maximum_v4f16__nsz:
1755 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1756 ; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
1757 ; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
1758 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1759 ; GFX940-NEXT: s_mov_b32 s0, 0x5040100
1760 ; GFX940-NEXT: s_nop 0
1761 ; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
1762 ; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1763 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1764 ; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
1765 ; GFX940-NEXT: s_nop 0
1766 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1767 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1768 ; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0
1769 ; GFX940-NEXT: s_nop 0
1770 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1771 ; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1772 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1773 ; GFX940-NEXT: s_nop 1
1774 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1775 ; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
1776 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1778 ; GFX10-LABEL: v_maximum_v4f16__nsz:
1780 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1781 ; GFX10-NEXT: v_pk_max_f16 v4, v1, v3
1782 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1783 ; GFX10-NEXT: v_pk_max_f16 v5, v0, v2
1784 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo
1785 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1786 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5
1787 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1788 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo
1789 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1790 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1791 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1792 ; GFX10-NEXT: v_perm_b32 v0, v0, v5, 0x5040100
1793 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1794 ; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100
1795 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1797 ; GFX11-LABEL: v_maximum_v4f16__nsz:
1799 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1800 ; GFX11-NEXT: v_pk_max_f16 v4, v1, v3
1801 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1802 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
1803 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
1804 ; GFX11-NEXT: v_pk_max_f16 v7, v0, v2
1805 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v2
1806 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1807 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1808 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1809 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v7
1810 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1811 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1812 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v8
1813 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1814 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
1815 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5
1816 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1817 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
1818 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1819 ; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
1820 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1822 ; GFX12-LABEL: v_maximum_v4f16__nsz:
1824 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1825 ; GFX12-NEXT: s_wait_expcnt 0x0
1826 ; GFX12-NEXT: s_wait_samplecnt 0x0
1827 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1828 ; GFX12-NEXT: s_wait_kmcnt 0x0
1829 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1830 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1831 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1832 %op = call nsz <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1836 define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1) {
1837 ; GFX7-LABEL: v_maximum_v4f16__nnan_nsz:
1839 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1840 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1841 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1842 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1843 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1844 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1845 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1846 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1847 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1848 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1849 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1850 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1851 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1852 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1853 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1854 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1855 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1856 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
1857 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
1858 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
1859 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
1860 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1862 ; GFX8-LABEL: v_maximum_v4f16__nnan_nsz:
1864 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1865 ; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1866 ; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1867 ; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
1868 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
1869 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
1870 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
1871 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1873 ; GFX9-LABEL: v_maximum_v4f16__nnan_nsz:
1875 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1876 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
1877 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
1878 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1880 ; GFX940-LABEL: v_maximum_v4f16__nnan_nsz:
1882 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1883 ; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
1884 ; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
1885 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1887 ; GFX10-LABEL: v_maximum_v4f16__nnan_nsz:
1889 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1890 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v2
1891 ; GFX10-NEXT: v_pk_max_f16 v1, v1, v3
1892 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1894 ; GFX11-LABEL: v_maximum_v4f16__nnan_nsz:
1896 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1897 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
1898 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
1899 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1901 ; GFX12-LABEL: v_maximum_v4f16__nnan_nsz:
1903 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1904 ; GFX12-NEXT: s_wait_expcnt 0x0
1905 ; GFX12-NEXT: s_wait_samplecnt 0x0
1906 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1907 ; GFX12-NEXT: s_wait_kmcnt 0x0
1908 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1909 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1910 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1911 %op = call nnan nsz <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1915 define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
1916 ; GFX7-LABEL: v_maximum_v8f16:
1918 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1919 ; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
1920 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1921 ; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
1922 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1923 ; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
1924 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1925 ; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
1926 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1927 ; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
1928 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1929 ; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
1930 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1931 ; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
1932 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1933 ; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
1934 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1935 ; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
1936 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1937 ; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
1938 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1939 ; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
1940 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1941 ; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
1942 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1943 ; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
1944 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1945 ; GFX7-NEXT: v_max_f32_e32 v16, v0, v8
1946 ; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1947 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
1948 ; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
1949 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1950 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
1951 ; GFX7-NEXT: v_max_f32_e32 v8, v1, v9
1952 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
1953 ; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
1954 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1955 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
1956 ; GFX7-NEXT: v_max_f32_e32 v8, v2, v10
1957 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
1958 ; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
1959 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1960 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
1961 ; GFX7-NEXT: v_max_f32_e32 v8, v3, v11
1962 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
1963 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
1964 ; GFX7-NEXT: v_max_f32_e32 v8, v4, v12
1965 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
1966 ; GFX7-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
1967 ; GFX7-NEXT: v_max_f32_e32 v8, v5, v13
1968 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
1969 ; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
1970 ; GFX7-NEXT: v_max_f32_e32 v8, v6, v14
1971 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
1972 ; GFX7-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
1973 ; GFX7-NEXT: v_max_f32_e32 v8, v7, v15
1974 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
1975 ; GFX7-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
1976 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1978 ; GFX8-LABEL: v_maximum_v8f16:
1980 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1981 ; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v7
1982 ; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3
1983 ; GFX8-NEXT: v_max_f16_e32 v10, v9, v8
1984 ; GFX8-NEXT: v_mov_b32_e32 v11, 0x7e00
1985 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v9, v8
1986 ; GFX8-NEXT: v_cndmask_b32_e32 v8, v11, v10, vcc
1987 ; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v6
1988 ; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v2
1989 ; GFX8-NEXT: v_max_f16_e32 v12, v10, v9
1990 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v10, v9
1991 ; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc
1992 ; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v5
1993 ; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v1
1994 ; GFX8-NEXT: v_max_f16_e32 v13, v12, v10
1995 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v12, v10
1996 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v11, v13, vcc
1997 ; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v4
1998 ; GFX8-NEXT: v_lshrrev_b32_e32 v13, 16, v0
1999 ; GFX8-NEXT: v_max_f16_e32 v14, v13, v12
2000 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v13, v12
2001 ; GFX8-NEXT: v_cndmask_b32_e32 v12, v11, v14, vcc
2002 ; GFX8-NEXT: v_max_f16_e32 v13, v3, v7
2003 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
2004 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v13, vcc
2005 ; GFX8-NEXT: v_max_f16_e32 v7, v2, v6
2006 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
2007 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v11, v7, vcc
2008 ; GFX8-NEXT: v_max_f16_e32 v6, v1, v5
2009 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
2010 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v11, v6, vcc
2011 ; GFX8-NEXT: v_max_f16_e32 v5, v0, v4
2012 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
2013 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v11, v5, vcc
2014 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v12
2015 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2016 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v10
2017 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2018 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v9
2019 ; GFX8-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2020 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v8
2021 ; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2022 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2024 ; GFX9-LABEL: v_maximum_v8f16:
2026 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2027 ; GFX9-NEXT: v_pk_max_f16 v8, v3, v7
2028 ; GFX9-NEXT: v_mov_b32_e32 v9, 0x7e00
2029 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
2030 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
2031 ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v8
2032 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
2033 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
2034 ; GFX9-NEXT: v_pk_max_f16 v7, v2, v6
2035 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
2036 ; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
2037 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v7
2038 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
2039 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
2040 ; GFX9-NEXT: v_pk_max_f16 v6, v1, v5
2041 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
2042 ; GFX9-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
2043 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
2044 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
2045 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
2046 ; GFX9-NEXT: v_pk_max_f16 v5, v0, v4
2047 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
2048 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
2049 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v5
2050 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
2051 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
2052 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2053 ; GFX9-NEXT: v_perm_b32 v0, v0, v6, s4
2054 ; GFX9-NEXT: v_perm_b32 v1, v1, v7, s4
2055 ; GFX9-NEXT: v_perm_b32 v2, v2, v8, s4
2056 ; GFX9-NEXT: v_perm_b32 v3, v3, v10, s4
2057 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2059 ; GFX940-LABEL: v_maximum_v8f16:
2061 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2062 ; GFX940-NEXT: v_pk_max_f16 v8, v3, v7
2063 ; GFX940-NEXT: v_mov_b32_e32 v9, 0x7e00
2064 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
2065 ; GFX940-NEXT: s_mov_b32 s0, 0x5040100
2066 ; GFX940-NEXT: s_nop 0
2067 ; GFX940-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
2068 ; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v8
2069 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
2070 ; GFX940-NEXT: v_pk_max_f16 v7, v2, v6
2071 ; GFX940-NEXT: s_nop 0
2072 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
2073 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
2074 ; GFX940-NEXT: v_perm_b32 v3, v3, v10, s0
2075 ; GFX940-NEXT: s_nop 0
2076 ; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
2077 ; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v7
2078 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
2079 ; GFX940-NEXT: v_pk_max_f16 v6, v1, v5
2080 ; GFX940-NEXT: s_nop 0
2081 ; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
2082 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
2083 ; GFX940-NEXT: v_perm_b32 v2, v2, v8, s0
2084 ; GFX940-NEXT: s_nop 0
2085 ; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
2086 ; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6
2087 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
2088 ; GFX940-NEXT: v_pk_max_f16 v5, v0, v4
2089 ; GFX940-NEXT: s_nop 0
2090 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
2091 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
2092 ; GFX940-NEXT: v_perm_b32 v1, v1, v7, s0
2093 ; GFX940-NEXT: s_nop 0
2094 ; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
2095 ; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v5
2096 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
2097 ; GFX940-NEXT: s_nop 1
2098 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
2099 ; GFX940-NEXT: v_perm_b32 v0, v0, v6, s0
2100 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2102 ; GFX10-LABEL: v_maximum_v8f16:
2104 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2105 ; GFX10-NEXT: v_pk_max_f16 v8, v3, v7
2106 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7
2107 ; GFX10-NEXT: v_pk_max_f16 v9, v2, v6
2108 ; GFX10-NEXT: v_pk_max_f16 v12, v1, v5
2109 ; GFX10-NEXT: v_pk_max_f16 v13, v0, v4
2110 ; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7e00, v8, vcc_lo
2111 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v6
2112 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v9
2113 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v8
2114 ; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v9, vcc_lo
2115 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
2116 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v11, vcc_lo
2117 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5
2118 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v13
2119 ; GFX10-NEXT: v_perm_b32 v2, v2, v9, 0x5040100
2120 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v12, vcc_lo
2121 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v4
2122 ; GFX10-NEXT: v_lshrrev_b32_e32 v12, 16, v12
2123 ; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v13, vcc_lo
2124 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
2125 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
2126 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
2127 ; GFX10-NEXT: v_perm_b32 v0, v0, v13, 0x5040100
2128 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v12, vcc_lo
2129 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
2130 ; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100
2131 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
2132 ; GFX10-NEXT: v_perm_b32 v3, v3, v10, 0x5040100
2133 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2135 ; GFX11-LABEL: v_maximum_v8f16:
2137 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2138 ; GFX11-NEXT: v_pk_max_f16 v8, v3, v7
2139 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7
2140 ; GFX11-NEXT: v_pk_max_f16 v10, v2, v6
2141 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v6
2142 ; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v2
2143 ; GFX11-NEXT: v_pk_max_f16 v14, v1, v5
2144 ; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
2145 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v6
2146 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 16, v10
2147 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v7
2148 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v3
2149 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v8
2150 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
2151 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v12, v11
2152 ; GFX11-NEXT: v_pk_max_f16 v11, v0, v4
2153 ; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v4
2154 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo
2155 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5
2156 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 16, v0
2157 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v5
2158 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
2159 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 16, v11
2160 ; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo
2161 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v4
2162 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v14
2163 ; GFX11-NEXT: v_perm_b32 v2, v6, v2, 0x5040100
2164 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
2165 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v13, v12
2166 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo
2167 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5
2168 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2169 ; GFX11-NEXT: v_perm_b32 v0, v4, v0, 0x5040100
2170 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo
2171 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7
2172 ; GFX11-NEXT: v_perm_b32 v1, v1, v10, 0x5040100
2173 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
2174 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2175 ; GFX11-NEXT: v_perm_b32 v3, v3, v9, 0x5040100
2176 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2178 ; GFX12-LABEL: v_maximum_v8f16:
2180 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2181 ; GFX12-NEXT: s_wait_expcnt 0x0
2182 ; GFX12-NEXT: s_wait_samplecnt 0x0
2183 ; GFX12-NEXT: s_wait_bvhcnt 0x0
2184 ; GFX12-NEXT: s_wait_kmcnt 0x0
2185 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v4
2186 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5
2187 ; GFX12-NEXT: v_pk_maximum_f16 v2, v2, v6
2188 ; GFX12-NEXT: v_pk_maximum_f16 v3, v3, v7
2189 ; GFX12-NEXT: s_setpc_b64 s[30:31]
2190 %op = call <8 x half> @llvm.maximum.v8f16(<8 x half> %src0, <8 x half> %src1)
2194 define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
2195 ; GFX7-LABEL: v_maximum_v16f16:
2197 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2198 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
2199 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
2200 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
2201 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
2202 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2203 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
2204 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
2205 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
2206 ; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
2207 ; GFX7-NEXT: v_cmp_o_f32_e64 s[12:13], v0, v16
2208 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v16
2209 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v22
2210 ; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
2211 ; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
2212 ; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
2213 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2214 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v17
2215 ; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
2216 ; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
2217 ; GFX7-NEXT: v_cmp_o_f32_e64 s[14:15], v6, v16
2218 ; GFX7-NEXT: v_max_f32_e32 v6, v6, v16
2219 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v23
2220 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
2221 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2222 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
2223 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2224 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
2225 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
2226 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
2227 ; GFX7-NEXT: v_cmp_o_f32_e64 s[16:17], v7, v16
2228 ; GFX7-NEXT: v_max_f32_e32 v7, v7, v16
2229 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v24
2230 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
2231 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v17
2232 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v18
2233 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2234 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
2235 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
2236 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2237 ; GFX7-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v16
2238 ; GFX7-NEXT: v_max_f32_e32 v8, v8, v16
2239 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v25
2240 ; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v17
2241 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v17
2242 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v19
2243 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2244 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
2245 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
2246 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2247 ; GFX7-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v16
2248 ; GFX7-NEXT: v_max_f32_e32 v9, v9, v16
2249 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v26
2250 ; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v17
2251 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v17
2252 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v20
2253 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2254 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
2255 ; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
2256 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2257 ; GFX7-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v16
2258 ; GFX7-NEXT: v_max_f32_e32 v10, v10, v16
2259 ; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
2260 ; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v17
2261 ; GFX7-NEXT: v_max_f32_e32 v4, v4, v17
2262 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v21
2263 ; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v28
2264 ; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
2265 ; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v29
2266 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2267 ; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
2268 ; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v30
2269 ; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
2270 ; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v17
2271 ; GFX7-NEXT: v_max_f32_e32 v5, v5, v17
2272 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v27
2273 ; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
2274 ; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
2275 ; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
2276 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2277 ; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
2278 ; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
2279 ; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
2280 ; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
2281 ; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
2282 ; GFX7-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v17
2283 ; GFX7-NEXT: v_max_f32_e32 v11, v11, v17
2284 ; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
2285 ; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
2286 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
2287 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v12, v20
2288 ; GFX7-NEXT: v_max_f32_e32 v12, v12, v20
2289 ; GFX7-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc
2290 ; GFX7-NEXT: v_max_f32_e32 v20, v13, v19
2291 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v13, v19
2292 ; GFX7-NEXT: v_cndmask_b32_e32 v13, v17, v20, vcc
2293 ; GFX7-NEXT: v_max_f32_e32 v19, v14, v18
2294 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v14, v18
2295 ; GFX7-NEXT: v_cndmask_b32_e32 v14, v17, v19, vcc
2296 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[12:13]
2297 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
2298 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
2299 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
2300 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
2301 ; GFX7-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[14:15]
2302 ; GFX7-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[16:17]
2303 ; GFX7-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
2304 ; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
2305 ; GFX7-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
2306 ; GFX7-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
2307 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2308 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
2309 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2310 ; GFX7-NEXT: v_max_f32_e32 v18, v15, v16
2311 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
2312 ; GFX7-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
2313 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2315 ; GFX8-LABEL: v_maximum_v16f16:
2317 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2318 ; GFX8-NEXT: v_lshrrev_b32_e32 v16, 16, v15
2319 ; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v7
2320 ; GFX8-NEXT: v_max_f16_e32 v18, v17, v16
2321 ; GFX8-NEXT: v_mov_b32_e32 v19, 0x7e00
2322 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v17, v16
2323 ; GFX8-NEXT: v_cndmask_b32_e32 v16, v19, v18, vcc
2324 ; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v14
2325 ; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v6
2326 ; GFX8-NEXT: v_max_f16_e32 v20, v18, v17
2327 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v18, v17
2328 ; GFX8-NEXT: v_cndmask_b32_e32 v17, v19, v20, vcc
2329 ; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v13
2330 ; GFX8-NEXT: v_lshrrev_b32_e32 v20, 16, v5
2331 ; GFX8-NEXT: v_max_f16_e32 v21, v20, v18
2332 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v20, v18
2333 ; GFX8-NEXT: v_cndmask_b32_e32 v18, v19, v21, vcc
2334 ; GFX8-NEXT: v_lshrrev_b32_e32 v20, 16, v12
2335 ; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v4
2336 ; GFX8-NEXT: v_max_f16_e32 v22, v21, v20
2337 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v21, v20
2338 ; GFX8-NEXT: v_cndmask_b32_e32 v20, v19, v22, vcc
2339 ; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v11
2340 ; GFX8-NEXT: v_lshrrev_b32_e32 v22, 16, v3
2341 ; GFX8-NEXT: v_max_f16_e32 v23, v22, v21
2342 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v22, v21
2343 ; GFX8-NEXT: v_cndmask_b32_e32 v21, v19, v23, vcc
2344 ; GFX8-NEXT: v_lshrrev_b32_e32 v22, 16, v10
2345 ; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v2
2346 ; GFX8-NEXT: v_max_f16_e32 v24, v23, v22
2347 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v23, v22
2348 ; GFX8-NEXT: v_cndmask_b32_e32 v22, v19, v24, vcc
2349 ; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v9
2350 ; GFX8-NEXT: v_lshrrev_b32_e32 v24, 16, v1
2351 ; GFX8-NEXT: v_max_f16_e32 v25, v24, v23
2352 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v24, v23
2353 ; GFX8-NEXT: v_cndmask_b32_e32 v23, v19, v25, vcc
2354 ; GFX8-NEXT: v_lshrrev_b32_e32 v24, 16, v8
2355 ; GFX8-NEXT: v_lshrrev_b32_e32 v25, 16, v0
2356 ; GFX8-NEXT: v_max_f16_e32 v26, v25, v24
2357 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v25, v24
2358 ; GFX8-NEXT: v_cndmask_b32_e32 v24, v19, v26, vcc
2359 ; GFX8-NEXT: v_max_f16_e32 v25, v7, v15
2360 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
2361 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v19, v25, vcc
2362 ; GFX8-NEXT: v_max_f16_e32 v15, v6, v14
2363 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
2364 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v19, v15, vcc
2365 ; GFX8-NEXT: v_max_f16_e32 v14, v5, v13
2366 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
2367 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v19, v14, vcc
2368 ; GFX8-NEXT: v_max_f16_e32 v13, v4, v12
2369 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
2370 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v19, v13, vcc
2371 ; GFX8-NEXT: v_max_f16_e32 v12, v3, v11
2372 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
2373 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v19, v12, vcc
2374 ; GFX8-NEXT: v_max_f16_e32 v11, v2, v10
2375 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
2376 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v19, v11, vcc
2377 ; GFX8-NEXT: v_max_f16_e32 v10, v1, v9
2378 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
2379 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v19, v10, vcc
2380 ; GFX8-NEXT: v_max_f16_e32 v9, v0, v8
2381 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
2382 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v19, v9, vcc
2383 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v24
2384 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2385 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v23
2386 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2387 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v22
2388 ; GFX8-NEXT: v_or_b32_sdwa v2, v2, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2389 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v21
2390 ; GFX8-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2391 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v20
2392 ; GFX8-NEXT: v_or_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2393 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v18
2394 ; GFX8-NEXT: v_or_b32_sdwa v5, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2395 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v17
2396 ; GFX8-NEXT: v_or_b32_sdwa v6, v6, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2397 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v16
2398 ; GFX8-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2399 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2401 ; GFX9-LABEL: v_maximum_v16f16:
2403 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2404 ; GFX9-NEXT: v_pk_max_f16 v16, v7, v15
2405 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x7e00
2406 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
2407 ; GFX9-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
2408 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v16
2409 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
2410 ; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
2411 ; GFX9-NEXT: v_pk_max_f16 v15, v6, v14
2412 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
2413 ; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
2414 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v15
2415 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
2416 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
2417 ; GFX9-NEXT: v_pk_max_f16 v14, v5, v13
2418 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
2419 ; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
2420 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v14
2421 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
2422 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
2423 ; GFX9-NEXT: v_pk_max_f16 v13, v4, v12
2424 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
2425 ; GFX9-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
2426 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 16, v13
2427 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
2428 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
2429 ; GFX9-NEXT: v_pk_max_f16 v12, v3, v11
2430 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
2431 ; GFX9-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
2432 ; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v12
2433 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
2434 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
2435 ; GFX9-NEXT: v_pk_max_f16 v11, v2, v10
2436 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
2437 ; GFX9-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
2438 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 16, v11
2439 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
2440 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
2441 ; GFX9-NEXT: v_pk_max_f16 v10, v1, v9
2442 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
2443 ; GFX9-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
2444 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v10
2445 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
2446 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
2447 ; GFX9-NEXT: v_pk_max_f16 v9, v0, v8
2448 ; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
2449 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
2450 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v9
2451 ; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
2452 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
2453 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
2454 ; GFX9-NEXT: v_perm_b32 v0, v0, v10, s4
2455 ; GFX9-NEXT: v_perm_b32 v1, v1, v11, s4
2456 ; GFX9-NEXT: v_perm_b32 v2, v2, v12, s4
2457 ; GFX9-NEXT: v_perm_b32 v3, v3, v13, s4
2458 ; GFX9-NEXT: v_perm_b32 v4, v4, v14, s4
2459 ; GFX9-NEXT: v_perm_b32 v5, v5, v15, s4
2460 ; GFX9-NEXT: v_perm_b32 v6, v6, v16, s4
2461 ; GFX9-NEXT: v_perm_b32 v7, v7, v18, s4
2462 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2464 ; GFX940-LABEL: v_maximum_v16f16:
2466 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2467 ; GFX940-NEXT: v_pk_max_f16 v16, v7, v15
2468 ; GFX940-NEXT: v_mov_b32_e32 v17, 0x7e00
2469 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
2470 ; GFX940-NEXT: s_mov_b32 s0, 0x5040100
2471 ; GFX940-NEXT: s_nop 0
2472 ; GFX940-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
2473 ; GFX940-NEXT: v_lshrrev_b32_e32 v16, 16, v16
2474 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
2475 ; GFX940-NEXT: v_pk_max_f16 v15, v6, v14
2476 ; GFX940-NEXT: s_nop 0
2477 ; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
2478 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
2479 ; GFX940-NEXT: v_perm_b32 v7, v7, v18, s0
2480 ; GFX940-NEXT: s_nop 0
2481 ; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
2482 ; GFX940-NEXT: v_lshrrev_b32_e32 v15, 16, v15
2483 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
2484 ; GFX940-NEXT: v_pk_max_f16 v14, v5, v13
2485 ; GFX940-NEXT: s_nop 0
2486 ; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
2487 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
2488 ; GFX940-NEXT: v_perm_b32 v6, v6, v16, s0
2489 ; GFX940-NEXT: s_nop 0
2490 ; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
2491 ; GFX940-NEXT: v_lshrrev_b32_e32 v14, 16, v14
2492 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
2493 ; GFX940-NEXT: v_pk_max_f16 v13, v4, v12
2494 ; GFX940-NEXT: s_nop 0
2495 ; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
2496 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
2497 ; GFX940-NEXT: v_perm_b32 v5, v5, v15, s0
2498 ; GFX940-NEXT: s_nop 0
2499 ; GFX940-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
2500 ; GFX940-NEXT: v_lshrrev_b32_e32 v13, 16, v13
2501 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
2502 ; GFX940-NEXT: v_pk_max_f16 v12, v3, v11
2503 ; GFX940-NEXT: s_nop 0
2504 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
2505 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
2506 ; GFX940-NEXT: v_perm_b32 v4, v4, v14, s0
2507 ; GFX940-NEXT: s_nop 0
2508 ; GFX940-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
2509 ; GFX940-NEXT: v_lshrrev_b32_e32 v12, 16, v12
2510 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
2511 ; GFX940-NEXT: v_pk_max_f16 v11, v2, v10
2512 ; GFX940-NEXT: s_nop 0
2513 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
2514 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
2515 ; GFX940-NEXT: v_perm_b32 v3, v3, v13, s0
2516 ; GFX940-NEXT: s_nop 0
2517 ; GFX940-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
2518 ; GFX940-NEXT: v_lshrrev_b32_e32 v11, 16, v11
2519 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
2520 ; GFX940-NEXT: v_pk_max_f16 v10, v1, v9
2521 ; GFX940-NEXT: s_nop 0
2522 ; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
2523 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
2524 ; GFX940-NEXT: v_perm_b32 v2, v2, v12, s0
2525 ; GFX940-NEXT: s_nop 0
2526 ; GFX940-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
2527 ; GFX940-NEXT: v_lshrrev_b32_e32 v10, 16, v10
2528 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
2529 ; GFX940-NEXT: v_pk_max_f16 v9, v0, v8
2530 ; GFX940-NEXT: s_nop 0
2531 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
2532 ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
2533 ; GFX940-NEXT: v_perm_b32 v1, v1, v11, s0
2534 ; GFX940-NEXT: s_nop 0
2535 ; GFX940-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
2536 ; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v9
2537 ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
2538 ; GFX940-NEXT: s_nop 1
2539 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
2540 ; GFX940-NEXT: v_perm_b32 v0, v0, v10, s0
2541 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2543 ; GFX10-LABEL: v_maximum_v16f16:
2545 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2546 ; GFX10-NEXT: v_pk_max_f16 v16, v7, v15
2547 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v7, v15
2548 ; GFX10-NEXT: v_pk_max_f16 v18, v6, v14
2549 ; GFX10-NEXT: v_pk_max_f16 v19, v3, v11
2550 ; GFX10-NEXT: v_pk_max_f16 v20, v2, v10
2551 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v16
2552 ; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7e00, v16, vcc_lo
2553 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
2554 ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 16, v18
2555 ; GFX10-NEXT: v_pk_max_f16 v21, v0, v8
2556 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7e00, v17, vcc_lo
2557 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v14
2558 ; GFX10-NEXT: v_pk_max_f16 v17, v5, v13
2559 ; GFX10-NEXT: v_lshrrev_b32_e32 v23, 16, v21
2560 ; GFX10-NEXT: v_perm_b32 v7, v7, v16, 0x5040100
2561 ; GFX10-NEXT: v_cndmask_b32_e32 v18, 0x7e00, v18, vcc_lo
2562 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
2563 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v17
2564 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
2565 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v5, v13
2566 ; GFX10-NEXT: v_perm_b32 v6, v6, v18, 0x5040100
2567 ; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7e00, v17, vcc_lo
2568 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
2569 ; GFX10-NEXT: v_pk_max_f16 v17, v4, v12
2570 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
2571 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12
2572 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v17
2573 ; GFX10-NEXT: v_perm_b32 v5, v5, v15, 0x5040100
2574 ; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v17, vcc_lo
2575 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v11
2576 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v19
2577 ; GFX10-NEXT: v_cndmask_b32_e32 v19, 0x7e00, v19, vcc_lo
2578 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
2579 ; GFX10-NEXT: v_pk_max_f16 v11, v1, v9
2580 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
2581 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10
2582 ; GFX10-NEXT: v_lshrrev_b32_e32 v22, 16, v11
2583 ; GFX10-NEXT: v_perm_b32 v3, v3, v19, 0x5040100
2584 ; GFX10-NEXT: v_cndmask_b32_e32 v17, 0x7e00, v20, vcc_lo
2585 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9
2586 ; GFX10-NEXT: v_lshrrev_b32_e32 v20, 16, v20
2587 ; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7e00, v11, vcc_lo
2588 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
2589 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v22, vcc_lo
2590 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v8
2591 ; GFX10-NEXT: v_perm_b32 v1, v1, v11, 0x5040100
2592 ; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v21, vcc_lo
2593 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
2594 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v23, vcc_lo
2595 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
2596 ; GFX10-NEXT: v_perm_b32 v0, v0, v9, 0x5040100
2597 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
2598 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
2599 ; GFX10-NEXT: v_perm_b32 v2, v2, v17, 0x5040100
2600 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v14, vcc_lo
2601 ; GFX10-NEXT: v_perm_b32 v4, v4, v13, 0x5040100
2602 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2604 ; GFX11-LABEL: v_maximum_v16f16:
2606 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2607 ; GFX11-NEXT: v_pk_max_f16 v16, v7, v15
2608 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v15
2609 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v7
2610 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v7, v15
2611 ; GFX11-NEXT: v_pk_max_f16 v15, v6, v14
2612 ; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v16
2613 ; GFX11-NEXT: v_pk_max_f16 v20, v4, v12
2614 ; GFX11-NEXT: v_pk_max_f16 v22, v2, v10
2615 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo
2616 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17
2617 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v14
2618 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v6
2619 ; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v8
2620 ; GFX11-NEXT: v_lshrrev_b32_e32 v24, 16, v0
2621 ; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo
2622 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v14
2623 ; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v15
2624 ; GFX11-NEXT: v_pk_max_f16 v14, v5, v13
2625 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
2626 ; GFX11-NEXT: v_perm_b32 v7, v16, v7, 0x5040100
2627 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
2628 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17
2629 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v13
2630 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v5
2631 ; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo
2632 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v5, v13
2633 ; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v14
2634 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
2635 ; GFX11-NEXT: v_perm_b32 v6, v15, v6, 0x5040100
2636 ; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
2637 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17
2638 ; GFX11-NEXT: v_pk_max_f16 v17, v3, v11
2639 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v20
2640 ; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo
2641 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12
2642 ; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v11
2643 ; GFX11-NEXT: v_lshrrev_b32_e32 v21, 16, v17
2644 ; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v12
2645 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4
2646 ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo
2647 ; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v3
2648 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v11
2649 ; GFX11-NEXT: v_perm_b32 v5, v13, v5, 0x5040100
2650 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
2651 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
2652 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v20, v19
2653 ; GFX11-NEXT: v_pk_max_f16 v19, v1, v9
2654 ; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v22
2655 ; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo
2656 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10
2657 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v10
2658 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2659 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
2660 ; GFX11-NEXT: v_perm_b32 v3, v11, v3, 0x5040100
2661 ; GFX11-NEXT: v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo
2662 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9
2663 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v9
2664 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
2665 ; GFX11-NEXT: v_pk_max_f16 v22, v0, v8
2666 ; GFX11-NEXT: v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo
2667 ; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v19
2668 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2669 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9
2670 ; GFX11-NEXT: v_lshrrev_b32_e32 v25, 16, v22
2671 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2672 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo
2673 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v8
2674 ; GFX11-NEXT: v_perm_b32 v1, v1, v21, 0x5040100
2675 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo
2676 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v24, v23
2677 ; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo
2678 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10
2679 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2680 ; GFX11-NEXT: v_perm_b32 v0, v8, v0, 0x5040100
2681 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
2682 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12
2683 ; GFX11-NEXT: v_perm_b32 v2, v2, v17, 0x5040100
2684 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo
2685 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2686 ; GFX11-NEXT: v_perm_b32 v4, v4, v14, 0x5040100
2687 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2689 ; GFX12-LABEL: v_maximum_v16f16:
2691 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2692 ; GFX12-NEXT: s_wait_expcnt 0x0
2693 ; GFX12-NEXT: s_wait_samplecnt 0x0
2694 ; GFX12-NEXT: s_wait_bvhcnt 0x0
2695 ; GFX12-NEXT: s_wait_kmcnt 0x0
2696 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v8
2697 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v9
2698 ; GFX12-NEXT: v_pk_maximum_f16 v2, v2, v10
2699 ; GFX12-NEXT: v_pk_maximum_f16 v3, v3, v11
2700 ; GFX12-NEXT: v_pk_maximum_f16 v4, v4, v12
2701 ; GFX12-NEXT: v_pk_maximum_f16 v5, v5, v13
2702 ; GFX12-NEXT: v_pk_maximum_f16 v6, v6, v14
2703 ; GFX12-NEXT: v_pk_maximum_f16 v7, v7, v15
2704 ; GFX12-NEXT: s_setpc_b64 s[30:31]
2705 %op = call <16 x half> @llvm.maximum.v16f16(<16 x half> %src0, <16 x half> %src1)
2708 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: