1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
8 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
10 define half @v_maximum_f16(half %src0, half %src1) {
11 ; GFX7-LABEL: v_maximum_f16:
13 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
15 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
16 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
17 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
18 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
19 ; GFX7-NEXT: v_max_f32_e32 v3, v0, v1
20 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
21 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
22 ; GFX7-NEXT: s_setpc_b64 s[30:31]
24 ; GFX8-LABEL: v_maximum_f16:
26 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27 ; GFX8-NEXT: v_max_f16_e32 v2, v0, v1
28 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
29 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
30 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
31 ; GFX8-NEXT: s_setpc_b64 s[30:31]
33 ; GFX900-LABEL: v_maximum_f16:
35 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
37 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
38 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
39 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
40 ; GFX900-NEXT: s_setpc_b64 s[30:31]
42 ; GFX950-LABEL: v_maximum_f16:
44 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45 ; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
46 ; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
47 ; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
48 ; GFX950-NEXT: s_nop 1
49 ; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
50 ; GFX950-NEXT: s_setpc_b64 s[30:31]
52 ; GFX10-LABEL: v_maximum_f16:
54 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; GFX10-NEXT: v_max_f16_e32 v2, v0, v1
56 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
57 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
58 ; GFX10-NEXT: s_setpc_b64 s[30:31]
60 ; GFX11-LABEL: v_maximum_f16:
62 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63 ; GFX11-NEXT: v_max_f16_e32 v2, v0, v1
64 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
65 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
66 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
67 ; GFX11-NEXT: s_setpc_b64 s[30:31]
69 ; GFX12-LABEL: v_maximum_f16:
71 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
72 ; GFX12-NEXT: s_wait_expcnt 0x0
73 ; GFX12-NEXT: s_wait_samplecnt 0x0
74 ; GFX12-NEXT: s_wait_bvhcnt 0x0
75 ; GFX12-NEXT: s_wait_kmcnt 0x0
76 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
77 ; GFX12-NEXT: s_setpc_b64 s[30:31]
78 %op = call half @llvm.maximum.f16(half %src0, half %src1)
82 define half @v_maximum_f16__nnan(half %src0, half %src1) {
83 ; GFX7-LABEL: v_maximum_f16__nnan:
85 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
87 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
88 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
89 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
90 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
91 ; GFX7-NEXT: s_setpc_b64 s[30:31]
93 ; GFX8-LABEL: v_maximum_f16__nnan:
95 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
97 ; GFX8-NEXT: s_setpc_b64 s[30:31]
99 ; GFX9-LABEL: v_maximum_f16__nnan:
101 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
103 ; GFX9-NEXT: s_setpc_b64 s[30:31]
105 ; GFX10-LABEL: v_maximum_f16__nnan:
107 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
109 ; GFX10-NEXT: s_setpc_b64 s[30:31]
111 ; GFX11-LABEL: v_maximum_f16__nnan:
113 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v1
115 ; GFX11-NEXT: s_setpc_b64 s[30:31]
117 ; GFX12-LABEL: v_maximum_f16__nnan:
119 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
120 ; GFX12-NEXT: s_wait_expcnt 0x0
121 ; GFX12-NEXT: s_wait_samplecnt 0x0
122 ; GFX12-NEXT: s_wait_bvhcnt 0x0
123 ; GFX12-NEXT: s_wait_kmcnt 0x0
124 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
125 ; GFX12-NEXT: s_setpc_b64 s[30:31]
126 %op = call nnan half @llvm.maximum.f16(half %src0, half %src1)
130 define half @v_maximum_f16__nsz(half %src0, half %src1) {
131 ; GFX7-LABEL: v_maximum_f16__nsz:
133 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
135 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
136 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
137 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
138 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
139 ; GFX7-NEXT: v_max_f32_e32 v3, v0, v1
140 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
141 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
142 ; GFX7-NEXT: s_setpc_b64 s[30:31]
144 ; GFX8-LABEL: v_maximum_f16__nsz:
146 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147 ; GFX8-NEXT: v_max_f16_e32 v2, v0, v1
148 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
149 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
150 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
151 ; GFX8-NEXT: s_setpc_b64 s[30:31]
153 ; GFX900-LABEL: v_maximum_f16__nsz:
155 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156 ; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
157 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
158 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
159 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
160 ; GFX900-NEXT: s_setpc_b64 s[30:31]
162 ; GFX950-LABEL: v_maximum_f16__nsz:
164 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165 ; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
166 ; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
167 ; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
168 ; GFX950-NEXT: s_nop 1
169 ; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
170 ; GFX950-NEXT: s_setpc_b64 s[30:31]
172 ; GFX10-LABEL: v_maximum_f16__nsz:
174 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175 ; GFX10-NEXT: v_max_f16_e32 v2, v0, v1
176 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
177 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
178 ; GFX10-NEXT: s_setpc_b64 s[30:31]
180 ; GFX11-LABEL: v_maximum_f16__nsz:
182 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183 ; GFX11-NEXT: v_max_f16_e32 v2, v0, v1
184 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
185 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
186 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
187 ; GFX11-NEXT: s_setpc_b64 s[30:31]
189 ; GFX12-LABEL: v_maximum_f16__nsz:
191 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
192 ; GFX12-NEXT: s_wait_expcnt 0x0
193 ; GFX12-NEXT: s_wait_samplecnt 0x0
194 ; GFX12-NEXT: s_wait_bvhcnt 0x0
195 ; GFX12-NEXT: s_wait_kmcnt 0x0
196 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
197 ; GFX12-NEXT: s_setpc_b64 s[30:31]
198 %op = call nsz half @llvm.maximum.f16(half %src0, half %src1)
202 define half @v_maximum_f16__nnan_nsz(half %src0, half %src1) {
203 ; GFX7-LABEL: v_maximum_f16__nnan_nsz:
205 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
207 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
208 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
209 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
210 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
211 ; GFX7-NEXT: s_setpc_b64 s[30:31]
213 ; GFX8-LABEL: v_maximum_f16__nnan_nsz:
215 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
217 ; GFX8-NEXT: s_setpc_b64 s[30:31]
219 ; GFX9-LABEL: v_maximum_f16__nnan_nsz:
221 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
223 ; GFX9-NEXT: s_setpc_b64 s[30:31]
225 ; GFX10-LABEL: v_maximum_f16__nnan_nsz:
227 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228 ; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
229 ; GFX10-NEXT: s_setpc_b64 s[30:31]
231 ; GFX11-LABEL: v_maximum_f16__nnan_nsz:
233 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v1
235 ; GFX11-NEXT: s_setpc_b64 s[30:31]
237 ; GFX12-LABEL: v_maximum_f16__nnan_nsz:
239 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
240 ; GFX12-NEXT: s_wait_expcnt 0x0
241 ; GFX12-NEXT: s_wait_samplecnt 0x0
242 ; GFX12-NEXT: s_wait_bvhcnt 0x0
243 ; GFX12-NEXT: s_wait_kmcnt 0x0
244 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
245 ; GFX12-NEXT: s_setpc_b64 s[30:31]
246 %op = call nnan nsz half @llvm.maximum.f16(half %src0, half %src1)
250 define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) {
251 ; GFX7-LABEL: v_maximum_f16__nnan_src0:
253 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
255 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
256 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
257 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
258 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
259 ; GFX7-NEXT: v_add_f32_e32 v0, 1.0, v0
260 ; GFX7-NEXT: v_max_f32_e32 v3, v0, v1
261 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
262 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
263 ; GFX7-NEXT: s_setpc_b64 s[30:31]
265 ; GFX8-LABEL: v_maximum_f16__nnan_src0:
267 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268 ; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0
269 ; GFX8-NEXT: v_max_f16_e32 v2, v0, v1
270 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
271 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
272 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
273 ; GFX8-NEXT: s_setpc_b64 s[30:31]
275 ; GFX900-LABEL: v_maximum_f16__nnan_src0:
277 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278 ; GFX900-NEXT: v_add_f16_e32 v0, 1.0, v0
279 ; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
280 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
281 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
282 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
283 ; GFX900-NEXT: s_setpc_b64 s[30:31]
285 ; GFX950-LABEL: v_maximum_f16__nnan_src0:
287 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288 ; GFX950-NEXT: v_add_f16_e32 v0, 1.0, v0
289 ; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
290 ; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
291 ; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
292 ; GFX950-NEXT: s_nop 1
293 ; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
294 ; GFX950-NEXT: s_setpc_b64 s[30:31]
296 ; GFX10-LABEL: v_maximum_f16__nnan_src0:
298 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299 ; GFX10-NEXT: v_add_f16_e32 v0, 1.0, v0
300 ; GFX10-NEXT: v_max_f16_e32 v2, v0, v1
301 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
302 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
303 ; GFX10-NEXT: s_setpc_b64 s[30:31]
305 ; GFX11-LABEL: v_maximum_f16__nnan_src0:
307 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308 ; GFX11-NEXT: v_add_f16_e32 v0, 1.0, v0
309 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
310 ; GFX11-NEXT: v_max_f16_e32 v2, v0, v1
311 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
312 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
313 ; GFX11-NEXT: s_setpc_b64 s[30:31]
315 ; GFX12-LABEL: v_maximum_f16__nnan_src0:
317 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
318 ; GFX12-NEXT: s_wait_expcnt 0x0
319 ; GFX12-NEXT: s_wait_samplecnt 0x0
320 ; GFX12-NEXT: s_wait_bvhcnt 0x0
321 ; GFX12-NEXT: s_wait_kmcnt 0x0
322 ; GFX12-NEXT: v_add_f16_e32 v0, 1.0, v0
323 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
324 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
325 ; GFX12-NEXT: s_setpc_b64 s[30:31]
326 %src0 = fadd nnan half %arg0, 1.0
327 %op = call half @llvm.maximum.f16(half %src0, half %src1)
331 define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) {
332 ; GFX7-LABEL: v_maximum_f16__nnan_src1:
334 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
336 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
337 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
338 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
339 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
340 ; GFX7-NEXT: v_add_f32_e32 v1, 1.0, v1
341 ; GFX7-NEXT: v_max_f32_e32 v3, v0, v1
342 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
343 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
344 ; GFX7-NEXT: s_setpc_b64 s[30:31]
346 ; GFX8-LABEL: v_maximum_f16__nnan_src1:
348 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349 ; GFX8-NEXT: v_add_f16_e32 v1, 1.0, v1
350 ; GFX8-NEXT: v_max_f16_e32 v2, v0, v1
351 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
352 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
353 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
354 ; GFX8-NEXT: s_setpc_b64 s[30:31]
356 ; GFX900-LABEL: v_maximum_f16__nnan_src1:
358 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359 ; GFX900-NEXT: v_add_f16_e32 v1, 1.0, v1
360 ; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
361 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
362 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
363 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
364 ; GFX900-NEXT: s_setpc_b64 s[30:31]
366 ; GFX950-LABEL: v_maximum_f16__nnan_src1:
368 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369 ; GFX950-NEXT: v_add_f16_e32 v1, 1.0, v1
370 ; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
371 ; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
372 ; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
373 ; GFX950-NEXT: s_nop 1
374 ; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
375 ; GFX950-NEXT: s_setpc_b64 s[30:31]
377 ; GFX10-LABEL: v_maximum_f16__nnan_src1:
379 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
380 ; GFX10-NEXT: v_add_f16_e32 v1, 1.0, v1
381 ; GFX10-NEXT: v_max_f16_e32 v2, v0, v1
382 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
383 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
384 ; GFX10-NEXT: s_setpc_b64 s[30:31]
386 ; GFX11-LABEL: v_maximum_f16__nnan_src1:
388 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389 ; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1
390 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
391 ; GFX11-NEXT: v_max_f16_e32 v2, v0, v1
392 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
393 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
394 ; GFX11-NEXT: s_setpc_b64 s[30:31]
396 ; GFX12-LABEL: v_maximum_f16__nnan_src1:
398 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
399 ; GFX12-NEXT: s_wait_expcnt 0x0
400 ; GFX12-NEXT: s_wait_samplecnt 0x0
401 ; GFX12-NEXT: s_wait_bvhcnt 0x0
402 ; GFX12-NEXT: s_wait_kmcnt 0x0
403 ; GFX12-NEXT: v_add_f16_e32 v1, 1.0, v1
404 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
405 ; GFX12-NEXT: v_maximum_f16 v0, v0, v1
406 ; GFX12-NEXT: s_setpc_b64 s[30:31]
407 %src1 = fadd nnan half %arg1, 1.0
408 %op = call half @llvm.maximum.f16(half %src0, half %src1)
412 define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
413 ; GFX7-LABEL: s_maximum_f16:
415 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s17
417 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16
418 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
419 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
420 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
421 ; GFX7-NEXT: v_max_f32_e32 v3, v1, v0
422 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v0
423 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
424 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
425 ; GFX7-NEXT: ;;#ASMSTART
426 ; GFX7-NEXT: ; use v0
427 ; GFX7-NEXT: ;;#ASMEND
428 ; GFX7-NEXT: s_setpc_b64 s[30:31]
430 ; GFX8-LABEL: s_maximum_f16:
432 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433 ; GFX8-NEXT: v_mov_b32_e32 v0, s17
434 ; GFX8-NEXT: v_max_f16_e32 v1, s16, v0
435 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00
436 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
437 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
438 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
439 ; GFX8-NEXT: ;;#ASMSTART
440 ; GFX8-NEXT: ; use v0
441 ; GFX8-NEXT: ;;#ASMEND
442 ; GFX8-NEXT: s_setpc_b64 s[30:31]
444 ; GFX900-LABEL: s_maximum_f16:
446 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447 ; GFX900-NEXT: v_mov_b32_e32 v0, s17
448 ; GFX900-NEXT: v_max_f16_e32 v1, s16, v0
449 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00
450 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
451 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
452 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
453 ; GFX900-NEXT: ;;#ASMSTART
454 ; GFX900-NEXT: ; use v0
455 ; GFX900-NEXT: ;;#ASMEND
456 ; GFX900-NEXT: s_setpc_b64 s[30:31]
458 ; GFX950-LABEL: s_maximum_f16:
460 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461 ; GFX950-NEXT: v_mov_b32_e32 v0, s1
462 ; GFX950-NEXT: v_max_f16_e32 v1, s0, v0
463 ; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00
464 ; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
465 ; GFX950-NEXT: s_nop 1
466 ; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
467 ; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
468 ; GFX950-NEXT: ;;#ASMSTART
469 ; GFX950-NEXT: ; use v0
470 ; GFX950-NEXT: ;;#ASMEND
471 ; GFX950-NEXT: s_setpc_b64 s[30:31]
473 ; GFX10-LABEL: s_maximum_f16:
475 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476 ; GFX10-NEXT: v_max_f16_e64 v0, s16, s17
477 ; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s16, s17
478 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
479 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
480 ; GFX10-NEXT: ;;#ASMSTART
481 ; GFX10-NEXT: ; use v0
482 ; GFX10-NEXT: ;;#ASMEND
483 ; GFX10-NEXT: s_setpc_b64 s[30:31]
485 ; GFX11-LABEL: s_maximum_f16:
487 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488 ; GFX11-NEXT: v_max_f16_e64 v0, s0, s1
489 ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
490 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
491 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
492 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
493 ; GFX11-NEXT: ;;#ASMSTART
494 ; GFX11-NEXT: ; use v0
495 ; GFX11-NEXT: ;;#ASMEND
496 ; GFX11-NEXT: s_setpc_b64 s[30:31]
498 ; GFX12-LABEL: s_maximum_f16:
500 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
501 ; GFX12-NEXT: s_wait_expcnt 0x0
502 ; GFX12-NEXT: s_wait_samplecnt 0x0
503 ; GFX12-NEXT: s_wait_bvhcnt 0x0
504 ; GFX12-NEXT: s_wait_kmcnt 0x0
505 ; GFX12-NEXT: s_maximum_f16 s0, s0, s1
506 ; GFX12-NEXT: s_wait_alu 0xfffe
507 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
508 ; GFX12-NEXT: s_and_b32 s0, 0xffff, s0
509 ; GFX12-NEXT: ;;#ASMSTART
510 ; GFX12-NEXT: ; use s0
511 ; GFX12-NEXT: ;;#ASMEND
512 ; GFX12-NEXT: s_wait_alu 0xfffe
513 ; GFX12-NEXT: s_setpc_b64 s[30:31]
514 %op = call half @llvm.maximum.f16(half %src0, half %src1)
515 %cast = bitcast half %op to i16
516 %zext = zext i16 %cast to i32
517 call void asm sideeffect "; use $0", "s"(i32 %zext)
521 define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
522 ; GFX7-LABEL: v_maximum_v2f16:
524 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
525 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
526 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
527 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
528 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
529 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
530 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
531 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
532 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
533 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
534 ; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
535 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
536 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
537 ; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
538 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
539 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
540 ; GFX7-NEXT: s_setpc_b64 s[30:31]
542 ; GFX8-LABEL: v_maximum_v2f16:
544 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
546 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
547 ; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
548 ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
549 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
550 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
551 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
552 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
553 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
554 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
555 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
556 ; GFX8-NEXT: s_setpc_b64 s[30:31]
558 ; GFX900-LABEL: v_maximum_v2f16:
560 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561 ; GFX900-NEXT: v_pk_max_f16 v2, v0, v1
562 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
563 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
564 ; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
565 ; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
566 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
567 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
568 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
569 ; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
570 ; GFX900-NEXT: s_setpc_b64 s[30:31]
572 ; GFX950-LABEL: v_maximum_v2f16:
574 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1
576 ; GFX950-NEXT: s_setpc_b64 s[30:31]
578 ; GFX10-LABEL: v_maximum_v2f16:
580 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581 ; GFX10-NEXT: v_pk_max_f16 v2, v0, v1
582 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
583 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v2
584 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo
585 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
586 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
587 ; GFX10-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
588 ; GFX10-NEXT: s_setpc_b64 s[30:31]
590 ; GFX11-LABEL: v_maximum_v2f16:
592 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593 ; GFX11-NEXT: v_pk_max_f16 v2, v0, v1
594 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
595 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0
596 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
597 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
598 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
599 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
600 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v3
601 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
602 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
603 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
604 ; GFX11-NEXT: s_setpc_b64 s[30:31]
606 ; GFX12-LABEL: v_maximum_v2f16:
608 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
609 ; GFX12-NEXT: s_wait_expcnt 0x0
610 ; GFX12-NEXT: s_wait_samplecnt 0x0
611 ; GFX12-NEXT: s_wait_bvhcnt 0x0
612 ; GFX12-NEXT: s_wait_kmcnt 0x0
613 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
614 ; GFX12-NEXT: s_setpc_b64 s[30:31]
615 %op = call <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
619 define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
620 ; GFX7-LABEL: v_maximum_v2f16__nnan:
622 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
624 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
625 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
626 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
627 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
628 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
629 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
630 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
631 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
632 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
633 ; GFX7-NEXT: s_setpc_b64 s[30:31]
635 ; GFX8-LABEL: v_maximum_v2f16__nnan:
637 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
638 ; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
639 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
640 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
641 ; GFX8-NEXT: s_setpc_b64 s[30:31]
643 ; GFX900-LABEL: v_maximum_v2f16__nnan:
645 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646 ; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
647 ; GFX900-NEXT: s_setpc_b64 s[30:31]
649 ; GFX950-LABEL: v_maximum_v2f16__nnan:
651 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1
653 ; GFX950-NEXT: s_setpc_b64 s[30:31]
655 ; GFX10-LABEL: v_maximum_v2f16__nnan:
657 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v1
659 ; GFX10-NEXT: s_setpc_b64 s[30:31]
661 ; GFX11-LABEL: v_maximum_v2f16__nnan:
663 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
665 ; GFX11-NEXT: s_setpc_b64 s[30:31]
667 ; GFX12-LABEL: v_maximum_v2f16__nnan:
669 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
670 ; GFX12-NEXT: s_wait_expcnt 0x0
671 ; GFX12-NEXT: s_wait_samplecnt 0x0
672 ; GFX12-NEXT: s_wait_bvhcnt 0x0
673 ; GFX12-NEXT: s_wait_kmcnt 0x0
674 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
675 ; GFX12-NEXT: s_setpc_b64 s[30:31]
676 %op = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
680 define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
681 ; GFX7-LABEL: v_maximum_v2f16__nsz:
683 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
685 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
686 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
687 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
688 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
689 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
690 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
691 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
692 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
693 ; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
694 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
695 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
696 ; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
697 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
698 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
699 ; GFX7-NEXT: s_setpc_b64 s[30:31]
701 ; GFX8-LABEL: v_maximum_v2f16__nsz:
703 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
705 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
706 ; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
707 ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
708 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
709 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
710 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
711 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
712 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
713 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
714 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
715 ; GFX8-NEXT: s_setpc_b64 s[30:31]
717 ; GFX900-LABEL: v_maximum_v2f16__nsz:
719 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
720 ; GFX900-NEXT: v_pk_max_f16 v2, v0, v1
721 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
722 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
723 ; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
724 ; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
725 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
726 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
727 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
728 ; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
729 ; GFX900-NEXT: s_setpc_b64 s[30:31]
731 ; GFX950-LABEL: v_maximum_v2f16__nsz:
733 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
734 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1
735 ; GFX950-NEXT: s_setpc_b64 s[30:31]
737 ; GFX10-LABEL: v_maximum_v2f16__nsz:
739 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740 ; GFX10-NEXT: v_pk_max_f16 v2, v0, v1
741 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
742 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v2
743 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo
744 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
745 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
746 ; GFX10-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
747 ; GFX10-NEXT: s_setpc_b64 s[30:31]
749 ; GFX11-LABEL: v_maximum_v2f16__nsz:
751 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
752 ; GFX11-NEXT: v_pk_max_f16 v2, v0, v1
753 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
754 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0
755 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
756 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
757 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
758 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
759 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v3
760 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
761 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
762 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
763 ; GFX11-NEXT: s_setpc_b64 s[30:31]
765 ; GFX12-LABEL: v_maximum_v2f16__nsz:
767 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
768 ; GFX12-NEXT: s_wait_expcnt 0x0
769 ; GFX12-NEXT: s_wait_samplecnt 0x0
770 ; GFX12-NEXT: s_wait_bvhcnt 0x0
771 ; GFX12-NEXT: s_wait_kmcnt 0x0
772 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
773 ; GFX12-NEXT: s_setpc_b64 s[30:31]
774 %op = call nsz <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
778 define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1) {
779 ; GFX7-LABEL: v_maximum_v2f16__nnan_nsz:
781 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
782 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
783 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
784 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
785 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
786 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
787 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
788 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
789 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
790 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
791 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
792 ; GFX7-NEXT: s_setpc_b64 s[30:31]
794 ; GFX8-LABEL: v_maximum_v2f16__nnan_nsz:
796 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
797 ; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
798 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
799 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
800 ; GFX8-NEXT: s_setpc_b64 s[30:31]
802 ; GFX900-LABEL: v_maximum_v2f16__nnan_nsz:
804 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805 ; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
806 ; GFX900-NEXT: s_setpc_b64 s[30:31]
808 ; GFX950-LABEL: v_maximum_v2f16__nnan_nsz:
810 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
811 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1
812 ; GFX950-NEXT: s_setpc_b64 s[30:31]
814 ; GFX10-LABEL: v_maximum_v2f16__nnan_nsz:
816 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v1
818 ; GFX10-NEXT: s_setpc_b64 s[30:31]
820 ; GFX11-LABEL: v_maximum_v2f16__nnan_nsz:
822 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
824 ; GFX11-NEXT: s_setpc_b64 s[30:31]
826 ; GFX12-LABEL: v_maximum_v2f16__nnan_nsz:
828 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
829 ; GFX12-NEXT: s_wait_expcnt 0x0
830 ; GFX12-NEXT: s_wait_samplecnt 0x0
831 ; GFX12-NEXT: s_wait_bvhcnt 0x0
832 ; GFX12-NEXT: s_wait_kmcnt 0x0
833 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
834 ; GFX12-NEXT: s_setpc_b64 s[30:31]
835 %op = call nnan nsz <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
839 define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
840 ; GFX7-LABEL: s_maximum_v2f16:
842 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
843 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s19
844 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s17
845 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, s18
846 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, s16
847 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
848 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
849 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
850 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
851 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
852 ; GFX7-NEXT: v_max_f32_e32 v4, v1, v0
853 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v0
854 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
855 ; GFX7-NEXT: v_max_f32_e32 v1, v3, v2
856 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v2
857 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
858 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
859 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
860 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
861 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
862 ; GFX7-NEXT: ;;#ASMSTART
863 ; GFX7-NEXT: ; use v0
864 ; GFX7-NEXT: ;;#ASMEND
865 ; GFX7-NEXT: s_setpc_b64 s[30:31]
867 ; GFX8-LABEL: s_maximum_v2f16:
869 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
870 ; GFX8-NEXT: s_lshr_b32 s4, s17, 16
871 ; GFX8-NEXT: s_lshr_b32 s5, s16, 16
872 ; GFX8-NEXT: v_mov_b32_e32 v0, s4
873 ; GFX8-NEXT: v_max_f16_e32 v1, s5, v0
874 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00
875 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s5, v0
876 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
877 ; GFX8-NEXT: v_mov_b32_e32 v1, s17
878 ; GFX8-NEXT: v_max_f16_e32 v3, s16, v1
879 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s16, v1
880 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
881 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
882 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
883 ; GFX8-NEXT: ;;#ASMSTART
884 ; GFX8-NEXT: ; use v0
885 ; GFX8-NEXT: ;;#ASMEND
886 ; GFX8-NEXT: s_setpc_b64 s[30:31]
888 ; GFX900-LABEL: s_maximum_v2f16:
890 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
891 ; GFX900-NEXT: v_mov_b32_e32 v0, s17
892 ; GFX900-NEXT: v_mov_b32_e32 v1, s17
893 ; GFX900-NEXT: s_lshr_b32 s4, s17, 16
894 ; GFX900-NEXT: v_pk_max_f16 v1, s16, v1
895 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00
896 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
897 ; GFX900-NEXT: s_lshr_b32 s5, s16, 16
898 ; GFX900-NEXT: v_mov_b32_e32 v3, s4
899 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
900 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
901 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s5, v3
902 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
903 ; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
904 ; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
905 ; GFX900-NEXT: ;;#ASMSTART
906 ; GFX900-NEXT: ; use v0
907 ; GFX900-NEXT: ;;#ASMEND
908 ; GFX900-NEXT: s_setpc_b64 s[30:31]
910 ; GFX950-LABEL: s_maximum_v2f16:
912 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913 ; GFX950-NEXT: v_mov_b32_e32 v0, s0
914 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, s1, s1
915 ; GFX950-NEXT: s_nop 0
916 ; GFX950-NEXT: ;;#ASMSTART
917 ; GFX950-NEXT: ; use v0
918 ; GFX950-NEXT: ;;#ASMEND
919 ; GFX950-NEXT: s_setpc_b64 s[30:31]
921 ; GFX10-LABEL: s_maximum_v2f16:
923 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
924 ; GFX10-NEXT: v_pk_max_f16 v0, s16, s17
925 ; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s16, s17
926 ; GFX10-NEXT: s_lshr_b32 s4, s17, 16
927 ; GFX10-NEXT: s_lshr_b32 s5, s16, 16
928 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0
929 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
930 ; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s5, s4
931 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
932 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
933 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
934 ; GFX10-NEXT: ;;#ASMSTART
935 ; GFX10-NEXT: ; use v0
936 ; GFX10-NEXT: ;;#ASMEND
937 ; GFX10-NEXT: s_setpc_b64 s[30:31]
939 ; GFX11-LABEL: s_maximum_v2f16:
941 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
942 ; GFX11-NEXT: v_pk_max_f16 v0, s0, s1
943 ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
944 ; GFX11-NEXT: s_lshr_b32 s2, s1, 16
945 ; GFX11-NEXT: s_lshr_b32 s0, s0, 16
946 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
947 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
948 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
949 ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s2
950 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
951 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
952 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
953 ; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
954 ; GFX11-NEXT: ;;#ASMSTART
955 ; GFX11-NEXT: ; use v0
956 ; GFX11-NEXT: ;;#ASMEND
957 ; GFX11-NEXT: s_setpc_b64 s[30:31]
959 ; GFX12-LABEL: s_maximum_v2f16:
961 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
962 ; GFX12-NEXT: s_wait_expcnt 0x0
963 ; GFX12-NEXT: s_wait_samplecnt 0x0
964 ; GFX12-NEXT: s_wait_bvhcnt 0x0
965 ; GFX12-NEXT: s_wait_kmcnt 0x0
966 ; GFX12-NEXT: v_pk_maximum_f16 v0, s0, s1
967 ; GFX12-NEXT: ;;#ASMSTART
968 ; GFX12-NEXT: ; use v0
969 ; GFX12-NEXT: ;;#ASMEND
970 ; GFX12-NEXT: s_setpc_b64 s[30:31]
971 %op = call <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
972 %cast = bitcast <2 x half> %op to i32
973 call void asm sideeffect "; use $0", "s"(i32 %cast)
977 define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
978 ; GFX7-LABEL: v_maximum_v3f16:
980 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
981 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
982 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
983 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
984 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
985 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
986 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
987 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
988 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
989 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
990 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
991 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
992 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
993 ; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
994 ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
995 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
996 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
997 ; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
998 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
999 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1000 ; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
1001 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1002 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1003 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1005 ; GFX8-LABEL: v_maximum_v3f16:
1007 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1008 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1009 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1010 ; GFX8-NEXT: v_max_f16_e32 v6, v5, v4
1011 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00
1012 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4
1013 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc
1014 ; GFX8-NEXT: v_max_f16_e32 v5, v1, v3
1015 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1016 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
1017 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v2
1018 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1019 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
1020 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
1021 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1022 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1024 ; GFX900-LABEL: v_maximum_v3f16:
1026 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027 ; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
1028 ; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
1029 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1030 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1031 ; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
1032 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1033 ; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1034 ; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1035 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1036 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1037 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1038 ; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
1039 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1041 ; GFX950-LABEL: v_maximum_v3f16:
1043 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044 ; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3
1045 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2
1046 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1048 ; GFX10-LABEL: v_maximum_v3f16:
1050 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1051 ; GFX10-NEXT: v_pk_max_f16 v4, v0, v2
1052 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1053 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v4
1054 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
1055 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1056 ; GFX10-NEXT: v_pk_max_f16 v2, v1, v3
1057 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
1058 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1059 ; GFX10-NEXT: v_perm_b32 v0, v0, v4, 0x5040100
1060 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
1061 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1063 ; GFX11-LABEL: v_maximum_v3f16:
1065 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1066 ; GFX11-NEXT: v_pk_max_f16 v4, v0, v2
1067 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1068 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1069 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1070 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1071 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4
1072 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
1073 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5
1074 ; GFX11-NEXT: v_pk_max_f16 v4, v1, v3
1075 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1076 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
1077 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1078 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1079 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
1080 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1081 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1083 ; GFX12-LABEL: v_maximum_v3f16:
1085 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1086 ; GFX12-NEXT: s_wait_expcnt 0x0
1087 ; GFX12-NEXT: s_wait_samplecnt 0x0
1088 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1089 ; GFX12-NEXT: s_wait_kmcnt 0x0
1090 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1091 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1092 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1093 %op = call <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1097 define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
1098 ; GFX7-LABEL: v_maximum_v3f16__nnan:
1100 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1102 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1103 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1104 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1105 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1106 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1107 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1108 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1109 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1110 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1111 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1112 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1113 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
1114 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
1115 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
1116 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1118 ; GFX8-LABEL: v_maximum_v3f16__nnan:
1120 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121 ; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1122 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
1123 ; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
1124 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
1125 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1127 ; GFX900-LABEL: v_maximum_v3f16__nnan:
1129 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1130 ; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
1131 ; GFX900-NEXT: v_pk_max_f16 v1, v1, v3
1132 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1134 ; GFX950-LABEL: v_maximum_v3f16__nnan:
1136 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137 ; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3
1138 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2
1139 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1141 ; GFX10-LABEL: v_maximum_v3f16__nnan:
1143 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1144 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v2
1145 ; GFX10-NEXT: v_pk_max_f16 v1, v1, v3
1146 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1148 ; GFX11-LABEL: v_maximum_v3f16__nnan:
1150 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1151 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
1152 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
1153 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1155 ; GFX12-LABEL: v_maximum_v3f16__nnan:
1157 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1158 ; GFX12-NEXT: s_wait_expcnt 0x0
1159 ; GFX12-NEXT: s_wait_samplecnt 0x0
1160 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1161 ; GFX12-NEXT: s_wait_kmcnt 0x0
1162 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1163 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1164 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1165 %op = call nnan <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1169 define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
1170 ; GFX7-LABEL: v_maximum_v3f16__nsz:
1172 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1173 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1174 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1175 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1176 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1177 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1178 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1179 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1180 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1181 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1182 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1183 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1184 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1185 ; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
1186 ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1187 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1188 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1189 ; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
1190 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1191 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1192 ; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
1193 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1194 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1195 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1197 ; GFX8-LABEL: v_maximum_v3f16__nsz:
1199 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1200 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1201 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1202 ; GFX8-NEXT: v_max_f16_e32 v6, v5, v4
1203 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00
1204 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4
1205 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc
1206 ; GFX8-NEXT: v_max_f16_e32 v5, v1, v3
1207 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1208 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
1209 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v2
1210 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1211 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
1212 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
1213 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1214 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1216 ; GFX900-LABEL: v_maximum_v3f16__nsz:
1218 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1219 ; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
1220 ; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
1221 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1222 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1223 ; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
1224 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1225 ; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1226 ; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1227 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1228 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1229 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1230 ; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
1231 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1233 ; GFX950-LABEL: v_maximum_v3f16__nsz:
1235 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1236 ; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3
1237 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2
1238 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1240 ; GFX10-LABEL: v_maximum_v3f16__nsz:
1242 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1243 ; GFX10-NEXT: v_pk_max_f16 v4, v0, v2
1244 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1245 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v4
1246 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
1247 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1248 ; GFX10-NEXT: v_pk_max_f16 v2, v1, v3
1249 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
1250 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1251 ; GFX10-NEXT: v_perm_b32 v0, v0, v4, 0x5040100
1252 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
1253 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1255 ; GFX11-LABEL: v_maximum_v3f16__nsz:
1257 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1258 ; GFX11-NEXT: v_pk_max_f16 v4, v0, v2
1259 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1260 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1261 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1262 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1263 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4
1264 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
1265 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5
1266 ; GFX11-NEXT: v_pk_max_f16 v4, v1, v3
1267 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1268 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
1269 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1270 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1271 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
1272 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1273 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1275 ; GFX12-LABEL: v_maximum_v3f16__nsz:
1277 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1278 ; GFX12-NEXT: s_wait_expcnt 0x0
1279 ; GFX12-NEXT: s_wait_samplecnt 0x0
1280 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1281 ; GFX12-NEXT: s_wait_kmcnt 0x0
1282 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1283 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1284 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1285 %op = call nsz <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1289 define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1) {
1290 ; GFX7-LABEL: v_maximum_v3f16__nnan_nsz:
1292 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1293 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1294 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1295 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1296 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1297 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1298 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1299 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1300 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1301 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1302 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1303 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1304 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1305 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
1306 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
1307 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
1308 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1310 ; GFX8-LABEL: v_maximum_v3f16__nnan_nsz:
1312 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313 ; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1314 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
1315 ; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
1316 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
1317 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1319 ; GFX900-LABEL: v_maximum_v3f16__nnan_nsz:
1321 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322 ; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
1323 ; GFX900-NEXT: v_pk_max_f16 v1, v1, v3
1324 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1326 ; GFX950-LABEL: v_maximum_v3f16__nnan_nsz:
1328 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1329 ; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3
1330 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2
1331 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1333 ; GFX10-LABEL: v_maximum_v3f16__nnan_nsz:
1335 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1336 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v2
1337 ; GFX10-NEXT: v_pk_max_f16 v1, v1, v3
1338 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1340 ; GFX11-LABEL: v_maximum_v3f16__nnan_nsz:
1342 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
1344 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
1345 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1347 ; GFX12-LABEL: v_maximum_v3f16__nnan_nsz:
1349 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1350 ; GFX12-NEXT: s_wait_expcnt 0x0
1351 ; GFX12-NEXT: s_wait_samplecnt 0x0
1352 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1353 ; GFX12-NEXT: s_wait_kmcnt 0x0
1354 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1355 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1356 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1357 %op = call nnan nsz <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1361 define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
1362 ; GFX7-LABEL: v_maximum_v4f16:
1364 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1365 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1366 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1367 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1368 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1369 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1370 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1371 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1372 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1373 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1374 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1375 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1376 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1377 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1378 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1379 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1380 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1381 ; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
1382 ; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1383 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1384 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1385 ; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
1386 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1387 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1388 ; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
1389 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1390 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1391 ; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
1392 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1393 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1394 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1396 ; GFX8-LABEL: v_maximum_v4f16:
1398 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1399 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3
1400 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1
1401 ; GFX8-NEXT: v_max_f16_e32 v6, v5, v4
1402 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00
1403 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4
1404 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc
1405 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1406 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1407 ; GFX8-NEXT: v_max_f16_e32 v8, v6, v5
1408 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
1409 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v8, vcc
1410 ; GFX8-NEXT: v_max_f16_e32 v6, v1, v3
1411 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1412 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
1413 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v2
1414 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1415 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
1416 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v5
1417 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1418 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
1419 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1420 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1422 ; GFX900-LABEL: v_maximum_v4f16:
1424 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425 ; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
1426 ; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
1427 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1428 ; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
1429 ; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1430 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1431 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1432 ; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
1433 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1434 ; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1435 ; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1436 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1437 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1438 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1439 ; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
1440 ; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4
1441 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1443 ; GFX950-LABEL: v_maximum_v4f16:
1445 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1446 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2
1447 ; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3
1448 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1450 ; GFX10-LABEL: v_maximum_v4f16:
1452 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1453 ; GFX10-NEXT: v_pk_max_f16 v4, v1, v3
1454 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1455 ; GFX10-NEXT: v_pk_max_f16 v5, v0, v2
1456 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo
1457 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1458 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5
1459 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1460 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo
1461 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1462 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1463 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1464 ; GFX10-NEXT: v_perm_b32 v0, v0, v5, 0x5040100
1465 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1466 ; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100
1467 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1469 ; GFX11-LABEL: v_maximum_v4f16:
1471 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1472 ; GFX11-NEXT: v_pk_max_f16 v4, v1, v3
1473 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1474 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
1475 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
1476 ; GFX11-NEXT: v_pk_max_f16 v7, v0, v2
1477 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v2
1478 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1479 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1480 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1481 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v7
1482 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1483 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1484 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v8
1485 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1486 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
1487 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5
1488 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1489 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
1490 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1491 ; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
1492 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1494 ; GFX12-LABEL: v_maximum_v4f16:
1496 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1497 ; GFX12-NEXT: s_wait_expcnt 0x0
1498 ; GFX12-NEXT: s_wait_samplecnt 0x0
1499 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1500 ; GFX12-NEXT: s_wait_kmcnt 0x0
1501 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1502 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1503 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1504 %op = call <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1508 define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
1509 ; GFX7-LABEL: v_maximum_v4f16__nnan:
1511 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1512 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1513 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1514 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1515 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1516 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1517 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1518 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1519 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1520 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1521 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1522 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1523 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1524 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1525 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1526 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1527 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1528 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
1529 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
1530 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
1531 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
1532 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1534 ; GFX8-LABEL: v_maximum_v4f16__nnan:
1536 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1537 ; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1538 ; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1539 ; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
1540 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
1541 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
1542 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
1543 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1545 ; GFX900-LABEL: v_maximum_v4f16__nnan:
1547 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1548 ; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
1549 ; GFX900-NEXT: v_pk_max_f16 v1, v1, v3
1550 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1552 ; GFX950-LABEL: v_maximum_v4f16__nnan:
1554 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1555 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2
1556 ; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3
1557 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1559 ; GFX10-LABEL: v_maximum_v4f16__nnan:
1561 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1562 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v2
1563 ; GFX10-NEXT: v_pk_max_f16 v1, v1, v3
1564 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1566 ; GFX11-LABEL: v_maximum_v4f16__nnan:
1568 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1569 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
1570 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
1571 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1573 ; GFX12-LABEL: v_maximum_v4f16__nnan:
1575 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1576 ; GFX12-NEXT: s_wait_expcnt 0x0
1577 ; GFX12-NEXT: s_wait_samplecnt 0x0
1578 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1579 ; GFX12-NEXT: s_wait_kmcnt 0x0
1580 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1581 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1582 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1583 %op = call nnan <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1587 define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
1588 ; GFX7-LABEL: v_maximum_v4f16__nsz:
1590 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1591 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1592 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1593 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1594 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1595 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1596 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1597 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1598 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1599 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1600 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1601 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1602 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1603 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1604 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1605 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1606 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1607 ; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
1608 ; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1609 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1610 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1611 ; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
1612 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1613 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1614 ; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
1615 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1616 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1617 ; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
1618 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1619 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1620 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1622 ; GFX8-LABEL: v_maximum_v4f16__nsz:
1624 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3
1626 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1
1627 ; GFX8-NEXT: v_max_f16_e32 v6, v5, v4
1628 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00
1629 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4
1630 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc
1631 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1632 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1633 ; GFX8-NEXT: v_max_f16_e32 v8, v6, v5
1634 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
1635 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v8, vcc
1636 ; GFX8-NEXT: v_max_f16_e32 v6, v1, v3
1637 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1638 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
1639 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v2
1640 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1641 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
1642 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v5
1643 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1644 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
1645 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1646 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1648 ; GFX900-LABEL: v_maximum_v4f16__nsz:
1650 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1651 ; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
1652 ; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
1653 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
1654 ; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
1655 ; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1656 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1657 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
1658 ; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
1659 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
1660 ; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
1661 ; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
1662 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1663 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
1664 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1665 ; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
1666 ; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4
1667 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1669 ; GFX950-LABEL: v_maximum_v4f16__nsz:
1671 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1672 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2
1673 ; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3
1674 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1676 ; GFX10-LABEL: v_maximum_v4f16__nsz:
1678 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1679 ; GFX10-NEXT: v_pk_max_f16 v4, v1, v3
1680 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1681 ; GFX10-NEXT: v_pk_max_f16 v5, v0, v2
1682 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo
1683 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1684 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5
1685 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1686 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo
1687 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1688 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1689 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1690 ; GFX10-NEXT: v_perm_b32 v0, v0, v5, 0x5040100
1691 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1692 ; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100
1693 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1695 ; GFX11-LABEL: v_maximum_v4f16__nsz:
1697 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1698 ; GFX11-NEXT: v_pk_max_f16 v4, v1, v3
1699 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3
1700 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
1701 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
1702 ; GFX11-NEXT: v_pk_max_f16 v7, v0, v2
1703 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v2
1704 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1705 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1706 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2
1707 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v7
1708 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4
1709 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1710 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v8
1711 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1712 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
1713 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5
1714 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1715 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
1716 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1717 ; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
1718 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1720 ; GFX12-LABEL: v_maximum_v4f16__nsz:
1722 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1723 ; GFX12-NEXT: s_wait_expcnt 0x0
1724 ; GFX12-NEXT: s_wait_samplecnt 0x0
1725 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1726 ; GFX12-NEXT: s_wait_kmcnt 0x0
1727 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1728 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1729 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1730 %op = call nsz <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1734 define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1) {
1735 ; GFX7-LABEL: v_maximum_v4f16__nnan_nsz:
1737 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1738 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1739 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1740 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1741 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1742 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1743 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1744 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1745 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1746 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1747 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1748 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1749 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1750 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1751 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1752 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1753 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1754 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
1755 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
1756 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
1757 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
1758 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1760 ; GFX8-LABEL: v_maximum_v4f16__nnan_nsz:
1762 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1763 ; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1764 ; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1765 ; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
1766 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
1767 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
1768 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
1769 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1771 ; GFX900-LABEL: v_maximum_v4f16__nnan_nsz:
1773 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774 ; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
1775 ; GFX900-NEXT: v_pk_max_f16 v1, v1, v3
1776 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1778 ; GFX950-LABEL: v_maximum_v4f16__nnan_nsz:
1780 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1781 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2
1782 ; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3
1783 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1785 ; GFX10-LABEL: v_maximum_v4f16__nnan_nsz:
1787 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1788 ; GFX10-NEXT: v_pk_max_f16 v0, v0, v2
1789 ; GFX10-NEXT: v_pk_max_f16 v1, v1, v3
1790 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1792 ; GFX11-LABEL: v_maximum_v4f16__nnan_nsz:
1794 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1795 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
1796 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
1797 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1799 ; GFX12-LABEL: v_maximum_v4f16__nnan_nsz:
1801 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1802 ; GFX12-NEXT: s_wait_expcnt 0x0
1803 ; GFX12-NEXT: s_wait_samplecnt 0x0
1804 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1805 ; GFX12-NEXT: s_wait_kmcnt 0x0
1806 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
1807 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
1808 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1809 %op = call nnan nsz <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1813 define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
1814 ; GFX7-LABEL: v_maximum_v8f16:
1816 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1817 ; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
1818 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1819 ; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
1820 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1821 ; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
1822 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1823 ; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
1824 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1825 ; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
1826 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1827 ; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
1828 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1829 ; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
1830 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1831 ; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
1832 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1833 ; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
1834 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1835 ; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
1836 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1837 ; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
1838 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1839 ; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
1840 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1841 ; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
1842 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1843 ; GFX7-NEXT: v_max_f32_e32 v16, v0, v8
1844 ; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1845 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
1846 ; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
1847 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1848 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
1849 ; GFX7-NEXT: v_max_f32_e32 v8, v1, v9
1850 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
1851 ; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
1852 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1853 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
1854 ; GFX7-NEXT: v_max_f32_e32 v8, v2, v10
1855 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
1856 ; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
1857 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1858 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
1859 ; GFX7-NEXT: v_max_f32_e32 v8, v3, v11
1860 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
1861 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
1862 ; GFX7-NEXT: v_max_f32_e32 v8, v4, v12
1863 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
1864 ; GFX7-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
1865 ; GFX7-NEXT: v_max_f32_e32 v8, v5, v13
1866 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
1867 ; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
1868 ; GFX7-NEXT: v_max_f32_e32 v8, v6, v14
1869 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
1870 ; GFX7-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
1871 ; GFX7-NEXT: v_max_f32_e32 v8, v7, v15
1872 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
1873 ; GFX7-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
1874 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1876 ; GFX8-LABEL: v_maximum_v8f16:
1878 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1879 ; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v7
1880 ; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3
1881 ; GFX8-NEXT: v_max_f16_e32 v10, v9, v8
1882 ; GFX8-NEXT: v_mov_b32_e32 v11, 0x7e00
1883 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v9, v8
1884 ; GFX8-NEXT: v_cndmask_b32_e32 v8, v11, v10, vcc
1885 ; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v6
1886 ; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v2
1887 ; GFX8-NEXT: v_max_f16_e32 v12, v10, v9
1888 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v10, v9
1889 ; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc
1890 ; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v5
1891 ; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v1
1892 ; GFX8-NEXT: v_max_f16_e32 v13, v12, v10
1893 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v12, v10
1894 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v11, v13, vcc
1895 ; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v4
1896 ; GFX8-NEXT: v_lshrrev_b32_e32 v13, 16, v0
1897 ; GFX8-NEXT: v_max_f16_e32 v14, v13, v12
1898 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v13, v12
1899 ; GFX8-NEXT: v_cndmask_b32_e32 v12, v11, v14, vcc
1900 ; GFX8-NEXT: v_max_f16_e32 v13, v3, v7
1901 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
1902 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v13, vcc
1903 ; GFX8-NEXT: v_max_f16_e32 v7, v2, v6
1904 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
1905 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v11, v7, vcc
1906 ; GFX8-NEXT: v_max_f16_e32 v6, v1, v5
1907 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
1908 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v11, v6, vcc
1909 ; GFX8-NEXT: v_max_f16_e32 v5, v0, v4
1910 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
1911 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v11, v5, vcc
1912 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v12
1913 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1914 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v10
1915 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1916 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v9
1917 ; GFX8-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1918 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v8
1919 ; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1920 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1922 ; GFX900-LABEL: v_maximum_v8f16:
1924 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1925 ; GFX900-NEXT: v_pk_max_f16 v8, v3, v7
1926 ; GFX900-NEXT: v_mov_b32_e32 v9, 0x7e00
1927 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
1928 ; GFX900-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
1929 ; GFX900-NEXT: v_lshrrev_b32_e32 v8, 16, v8
1930 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
1931 ; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
1932 ; GFX900-NEXT: v_pk_max_f16 v7, v2, v6
1933 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
1934 ; GFX900-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
1935 ; GFX900-NEXT: v_lshrrev_b32_e32 v7, 16, v7
1936 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
1937 ; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
1938 ; GFX900-NEXT: v_pk_max_f16 v6, v1, v5
1939 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
1940 ; GFX900-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
1941 ; GFX900-NEXT: v_lshrrev_b32_e32 v6, 16, v6
1942 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
1943 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
1944 ; GFX900-NEXT: v_pk_max_f16 v5, v0, v4
1945 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
1946 ; GFX900-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
1947 ; GFX900-NEXT: v_lshrrev_b32_e32 v5, 16, v5
1948 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
1949 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
1950 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1951 ; GFX900-NEXT: v_perm_b32 v0, v0, v6, s4
1952 ; GFX900-NEXT: v_perm_b32 v1, v1, v7, s4
1953 ; GFX900-NEXT: v_perm_b32 v2, v2, v8, s4
1954 ; GFX900-NEXT: v_perm_b32 v3, v3, v10, s4
1955 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1957 ; GFX950-LABEL: v_maximum_v8f16:
1959 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1960 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v4, v4
1961 ; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v5, v5
1962 ; GFX950-NEXT: v_pk_maximum3_f16 v2, v2, v6, v6
1963 ; GFX950-NEXT: v_pk_maximum3_f16 v3, v3, v7, v7
1964 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1966 ; GFX10-LABEL: v_maximum_v8f16:
1968 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1969 ; GFX10-NEXT: v_pk_max_f16 v8, v3, v7
1970 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7
1971 ; GFX10-NEXT: v_pk_max_f16 v9, v2, v6
1972 ; GFX10-NEXT: v_pk_max_f16 v12, v1, v5
1973 ; GFX10-NEXT: v_pk_max_f16 v13, v0, v4
1974 ; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7e00, v8, vcc_lo
1975 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v6
1976 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v9
1977 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v8
1978 ; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v9, vcc_lo
1979 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
1980 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v11, vcc_lo
1981 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5
1982 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v13
1983 ; GFX10-NEXT: v_perm_b32 v2, v2, v9, 0x5040100
1984 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v12, vcc_lo
1985 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v4
1986 ; GFX10-NEXT: v_lshrrev_b32_e32 v12, 16, v12
1987 ; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v13, vcc_lo
1988 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
1989 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
1990 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
1991 ; GFX10-NEXT: v_perm_b32 v0, v0, v13, 0x5040100
1992 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v12, vcc_lo
1993 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
1994 ; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100
1995 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
1996 ; GFX10-NEXT: v_perm_b32 v3, v3, v10, 0x5040100
1997 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1999 ; GFX11-LABEL: v_maximum_v8f16:
2001 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2002 ; GFX11-NEXT: v_pk_max_f16 v8, v3, v7
2003 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7
2004 ; GFX11-NEXT: v_pk_max_f16 v10, v2, v6
2005 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v6
2006 ; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v2
2007 ; GFX11-NEXT: v_pk_max_f16 v14, v1, v5
2008 ; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
2009 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v6
2010 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 16, v10
2011 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v7
2012 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v3
2013 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v8
2014 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
2015 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v12, v11
2016 ; GFX11-NEXT: v_pk_max_f16 v11, v0, v4
2017 ; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v4
2018 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo
2019 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5
2020 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 16, v0
2021 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v5
2022 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
2023 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 16, v11
2024 ; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo
2025 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v4
2026 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v14
2027 ; GFX11-NEXT: v_perm_b32 v2, v6, v2, 0x5040100
2028 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
2029 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v13, v12
2030 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo
2031 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5
2032 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2033 ; GFX11-NEXT: v_perm_b32 v0, v4, v0, 0x5040100
2034 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo
2035 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7
2036 ; GFX11-NEXT: v_perm_b32 v1, v1, v10, 0x5040100
2037 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
2038 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2039 ; GFX11-NEXT: v_perm_b32 v3, v3, v9, 0x5040100
2040 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2042 ; GFX12-LABEL: v_maximum_v8f16:
2044 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2045 ; GFX12-NEXT: s_wait_expcnt 0x0
2046 ; GFX12-NEXT: s_wait_samplecnt 0x0
2047 ; GFX12-NEXT: s_wait_bvhcnt 0x0
2048 ; GFX12-NEXT: s_wait_kmcnt 0x0
2049 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v4
2050 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5
2051 ; GFX12-NEXT: v_pk_maximum_f16 v2, v2, v6
2052 ; GFX12-NEXT: v_pk_maximum_f16 v3, v3, v7
2053 ; GFX12-NEXT: s_setpc_b64 s[30:31]
2054 %op = call <8 x half> @llvm.maximum.v8f16(<8 x half> %src0, <8 x half> %src1)
2058 define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
2059 ; GFX7-LABEL: v_maximum_v16f16:
2061 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2062 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
2063 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
2064 ; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
2065 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
2066 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2067 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
2068 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
2069 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
2070 ; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
2071 ; GFX7-NEXT: v_cmp_o_f32_e64 s[12:13], v0, v16
2072 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v16
2073 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v22
2074 ; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
2075 ; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
2076 ; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
2077 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2078 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v17
2079 ; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
2080 ; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
2081 ; GFX7-NEXT: v_cmp_o_f32_e64 s[14:15], v6, v16
2082 ; GFX7-NEXT: v_max_f32_e32 v6, v6, v16
2083 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v23
2084 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
2085 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2086 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
2087 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2088 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
2089 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
2090 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
2091 ; GFX7-NEXT: v_cmp_o_f32_e64 s[16:17], v7, v16
2092 ; GFX7-NEXT: v_max_f32_e32 v7, v7, v16
2093 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v24
2094 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
2095 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v17
2096 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v18
2097 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2098 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
2099 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
2100 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2101 ; GFX7-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v16
2102 ; GFX7-NEXT: v_max_f32_e32 v8, v8, v16
2103 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v25
2104 ; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v17
2105 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v17
2106 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v19
2107 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2108 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
2109 ; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
2110 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2111 ; GFX7-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v16
2112 ; GFX7-NEXT: v_max_f32_e32 v9, v9, v16
2113 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v26
2114 ; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v17
2115 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v17
2116 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v20
2117 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2118 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
2119 ; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
2120 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2121 ; GFX7-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v16
2122 ; GFX7-NEXT: v_max_f32_e32 v10, v10, v16
2123 ; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
2124 ; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v17
2125 ; GFX7-NEXT: v_max_f32_e32 v4, v4, v17
2126 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v21
2127 ; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v28
2128 ; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
2129 ; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v29
2130 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2131 ; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
2132 ; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v30
2133 ; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
2134 ; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v17
2135 ; GFX7-NEXT: v_max_f32_e32 v5, v5, v17
2136 ; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v27
2137 ; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
2138 ; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
2139 ; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
2140 ; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
2141 ; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
2142 ; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
2143 ; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
2144 ; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
2145 ; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
2146 ; GFX7-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v17
2147 ; GFX7-NEXT: v_max_f32_e32 v11, v11, v17
2148 ; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
2149 ; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
2150 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
2151 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v12, v20
2152 ; GFX7-NEXT: v_max_f32_e32 v12, v12, v20
2153 ; GFX7-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc
2154 ; GFX7-NEXT: v_max_f32_e32 v20, v13, v19
2155 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v13, v19
2156 ; GFX7-NEXT: v_cndmask_b32_e32 v13, v17, v20, vcc
2157 ; GFX7-NEXT: v_max_f32_e32 v19, v14, v18
2158 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v14, v18
2159 ; GFX7-NEXT: v_cndmask_b32_e32 v14, v17, v19, vcc
2160 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[12:13]
2161 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
2162 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
2163 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
2164 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
2165 ; GFX7-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[14:15]
2166 ; GFX7-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[16:17]
2167 ; GFX7-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
2168 ; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
2169 ; GFX7-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
2170 ; GFX7-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
2171 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2172 ; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
2173 ; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
2174 ; GFX7-NEXT: v_max_f32_e32 v18, v15, v16
2175 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
2176 ; GFX7-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
2177 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2179 ; GFX8-LABEL: v_maximum_v16f16:
2181 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2182 ; GFX8-NEXT: v_lshrrev_b32_e32 v16, 16, v15
2183 ; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v7
2184 ; GFX8-NEXT: v_max_f16_e32 v18, v17, v16
2185 ; GFX8-NEXT: v_mov_b32_e32 v19, 0x7e00
2186 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v17, v16
2187 ; GFX8-NEXT: v_cndmask_b32_e32 v16, v19, v18, vcc
2188 ; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v14
2189 ; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v6
2190 ; GFX8-NEXT: v_max_f16_e32 v20, v18, v17
2191 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v18, v17
2192 ; GFX8-NEXT: v_cndmask_b32_e32 v17, v19, v20, vcc
2193 ; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v13
2194 ; GFX8-NEXT: v_lshrrev_b32_e32 v20, 16, v5
2195 ; GFX8-NEXT: v_max_f16_e32 v21, v20, v18
2196 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v20, v18
2197 ; GFX8-NEXT: v_cndmask_b32_e32 v18, v19, v21, vcc
2198 ; GFX8-NEXT: v_lshrrev_b32_e32 v20, 16, v12
2199 ; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v4
2200 ; GFX8-NEXT: v_max_f16_e32 v22, v21, v20
2201 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v21, v20
2202 ; GFX8-NEXT: v_cndmask_b32_e32 v20, v19, v22, vcc
2203 ; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v11
2204 ; GFX8-NEXT: v_lshrrev_b32_e32 v22, 16, v3
2205 ; GFX8-NEXT: v_max_f16_e32 v23, v22, v21
2206 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v22, v21
2207 ; GFX8-NEXT: v_cndmask_b32_e32 v21, v19, v23, vcc
2208 ; GFX8-NEXT: v_lshrrev_b32_e32 v22, 16, v10
2209 ; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v2
2210 ; GFX8-NEXT: v_max_f16_e32 v24, v23, v22
2211 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v23, v22
2212 ; GFX8-NEXT: v_cndmask_b32_e32 v22, v19, v24, vcc
2213 ; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v9
2214 ; GFX8-NEXT: v_lshrrev_b32_e32 v24, 16, v1
2215 ; GFX8-NEXT: v_max_f16_e32 v25, v24, v23
2216 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v24, v23
2217 ; GFX8-NEXT: v_cndmask_b32_e32 v23, v19, v25, vcc
2218 ; GFX8-NEXT: v_lshrrev_b32_e32 v24, 16, v8
2219 ; GFX8-NEXT: v_lshrrev_b32_e32 v25, 16, v0
2220 ; GFX8-NEXT: v_max_f16_e32 v26, v25, v24
2221 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v25, v24
2222 ; GFX8-NEXT: v_cndmask_b32_e32 v24, v19, v26, vcc
2223 ; GFX8-NEXT: v_max_f16_e32 v25, v7, v15
2224 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
2225 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v19, v25, vcc
2226 ; GFX8-NEXT: v_max_f16_e32 v15, v6, v14
2227 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
2228 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v19, v15, vcc
2229 ; GFX8-NEXT: v_max_f16_e32 v14, v5, v13
2230 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
2231 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v19, v14, vcc
2232 ; GFX8-NEXT: v_max_f16_e32 v13, v4, v12
2233 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
2234 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v19, v13, vcc
2235 ; GFX8-NEXT: v_max_f16_e32 v12, v3, v11
2236 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
2237 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v19, v12, vcc
2238 ; GFX8-NEXT: v_max_f16_e32 v11, v2, v10
2239 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
2240 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v19, v11, vcc
2241 ; GFX8-NEXT: v_max_f16_e32 v10, v1, v9
2242 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
2243 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v19, v10, vcc
2244 ; GFX8-NEXT: v_max_f16_e32 v9, v0, v8
2245 ; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
2246 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v19, v9, vcc
2247 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v24
2248 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2249 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v23
2250 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2251 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v22
2252 ; GFX8-NEXT: v_or_b32_sdwa v2, v2, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2253 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v21
2254 ; GFX8-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2255 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v20
2256 ; GFX8-NEXT: v_or_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2257 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v18
2258 ; GFX8-NEXT: v_or_b32_sdwa v5, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2259 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v17
2260 ; GFX8-NEXT: v_or_b32_sdwa v6, v6, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2261 ; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v16
2262 ; GFX8-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2263 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2265 ; GFX900-LABEL: v_maximum_v16f16:
2267 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2268 ; GFX900-NEXT: v_pk_max_f16 v16, v7, v15
2269 ; GFX900-NEXT: v_mov_b32_e32 v17, 0x7e00
2270 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
2271 ; GFX900-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
2272 ; GFX900-NEXT: v_lshrrev_b32_e32 v16, 16, v16
2273 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
2274 ; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
2275 ; GFX900-NEXT: v_pk_max_f16 v15, v6, v14
2276 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
2277 ; GFX900-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
2278 ; GFX900-NEXT: v_lshrrev_b32_e32 v15, 16, v15
2279 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
2280 ; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
2281 ; GFX900-NEXT: v_pk_max_f16 v14, v5, v13
2282 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
2283 ; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
2284 ; GFX900-NEXT: v_lshrrev_b32_e32 v14, 16, v14
2285 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
2286 ; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
2287 ; GFX900-NEXT: v_pk_max_f16 v13, v4, v12
2288 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
2289 ; GFX900-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
2290 ; GFX900-NEXT: v_lshrrev_b32_e32 v13, 16, v13
2291 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
2292 ; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
2293 ; GFX900-NEXT: v_pk_max_f16 v12, v3, v11
2294 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
2295 ; GFX900-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
2296 ; GFX900-NEXT: v_lshrrev_b32_e32 v12, 16, v12
2297 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
2298 ; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
2299 ; GFX900-NEXT: v_pk_max_f16 v11, v2, v10
2300 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
2301 ; GFX900-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
2302 ; GFX900-NEXT: v_lshrrev_b32_e32 v11, 16, v11
2303 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
2304 ; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
2305 ; GFX900-NEXT: v_pk_max_f16 v10, v1, v9
2306 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
2307 ; GFX900-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
2308 ; GFX900-NEXT: v_lshrrev_b32_e32 v10, 16, v10
2309 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
2310 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
2311 ; GFX900-NEXT: v_pk_max_f16 v9, v0, v8
2312 ; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
2313 ; GFX900-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
2314 ; GFX900-NEXT: v_lshrrev_b32_e32 v9, 16, v9
2315 ; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
2316 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
2317 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2318 ; GFX900-NEXT: v_perm_b32 v0, v0, v10, s4
2319 ; GFX900-NEXT: v_perm_b32 v1, v1, v11, s4
2320 ; GFX900-NEXT: v_perm_b32 v2, v2, v12, s4
2321 ; GFX900-NEXT: v_perm_b32 v3, v3, v13, s4
2322 ; GFX900-NEXT: v_perm_b32 v4, v4, v14, s4
2323 ; GFX900-NEXT: v_perm_b32 v5, v5, v15, s4
2324 ; GFX900-NEXT: v_perm_b32 v6, v6, v16, s4
2325 ; GFX900-NEXT: v_perm_b32 v7, v7, v18, s4
2326 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2328 ; GFX950-LABEL: v_maximum_v16f16:
2330 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2331 ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v8, v8
2332 ; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v9, v9
2333 ; GFX950-NEXT: v_pk_maximum3_f16 v2, v2, v10, v10
2334 ; GFX950-NEXT: v_pk_maximum3_f16 v3, v3, v11, v11
2335 ; GFX950-NEXT: v_pk_maximum3_f16 v4, v4, v12, v12
2336 ; GFX950-NEXT: v_pk_maximum3_f16 v5, v5, v13, v13
2337 ; GFX950-NEXT: v_pk_maximum3_f16 v6, v6, v14, v14
2338 ; GFX950-NEXT: v_pk_maximum3_f16 v7, v7, v15, v15
2339 ; GFX950-NEXT: s_setpc_b64 s[30:31]
2341 ; GFX10-LABEL: v_maximum_v16f16:
2343 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2344 ; GFX10-NEXT: v_pk_max_f16 v16, v7, v15
2345 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v7, v15
2346 ; GFX10-NEXT: v_pk_max_f16 v18, v6, v14
2347 ; GFX10-NEXT: v_pk_max_f16 v19, v3, v11
2348 ; GFX10-NEXT: v_pk_max_f16 v20, v2, v10
2349 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v16
2350 ; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7e00, v16, vcc_lo
2351 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
2352 ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 16, v18
2353 ; GFX10-NEXT: v_pk_max_f16 v21, v0, v8
2354 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7e00, v17, vcc_lo
2355 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v14
2356 ; GFX10-NEXT: v_pk_max_f16 v17, v5, v13
2357 ; GFX10-NEXT: v_lshrrev_b32_e32 v23, 16, v21
2358 ; GFX10-NEXT: v_perm_b32 v7, v7, v16, 0x5040100
2359 ; GFX10-NEXT: v_cndmask_b32_e32 v18, 0x7e00, v18, vcc_lo
2360 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
2361 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v17
2362 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
2363 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v5, v13
2364 ; GFX10-NEXT: v_perm_b32 v6, v6, v18, 0x5040100
2365 ; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7e00, v17, vcc_lo
2366 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
2367 ; GFX10-NEXT: v_pk_max_f16 v17, v4, v12
2368 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
2369 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12
2370 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v17
2371 ; GFX10-NEXT: v_perm_b32 v5, v5, v15, 0x5040100
2372 ; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v17, vcc_lo
2373 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v11
2374 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v19
2375 ; GFX10-NEXT: v_cndmask_b32_e32 v19, 0x7e00, v19, vcc_lo
2376 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
2377 ; GFX10-NEXT: v_pk_max_f16 v11, v1, v9
2378 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
2379 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10
2380 ; GFX10-NEXT: v_lshrrev_b32_e32 v22, 16, v11
2381 ; GFX10-NEXT: v_perm_b32 v3, v3, v19, 0x5040100
2382 ; GFX10-NEXT: v_cndmask_b32_e32 v17, 0x7e00, v20, vcc_lo
2383 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9
2384 ; GFX10-NEXT: v_lshrrev_b32_e32 v20, 16, v20
2385 ; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7e00, v11, vcc_lo
2386 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
2387 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v22, vcc_lo
2388 ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v8
2389 ; GFX10-NEXT: v_perm_b32 v1, v1, v11, 0x5040100
2390 ; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v21, vcc_lo
2391 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
2392 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v23, vcc_lo
2393 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
2394 ; GFX10-NEXT: v_perm_b32 v0, v0, v9, 0x5040100
2395 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
2396 ; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
2397 ; GFX10-NEXT: v_perm_b32 v2, v2, v17, 0x5040100
2398 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v14, vcc_lo
2399 ; GFX10-NEXT: v_perm_b32 v4, v4, v13, 0x5040100
2400 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2402 ; GFX11-LABEL: v_maximum_v16f16:
2404 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2405 ; GFX11-NEXT: v_pk_max_f16 v16, v7, v15
2406 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v15
2407 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v7
2408 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v7, v15
2409 ; GFX11-NEXT: v_pk_max_f16 v15, v6, v14
2410 ; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v16
2411 ; GFX11-NEXT: v_pk_max_f16 v20, v4, v12
2412 ; GFX11-NEXT: v_pk_max_f16 v22, v2, v10
2413 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo
2414 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17
2415 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v14
2416 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v6
2417 ; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v8
2418 ; GFX11-NEXT: v_lshrrev_b32_e32 v24, 16, v0
2419 ; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo
2420 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v14
2421 ; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v15
2422 ; GFX11-NEXT: v_pk_max_f16 v14, v5, v13
2423 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
2424 ; GFX11-NEXT: v_perm_b32 v7, v16, v7, 0x5040100
2425 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
2426 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17
2427 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v13
2428 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v5
2429 ; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo
2430 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v5, v13
2431 ; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v14
2432 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
2433 ; GFX11-NEXT: v_perm_b32 v6, v15, v6, 0x5040100
2434 ; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
2435 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17
2436 ; GFX11-NEXT: v_pk_max_f16 v17, v3, v11
2437 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v20
2438 ; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo
2439 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12
2440 ; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v11
2441 ; GFX11-NEXT: v_lshrrev_b32_e32 v21, 16, v17
2442 ; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v12
2443 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4
2444 ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo
2445 ; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v3
2446 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v11
2447 ; GFX11-NEXT: v_perm_b32 v5, v13, v5, 0x5040100
2448 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
2449 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
2450 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v20, v19
2451 ; GFX11-NEXT: v_pk_max_f16 v19, v1, v9
2452 ; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v22
2453 ; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo
2454 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10
2455 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v10
2456 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2457 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
2458 ; GFX11-NEXT: v_perm_b32 v3, v11, v3, 0x5040100
2459 ; GFX11-NEXT: v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo
2460 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9
2461 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v9
2462 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
2463 ; GFX11-NEXT: v_pk_max_f16 v22, v0, v8
2464 ; GFX11-NEXT: v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo
2465 ; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v19
2466 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2467 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9
2468 ; GFX11-NEXT: v_lshrrev_b32_e32 v25, 16, v22
2469 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2470 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo
2471 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v8
2472 ; GFX11-NEXT: v_perm_b32 v1, v1, v21, 0x5040100
2473 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo
2474 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v24, v23
2475 ; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo
2476 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10
2477 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2478 ; GFX11-NEXT: v_perm_b32 v0, v8, v0, 0x5040100
2479 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
2480 ; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12
2481 ; GFX11-NEXT: v_perm_b32 v2, v2, v17, 0x5040100
2482 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo
2483 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2484 ; GFX11-NEXT: v_perm_b32 v4, v4, v14, 0x5040100
2485 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2487 ; GFX12-LABEL: v_maximum_v16f16:
2489 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2490 ; GFX12-NEXT: s_wait_expcnt 0x0
2491 ; GFX12-NEXT: s_wait_samplecnt 0x0
2492 ; GFX12-NEXT: s_wait_bvhcnt 0x0
2493 ; GFX12-NEXT: s_wait_kmcnt 0x0
2494 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v8
2495 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v9
2496 ; GFX12-NEXT: v_pk_maximum_f16 v2, v2, v10
2497 ; GFX12-NEXT: v_pk_maximum_f16 v3, v3, v11
2498 ; GFX12-NEXT: v_pk_maximum_f16 v4, v4, v12
2499 ; GFX12-NEXT: v_pk_maximum_f16 v5, v5, v13
2500 ; GFX12-NEXT: v_pk_maximum_f16 v6, v6, v14
2501 ; GFX12-NEXT: v_pk_maximum_f16 v7, v7, v15
2502 ; GFX12-NEXT: s_setpc_b64 s[30:31]
2503 %op = call <16 x half> @llvm.maximum.v16f16(<16 x half> %src0, <16 x half> %src1)
2506 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: