1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
8 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
10 define float @v_maximum_f32(float %src0, float %src1) {
11 ; GFX7-LABEL: v_maximum_f32:
13 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14 ; GFX7-NEXT: v_max_f32_e32 v2, v0, v1
15 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000
16 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
17 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
18 ; GFX7-NEXT: s_setpc_b64 s[30:31]
20 ; GFX8-LABEL: v_maximum_f32:
22 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX8-NEXT: v_max_f32_e32 v2, v0, v1
24 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
25 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
26 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
27 ; GFX8-NEXT: s_setpc_b64 s[30:31]
29 ; GFX9-LABEL: v_maximum_f32:
31 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32 ; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
33 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
34 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
35 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
36 ; GFX9-NEXT: s_setpc_b64 s[30:31]
38 ; GFX940-LABEL: v_maximum_f32:
40 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
42 ; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
43 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
44 ; GFX940-NEXT: s_nop 1
45 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
46 ; GFX940-NEXT: s_setpc_b64 s[30:31]
48 ; GFX10-LABEL: v_maximum_f32:
50 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; GFX10-NEXT: v_max_f32_e32 v2, v0, v1
52 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
53 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
54 ; GFX10-NEXT: s_setpc_b64 s[30:31]
56 ; GFX11-LABEL: v_maximum_f32:
58 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX11-NEXT: v_max_f32_e32 v2, v0, v1
60 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
61 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
62 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
63 ; GFX11-NEXT: s_setpc_b64 s[30:31]
65 ; GFX12-LABEL: v_maximum_f32:
67 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
68 ; GFX12-NEXT: s_wait_expcnt 0x0
69 ; GFX12-NEXT: s_wait_samplecnt 0x0
70 ; GFX12-NEXT: s_wait_bvhcnt 0x0
71 ; GFX12-NEXT: s_wait_kmcnt 0x0
72 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
73 ; GFX12-NEXT: s_setpc_b64 s[30:31]
74 %op = call float @llvm.maximum.f32(float %src0, float %src1)
78 define float @v_maximum_f32__nnan(float %src0, float %src1) {
79 ; GFX7-LABEL: v_maximum_f32__nnan:
81 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
83 ; GFX7-NEXT: s_setpc_b64 s[30:31]
85 ; GFX8-LABEL: v_maximum_f32__nnan:
87 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
89 ; GFX8-NEXT: s_setpc_b64 s[30:31]
91 ; GFX9-LABEL: v_maximum_f32__nnan:
93 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
95 ; GFX9-NEXT: s_setpc_b64 s[30:31]
97 ; GFX940-LABEL: v_maximum_f32__nnan:
99 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100 ; GFX940-NEXT: v_max_f32_e32 v0, v0, v1
101 ; GFX940-NEXT: s_setpc_b64 s[30:31]
103 ; GFX10-LABEL: v_maximum_f32__nnan:
105 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
107 ; GFX10-NEXT: s_setpc_b64 s[30:31]
109 ; GFX11-LABEL: v_maximum_f32__nnan:
111 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112 ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
113 ; GFX11-NEXT: s_setpc_b64 s[30:31]
115 ; GFX12-LABEL: v_maximum_f32__nnan:
117 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
118 ; GFX12-NEXT: s_wait_expcnt 0x0
119 ; GFX12-NEXT: s_wait_samplecnt 0x0
120 ; GFX12-NEXT: s_wait_bvhcnt 0x0
121 ; GFX12-NEXT: s_wait_kmcnt 0x0
122 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
123 ; GFX12-NEXT: s_setpc_b64 s[30:31]
124 %op = call nnan float @llvm.maximum.f32(float %src0, float %src1)
128 define float @v_maximum_f32__nsz(float %src0, float %src1) {
129 ; GFX7-LABEL: v_maximum_f32__nsz:
131 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; GFX7-NEXT: v_max_f32_e32 v2, v0, v1
133 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000
134 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
135 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
136 ; GFX7-NEXT: s_setpc_b64 s[30:31]
138 ; GFX8-LABEL: v_maximum_f32__nsz:
140 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX8-NEXT: v_max_f32_e32 v2, v0, v1
142 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
143 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
144 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
145 ; GFX8-NEXT: s_setpc_b64 s[30:31]
147 ; GFX9-LABEL: v_maximum_f32__nsz:
149 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
151 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
152 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
153 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
154 ; GFX9-NEXT: s_setpc_b64 s[30:31]
156 ; GFX940-LABEL: v_maximum_f32__nsz:
158 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159 ; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
160 ; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
161 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
162 ; GFX940-NEXT: s_nop 1
163 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
164 ; GFX940-NEXT: s_setpc_b64 s[30:31]
166 ; GFX10-LABEL: v_maximum_f32__nsz:
168 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169 ; GFX10-NEXT: v_max_f32_e32 v2, v0, v1
170 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
171 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
172 ; GFX10-NEXT: s_setpc_b64 s[30:31]
174 ; GFX11-LABEL: v_maximum_f32__nsz:
176 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177 ; GFX11-NEXT: v_max_f32_e32 v2, v0, v1
178 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
179 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
180 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
181 ; GFX11-NEXT: s_setpc_b64 s[30:31]
183 ; GFX12-LABEL: v_maximum_f32__nsz:
185 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
186 ; GFX12-NEXT: s_wait_expcnt 0x0
187 ; GFX12-NEXT: s_wait_samplecnt 0x0
188 ; GFX12-NEXT: s_wait_bvhcnt 0x0
189 ; GFX12-NEXT: s_wait_kmcnt 0x0
190 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
191 ; GFX12-NEXT: s_setpc_b64 s[30:31]
192 %op = call nsz float @llvm.maximum.f32(float %src0, float %src1)
196 define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) {
197 ; GFX7-LABEL: v_maximum_f32__nnan_nsz:
199 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
201 ; GFX7-NEXT: s_setpc_b64 s[30:31]
203 ; GFX8-LABEL: v_maximum_f32__nnan_nsz:
205 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
207 ; GFX8-NEXT: s_setpc_b64 s[30:31]
209 ; GFX9-LABEL: v_maximum_f32__nnan_nsz:
211 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
213 ; GFX9-NEXT: s_setpc_b64 s[30:31]
215 ; GFX940-LABEL: v_maximum_f32__nnan_nsz:
217 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218 ; GFX940-NEXT: v_max_f32_e32 v0, v0, v1
219 ; GFX940-NEXT: s_setpc_b64 s[30:31]
221 ; GFX10-LABEL: v_maximum_f32__nnan_nsz:
223 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
225 ; GFX10-NEXT: s_setpc_b64 s[30:31]
227 ; GFX11-LABEL: v_maximum_f32__nnan_nsz:
229 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230 ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
231 ; GFX11-NEXT: s_setpc_b64 s[30:31]
233 ; GFX12-LABEL: v_maximum_f32__nnan_nsz:
235 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
236 ; GFX12-NEXT: s_wait_expcnt 0x0
237 ; GFX12-NEXT: s_wait_samplecnt 0x0
238 ; GFX12-NEXT: s_wait_bvhcnt 0x0
239 ; GFX12-NEXT: s_wait_kmcnt 0x0
240 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
241 ; GFX12-NEXT: s_setpc_b64 s[30:31]
242 %op = call nnan nsz float @llvm.maximum.f32(float %src0, float %src1)
246 define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) {
247 ; GFX7-LABEL: v_maximum_f32__nnan_src0:
249 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250 ; GFX7-NEXT: v_add_f32_e32 v0, 1.0, v0
251 ; GFX7-NEXT: v_max_f32_e32 v2, v0, v1
252 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000
253 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
254 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
255 ; GFX7-NEXT: s_setpc_b64 s[30:31]
257 ; GFX8-LABEL: v_maximum_f32__nnan_src0:
259 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
260 ; GFX8-NEXT: v_add_f32_e32 v0, 1.0, v0
261 ; GFX8-NEXT: v_max_f32_e32 v2, v0, v1
262 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
263 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
264 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
265 ; GFX8-NEXT: s_setpc_b64 s[30:31]
267 ; GFX9-LABEL: v_maximum_f32__nnan_src0:
269 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
270 ; GFX9-NEXT: v_add_f32_e32 v0, 1.0, v0
271 ; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
272 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
273 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
274 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
275 ; GFX9-NEXT: s_setpc_b64 s[30:31]
277 ; GFX940-LABEL: v_maximum_f32__nnan_src0:
279 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280 ; GFX940-NEXT: v_add_f32_e32 v0, 1.0, v0
281 ; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
282 ; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
283 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
284 ; GFX940-NEXT: s_nop 1
285 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
286 ; GFX940-NEXT: s_setpc_b64 s[30:31]
288 ; GFX10-LABEL: v_maximum_f32__nnan_src0:
290 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291 ; GFX10-NEXT: v_add_f32_e32 v0, 1.0, v0
292 ; GFX10-NEXT: v_max_f32_e32 v2, v0, v1
293 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
294 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
295 ; GFX10-NEXT: s_setpc_b64 s[30:31]
297 ; GFX11-LABEL: v_maximum_f32__nnan_src0:
299 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
300 ; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
301 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
302 ; GFX11-NEXT: v_max_f32_e32 v2, v0, v1
303 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
304 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
305 ; GFX11-NEXT: s_setpc_b64 s[30:31]
307 ; GFX12-LABEL: v_maximum_f32__nnan_src0:
309 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
310 ; GFX12-NEXT: s_wait_expcnt 0x0
311 ; GFX12-NEXT: s_wait_samplecnt 0x0
312 ; GFX12-NEXT: s_wait_bvhcnt 0x0
313 ; GFX12-NEXT: s_wait_kmcnt 0x0
314 ; GFX12-NEXT: v_add_f32_e32 v0, 1.0, v0
315 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
316 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
317 ; GFX12-NEXT: s_setpc_b64 s[30:31]
318 %src0 = fadd nnan float %arg0, 1.0
319 %op = call float @llvm.maximum.f32(float %src0, float %src1)
323 define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) {
324 ; GFX7-LABEL: v_maximum_f32__nnan_src1:
326 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327 ; GFX7-NEXT: v_add_f32_e32 v1, 1.0, v1
328 ; GFX7-NEXT: v_max_f32_e32 v2, v0, v1
329 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000
330 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
331 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
332 ; GFX7-NEXT: s_setpc_b64 s[30:31]
334 ; GFX8-LABEL: v_maximum_f32__nnan_src1:
336 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337 ; GFX8-NEXT: v_add_f32_e32 v1, 1.0, v1
338 ; GFX8-NEXT: v_max_f32_e32 v2, v0, v1
339 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
340 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
341 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
342 ; GFX8-NEXT: s_setpc_b64 s[30:31]
344 ; GFX9-LABEL: v_maximum_f32__nnan_src1:
346 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347 ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
348 ; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
349 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
350 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
351 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
352 ; GFX9-NEXT: s_setpc_b64 s[30:31]
354 ; GFX940-LABEL: v_maximum_f32__nnan_src1:
356 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357 ; GFX940-NEXT: v_add_f32_e32 v1, 1.0, v1
358 ; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
359 ; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
360 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
361 ; GFX940-NEXT: s_nop 1
362 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
363 ; GFX940-NEXT: s_setpc_b64 s[30:31]
365 ; GFX10-LABEL: v_maximum_f32__nnan_src1:
367 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
368 ; GFX10-NEXT: v_add_f32_e32 v1, 1.0, v1
369 ; GFX10-NEXT: v_max_f32_e32 v2, v0, v1
370 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
371 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
372 ; GFX10-NEXT: s_setpc_b64 s[30:31]
374 ; GFX11-LABEL: v_maximum_f32__nnan_src1:
376 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1
378 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
379 ; GFX11-NEXT: v_max_f32_e32 v2, v0, v1
380 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
381 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
382 ; GFX11-NEXT: s_setpc_b64 s[30:31]
384 ; GFX12-LABEL: v_maximum_f32__nnan_src1:
386 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
387 ; GFX12-NEXT: s_wait_expcnt 0x0
388 ; GFX12-NEXT: s_wait_samplecnt 0x0
389 ; GFX12-NEXT: s_wait_bvhcnt 0x0
390 ; GFX12-NEXT: s_wait_kmcnt 0x0
391 ; GFX12-NEXT: v_add_f32_e32 v1, 1.0, v1
392 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
393 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
394 ; GFX12-NEXT: s_setpc_b64 s[30:31]
395 %src1 = fadd nnan float %arg1, 1.0
396 %op = call float @llvm.maximum.f32(float %src0, float %src1)
400 define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
401 ; GFX7-LABEL: s_maximum_f32:
403 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404 ; GFX7-NEXT: v_mov_b32_e32 v0, s7
405 ; GFX7-NEXT: v_max_f32_e32 v1, s6, v0
406 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
407 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s6, v0
408 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
409 ; GFX7-NEXT: ;;#ASMSTART
410 ; GFX7-NEXT: ; use v0
411 ; GFX7-NEXT: ;;#ASMEND
412 ; GFX7-NEXT: s_setpc_b64 s[30:31]
414 ; GFX8-LABEL: s_maximum_f32:
416 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417 ; GFX8-NEXT: v_mov_b32_e32 v0, s7
418 ; GFX8-NEXT: v_max_f32_e32 v1, s6, v0
419 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
420 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s6, v0
421 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
422 ; GFX8-NEXT: ;;#ASMSTART
423 ; GFX8-NEXT: ; use v0
424 ; GFX8-NEXT: ;;#ASMEND
425 ; GFX8-NEXT: s_setpc_b64 s[30:31]
427 ; GFX9-LABEL: s_maximum_f32:
429 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430 ; GFX9-NEXT: v_mov_b32_e32 v0, s7
431 ; GFX9-NEXT: v_max_f32_e32 v1, s6, v0
432 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
433 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s6, v0
434 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
435 ; GFX9-NEXT: ;;#ASMSTART
436 ; GFX9-NEXT: ; use v0
437 ; GFX9-NEXT: ;;#ASMEND
438 ; GFX9-NEXT: s_setpc_b64 s[30:31]
440 ; GFX940-LABEL: s_maximum_f32:
442 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443 ; GFX940-NEXT: v_mov_b32_e32 v0, s1
444 ; GFX940-NEXT: v_max_f32_e32 v1, s0, v0
445 ; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
446 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
447 ; GFX940-NEXT: s_nop 1
448 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
449 ; GFX940-NEXT: ;;#ASMSTART
450 ; GFX940-NEXT: ; use v0
451 ; GFX940-NEXT: ;;#ASMEND
452 ; GFX940-NEXT: s_setpc_b64 s[30:31]
454 ; GFX10-LABEL: s_maximum_f32:
456 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457 ; GFX10-NEXT: v_max_f32_e64 v0, s6, s7
458 ; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s6, s7
459 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
460 ; GFX10-NEXT: ;;#ASMSTART
461 ; GFX10-NEXT: ; use v0
462 ; GFX10-NEXT: ;;#ASMEND
463 ; GFX10-NEXT: s_setpc_b64 s[30:31]
465 ; GFX11-LABEL: s_maximum_f32:
467 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468 ; GFX11-NEXT: v_max_f32_e64 v0, s0, s1
469 ; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s1
470 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
471 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
472 ; GFX11-NEXT: ;;#ASMSTART
473 ; GFX11-NEXT: ; use v0
474 ; GFX11-NEXT: ;;#ASMEND
475 ; GFX11-NEXT: s_setpc_b64 s[30:31]
477 ; GFX12-LABEL: s_maximum_f32:
479 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
480 ; GFX12-NEXT: s_wait_expcnt 0x0
481 ; GFX12-NEXT: s_wait_samplecnt 0x0
482 ; GFX12-NEXT: s_wait_bvhcnt 0x0
483 ; GFX12-NEXT: s_wait_kmcnt 0x0
484 ; GFX12-NEXT: s_maximum_f32 s0, s0, s1
485 ; GFX12-NEXT: ;;#ASMSTART
486 ; GFX12-NEXT: ; use s0
487 ; GFX12-NEXT: ;;#ASMEND
488 ; GFX12-NEXT: s_setpc_b64 s[30:31]
489 %op = call float @llvm.maximum.f32(float %src0, float %src1)
490 call void asm sideeffect "; use $0", "s"(float %op)
494 define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
495 ; GFX7-LABEL: v_maximum_v2f32:
497 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
499 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
500 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
501 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
502 ; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
503 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
504 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
505 ; GFX7-NEXT: s_setpc_b64 s[30:31]
507 ; GFX8-LABEL: v_maximum_v2f32:
509 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510 ; GFX8-NEXT: v_max_f32_e32 v4, v0, v2
511 ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
512 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
513 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
514 ; GFX8-NEXT: v_max_f32_e32 v2, v1, v3
515 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
516 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
517 ; GFX8-NEXT: s_setpc_b64 s[30:31]
519 ; GFX9-LABEL: v_maximum_v2f32:
521 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
522 ; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
523 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
524 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
525 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
526 ; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
527 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
528 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
529 ; GFX9-NEXT: s_setpc_b64 s[30:31]
531 ; GFX940-LABEL: v_maximum_v2f32:
533 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
534 ; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
535 ; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
536 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
537 ; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
538 ; GFX940-NEXT: s_nop 0
539 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
540 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
541 ; GFX940-NEXT: s_nop 1
542 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
543 ; GFX940-NEXT: s_setpc_b64 s[30:31]
545 ; GFX10-LABEL: v_maximum_v2f32:
547 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
548 ; GFX10-NEXT: v_max_f32_e32 v4, v0, v2
549 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
550 ; GFX10-NEXT: v_max_f32_e32 v5, v1, v3
551 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
552 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
553 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
554 ; GFX10-NEXT: s_setpc_b64 s[30:31]
556 ; GFX11-LABEL: v_maximum_v2f32:
558 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
559 ; GFX11-NEXT: v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
560 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
561 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
562 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
563 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
564 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
565 ; GFX11-NEXT: s_setpc_b64 s[30:31]
567 ; GFX12-LABEL: v_maximum_v2f32:
569 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
570 ; GFX12-NEXT: s_wait_expcnt 0x0
571 ; GFX12-NEXT: s_wait_samplecnt 0x0
572 ; GFX12-NEXT: s_wait_bvhcnt 0x0
573 ; GFX12-NEXT: s_wait_kmcnt 0x0
574 ; GFX12-NEXT: v_maximum_f32 v0, v0, v2
575 ; GFX12-NEXT: v_maximum_f32 v1, v1, v3
576 ; GFX12-NEXT: s_setpc_b64 s[30:31]
577 %op = call <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
581 define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1) {
582 ; GFX7-LABEL: v_maximum_v2f32__nnan:
584 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
586 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
587 ; GFX7-NEXT: s_setpc_b64 s[30:31]
589 ; GFX8-LABEL: v_maximum_v2f32__nnan:
591 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v2
593 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
594 ; GFX8-NEXT: s_setpc_b64 s[30:31]
596 ; GFX9-LABEL: v_maximum_v2f32__nnan:
598 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
600 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
601 ; GFX9-NEXT: s_setpc_b64 s[30:31]
603 ; GFX940-LABEL: v_maximum_v2f32__nnan:
605 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
606 ; GFX940-NEXT: v_max_f32_e32 v0, v0, v2
607 ; GFX940-NEXT: v_max_f32_e32 v1, v1, v3
608 ; GFX940-NEXT: s_setpc_b64 s[30:31]
610 ; GFX10-LABEL: v_maximum_v2f32__nnan:
612 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
613 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v2
614 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v3
615 ; GFX10-NEXT: s_setpc_b64 s[30:31]
617 ; GFX11-LABEL: v_maximum_v2f32__nnan:
619 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
621 ; GFX11-NEXT: s_setpc_b64 s[30:31]
623 ; GFX12-LABEL: v_maximum_v2f32__nnan:
625 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
626 ; GFX12-NEXT: s_wait_expcnt 0x0
627 ; GFX12-NEXT: s_wait_samplecnt 0x0
628 ; GFX12-NEXT: s_wait_bvhcnt 0x0
629 ; GFX12-NEXT: s_wait_kmcnt 0x0
630 ; GFX12-NEXT: v_maximum_f32 v0, v0, v2
631 ; GFX12-NEXT: v_maximum_f32 v1, v1, v3
632 ; GFX12-NEXT: s_setpc_b64 s[30:31]
633 %op = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
637 define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
638 ; GFX7-LABEL: v_maximum_v2f32__nsz:
640 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
641 ; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
642 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
643 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
644 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
645 ; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
646 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
647 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
648 ; GFX7-NEXT: s_setpc_b64 s[30:31]
650 ; GFX8-LABEL: v_maximum_v2f32__nsz:
652 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653 ; GFX8-NEXT: v_max_f32_e32 v4, v0, v2
654 ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
655 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
656 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
657 ; GFX8-NEXT: v_max_f32_e32 v2, v1, v3
658 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
659 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
660 ; GFX8-NEXT: s_setpc_b64 s[30:31]
662 ; GFX9-LABEL: v_maximum_v2f32__nsz:
664 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665 ; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
666 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
667 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
668 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
669 ; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
670 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
671 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
672 ; GFX9-NEXT: s_setpc_b64 s[30:31]
674 ; GFX940-LABEL: v_maximum_v2f32__nsz:
676 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
677 ; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
678 ; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
679 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
680 ; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
681 ; GFX940-NEXT: s_nop 0
682 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
683 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
684 ; GFX940-NEXT: s_nop 1
685 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
686 ; GFX940-NEXT: s_setpc_b64 s[30:31]
688 ; GFX10-LABEL: v_maximum_v2f32__nsz:
690 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691 ; GFX10-NEXT: v_max_f32_e32 v4, v0, v2
692 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
693 ; GFX10-NEXT: v_max_f32_e32 v5, v1, v3
694 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
695 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
696 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
697 ; GFX10-NEXT: s_setpc_b64 s[30:31]
699 ; GFX11-LABEL: v_maximum_v2f32__nsz:
701 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
702 ; GFX11-NEXT: v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
703 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
704 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
705 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
706 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
707 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
708 ; GFX11-NEXT: s_setpc_b64 s[30:31]
710 ; GFX12-LABEL: v_maximum_v2f32__nsz:
712 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
713 ; GFX12-NEXT: s_wait_expcnt 0x0
714 ; GFX12-NEXT: s_wait_samplecnt 0x0
715 ; GFX12-NEXT: s_wait_bvhcnt 0x0
716 ; GFX12-NEXT: s_wait_kmcnt 0x0
717 ; GFX12-NEXT: v_maximum_f32 v0, v0, v2
718 ; GFX12-NEXT: v_maximum_f32 v1, v1, v3
719 ; GFX12-NEXT: s_setpc_b64 s[30:31]
720 %op = call nsz <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
724 define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %src1) {
725 ; GFX7-LABEL: v_maximum_v2f32__nnan_nsz:
727 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
728 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
729 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
730 ; GFX7-NEXT: s_setpc_b64 s[30:31]
732 ; GFX8-LABEL: v_maximum_v2f32__nnan_nsz:
734 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
735 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v2
736 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
737 ; GFX8-NEXT: s_setpc_b64 s[30:31]
739 ; GFX9-LABEL: v_maximum_v2f32__nnan_nsz:
741 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
742 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
743 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
744 ; GFX9-NEXT: s_setpc_b64 s[30:31]
746 ; GFX940-LABEL: v_maximum_v2f32__nnan_nsz:
748 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
749 ; GFX940-NEXT: v_max_f32_e32 v0, v0, v2
750 ; GFX940-NEXT: v_max_f32_e32 v1, v1, v3
751 ; GFX940-NEXT: s_setpc_b64 s[30:31]
753 ; GFX10-LABEL: v_maximum_v2f32__nnan_nsz:
755 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
756 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v2
757 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v3
758 ; GFX10-NEXT: s_setpc_b64 s[30:31]
760 ; GFX11-LABEL: v_maximum_v2f32__nnan_nsz:
762 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
763 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
764 ; GFX11-NEXT: s_setpc_b64 s[30:31]
766 ; GFX12-LABEL: v_maximum_v2f32__nnan_nsz:
768 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
769 ; GFX12-NEXT: s_wait_expcnt 0x0
770 ; GFX12-NEXT: s_wait_samplecnt 0x0
771 ; GFX12-NEXT: s_wait_bvhcnt 0x0
772 ; GFX12-NEXT: s_wait_kmcnt 0x0
773 ; GFX12-NEXT: v_maximum_f32 v0, v0, v2
774 ; GFX12-NEXT: v_maximum_f32 v1, v1, v3
775 ; GFX12-NEXT: s_setpc_b64 s[30:31]
776 %op = call nnan nsz <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
780 define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
781 ; GFX7-LABEL: s_maximum_v2f32:
783 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
784 ; GFX7-NEXT: v_mov_b32_e32 v0, s17
785 ; GFX7-NEXT: v_max_f32_e32 v1, s7, v0
786 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
787 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s7, v0
788 ; GFX7-NEXT: v_mov_b32_e32 v0, s16
789 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
790 ; GFX7-NEXT: v_max_f32_e32 v3, s6, v0
791 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s6, v0
792 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
793 ; GFX7-NEXT: ;;#ASMSTART
794 ; GFX7-NEXT: ; use v[0:1]
795 ; GFX7-NEXT: ;;#ASMEND
796 ; GFX7-NEXT: s_setpc_b64 s[30:31]
798 ; GFX8-LABEL: s_maximum_v2f32:
800 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801 ; GFX8-NEXT: v_mov_b32_e32 v0, s17
802 ; GFX8-NEXT: v_max_f32_e32 v1, s7, v0
803 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
804 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s7, v0
805 ; GFX8-NEXT: v_mov_b32_e32 v0, s16
806 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
807 ; GFX8-NEXT: v_max_f32_e32 v3, s6, v0
808 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s6, v0
809 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
810 ; GFX8-NEXT: ;;#ASMSTART
811 ; GFX8-NEXT: ; use v[0:1]
812 ; GFX8-NEXT: ;;#ASMEND
813 ; GFX8-NEXT: s_setpc_b64 s[30:31]
815 ; GFX9-LABEL: s_maximum_v2f32:
817 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
818 ; GFX9-NEXT: v_mov_b32_e32 v0, s17
819 ; GFX9-NEXT: v_max_f32_e32 v1, s7, v0
820 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
821 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s7, v0
822 ; GFX9-NEXT: v_mov_b32_e32 v0, s16
823 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
824 ; GFX9-NEXT: v_max_f32_e32 v3, s6, v0
825 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s6, v0
826 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
827 ; GFX9-NEXT: ;;#ASMSTART
828 ; GFX9-NEXT: ; use v[0:1]
829 ; GFX9-NEXT: ;;#ASMEND
830 ; GFX9-NEXT: s_setpc_b64 s[30:31]
832 ; GFX940-LABEL: s_maximum_v2f32:
834 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
835 ; GFX940-NEXT: v_mov_b32_e32 v0, s3
836 ; GFX940-NEXT: v_max_f32_e32 v1, s1, v0
837 ; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
838 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
839 ; GFX940-NEXT: v_mov_b32_e32 v0, s2
840 ; GFX940-NEXT: v_max_f32_e32 v3, s0, v0
841 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
842 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
843 ; GFX940-NEXT: s_nop 1
844 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
845 ; GFX940-NEXT: ;;#ASMSTART
846 ; GFX940-NEXT: ; use v[0:1]
847 ; GFX940-NEXT: ;;#ASMEND
848 ; GFX940-NEXT: s_setpc_b64 s[30:31]
850 ; GFX10-LABEL: s_maximum_v2f32:
852 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
853 ; GFX10-NEXT: v_max_f32_e64 v0, s7, s17
854 ; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s7, s17
855 ; GFX10-NEXT: v_max_f32_e64 v2, s6, s16
856 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
857 ; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s6, s16
858 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
859 ; GFX10-NEXT: ;;#ASMSTART
860 ; GFX10-NEXT: ; use v[0:1]
861 ; GFX10-NEXT: ;;#ASMEND
862 ; GFX10-NEXT: s_setpc_b64 s[30:31]
864 ; GFX11-LABEL: s_maximum_v2f32:
866 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867 ; GFX11-NEXT: v_max_f32_e64 v0, s1, s3
868 ; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s1, s3
869 ; GFX11-NEXT: v_max_f32_e64 v2, s0, s2
870 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
871 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
872 ; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s2
873 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
874 ; GFX11-NEXT: ;;#ASMSTART
875 ; GFX11-NEXT: ; use v[0:1]
876 ; GFX11-NEXT: ;;#ASMEND
877 ; GFX11-NEXT: s_setpc_b64 s[30:31]
879 ; GFX12-LABEL: s_maximum_v2f32:
881 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
882 ; GFX12-NEXT: s_wait_expcnt 0x0
883 ; GFX12-NEXT: s_wait_samplecnt 0x0
884 ; GFX12-NEXT: s_wait_bvhcnt 0x0
885 ; GFX12-NEXT: s_wait_kmcnt 0x0
886 ; GFX12-NEXT: s_maximum_f32 s1, s1, s3
887 ; GFX12-NEXT: s_maximum_f32 s0, s0, s2
888 ; GFX12-NEXT: ;;#ASMSTART
889 ; GFX12-NEXT: ; use s[0:1]
890 ; GFX12-NEXT: ;;#ASMEND
891 ; GFX12-NEXT: s_setpc_b64 s[30:31]
892 %op = call <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
893 call void asm sideeffect "; use $0", "s"(<2 x float> %op)
897 define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
898 ; GFX7-LABEL: v_maximum_v3f32:
900 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
901 ; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
902 ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
903 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
904 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
905 ; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
906 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
907 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
908 ; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
909 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
910 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
911 ; GFX7-NEXT: s_setpc_b64 s[30:31]
913 ; GFX8-LABEL: v_maximum_v3f32:
915 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
916 ; GFX8-NEXT: v_max_f32_e32 v6, v0, v3
917 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
918 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
919 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
920 ; GFX8-NEXT: v_max_f32_e32 v3, v1, v4
921 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
922 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
923 ; GFX8-NEXT: v_max_f32_e32 v3, v2, v5
924 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
925 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
926 ; GFX8-NEXT: s_setpc_b64 s[30:31]
928 ; GFX9-LABEL: v_maximum_v3f32:
930 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
931 ; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
932 ; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
933 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
934 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
935 ; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
936 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
937 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
938 ; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
939 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
940 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
941 ; GFX9-NEXT: s_setpc_b64 s[30:31]
943 ; GFX940-LABEL: v_maximum_v3f32:
945 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
946 ; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
947 ; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
948 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
949 ; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
950 ; GFX940-NEXT: s_nop 0
951 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
952 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
953 ; GFX940-NEXT: s_nop 1
954 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
955 ; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
956 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
957 ; GFX940-NEXT: s_nop 1
958 ; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
959 ; GFX940-NEXT: s_setpc_b64 s[30:31]
961 ; GFX10-LABEL: v_maximum_v3f32:
963 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
964 ; GFX10-NEXT: v_max_f32_e32 v6, v0, v3
965 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
966 ; GFX10-NEXT: v_max_f32_e32 v7, v1, v4
967 ; GFX10-NEXT: v_max_f32_e32 v8, v2, v5
968 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
969 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
970 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
971 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
972 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
973 ; GFX10-NEXT: s_setpc_b64 s[30:31]
975 ; GFX11-LABEL: v_maximum_v3f32:
977 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
978 ; GFX11-NEXT: v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
979 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
980 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
981 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
982 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
983 ; GFX11-NEXT: v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
984 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
985 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
986 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
987 ; GFX11-NEXT: s_setpc_b64 s[30:31]
989 ; GFX12-LABEL: v_maximum_v3f32:
991 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
992 ; GFX12-NEXT: s_wait_expcnt 0x0
993 ; GFX12-NEXT: s_wait_samplecnt 0x0
994 ; GFX12-NEXT: s_wait_bvhcnt 0x0
995 ; GFX12-NEXT: s_wait_kmcnt 0x0
996 ; GFX12-NEXT: v_maximum_f32 v0, v0, v3
997 ; GFX12-NEXT: v_maximum_f32 v1, v1, v4
998 ; GFX12-NEXT: v_maximum_f32 v2, v2, v5
999 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1000 %op = call <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
1004 define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1) {
1005 ; GFX7-LABEL: v_maximum_v3f32__nnan:
1007 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1008 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
1009 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
1010 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
1011 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1013 ; GFX8-LABEL: v_maximum_v3f32__nnan:
1015 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1016 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v3
1017 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v4
1018 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
1019 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1021 ; GFX9-LABEL: v_maximum_v3f32__nnan:
1023 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
1025 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
1026 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
1027 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1029 ; GFX940-LABEL: v_maximum_v3f32__nnan:
1031 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1032 ; GFX940-NEXT: v_max_f32_e32 v0, v0, v3
1033 ; GFX940-NEXT: v_max_f32_e32 v1, v1, v4
1034 ; GFX940-NEXT: v_max_f32_e32 v2, v2, v5
1035 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1037 ; GFX10-LABEL: v_maximum_v3f32__nnan:
1039 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1040 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v3
1041 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v4
1042 ; GFX10-NEXT: v_max_f32_e32 v2, v2, v5
1043 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1045 ; GFX11-LABEL: v_maximum_v3f32__nnan:
1047 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1048 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
1049 ; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
1050 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1052 ; GFX12-LABEL: v_maximum_v3f32__nnan:
1054 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1055 ; GFX12-NEXT: s_wait_expcnt 0x0
1056 ; GFX12-NEXT: s_wait_samplecnt 0x0
1057 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1058 ; GFX12-NEXT: s_wait_kmcnt 0x0
1059 ; GFX12-NEXT: v_maximum_f32 v0, v0, v3
1060 ; GFX12-NEXT: v_maximum_f32 v1, v1, v4
1061 ; GFX12-NEXT: v_maximum_f32 v2, v2, v5
1062 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1063 %op = call nnan <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
1067 define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
1068 ; GFX7-LABEL: v_maximum_v3f32__nsz:
1070 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1071 ; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
1072 ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1073 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1074 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1075 ; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
1076 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1077 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1078 ; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
1079 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1080 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1081 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1083 ; GFX8-LABEL: v_maximum_v3f32__nsz:
1085 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086 ; GFX8-NEXT: v_max_f32_e32 v6, v0, v3
1087 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1088 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1089 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1090 ; GFX8-NEXT: v_max_f32_e32 v3, v1, v4
1091 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1092 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1093 ; GFX8-NEXT: v_max_f32_e32 v3, v2, v5
1094 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1095 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1096 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1098 ; GFX9-LABEL: v_maximum_v3f32__nsz:
1100 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101 ; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
1102 ; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1103 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1104 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1105 ; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
1106 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1107 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1108 ; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
1109 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1110 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1111 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1113 ; GFX940-LABEL: v_maximum_v3f32__nsz:
1115 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1116 ; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
1117 ; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1118 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1119 ; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
1120 ; GFX940-NEXT: s_nop 0
1121 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1122 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1123 ; GFX940-NEXT: s_nop 1
1124 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1125 ; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
1126 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1127 ; GFX940-NEXT: s_nop 1
1128 ; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1129 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1131 ; GFX10-LABEL: v_maximum_v3f32__nsz:
1133 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1134 ; GFX10-NEXT: v_max_f32_e32 v6, v0, v3
1135 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
1136 ; GFX10-NEXT: v_max_f32_e32 v7, v1, v4
1137 ; GFX10-NEXT: v_max_f32_e32 v8, v2, v5
1138 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
1139 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
1140 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
1141 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
1142 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1143 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1145 ; GFX11-LABEL: v_maximum_v3f32__nsz:
1147 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1148 ; GFX11-NEXT: v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
1149 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
1150 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1151 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
1152 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
1153 ; GFX11-NEXT: v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
1154 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
1155 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1156 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1157 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1159 ; GFX12-LABEL: v_maximum_v3f32__nsz:
1161 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1162 ; GFX12-NEXT: s_wait_expcnt 0x0
1163 ; GFX12-NEXT: s_wait_samplecnt 0x0
1164 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1165 ; GFX12-NEXT: s_wait_kmcnt 0x0
1166 ; GFX12-NEXT: v_maximum_f32 v0, v0, v3
1167 ; GFX12-NEXT: v_maximum_f32 v1, v1, v4
1168 ; GFX12-NEXT: v_maximum_f32 v2, v2, v5
1169 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1170 %op = call nsz <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
1174 define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %src1) {
1175 ; GFX7-LABEL: v_maximum_v3f32__nnan_nsz:
1177 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1178 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
1179 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
1180 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
1181 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1183 ; GFX8-LABEL: v_maximum_v3f32__nnan_nsz:
1185 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1186 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v3
1187 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v4
1188 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
1189 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1191 ; GFX9-LABEL: v_maximum_v3f32__nnan_nsz:
1193 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1194 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
1195 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
1196 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
1197 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1199 ; GFX940-LABEL: v_maximum_v3f32__nnan_nsz:
1201 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1202 ; GFX940-NEXT: v_max_f32_e32 v0, v0, v3
1203 ; GFX940-NEXT: v_max_f32_e32 v1, v1, v4
1204 ; GFX940-NEXT: v_max_f32_e32 v2, v2, v5
1205 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1207 ; GFX10-LABEL: v_maximum_v3f32__nnan_nsz:
1209 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1210 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v3
1211 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v4
1212 ; GFX10-NEXT: v_max_f32_e32 v2, v2, v5
1213 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1215 ; GFX11-LABEL: v_maximum_v3f32__nnan_nsz:
1217 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1218 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
1219 ; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
1220 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1222 ; GFX12-LABEL: v_maximum_v3f32__nnan_nsz:
1224 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1225 ; GFX12-NEXT: s_wait_expcnt 0x0
1226 ; GFX12-NEXT: s_wait_samplecnt 0x0
1227 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1228 ; GFX12-NEXT: s_wait_kmcnt 0x0
1229 ; GFX12-NEXT: v_maximum_f32 v0, v0, v3
1230 ; GFX12-NEXT: v_maximum_f32 v1, v1, v4
1231 ; GFX12-NEXT: v_maximum_f32 v2, v2, v5
1232 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1233 %op = call nnan nsz <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
1237 define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
1238 ; GFX7-LABEL: v_maximum_v4f32:
1240 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241 ; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
1242 ; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1243 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1244 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1245 ; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
1246 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1247 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1248 ; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
1249 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1250 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1251 ; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
1252 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1253 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1254 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1256 ; GFX8-LABEL: v_maximum_v4f32:
1258 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1259 ; GFX8-NEXT: v_max_f32_e32 v8, v0, v4
1260 ; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1261 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1262 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1263 ; GFX8-NEXT: v_max_f32_e32 v4, v1, v5
1264 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1265 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1266 ; GFX8-NEXT: v_max_f32_e32 v4, v2, v6
1267 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1268 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1269 ; GFX8-NEXT: v_max_f32_e32 v4, v3, v7
1270 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1271 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1272 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1274 ; GFX9-LABEL: v_maximum_v4f32:
1276 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1277 ; GFX9-NEXT: v_max_f32_e32 v8, v0, v4
1278 ; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1279 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1280 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1281 ; GFX9-NEXT: v_max_f32_e32 v4, v1, v5
1282 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1283 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1284 ; GFX9-NEXT: v_max_f32_e32 v4, v2, v6
1285 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1286 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1287 ; GFX9-NEXT: v_max_f32_e32 v4, v3, v7
1288 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1289 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1290 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1292 ; GFX940-LABEL: v_maximum_v4f32:
1294 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295 ; GFX940-NEXT: v_max_f32_e32 v8, v0, v4
1296 ; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1297 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1298 ; GFX940-NEXT: v_max_f32_e32 v4, v1, v5
1299 ; GFX940-NEXT: s_nop 0
1300 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1301 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1302 ; GFX940-NEXT: s_nop 1
1303 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1304 ; GFX940-NEXT: v_max_f32_e32 v4, v2, v6
1305 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1306 ; GFX940-NEXT: s_nop 1
1307 ; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1308 ; GFX940-NEXT: v_max_f32_e32 v4, v3, v7
1309 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1310 ; GFX940-NEXT: s_nop 1
1311 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1312 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1314 ; GFX10-LABEL: v_maximum_v4f32:
1316 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1317 ; GFX10-NEXT: v_max_f32_e32 v8, v0, v4
1318 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
1319 ; GFX10-NEXT: v_max_f32_e32 v9, v1, v5
1320 ; GFX10-NEXT: v_max_f32_e32 v4, v2, v6
1321 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1322 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
1323 ; GFX10-NEXT: v_max_f32_e32 v8, v3, v7
1324 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
1325 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
1326 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1327 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
1328 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1329 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1331 ; GFX11-LABEL: v_maximum_v4f32:
1333 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1334 ; GFX11-NEXT: v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
1335 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
1336 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
1337 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1338 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
1339 ; GFX11-NEXT: v_max_f32_e32 v4, v2, v6
1340 ; GFX11-NEXT: v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
1341 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
1342 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1343 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
1344 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
1345 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1346 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1348 ; GFX12-LABEL: v_maximum_v4f32:
1350 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1351 ; GFX12-NEXT: s_wait_expcnt 0x0
1352 ; GFX12-NEXT: s_wait_samplecnt 0x0
1353 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1354 ; GFX12-NEXT: s_wait_kmcnt 0x0
1355 ; GFX12-NEXT: v_maximum_f32 v0, v0, v4
1356 ; GFX12-NEXT: v_maximum_f32 v1, v1, v5
1357 ; GFX12-NEXT: v_maximum_f32 v2, v2, v6
1358 ; GFX12-NEXT: v_maximum_f32 v3, v3, v7
1359 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1360 %op = call <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1364 define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1) {
1365 ; GFX7-LABEL: v_maximum_v4f32__nnan:
1367 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1368 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
1369 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
1370 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
1371 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
1372 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1374 ; GFX8-LABEL: v_maximum_v4f32__nnan:
1376 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1377 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v4
1378 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v5
1379 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v6
1380 ; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
1381 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1383 ; GFX9-LABEL: v_maximum_v4f32__nnan:
1385 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1386 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
1387 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v5
1388 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v6
1389 ; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
1390 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1392 ; GFX940-LABEL: v_maximum_v4f32__nnan:
1394 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1395 ; GFX940-NEXT: v_max_f32_e32 v0, v0, v4
1396 ; GFX940-NEXT: v_max_f32_e32 v1, v1, v5
1397 ; GFX940-NEXT: v_max_f32_e32 v2, v2, v6
1398 ; GFX940-NEXT: v_max_f32_e32 v3, v3, v7
1399 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1401 ; GFX10-LABEL: v_maximum_v4f32__nnan:
1403 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v4
1405 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v5
1406 ; GFX10-NEXT: v_max_f32_e32 v2, v2, v6
1407 ; GFX10-NEXT: v_max_f32_e32 v3, v3, v7
1408 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1410 ; GFX11-LABEL: v_maximum_v4f32__nnan:
1412 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1413 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
1414 ; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
1415 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1417 ; GFX12-LABEL: v_maximum_v4f32__nnan:
1419 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1420 ; GFX12-NEXT: s_wait_expcnt 0x0
1421 ; GFX12-NEXT: s_wait_samplecnt 0x0
1422 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1423 ; GFX12-NEXT: s_wait_kmcnt 0x0
1424 ; GFX12-NEXT: v_maximum_f32 v0, v0, v4
1425 ; GFX12-NEXT: v_maximum_f32 v1, v1, v5
1426 ; GFX12-NEXT: v_maximum_f32 v2, v2, v6
1427 ; GFX12-NEXT: v_maximum_f32 v3, v3, v7
1428 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1429 %op = call nnan <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1433 define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
1434 ; GFX7-LABEL: v_maximum_v4f32__nsz:
1436 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1437 ; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
1438 ; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1439 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1440 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1441 ; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
1442 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1443 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1444 ; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
1445 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1446 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1447 ; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
1448 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1449 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1450 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1452 ; GFX8-LABEL: v_maximum_v4f32__nsz:
1454 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1455 ; GFX8-NEXT: v_max_f32_e32 v8, v0, v4
1456 ; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1457 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1458 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1459 ; GFX8-NEXT: v_max_f32_e32 v4, v1, v5
1460 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1461 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1462 ; GFX8-NEXT: v_max_f32_e32 v4, v2, v6
1463 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1464 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1465 ; GFX8-NEXT: v_max_f32_e32 v4, v3, v7
1466 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1467 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1468 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1470 ; GFX9-LABEL: v_maximum_v4f32__nsz:
1472 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1473 ; GFX9-NEXT: v_max_f32_e32 v8, v0, v4
1474 ; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1475 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1476 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1477 ; GFX9-NEXT: v_max_f32_e32 v4, v1, v5
1478 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1479 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1480 ; GFX9-NEXT: v_max_f32_e32 v4, v2, v6
1481 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1482 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1483 ; GFX9-NEXT: v_max_f32_e32 v4, v3, v7
1484 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1485 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1486 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1488 ; GFX940-LABEL: v_maximum_v4f32__nsz:
1490 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491 ; GFX940-NEXT: v_max_f32_e32 v8, v0, v4
1492 ; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1493 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1494 ; GFX940-NEXT: v_max_f32_e32 v4, v1, v5
1495 ; GFX940-NEXT: s_nop 0
1496 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1497 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1498 ; GFX940-NEXT: s_nop 1
1499 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1500 ; GFX940-NEXT: v_max_f32_e32 v4, v2, v6
1501 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1502 ; GFX940-NEXT: s_nop 1
1503 ; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1504 ; GFX940-NEXT: v_max_f32_e32 v4, v3, v7
1505 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1506 ; GFX940-NEXT: s_nop 1
1507 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1508 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1510 ; GFX10-LABEL: v_maximum_v4f32__nsz:
1512 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1513 ; GFX10-NEXT: v_max_f32_e32 v8, v0, v4
1514 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
1515 ; GFX10-NEXT: v_max_f32_e32 v9, v1, v5
1516 ; GFX10-NEXT: v_max_f32_e32 v4, v2, v6
1517 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1518 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
1519 ; GFX10-NEXT: v_max_f32_e32 v8, v3, v7
1520 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
1521 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
1522 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1523 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
1524 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1525 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1527 ; GFX11-LABEL: v_maximum_v4f32__nsz:
1529 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1530 ; GFX11-NEXT: v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
1531 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
1532 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
1533 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1534 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
1535 ; GFX11-NEXT: v_max_f32_e32 v4, v2, v6
1536 ; GFX11-NEXT: v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
1537 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
1538 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1539 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
1540 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
1541 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1542 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1544 ; GFX12-LABEL: v_maximum_v4f32__nsz:
1546 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1547 ; GFX12-NEXT: s_wait_expcnt 0x0
1548 ; GFX12-NEXT: s_wait_samplecnt 0x0
1549 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1550 ; GFX12-NEXT: s_wait_kmcnt 0x0
1551 ; GFX12-NEXT: v_maximum_f32 v0, v0, v4
1552 ; GFX12-NEXT: v_maximum_f32 v1, v1, v5
1553 ; GFX12-NEXT: v_maximum_f32 v2, v2, v6
1554 ; GFX12-NEXT: v_maximum_f32 v3, v3, v7
1555 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1556 %op = call nsz <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1560 define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %src1) {
1561 ; GFX7-LABEL: v_maximum_v4f32__nnan_nsz:
1563 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1564 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
1565 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
1566 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
1567 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
1568 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1570 ; GFX8-LABEL: v_maximum_v4f32__nnan_nsz:
1572 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1573 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v4
1574 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v5
1575 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v6
1576 ; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
1577 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1579 ; GFX9-LABEL: v_maximum_v4f32__nnan_nsz:
1581 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1582 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
1583 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v5
1584 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v6
1585 ; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
1586 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1588 ; GFX940-LABEL: v_maximum_v4f32__nnan_nsz:
1590 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1591 ; GFX940-NEXT: v_max_f32_e32 v0, v0, v4
1592 ; GFX940-NEXT: v_max_f32_e32 v1, v1, v5
1593 ; GFX940-NEXT: v_max_f32_e32 v2, v2, v6
1594 ; GFX940-NEXT: v_max_f32_e32 v3, v3, v7
1595 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1597 ; GFX10-LABEL: v_maximum_v4f32__nnan_nsz:
1599 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1600 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v4
1601 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v5
1602 ; GFX10-NEXT: v_max_f32_e32 v2, v2, v6
1603 ; GFX10-NEXT: v_max_f32_e32 v3, v3, v7
1604 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1606 ; GFX11-LABEL: v_maximum_v4f32__nnan_nsz:
1608 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1609 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
1610 ; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
1611 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1613 ; GFX12-LABEL: v_maximum_v4f32__nnan_nsz:
1615 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1616 ; GFX12-NEXT: s_wait_expcnt 0x0
1617 ; GFX12-NEXT: s_wait_samplecnt 0x0
1618 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1619 ; GFX12-NEXT: s_wait_kmcnt 0x0
1620 ; GFX12-NEXT: v_maximum_f32 v0, v0, v4
1621 ; GFX12-NEXT: v_maximum_f32 v1, v1, v5
1622 ; GFX12-NEXT: v_maximum_f32 v2, v2, v6
1623 ; GFX12-NEXT: v_maximum_f32 v3, v3, v7
1624 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1625 %op = call nnan nsz <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1629 define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
1630 ; GFX7-LABEL: v_maximum_v8f32:
1632 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1633 ; GFX7-NEXT: v_max_f32_e32 v16, v0, v8
1634 ; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1635 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
1636 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
1637 ; GFX7-NEXT: v_max_f32_e32 v8, v1, v9
1638 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
1639 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
1640 ; GFX7-NEXT: v_max_f32_e32 v8, v2, v10
1641 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
1642 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
1643 ; GFX7-NEXT: v_max_f32_e32 v8, v3, v11
1644 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
1645 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
1646 ; GFX7-NEXT: v_max_f32_e32 v8, v4, v12
1647 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
1648 ; GFX7-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
1649 ; GFX7-NEXT: v_max_f32_e32 v8, v5, v13
1650 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
1651 ; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
1652 ; GFX7-NEXT: v_max_f32_e32 v8, v6, v14
1653 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
1654 ; GFX7-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
1655 ; GFX7-NEXT: v_max_f32_e32 v8, v7, v15
1656 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
1657 ; GFX7-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
1658 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1660 ; GFX8-LABEL: v_maximum_v8f32:
1662 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1663 ; GFX8-NEXT: v_max_f32_e32 v16, v0, v8
1664 ; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1665 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
1666 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
1667 ; GFX8-NEXT: v_max_f32_e32 v8, v1, v9
1668 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
1669 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
1670 ; GFX8-NEXT: v_max_f32_e32 v8, v2, v10
1671 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
1672 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
1673 ; GFX8-NEXT: v_max_f32_e32 v8, v3, v11
1674 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
1675 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
1676 ; GFX8-NEXT: v_max_f32_e32 v8, v4, v12
1677 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
1678 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
1679 ; GFX8-NEXT: v_max_f32_e32 v8, v5, v13
1680 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
1681 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
1682 ; GFX8-NEXT: v_max_f32_e32 v8, v6, v14
1683 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
1684 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
1685 ; GFX8-NEXT: v_max_f32_e32 v8, v7, v15
1686 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
1687 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
1688 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1690 ; GFX9-LABEL: v_maximum_v8f32:
1692 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1693 ; GFX9-NEXT: v_max_f32_e32 v16, v0, v8
1694 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1695 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
1696 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
1697 ; GFX9-NEXT: v_max_f32_e32 v8, v1, v9
1698 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
1699 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
1700 ; GFX9-NEXT: v_max_f32_e32 v8, v2, v10
1701 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
1702 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
1703 ; GFX9-NEXT: v_max_f32_e32 v8, v3, v11
1704 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
1705 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
1706 ; GFX9-NEXT: v_max_f32_e32 v8, v4, v12
1707 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
1708 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
1709 ; GFX9-NEXT: v_max_f32_e32 v8, v5, v13
1710 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
1711 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
1712 ; GFX9-NEXT: v_max_f32_e32 v8, v6, v14
1713 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
1714 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
1715 ; GFX9-NEXT: v_max_f32_e32 v8, v7, v15
1716 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
1717 ; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
1718 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1720 ; GFX940-LABEL: v_maximum_v8f32:
1722 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1723 ; GFX940-NEXT: v_max_f32_e32 v16, v0, v8
1724 ; GFX940-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1725 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
1726 ; GFX940-NEXT: v_max_f32_e32 v8, v1, v9
1727 ; GFX940-NEXT: s_nop 0
1728 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
1729 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
1730 ; GFX940-NEXT: s_nop 1
1731 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
1732 ; GFX940-NEXT: v_max_f32_e32 v8, v2, v10
1733 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
1734 ; GFX940-NEXT: s_nop 1
1735 ; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
1736 ; GFX940-NEXT: v_max_f32_e32 v8, v3, v11
1737 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
1738 ; GFX940-NEXT: s_nop 1
1739 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
1740 ; GFX940-NEXT: v_max_f32_e32 v8, v4, v12
1741 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
1742 ; GFX940-NEXT: s_nop 1
1743 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
1744 ; GFX940-NEXT: v_max_f32_e32 v8, v5, v13
1745 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
1746 ; GFX940-NEXT: s_nop 1
1747 ; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
1748 ; GFX940-NEXT: v_max_f32_e32 v8, v6, v14
1749 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
1750 ; GFX940-NEXT: s_nop 1
1751 ; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
1752 ; GFX940-NEXT: v_max_f32_e32 v8, v7, v15
1753 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
1754 ; GFX940-NEXT: s_nop 1
1755 ; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
1756 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1758 ; GFX10-LABEL: v_maximum_v8f32:
1760 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1761 ; GFX10-NEXT: v_max_f32_e32 v16, v0, v8
1762 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
1763 ; GFX10-NEXT: v_max_f32_e32 v17, v1, v9
1764 ; GFX10-NEXT: v_max_f32_e32 v8, v2, v10
1765 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
1766 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
1767 ; GFX10-NEXT: v_max_f32_e32 v9, v3, v11
1768 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
1769 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
1770 ; GFX10-NEXT: v_max_f32_e32 v10, v7, v15
1771 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1772 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
1773 ; GFX10-NEXT: v_max_f32_e32 v8, v4, v12
1774 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
1775 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
1776 ; GFX10-NEXT: v_max_f32_e32 v9, v5, v13
1777 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v8, vcc_lo
1778 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
1779 ; GFX10-NEXT: v_max_f32_e32 v8, v6, v14
1780 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v9, vcc_lo
1781 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
1782 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
1783 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
1784 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
1785 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1787 ; GFX11-LABEL: v_maximum_v8f32:
1789 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1790 ; GFX11-NEXT: v_dual_max_f32 v16, v0, v8 :: v_dual_max_f32 v17, v1, v9
1791 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
1792 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1793 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
1794 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
1795 ; GFX11-NEXT: v_dual_max_f32 v9, v3, v11 :: v_dual_max_f32 v8, v2, v10
1796 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
1797 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
1798 ; GFX11-NEXT: v_max_f32_e32 v10, v7, v15
1799 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
1800 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1801 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
1802 ; GFX11-NEXT: v_dual_max_f32 v8, v4, v12 :: v_dual_cndmask_b32 v3, 0x7fc00000, v9
1803 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
1804 ; GFX11-NEXT: v_dual_max_f32 v9, v5, v13 :: v_dual_cndmask_b32 v4, 0x7fc00000, v8
1805 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
1806 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1807 ; GFX11-NEXT: v_dual_max_f32 v8, v6, v14 :: v_dual_cndmask_b32 v5, 0x7fc00000, v9
1808 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
1809 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
1810 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
1811 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
1812 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1814 ; GFX12-LABEL: v_maximum_v8f32:
1816 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1817 ; GFX12-NEXT: s_wait_expcnt 0x0
1818 ; GFX12-NEXT: s_wait_samplecnt 0x0
1819 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1820 ; GFX12-NEXT: s_wait_kmcnt 0x0
1821 ; GFX12-NEXT: v_maximum_f32 v0, v0, v8
1822 ; GFX12-NEXT: v_maximum_f32 v1, v1, v9
1823 ; GFX12-NEXT: v_maximum_f32 v2, v2, v10
1824 ; GFX12-NEXT: v_maximum_f32 v3, v3, v11
1825 ; GFX12-NEXT: v_maximum_f32 v4, v4, v12
1826 ; GFX12-NEXT: v_maximum_f32 v5, v5, v13
1827 ; GFX12-NEXT: v_maximum_f32 v6, v6, v14
1828 ; GFX12-NEXT: v_maximum_f32 v7, v7, v15
1829 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1830 %op = call <8 x float> @llvm.maximum.v8f32(<8 x float> %src0, <8 x float> %src1)
1834 define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
1835 ; GFX7-LABEL: v_maximum_v16f32:
1837 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1838 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
1839 ; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1840 ; GFX7-NEXT: s_mov_b64 exec, s[4:5]
1841 ; GFX7-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
1842 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v16
1843 ; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
1844 ; GFX7-NEXT: v_writelane_b32 v31, s30, 0
1845 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
1846 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v17
1847 ; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
1848 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v18
1849 ; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1850 ; GFX7-NEXT: v_max_f32_e32 v18, v13, v29
1851 ; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
1852 ; GFX7-NEXT: v_writelane_b32 v31, s31, 1
1853 ; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
1854 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v19
1855 ; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
1856 ; GFX7-NEXT: v_max_f32_e32 v4, v4, v20
1857 ; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
1858 ; GFX7-NEXT: v_max_f32_e32 v5, v5, v21
1859 ; GFX7-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
1860 ; GFX7-NEXT: v_max_f32_e32 v6, v6, v22
1861 ; GFX7-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
1862 ; GFX7-NEXT: v_max_f32_e32 v7, v7, v23
1863 ; GFX7-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
1864 ; GFX7-NEXT: v_max_f32_e32 v8, v8, v24
1865 ; GFX7-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
1866 ; GFX7-NEXT: v_max_f32_e32 v9, v9, v25
1867 ; GFX7-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
1868 ; GFX7-NEXT: v_max_f32_e32 v10, v10, v26
1869 ; GFX7-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
1870 ; GFX7-NEXT: v_max_f32_e32 v11, v11, v27
1871 ; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
1872 ; GFX7-NEXT: v_max_f32_e32 v12, v12, v28
1873 ; GFX7-NEXT: v_max_f32_e32 v19, v14, v30
1874 ; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
1875 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
1876 ; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
1877 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
1878 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
1879 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
1880 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
1881 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
1882 ; GFX7-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
1883 ; GFX7-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
1884 ; GFX7-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
1885 ; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
1886 ; GFX7-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
1887 ; GFX7-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
1888 ; GFX7-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
1889 ; GFX7-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
1890 ; GFX7-NEXT: v_readlane_b32 s31, v31, 1
1891 ; GFX7-NEXT: v_readlane_b32 s30, v31, 0
1892 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1893 ; GFX7-NEXT: v_max_f32_e32 v18, v15, v16
1894 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
1895 ; GFX7-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
1896 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
1897 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1898 ; GFX7-NEXT: s_mov_b64 exec, s[4:5]
1899 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1900 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1902 ; GFX8-LABEL: v_maximum_v16f32:
1904 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1905 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
1906 ; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1907 ; GFX8-NEXT: s_mov_b64 exec, s[4:5]
1908 ; GFX8-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
1909 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v16
1910 ; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32
1911 ; GFX8-NEXT: v_writelane_b32 v31, s30, 0
1912 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
1913 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v17
1914 ; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
1915 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v18
1916 ; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1917 ; GFX8-NEXT: v_max_f32_e32 v18, v13, v29
1918 ; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
1919 ; GFX8-NEXT: v_writelane_b32 v31, s31, 1
1920 ; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
1921 ; GFX8-NEXT: v_max_f32_e32 v3, v3, v19
1922 ; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
1923 ; GFX8-NEXT: v_max_f32_e32 v4, v4, v20
1924 ; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
1925 ; GFX8-NEXT: v_max_f32_e32 v5, v5, v21
1926 ; GFX8-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
1927 ; GFX8-NEXT: v_max_f32_e32 v6, v6, v22
1928 ; GFX8-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
1929 ; GFX8-NEXT: v_max_f32_e32 v7, v7, v23
1930 ; GFX8-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
1931 ; GFX8-NEXT: v_max_f32_e32 v8, v8, v24
1932 ; GFX8-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
1933 ; GFX8-NEXT: v_max_f32_e32 v9, v9, v25
1934 ; GFX8-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
1935 ; GFX8-NEXT: v_max_f32_e32 v10, v10, v26
1936 ; GFX8-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
1937 ; GFX8-NEXT: v_max_f32_e32 v11, v11, v27
1938 ; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
1939 ; GFX8-NEXT: v_max_f32_e32 v12, v12, v28
1940 ; GFX8-NEXT: v_max_f32_e32 v19, v14, v30
1941 ; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
1942 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
1943 ; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
1944 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
1945 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
1946 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
1947 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
1948 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
1949 ; GFX8-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
1950 ; GFX8-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
1951 ; GFX8-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
1952 ; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
1953 ; GFX8-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
1954 ; GFX8-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
1955 ; GFX8-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
1956 ; GFX8-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
1957 ; GFX8-NEXT: v_readlane_b32 s31, v31, 1
1958 ; GFX8-NEXT: v_readlane_b32 s30, v31, 0
1959 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1960 ; GFX8-NEXT: v_max_f32_e32 v18, v15, v16
1961 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
1962 ; GFX8-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
1963 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
1964 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1965 ; GFX8-NEXT: s_mov_b64 exec, s[4:5]
1966 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1967 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1969 ; GFX9-LABEL: v_maximum_v16f32:
1971 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1972 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
1973 ; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1974 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1975 ; GFX9-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
1976 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v16
1977 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
1978 ; GFX9-NEXT: v_writelane_b32 v31, s30, 0
1979 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
1980 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v17
1981 ; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
1982 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v18
1983 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1984 ; GFX9-NEXT: v_max_f32_e32 v18, v13, v29
1985 ; GFX9-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
1986 ; GFX9-NEXT: v_writelane_b32 v31, s31, 1
1987 ; GFX9-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
1988 ; GFX9-NEXT: v_max_f32_e32 v3, v3, v19
1989 ; GFX9-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
1990 ; GFX9-NEXT: v_max_f32_e32 v4, v4, v20
1991 ; GFX9-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
1992 ; GFX9-NEXT: v_max_f32_e32 v5, v5, v21
1993 ; GFX9-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
1994 ; GFX9-NEXT: v_max_f32_e32 v6, v6, v22
1995 ; GFX9-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
1996 ; GFX9-NEXT: v_max_f32_e32 v7, v7, v23
1997 ; GFX9-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
1998 ; GFX9-NEXT: v_max_f32_e32 v8, v8, v24
1999 ; GFX9-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
2000 ; GFX9-NEXT: v_max_f32_e32 v9, v9, v25
2001 ; GFX9-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
2002 ; GFX9-NEXT: v_max_f32_e32 v10, v10, v26
2003 ; GFX9-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
2004 ; GFX9-NEXT: v_max_f32_e32 v11, v11, v27
2005 ; GFX9-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
2006 ; GFX9-NEXT: v_max_f32_e32 v12, v12, v28
2007 ; GFX9-NEXT: v_max_f32_e32 v19, v14, v30
2008 ; GFX9-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
2009 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
2010 ; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
2011 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
2012 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
2013 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
2014 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
2015 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
2016 ; GFX9-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
2017 ; GFX9-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
2018 ; GFX9-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
2019 ; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
2020 ; GFX9-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
2021 ; GFX9-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
2022 ; GFX9-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
2023 ; GFX9-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
2024 ; GFX9-NEXT: v_readlane_b32 s31, v31, 1
2025 ; GFX9-NEXT: v_readlane_b32 s30, v31, 0
2026 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2027 ; GFX9-NEXT: v_max_f32_e32 v18, v15, v16
2028 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
2029 ; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
2030 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
2031 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
2032 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2033 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2034 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2036 ; GFX940-LABEL: v_maximum_v16f32:
2038 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039 ; GFX940-NEXT: scratch_load_dword v31, off, s32
2040 ; GFX940-NEXT: v_mov_b32_e32 v32, 0x7fc00000
2041 ; GFX940-NEXT: v_max_f32_e32 v33, v0, v16
2042 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
2043 ; GFX940-NEXT: v_max_f32_e32 v34, v1, v17
2044 ; GFX940-NEXT: v_max_f32_e32 v35, v2, v18
2045 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
2046 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
2047 ; GFX940-NEXT: v_max_f32_e32 v36, v3, v19
2048 ; GFX940-NEXT: v_max_f32_e32 v37, v4, v20
2049 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
2050 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
2051 ; GFX940-NEXT: v_max_f32_e32 v38, v5, v21
2052 ; GFX940-NEXT: v_max_f32_e32 v39, v6, v22
2053 ; GFX940-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
2054 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
2055 ; GFX940-NEXT: v_max_f32_e32 v48, v7, v23
2056 ; GFX940-NEXT: v_max_f32_e32 v49, v8, v24
2057 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
2058 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
2059 ; GFX940-NEXT: v_max_f32_e32 v50, v9, v25
2060 ; GFX940-NEXT: v_max_f32_e32 v51, v10, v26
2061 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
2062 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
2063 ; GFX940-NEXT: v_max_f32_e32 v52, v11, v27
2064 ; GFX940-NEXT: v_max_f32_e32 v53, v12, v28
2065 ; GFX940-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
2066 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
2067 ; GFX940-NEXT: v_max_f32_e32 v54, v13, v29
2068 ; GFX940-NEXT: v_max_f32_e32 v55, v14, v30
2069 ; GFX940-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
2070 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
2071 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2072 ; GFX940-NEXT: v_max_f32_e32 v16, v15, v31
2073 ; GFX940-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
2074 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
2075 ; GFX940-NEXT: s_nop 1
2076 ; GFX940-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
2077 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
2078 ; GFX940-NEXT: s_nop 1
2079 ; GFX940-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
2080 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
2081 ; GFX940-NEXT: s_nop 1
2082 ; GFX940-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
2083 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
2084 ; GFX940-NEXT: s_nop 1
2085 ; GFX940-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
2086 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
2087 ; GFX940-NEXT: s_nop 1
2088 ; GFX940-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
2089 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
2090 ; GFX940-NEXT: s_nop 1
2091 ; GFX940-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
2092 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
2093 ; GFX940-NEXT: s_nop 1
2094 ; GFX940-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
2095 ; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
2096 ; GFX940-NEXT: s_nop 1
2097 ; GFX940-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
2098 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2100 ; GFX10-LABEL: v_maximum_v16f32:
2102 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2103 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
2104 ; GFX10-NEXT: v_max_f32_e32 v32, v0, v16
2105 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
2106 ; GFX10-NEXT: v_max_f32_e32 v33, v1, v17
2107 ; GFX10-NEXT: v_max_f32_e32 v34, v2, v18
2108 ; GFX10-NEXT: v_max_f32_e32 v35, v3, v19
2109 ; GFX10-NEXT: v_max_f32_e32 v36, v4, v20
2110 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
2111 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
2112 ; GFX10-NEXT: v_max_f32_e32 v37, v5, v21
2113 ; GFX10-NEXT: v_max_f32_e32 v38, v6, v22
2114 ; GFX10-NEXT: v_max_f32_e32 v39, v7, v23
2115 ; GFX10-NEXT: v_max_f32_e32 v48, v8, v24
2116 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
2117 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
2118 ; GFX10-NEXT: v_max_f32_e32 v49, v9, v25
2119 ; GFX10-NEXT: v_max_f32_e32 v50, v10, v26
2120 ; GFX10-NEXT: v_max_f32_e32 v51, v11, v27
2121 ; GFX10-NEXT: v_max_f32_e32 v52, v12, v28
2122 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
2123 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
2124 ; GFX10-NEXT: v_max_f32_e32 v53, v13, v29
2125 ; GFX10-NEXT: v_max_f32_e32 v54, v14, v30
2126 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
2127 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
2128 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
2129 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
2130 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
2131 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
2132 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
2133 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
2134 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
2135 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
2136 ; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
2137 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
2138 ; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
2139 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
2140 ; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
2141 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
2142 ; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
2143 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
2144 ; GFX10-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
2145 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
2146 ; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
2147 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
2148 ; GFX10-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
2149 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2150 ; GFX10-NEXT: v_max_f32_e32 v16, v15, v31
2151 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
2152 ; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
2153 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2155 ; GFX11-LABEL: v_maximum_v16f32:
2157 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2158 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
2159 ; GFX11-NEXT: v_dual_max_f32 v32, v0, v16 :: v_dual_max_f32 v33, v1, v17
2160 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
2161 ; GFX11-NEXT: v_dual_max_f32 v34, v2, v18 :: v_dual_max_f32 v35, v3, v19
2162 ; GFX11-NEXT: v_dual_max_f32 v36, v4, v20 :: v_dual_max_f32 v37, v5, v21
2163 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
2164 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
2165 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
2166 ; GFX11-NEXT: v_max_f32_e32 v54, v14, v30
2167 ; GFX11-NEXT: v_dual_max_f32 v38, v6, v22 :: v_dual_max_f32 v39, v7, v23
2168 ; GFX11-NEXT: v_dual_max_f32 v48, v8, v24 :: v_dual_max_f32 v49, v9, v25
2169 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
2170 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
2171 ; GFX11-NEXT: v_dual_max_f32 v50, v10, v26 :: v_dual_max_f32 v51, v11, v27
2172 ; GFX11-NEXT: v_dual_max_f32 v52, v12, v28 :: v_dual_max_f32 v53, v13, v29
2173 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
2174 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
2175 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
2176 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
2177 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
2178 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
2179 ; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
2180 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
2181 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
2182 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
2183 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
2184 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
2185 ; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
2186 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
2187 ; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
2188 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
2189 ; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
2190 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
2191 ; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
2192 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
2193 ; GFX11-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
2194 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
2195 ; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
2196 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
2197 ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
2198 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2199 ; GFX11-NEXT: v_max_f32_e32 v16, v15, v31
2200 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
2201 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
2202 ; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
2203 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2205 ; GFX12-LABEL: v_maximum_v16f32:
2207 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2208 ; GFX12-NEXT: s_wait_expcnt 0x0
2209 ; GFX12-NEXT: s_wait_samplecnt 0x0
2210 ; GFX12-NEXT: s_wait_bvhcnt 0x0
2211 ; GFX12-NEXT: s_wait_kmcnt 0x0
2212 ; GFX12-NEXT: scratch_load_b32 v31, off, s32
2213 ; GFX12-NEXT: v_maximum_f32 v0, v0, v16
2214 ; GFX12-NEXT: v_maximum_f32 v1, v1, v17
2215 ; GFX12-NEXT: v_maximum_f32 v2, v2, v18
2216 ; GFX12-NEXT: v_maximum_f32 v3, v3, v19
2217 ; GFX12-NEXT: v_maximum_f32 v4, v4, v20
2218 ; GFX12-NEXT: v_maximum_f32 v5, v5, v21
2219 ; GFX12-NEXT: v_maximum_f32 v6, v6, v22
2220 ; GFX12-NEXT: v_maximum_f32 v7, v7, v23
2221 ; GFX12-NEXT: v_maximum_f32 v8, v8, v24
2222 ; GFX12-NEXT: v_maximum_f32 v9, v9, v25
2223 ; GFX12-NEXT: v_maximum_f32 v10, v10, v26
2224 ; GFX12-NEXT: v_maximum_f32 v11, v11, v27
2225 ; GFX12-NEXT: v_maximum_f32 v12, v12, v28
2226 ; GFX12-NEXT: v_maximum_f32 v13, v13, v29
2227 ; GFX12-NEXT: v_maximum_f32 v14, v14, v30
2228 ; GFX12-NEXT: s_wait_loadcnt 0x0
2229 ; GFX12-NEXT: v_maximum_f32 v15, v15, v31
2230 ; GFX12-NEXT: s_setpc_b64 s[30:31]
2231 %op = call <16 x float> @llvm.maximum.v16f32(<16 x float> %src0, <16 x float> %src1)
2232 ret <16 x float> %op
2234 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: