1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
8 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
9 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
11 define float @v_maximum_f32(float %src0, float %src1) {
12 ; GFX7-LABEL: v_maximum_f32:
14 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15 ; GFX7-NEXT: v_max_f32_e32 v2, v0, v1
16 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000
17 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
18 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
19 ; GFX7-NEXT: s_setpc_b64 s[30:31]
21 ; GFX8-LABEL: v_maximum_f32:
23 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX8-NEXT: v_max_f32_e32 v2, v0, v1
25 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
26 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
27 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
28 ; GFX8-NEXT: s_setpc_b64 s[30:31]
30 ; GFX900-LABEL: v_maximum_f32:
32 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
34 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
35 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
36 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
37 ; GFX900-NEXT: s_setpc_b64 s[30:31]
39 ; GFX950-LABEL: v_maximum_f32:
41 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
43 ; GFX950-NEXT: s_setpc_b64 s[30:31]
45 ; GFX10-LABEL: v_maximum_f32:
47 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX10-NEXT: v_max_f32_e32 v2, v0, v1
49 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
50 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
51 ; GFX10-NEXT: s_setpc_b64 s[30:31]
53 ; GFX11-LABEL: v_maximum_f32:
55 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56 ; GFX11-NEXT: v_max_f32_e32 v2, v0, v1
57 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
58 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
59 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
60 ; GFX11-NEXT: s_setpc_b64 s[30:31]
62 ; GFX12-LABEL: v_maximum_f32:
64 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
65 ; GFX12-NEXT: s_wait_expcnt 0x0
66 ; GFX12-NEXT: s_wait_samplecnt 0x0
67 ; GFX12-NEXT: s_wait_bvhcnt 0x0
68 ; GFX12-NEXT: s_wait_kmcnt 0x0
69 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
70 ; GFX12-NEXT: s_setpc_b64 s[30:31]
71 %op = call float @llvm.maximum.f32(float %src0, float %src1)
75 define float @v_maximum_f32__nnan(float %src0, float %src1) {
76 ; GFX7-LABEL: v_maximum_f32__nnan:
78 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
80 ; GFX7-NEXT: s_setpc_b64 s[30:31]
82 ; GFX8-LABEL: v_maximum_f32__nnan:
84 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
86 ; GFX8-NEXT: s_setpc_b64 s[30:31]
88 ; GFX900-LABEL: v_maximum_f32__nnan:
90 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91 ; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
92 ; GFX900-NEXT: s_setpc_b64 s[30:31]
94 ; GFX950-LABEL: v_maximum_f32__nnan:
96 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
98 ; GFX950-NEXT: s_setpc_b64 s[30:31]
100 ; GFX10-LABEL: v_maximum_f32__nnan:
102 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
104 ; GFX10-NEXT: s_setpc_b64 s[30:31]
106 ; GFX11-LABEL: v_maximum_f32__nnan:
108 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109 ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
110 ; GFX11-NEXT: s_setpc_b64 s[30:31]
112 ; GFX12-LABEL: v_maximum_f32__nnan:
114 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
115 ; GFX12-NEXT: s_wait_expcnt 0x0
116 ; GFX12-NEXT: s_wait_samplecnt 0x0
117 ; GFX12-NEXT: s_wait_bvhcnt 0x0
118 ; GFX12-NEXT: s_wait_kmcnt 0x0
119 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
120 ; GFX12-NEXT: s_setpc_b64 s[30:31]
121 %op = call nnan float @llvm.maximum.f32(float %src0, float %src1)
125 define float @v_maximum_f32__nsz(float %src0, float %src1) {
126 ; GFX7-LABEL: v_maximum_f32__nsz:
128 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129 ; GFX7-NEXT: v_max_f32_e32 v2, v0, v1
130 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000
131 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
132 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
133 ; GFX7-NEXT: s_setpc_b64 s[30:31]
135 ; GFX8-LABEL: v_maximum_f32__nsz:
137 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX8-NEXT: v_max_f32_e32 v2, v0, v1
139 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
140 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
141 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
142 ; GFX8-NEXT: s_setpc_b64 s[30:31]
144 ; GFX900-LABEL: v_maximum_f32__nsz:
146 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147 ; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
148 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
149 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
150 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
151 ; GFX900-NEXT: s_setpc_b64 s[30:31]
153 ; GFX950-LABEL: v_maximum_f32__nsz:
155 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
157 ; GFX950-NEXT: s_setpc_b64 s[30:31]
159 ; GFX10-LABEL: v_maximum_f32__nsz:
161 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162 ; GFX10-NEXT: v_max_f32_e32 v2, v0, v1
163 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
164 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
165 ; GFX10-NEXT: s_setpc_b64 s[30:31]
167 ; GFX11-LABEL: v_maximum_f32__nsz:
169 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170 ; GFX11-NEXT: v_max_f32_e32 v2, v0, v1
171 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
172 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
173 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
174 ; GFX11-NEXT: s_setpc_b64 s[30:31]
176 ; GFX12-LABEL: v_maximum_f32__nsz:
178 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
179 ; GFX12-NEXT: s_wait_expcnt 0x0
180 ; GFX12-NEXT: s_wait_samplecnt 0x0
181 ; GFX12-NEXT: s_wait_bvhcnt 0x0
182 ; GFX12-NEXT: s_wait_kmcnt 0x0
183 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
184 ; GFX12-NEXT: s_setpc_b64 s[30:31]
185 %op = call nsz float @llvm.maximum.f32(float %src0, float %src1)
189 define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) {
190 ; GFX7-LABEL: v_maximum_f32__nnan_nsz:
192 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
194 ; GFX7-NEXT: s_setpc_b64 s[30:31]
196 ; GFX8-LABEL: v_maximum_f32__nnan_nsz:
198 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
200 ; GFX8-NEXT: s_setpc_b64 s[30:31]
202 ; GFX900-LABEL: v_maximum_f32__nnan_nsz:
204 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205 ; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
206 ; GFX900-NEXT: s_setpc_b64 s[30:31]
208 ; GFX950-LABEL: v_maximum_f32__nnan_nsz:
210 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
212 ; GFX950-NEXT: s_setpc_b64 s[30:31]
214 ; GFX10-LABEL: v_maximum_f32__nnan_nsz:
216 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
218 ; GFX10-NEXT: s_setpc_b64 s[30:31]
220 ; GFX11-LABEL: v_maximum_f32__nnan_nsz:
222 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
224 ; GFX11-NEXT: s_setpc_b64 s[30:31]
226 ; GFX12-LABEL: v_maximum_f32__nnan_nsz:
228 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
229 ; GFX12-NEXT: s_wait_expcnt 0x0
230 ; GFX12-NEXT: s_wait_samplecnt 0x0
231 ; GFX12-NEXT: s_wait_bvhcnt 0x0
232 ; GFX12-NEXT: s_wait_kmcnt 0x0
233 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
234 ; GFX12-NEXT: s_setpc_b64 s[30:31]
235 %op = call nnan nsz float @llvm.maximum.f32(float %src0, float %src1)
239 define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) {
240 ; GFX7-LABEL: v_maximum_f32__nnan_src0:
242 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243 ; GFX7-NEXT: v_add_f32_e32 v0, 1.0, v0
244 ; GFX7-NEXT: v_max_f32_e32 v2, v0, v1
245 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000
246 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
247 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
248 ; GFX7-NEXT: s_setpc_b64 s[30:31]
250 ; GFX8-LABEL: v_maximum_f32__nnan_src0:
252 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253 ; GFX8-NEXT: v_add_f32_e32 v0, 1.0, v0
254 ; GFX8-NEXT: v_max_f32_e32 v2, v0, v1
255 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
256 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
257 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
258 ; GFX8-NEXT: s_setpc_b64 s[30:31]
260 ; GFX900-LABEL: v_maximum_f32__nnan_src0:
262 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263 ; GFX900-NEXT: v_add_f32_e32 v0, 1.0, v0
264 ; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
265 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
266 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
267 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
268 ; GFX900-NEXT: s_setpc_b64 s[30:31]
270 ; GFX950-LABEL: v_maximum_f32__nnan_src0:
272 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0
274 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
275 ; GFX950-NEXT: s_setpc_b64 s[30:31]
277 ; GFX10-LABEL: v_maximum_f32__nnan_src0:
279 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280 ; GFX10-NEXT: v_add_f32_e32 v0, 1.0, v0
281 ; GFX10-NEXT: v_max_f32_e32 v2, v0, v1
282 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
283 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
284 ; GFX10-NEXT: s_setpc_b64 s[30:31]
286 ; GFX11-LABEL: v_maximum_f32__nnan_src0:
288 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289 ; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
290 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
291 ; GFX11-NEXT: v_max_f32_e32 v2, v0, v1
292 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
293 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
294 ; GFX11-NEXT: s_setpc_b64 s[30:31]
296 ; GFX12-LABEL: v_maximum_f32__nnan_src0:
298 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
299 ; GFX12-NEXT: s_wait_expcnt 0x0
300 ; GFX12-NEXT: s_wait_samplecnt 0x0
301 ; GFX12-NEXT: s_wait_bvhcnt 0x0
302 ; GFX12-NEXT: s_wait_kmcnt 0x0
303 ; GFX12-NEXT: v_add_f32_e32 v0, 1.0, v0
304 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
305 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
306 ; GFX12-NEXT: s_setpc_b64 s[30:31]
307 %src0 = fadd nnan float %arg0, 1.0
308 %op = call float @llvm.maximum.f32(float %src0, float %src1)
312 define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) {
313 ; GFX7-LABEL: v_maximum_f32__nnan_src1:
315 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316 ; GFX7-NEXT: v_add_f32_e32 v1, 1.0, v1
317 ; GFX7-NEXT: v_max_f32_e32 v2, v0, v1
318 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000
319 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
320 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
321 ; GFX7-NEXT: s_setpc_b64 s[30:31]
323 ; GFX8-LABEL: v_maximum_f32__nnan_src1:
325 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326 ; GFX8-NEXT: v_add_f32_e32 v1, 1.0, v1
327 ; GFX8-NEXT: v_max_f32_e32 v2, v0, v1
328 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
329 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
330 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
331 ; GFX8-NEXT: s_setpc_b64 s[30:31]
333 ; GFX900-LABEL: v_maximum_f32__nnan_src1:
335 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336 ; GFX900-NEXT: v_add_f32_e32 v1, 1.0, v1
337 ; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
338 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
339 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
340 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
341 ; GFX900-NEXT: s_setpc_b64 s[30:31]
343 ; GFX950-LABEL: v_maximum_f32__nnan_src1:
345 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346 ; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1
347 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
348 ; GFX950-NEXT: s_setpc_b64 s[30:31]
350 ; GFX10-LABEL: v_maximum_f32__nnan_src1:
352 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353 ; GFX10-NEXT: v_add_f32_e32 v1, 1.0, v1
354 ; GFX10-NEXT: v_max_f32_e32 v2, v0, v1
355 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
356 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
357 ; GFX10-NEXT: s_setpc_b64 s[30:31]
359 ; GFX11-LABEL: v_maximum_f32__nnan_src1:
361 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362 ; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1
363 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
364 ; GFX11-NEXT: v_max_f32_e32 v2, v0, v1
365 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1
366 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
367 ; GFX11-NEXT: s_setpc_b64 s[30:31]
369 ; GFX12-LABEL: v_maximum_f32__nnan_src1:
371 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
372 ; GFX12-NEXT: s_wait_expcnt 0x0
373 ; GFX12-NEXT: s_wait_samplecnt 0x0
374 ; GFX12-NEXT: s_wait_bvhcnt 0x0
375 ; GFX12-NEXT: s_wait_kmcnt 0x0
376 ; GFX12-NEXT: v_add_f32_e32 v1, 1.0, v1
377 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
378 ; GFX12-NEXT: v_maximum_f32 v0, v0, v1
379 ; GFX12-NEXT: s_setpc_b64 s[30:31]
380 %src1 = fadd nnan float %arg1, 1.0
381 %op = call float @llvm.maximum.f32(float %src0, float %src1)
385 define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
386 ; GFX7-LABEL: s_maximum_f32:
388 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389 ; GFX7-NEXT: v_mov_b32_e32 v0, s17
390 ; GFX7-NEXT: v_max_f32_e32 v1, s16, v0
391 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
392 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
393 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
394 ; GFX7-NEXT: ;;#ASMSTART
395 ; GFX7-NEXT: ; use v0
396 ; GFX7-NEXT: ;;#ASMEND
397 ; GFX7-NEXT: s_setpc_b64 s[30:31]
399 ; GFX8-LABEL: s_maximum_f32:
401 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402 ; GFX8-NEXT: v_mov_b32_e32 v0, s17
403 ; GFX8-NEXT: v_max_f32_e32 v1, s16, v0
404 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
405 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
406 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
407 ; GFX8-NEXT: ;;#ASMSTART
408 ; GFX8-NEXT: ; use v0
409 ; GFX8-NEXT: ;;#ASMEND
410 ; GFX8-NEXT: s_setpc_b64 s[30:31]
412 ; GFX900-LABEL: s_maximum_f32:
414 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415 ; GFX900-NEXT: v_mov_b32_e32 v0, s17
416 ; GFX900-NEXT: v_max_f32_e32 v1, s16, v0
417 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000
418 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
419 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
420 ; GFX900-NEXT: ;;#ASMSTART
421 ; GFX900-NEXT: ; use v0
422 ; GFX900-NEXT: ;;#ASMEND
423 ; GFX900-NEXT: s_setpc_b64 s[30:31]
425 ; GFX950-LABEL: s_maximum_f32:
427 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GFX950-NEXT: v_mov_b32_e32 v0, s0
429 ; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1
430 ; GFX950-NEXT: ;;#ASMSTART
431 ; GFX950-NEXT: ; use v0
432 ; GFX950-NEXT: ;;#ASMEND
433 ; GFX950-NEXT: s_setpc_b64 s[30:31]
435 ; GFX10-LABEL: s_maximum_f32:
437 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438 ; GFX10-NEXT: v_max_f32_e64 v0, s16, s17
439 ; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s16, s17
440 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
441 ; GFX10-NEXT: ;;#ASMSTART
442 ; GFX10-NEXT: ; use v0
443 ; GFX10-NEXT: ;;#ASMEND
444 ; GFX10-NEXT: s_setpc_b64 s[30:31]
446 ; GFX11-LABEL: s_maximum_f32:
448 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449 ; GFX11-NEXT: v_max_f32_e64 v0, s0, s1
450 ; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s1
451 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
452 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
453 ; GFX11-NEXT: ;;#ASMSTART
454 ; GFX11-NEXT: ; use v0
455 ; GFX11-NEXT: ;;#ASMEND
456 ; GFX11-NEXT: s_setpc_b64 s[30:31]
458 ; GFX12-LABEL: s_maximum_f32:
460 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
461 ; GFX12-NEXT: s_wait_expcnt 0x0
462 ; GFX12-NEXT: s_wait_samplecnt 0x0
463 ; GFX12-NEXT: s_wait_bvhcnt 0x0
464 ; GFX12-NEXT: s_wait_kmcnt 0x0
465 ; GFX12-NEXT: s_maximum_f32 s0, s0, s1
466 ; GFX12-NEXT: ;;#ASMSTART
467 ; GFX12-NEXT: ; use s0
468 ; GFX12-NEXT: ;;#ASMEND
469 ; GFX12-NEXT: s_wait_alu 0xfffe
470 ; GFX12-NEXT: s_setpc_b64 s[30:31]
471 %op = call float @llvm.maximum.f32(float %src0, float %src1)
472 call void asm sideeffect "; use $0", "s"(float %op)
476 define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
477 ; GFX7-LABEL: v_maximum_v2f32:
479 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480 ; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
481 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
482 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
483 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
484 ; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
485 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
486 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
487 ; GFX7-NEXT: s_setpc_b64 s[30:31]
489 ; GFX8-LABEL: v_maximum_v2f32:
491 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492 ; GFX8-NEXT: v_max_f32_e32 v4, v0, v2
493 ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
494 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
495 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
496 ; GFX8-NEXT: v_max_f32_e32 v2, v1, v3
497 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
498 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
499 ; GFX8-NEXT: s_setpc_b64 s[30:31]
501 ; GFX900-LABEL: v_maximum_v2f32:
503 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
504 ; GFX900-NEXT: v_max_f32_e32 v4, v0, v2
505 ; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000
506 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
507 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
508 ; GFX900-NEXT: v_max_f32_e32 v2, v1, v3
509 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
510 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
511 ; GFX900-NEXT: s_setpc_b64 s[30:31]
513 ; GFX950-LABEL: v_maximum_v2f32:
515 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
516 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
517 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
518 ; GFX950-NEXT: s_setpc_b64 s[30:31]
520 ; GFX10-LABEL: v_maximum_v2f32:
522 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
523 ; GFX10-NEXT: v_max_f32_e32 v4, v0, v2
524 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
525 ; GFX10-NEXT: v_max_f32_e32 v5, v1, v3
526 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
527 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
528 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
529 ; GFX10-NEXT: s_setpc_b64 s[30:31]
531 ; GFX11-LABEL: v_maximum_v2f32:
533 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
534 ; GFX11-NEXT: v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
535 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
536 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
537 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
538 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
539 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
540 ; GFX11-NEXT: s_setpc_b64 s[30:31]
542 ; GFX12-LABEL: v_maximum_v2f32:
544 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
545 ; GFX12-NEXT: s_wait_expcnt 0x0
546 ; GFX12-NEXT: s_wait_samplecnt 0x0
547 ; GFX12-NEXT: s_wait_bvhcnt 0x0
548 ; GFX12-NEXT: s_wait_kmcnt 0x0
549 ; GFX12-NEXT: v_maximum_f32 v0, v0, v2
550 ; GFX12-NEXT: v_maximum_f32 v1, v1, v3
551 ; GFX12-NEXT: s_setpc_b64 s[30:31]
552 %op = call <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
556 define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1) {
557 ; GFX7-LABEL: v_maximum_v2f32__nnan:
559 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
560 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
561 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
562 ; GFX7-NEXT: s_setpc_b64 s[30:31]
564 ; GFX8-LABEL: v_maximum_v2f32__nnan:
566 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
567 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v2
568 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
569 ; GFX8-NEXT: s_setpc_b64 s[30:31]
571 ; GFX900-LABEL: v_maximum_v2f32__nnan:
573 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
574 ; GFX900-NEXT: v_max_f32_e32 v0, v0, v2
575 ; GFX900-NEXT: v_max_f32_e32 v1, v1, v3
576 ; GFX900-NEXT: s_setpc_b64 s[30:31]
578 ; GFX950-LABEL: v_maximum_v2f32__nnan:
580 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
582 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
583 ; GFX950-NEXT: s_setpc_b64 s[30:31]
585 ; GFX10-LABEL: v_maximum_v2f32__nnan:
587 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v2
589 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v3
590 ; GFX10-NEXT: s_setpc_b64 s[30:31]
592 ; GFX11-LABEL: v_maximum_v2f32__nnan:
594 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
596 ; GFX11-NEXT: s_setpc_b64 s[30:31]
598 ; GFX12-LABEL: v_maximum_v2f32__nnan:
600 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
601 ; GFX12-NEXT: s_wait_expcnt 0x0
602 ; GFX12-NEXT: s_wait_samplecnt 0x0
603 ; GFX12-NEXT: s_wait_bvhcnt 0x0
604 ; GFX12-NEXT: s_wait_kmcnt 0x0
605 ; GFX12-NEXT: v_maximum_f32 v0, v0, v2
606 ; GFX12-NEXT: v_maximum_f32 v1, v1, v3
607 ; GFX12-NEXT: s_setpc_b64 s[30:31]
608 %op = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
612 define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
613 ; GFX7-LABEL: v_maximum_v2f32__nsz:
615 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616 ; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
617 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
618 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
619 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
620 ; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
621 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
622 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
623 ; GFX7-NEXT: s_setpc_b64 s[30:31]
625 ; GFX8-LABEL: v_maximum_v2f32__nsz:
627 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628 ; GFX8-NEXT: v_max_f32_e32 v4, v0, v2
629 ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
630 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
631 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
632 ; GFX8-NEXT: v_max_f32_e32 v2, v1, v3
633 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
634 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
635 ; GFX8-NEXT: s_setpc_b64 s[30:31]
637 ; GFX900-LABEL: v_maximum_v2f32__nsz:
639 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640 ; GFX900-NEXT: v_max_f32_e32 v4, v0, v2
641 ; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000
642 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
643 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
644 ; GFX900-NEXT: v_max_f32_e32 v2, v1, v3
645 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
646 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
647 ; GFX900-NEXT: s_setpc_b64 s[30:31]
649 ; GFX950-LABEL: v_maximum_v2f32__nsz:
651 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
653 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
654 ; GFX950-NEXT: s_setpc_b64 s[30:31]
656 ; GFX10-LABEL: v_maximum_v2f32__nsz:
658 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
659 ; GFX10-NEXT: v_max_f32_e32 v4, v0, v2
660 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
661 ; GFX10-NEXT: v_max_f32_e32 v5, v1, v3
662 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
663 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
664 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
665 ; GFX10-NEXT: s_setpc_b64 s[30:31]
667 ; GFX11-LABEL: v_maximum_v2f32__nsz:
669 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
670 ; GFX11-NEXT: v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
671 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
672 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
673 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
674 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
675 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
676 ; GFX11-NEXT: s_setpc_b64 s[30:31]
678 ; GFX12-LABEL: v_maximum_v2f32__nsz:
680 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
681 ; GFX12-NEXT: s_wait_expcnt 0x0
682 ; GFX12-NEXT: s_wait_samplecnt 0x0
683 ; GFX12-NEXT: s_wait_bvhcnt 0x0
684 ; GFX12-NEXT: s_wait_kmcnt 0x0
685 ; GFX12-NEXT: v_maximum_f32 v0, v0, v2
686 ; GFX12-NEXT: v_maximum_f32 v1, v1, v3
687 ; GFX12-NEXT: s_setpc_b64 s[30:31]
688 %op = call nsz <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
692 define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %src1) {
693 ; GFX7-LABEL: v_maximum_v2f32__nnan_nsz:
695 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
697 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
698 ; GFX7-NEXT: s_setpc_b64 s[30:31]
700 ; GFX8-LABEL: v_maximum_v2f32__nnan_nsz:
702 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
703 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v2
704 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
705 ; GFX8-NEXT: s_setpc_b64 s[30:31]
707 ; GFX900-LABEL: v_maximum_v2f32__nnan_nsz:
709 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
710 ; GFX900-NEXT: v_max_f32_e32 v0, v0, v2
711 ; GFX900-NEXT: v_max_f32_e32 v1, v1, v3
712 ; GFX900-NEXT: s_setpc_b64 s[30:31]
714 ; GFX950-LABEL: v_maximum_v2f32__nnan_nsz:
716 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
718 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
719 ; GFX950-NEXT: s_setpc_b64 s[30:31]
721 ; GFX10-LABEL: v_maximum_v2f32__nnan_nsz:
723 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
724 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v2
725 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v3
726 ; GFX10-NEXT: s_setpc_b64 s[30:31]
728 ; GFX11-LABEL: v_maximum_v2f32__nnan_nsz:
730 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
731 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
732 ; GFX11-NEXT: s_setpc_b64 s[30:31]
734 ; GFX12-LABEL: v_maximum_v2f32__nnan_nsz:
736 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
737 ; GFX12-NEXT: s_wait_expcnt 0x0
738 ; GFX12-NEXT: s_wait_samplecnt 0x0
739 ; GFX12-NEXT: s_wait_bvhcnt 0x0
740 ; GFX12-NEXT: s_wait_kmcnt 0x0
741 ; GFX12-NEXT: v_maximum_f32 v0, v0, v2
742 ; GFX12-NEXT: v_maximum_f32 v1, v1, v3
743 ; GFX12-NEXT: s_setpc_b64 s[30:31]
744 %op = call nnan nsz <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
748 define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
749 ; GFX7-LABEL: s_maximum_v2f32:
751 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
752 ; GFX7-NEXT: v_mov_b32_e32 v0, s19
753 ; GFX7-NEXT: v_max_f32_e32 v1, s17, v0
754 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
755 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
756 ; GFX7-NEXT: v_mov_b32_e32 v0, s18
757 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
758 ; GFX7-NEXT: v_max_f32_e32 v3, s16, v0
759 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
760 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
761 ; GFX7-NEXT: ;;#ASMSTART
762 ; GFX7-NEXT: ; use v[0:1]
763 ; GFX7-NEXT: ;;#ASMEND
764 ; GFX7-NEXT: s_setpc_b64 s[30:31]
766 ; GFX8-LABEL: s_maximum_v2f32:
768 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769 ; GFX8-NEXT: v_mov_b32_e32 v0, s19
770 ; GFX8-NEXT: v_max_f32_e32 v1, s17, v0
771 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
772 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
773 ; GFX8-NEXT: v_mov_b32_e32 v0, s18
774 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
775 ; GFX8-NEXT: v_max_f32_e32 v3, s16, v0
776 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
777 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
778 ; GFX8-NEXT: ;;#ASMSTART
779 ; GFX8-NEXT: ; use v[0:1]
780 ; GFX8-NEXT: ;;#ASMEND
781 ; GFX8-NEXT: s_setpc_b64 s[30:31]
783 ; GFX900-LABEL: s_maximum_v2f32:
785 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
786 ; GFX900-NEXT: v_mov_b32_e32 v0, s19
787 ; GFX900-NEXT: v_max_f32_e32 v1, s17, v0
788 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000
789 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
790 ; GFX900-NEXT: v_mov_b32_e32 v0, s18
791 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
792 ; GFX900-NEXT: v_max_f32_e32 v3, s16, v0
793 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
794 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
795 ; GFX900-NEXT: ;;#ASMSTART
796 ; GFX900-NEXT: ; use v[0:1]
797 ; GFX900-NEXT: ;;#ASMEND
798 ; GFX900-NEXT: s_setpc_b64 s[30:31]
800 ; GFX950-LABEL: s_maximum_v2f32:
802 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803 ; GFX950-NEXT: v_mov_b32_e32 v0, s1
804 ; GFX950-NEXT: v_maximum3_f32 v1, v0, s3, s3
805 ; GFX950-NEXT: v_mov_b32_e32 v0, s0
806 ; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2
807 ; GFX950-NEXT: ;;#ASMSTART
808 ; GFX950-NEXT: ; use v[0:1]
809 ; GFX950-NEXT: ;;#ASMEND
810 ; GFX950-NEXT: s_setpc_b64 s[30:31]
812 ; GFX10-LABEL: s_maximum_v2f32:
814 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815 ; GFX10-NEXT: v_max_f32_e64 v0, s17, s19
816 ; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s17, s19
817 ; GFX10-NEXT: v_max_f32_e64 v2, s16, s18
818 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
819 ; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s16, s18
820 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
821 ; GFX10-NEXT: ;;#ASMSTART
822 ; GFX10-NEXT: ; use v[0:1]
823 ; GFX10-NEXT: ;;#ASMEND
824 ; GFX10-NEXT: s_setpc_b64 s[30:31]
826 ; GFX11-LABEL: s_maximum_v2f32:
828 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
829 ; GFX11-NEXT: v_max_f32_e64 v0, s1, s3
830 ; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s1, s3
831 ; GFX11-NEXT: v_max_f32_e64 v2, s0, s2
832 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
833 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
834 ; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s2
835 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
836 ; GFX11-NEXT: ;;#ASMSTART
837 ; GFX11-NEXT: ; use v[0:1]
838 ; GFX11-NEXT: ;;#ASMEND
839 ; GFX11-NEXT: s_setpc_b64 s[30:31]
841 ; GFX12-LABEL: s_maximum_v2f32:
843 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
844 ; GFX12-NEXT: s_wait_expcnt 0x0
845 ; GFX12-NEXT: s_wait_samplecnt 0x0
846 ; GFX12-NEXT: s_wait_bvhcnt 0x0
847 ; GFX12-NEXT: s_wait_kmcnt 0x0
848 ; GFX12-NEXT: s_maximum_f32 s1, s1, s3
849 ; GFX12-NEXT: s_maximum_f32 s0, s0, s2
850 ; GFX12-NEXT: ;;#ASMSTART
851 ; GFX12-NEXT: ; use s[0:1]
852 ; GFX12-NEXT: ;;#ASMEND
853 ; GFX12-NEXT: s_wait_alu 0xfffe
854 ; GFX12-NEXT: s_setpc_b64 s[30:31]
855 %op = call <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
856 call void asm sideeffect "; use $0", "s"(<2 x float> %op)
860 define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
861 ; GFX7-LABEL: v_maximum_v3f32:
863 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
864 ; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
865 ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
866 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
867 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
868 ; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
869 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
870 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
871 ; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
872 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
873 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
874 ; GFX7-NEXT: s_setpc_b64 s[30:31]
876 ; GFX8-LABEL: v_maximum_v3f32:
878 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
879 ; GFX8-NEXT: v_max_f32_e32 v6, v0, v3
880 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
881 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
882 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
883 ; GFX8-NEXT: v_max_f32_e32 v3, v1, v4
884 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
885 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
886 ; GFX8-NEXT: v_max_f32_e32 v3, v2, v5
887 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
888 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
889 ; GFX8-NEXT: s_setpc_b64 s[30:31]
891 ; GFX900-LABEL: v_maximum_v3f32:
893 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
894 ; GFX900-NEXT: v_max_f32_e32 v6, v0, v3
895 ; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000
896 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
897 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
898 ; GFX900-NEXT: v_max_f32_e32 v3, v1, v4
899 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
900 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
901 ; GFX900-NEXT: v_max_f32_e32 v3, v2, v5
902 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
903 ; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
904 ; GFX900-NEXT: s_setpc_b64 s[30:31]
906 ; GFX950-LABEL: v_maximum_v3f32:
908 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
909 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
910 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
911 ; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
912 ; GFX950-NEXT: s_setpc_b64 s[30:31]
914 ; GFX10-LABEL: v_maximum_v3f32:
916 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
917 ; GFX10-NEXT: v_max_f32_e32 v6, v0, v3
918 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
919 ; GFX10-NEXT: v_max_f32_e32 v7, v1, v4
920 ; GFX10-NEXT: v_max_f32_e32 v8, v2, v5
921 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
922 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
923 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
924 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
925 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
926 ; GFX10-NEXT: s_setpc_b64 s[30:31]
928 ; GFX11-LABEL: v_maximum_v3f32:
930 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
931 ; GFX11-NEXT: v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
932 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
933 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
934 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
935 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
936 ; GFX11-NEXT: v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
937 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
938 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
939 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
940 ; GFX11-NEXT: s_setpc_b64 s[30:31]
942 ; GFX12-LABEL: v_maximum_v3f32:
944 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
945 ; GFX12-NEXT: s_wait_expcnt 0x0
946 ; GFX12-NEXT: s_wait_samplecnt 0x0
947 ; GFX12-NEXT: s_wait_bvhcnt 0x0
948 ; GFX12-NEXT: s_wait_kmcnt 0x0
949 ; GFX12-NEXT: v_maximum_f32 v0, v0, v3
950 ; GFX12-NEXT: v_maximum_f32 v1, v1, v4
951 ; GFX12-NEXT: v_maximum_f32 v2, v2, v5
952 ; GFX12-NEXT: s_setpc_b64 s[30:31]
953 %op = call <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
957 define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1) {
958 ; GFX7-LABEL: v_maximum_v3f32__nnan:
960 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
961 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
962 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
963 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
964 ; GFX7-NEXT: s_setpc_b64 s[30:31]
966 ; GFX8-LABEL: v_maximum_v3f32__nnan:
968 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
969 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v3
970 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v4
971 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
972 ; GFX8-NEXT: s_setpc_b64 s[30:31]
974 ; GFX900-LABEL: v_maximum_v3f32__nnan:
976 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977 ; GFX900-NEXT: v_max_f32_e32 v0, v0, v3
978 ; GFX900-NEXT: v_max_f32_e32 v1, v1, v4
979 ; GFX900-NEXT: v_max_f32_e32 v2, v2, v5
980 ; GFX900-NEXT: s_setpc_b64 s[30:31]
982 ; GFX950-LABEL: v_maximum_v3f32__nnan:
984 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
986 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
987 ; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
988 ; GFX950-NEXT: s_setpc_b64 s[30:31]
990 ; GFX10-LABEL: v_maximum_v3f32__nnan:
992 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
993 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v3
994 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v4
995 ; GFX10-NEXT: v_max_f32_e32 v2, v2, v5
996 ; GFX10-NEXT: s_setpc_b64 s[30:31]
998 ; GFX11-LABEL: v_maximum_v3f32__nnan:
1000 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1001 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
1002 ; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
1003 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1005 ; GFX12-LABEL: v_maximum_v3f32__nnan:
1007 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1008 ; GFX12-NEXT: s_wait_expcnt 0x0
1009 ; GFX12-NEXT: s_wait_samplecnt 0x0
1010 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1011 ; GFX12-NEXT: s_wait_kmcnt 0x0
1012 ; GFX12-NEXT: v_maximum_f32 v0, v0, v3
1013 ; GFX12-NEXT: v_maximum_f32 v1, v1, v4
1014 ; GFX12-NEXT: v_maximum_f32 v2, v2, v5
1015 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1016 %op = call nnan <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
1020 define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
1021 ; GFX7-LABEL: v_maximum_v3f32__nsz:
1023 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024 ; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
1025 ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1026 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1027 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1028 ; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
1029 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1030 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1031 ; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
1032 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1033 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1034 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1036 ; GFX8-LABEL: v_maximum_v3f32__nsz:
1038 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1039 ; GFX8-NEXT: v_max_f32_e32 v6, v0, v3
1040 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1041 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1042 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1043 ; GFX8-NEXT: v_max_f32_e32 v3, v1, v4
1044 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1045 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1046 ; GFX8-NEXT: v_max_f32_e32 v3, v2, v5
1047 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1048 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1049 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1051 ; GFX900-LABEL: v_maximum_v3f32__nsz:
1053 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1054 ; GFX900-NEXT: v_max_f32_e32 v6, v0, v3
1055 ; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1056 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1057 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1058 ; GFX900-NEXT: v_max_f32_e32 v3, v1, v4
1059 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1060 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1061 ; GFX900-NEXT: v_max_f32_e32 v3, v2, v5
1062 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1063 ; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1064 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1066 ; GFX950-LABEL: v_maximum_v3f32__nsz:
1068 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1069 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
1070 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
1071 ; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
1072 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1074 ; GFX10-LABEL: v_maximum_v3f32__nsz:
1076 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1077 ; GFX10-NEXT: v_max_f32_e32 v6, v0, v3
1078 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
1079 ; GFX10-NEXT: v_max_f32_e32 v7, v1, v4
1080 ; GFX10-NEXT: v_max_f32_e32 v8, v2, v5
1081 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
1082 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
1083 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
1084 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
1085 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1086 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1088 ; GFX11-LABEL: v_maximum_v3f32__nsz:
1090 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1091 ; GFX11-NEXT: v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
1092 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
1093 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1094 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
1095 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
1096 ; GFX11-NEXT: v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
1097 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
1098 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1099 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1100 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1102 ; GFX12-LABEL: v_maximum_v3f32__nsz:
1104 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1105 ; GFX12-NEXT: s_wait_expcnt 0x0
1106 ; GFX12-NEXT: s_wait_samplecnt 0x0
1107 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1108 ; GFX12-NEXT: s_wait_kmcnt 0x0
1109 ; GFX12-NEXT: v_maximum_f32 v0, v0, v3
1110 ; GFX12-NEXT: v_maximum_f32 v1, v1, v4
1111 ; GFX12-NEXT: v_maximum_f32 v2, v2, v5
1112 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1113 %op = call nsz <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
1117 define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %src1) {
1118 ; GFX7-LABEL: v_maximum_v3f32__nnan_nsz:
1120 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
1122 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
1123 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
1124 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1126 ; GFX8-LABEL: v_maximum_v3f32__nnan_nsz:
1128 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1129 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v3
1130 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v4
1131 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
1132 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1134 ; GFX900-LABEL: v_maximum_v3f32__nnan_nsz:
1136 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137 ; GFX900-NEXT: v_max_f32_e32 v0, v0, v3
1138 ; GFX900-NEXT: v_max_f32_e32 v1, v1, v4
1139 ; GFX900-NEXT: v_max_f32_e32 v2, v2, v5
1140 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1142 ; GFX950-LABEL: v_maximum_v3f32__nnan_nsz:
1144 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1145 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
1146 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
1147 ; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
1148 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1150 ; GFX10-LABEL: v_maximum_v3f32__nnan_nsz:
1152 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v3
1154 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v4
1155 ; GFX10-NEXT: v_max_f32_e32 v2, v2, v5
1156 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1158 ; GFX11-LABEL: v_maximum_v3f32__nnan_nsz:
1160 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1161 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
1162 ; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
1163 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1165 ; GFX12-LABEL: v_maximum_v3f32__nnan_nsz:
1167 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1168 ; GFX12-NEXT: s_wait_expcnt 0x0
1169 ; GFX12-NEXT: s_wait_samplecnt 0x0
1170 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1171 ; GFX12-NEXT: s_wait_kmcnt 0x0
1172 ; GFX12-NEXT: v_maximum_f32 v0, v0, v3
1173 ; GFX12-NEXT: v_maximum_f32 v1, v1, v4
1174 ; GFX12-NEXT: v_maximum_f32 v2, v2, v5
1175 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1176 %op = call nnan nsz <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
1180 define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
1181 ; GFX7-LABEL: v_maximum_v4f32:
1183 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1184 ; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
1185 ; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1186 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1187 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1188 ; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
1189 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1190 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1191 ; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
1192 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1193 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1194 ; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
1195 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1196 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1197 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1199 ; GFX8-LABEL: v_maximum_v4f32:
1201 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1202 ; GFX8-NEXT: v_max_f32_e32 v8, v0, v4
1203 ; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1204 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1205 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1206 ; GFX8-NEXT: v_max_f32_e32 v4, v1, v5
1207 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1208 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1209 ; GFX8-NEXT: v_max_f32_e32 v4, v2, v6
1210 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1211 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1212 ; GFX8-NEXT: v_max_f32_e32 v4, v3, v7
1213 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1214 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1215 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1217 ; GFX900-LABEL: v_maximum_v4f32:
1219 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1220 ; GFX900-NEXT: v_max_f32_e32 v8, v0, v4
1221 ; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1222 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1223 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1224 ; GFX900-NEXT: v_max_f32_e32 v4, v1, v5
1225 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1226 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1227 ; GFX900-NEXT: v_max_f32_e32 v4, v2, v6
1228 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1229 ; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1230 ; GFX900-NEXT: v_max_f32_e32 v4, v3, v7
1231 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1232 ; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1233 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1235 ; GFX950-LABEL: v_maximum_v4f32:
1237 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1238 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4
1239 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5
1240 ; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6
1241 ; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7
1242 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1244 ; GFX10-LABEL: v_maximum_v4f32:
1246 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1247 ; GFX10-NEXT: v_max_f32_e32 v8, v0, v4
1248 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
1249 ; GFX10-NEXT: v_max_f32_e32 v9, v1, v5
1250 ; GFX10-NEXT: v_max_f32_e32 v4, v2, v6
1251 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1252 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
1253 ; GFX10-NEXT: v_max_f32_e32 v8, v3, v7
1254 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
1255 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
1256 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1257 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
1258 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1259 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1261 ; GFX11-LABEL: v_maximum_v4f32:
1263 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1264 ; GFX11-NEXT: v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
1265 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
1266 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
1267 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1268 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
1269 ; GFX11-NEXT: v_max_f32_e32 v4, v2, v6
1270 ; GFX11-NEXT: v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
1271 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
1272 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1273 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
1274 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
1275 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1276 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1278 ; GFX12-LABEL: v_maximum_v4f32:
1280 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1281 ; GFX12-NEXT: s_wait_expcnt 0x0
1282 ; GFX12-NEXT: s_wait_samplecnt 0x0
1283 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1284 ; GFX12-NEXT: s_wait_kmcnt 0x0
1285 ; GFX12-NEXT: v_maximum_f32 v0, v0, v4
1286 ; GFX12-NEXT: v_maximum_f32 v1, v1, v5
1287 ; GFX12-NEXT: v_maximum_f32 v2, v2, v6
1288 ; GFX12-NEXT: v_maximum_f32 v3, v3, v7
1289 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1290 %op = call <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1294 define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1) {
1295 ; GFX7-LABEL: v_maximum_v4f32__nnan:
1297 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
1299 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
1300 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
1301 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
1302 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1304 ; GFX8-LABEL: v_maximum_v4f32__nnan:
1306 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1307 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v4
1308 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v5
1309 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v6
1310 ; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
1311 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1313 ; GFX900-LABEL: v_maximum_v4f32__nnan:
1315 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1316 ; GFX900-NEXT: v_max_f32_e32 v0, v0, v4
1317 ; GFX900-NEXT: v_max_f32_e32 v1, v1, v5
1318 ; GFX900-NEXT: v_max_f32_e32 v2, v2, v6
1319 ; GFX900-NEXT: v_max_f32_e32 v3, v3, v7
1320 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1322 ; GFX950-LABEL: v_maximum_v4f32__nnan:
1324 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4
1326 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5
1327 ; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6
1328 ; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7
1329 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1331 ; GFX10-LABEL: v_maximum_v4f32__nnan:
1333 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1334 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v4
1335 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v5
1336 ; GFX10-NEXT: v_max_f32_e32 v2, v2, v6
1337 ; GFX10-NEXT: v_max_f32_e32 v3, v3, v7
1338 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1340 ; GFX11-LABEL: v_maximum_v4f32__nnan:
1342 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
1344 ; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
1345 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1347 ; GFX12-LABEL: v_maximum_v4f32__nnan:
1349 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1350 ; GFX12-NEXT: s_wait_expcnt 0x0
1351 ; GFX12-NEXT: s_wait_samplecnt 0x0
1352 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1353 ; GFX12-NEXT: s_wait_kmcnt 0x0
1354 ; GFX12-NEXT: v_maximum_f32 v0, v0, v4
1355 ; GFX12-NEXT: v_maximum_f32 v1, v1, v5
1356 ; GFX12-NEXT: v_maximum_f32 v2, v2, v6
1357 ; GFX12-NEXT: v_maximum_f32 v3, v3, v7
1358 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1359 %op = call nnan <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1363 define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
1364 ; GFX7-LABEL: v_maximum_v4f32__nsz:
1366 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367 ; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
1368 ; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1369 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1370 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1371 ; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
1372 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1373 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1374 ; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
1375 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1376 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1377 ; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
1378 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1379 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1380 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1382 ; GFX8-LABEL: v_maximum_v4f32__nsz:
1384 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1385 ; GFX8-NEXT: v_max_f32_e32 v8, v0, v4
1386 ; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1387 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1388 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1389 ; GFX8-NEXT: v_max_f32_e32 v4, v1, v5
1390 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1391 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1392 ; GFX8-NEXT: v_max_f32_e32 v4, v2, v6
1393 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1394 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1395 ; GFX8-NEXT: v_max_f32_e32 v4, v3, v7
1396 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1397 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1398 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1400 ; GFX900-LABEL: v_maximum_v4f32__nsz:
1402 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403 ; GFX900-NEXT: v_max_f32_e32 v8, v0, v4
1404 ; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1405 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1406 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1407 ; GFX900-NEXT: v_max_f32_e32 v4, v1, v5
1408 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1409 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1410 ; GFX900-NEXT: v_max_f32_e32 v4, v2, v6
1411 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1412 ; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1413 ; GFX900-NEXT: v_max_f32_e32 v4, v3, v7
1414 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1415 ; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1416 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1418 ; GFX950-LABEL: v_maximum_v4f32__nsz:
1420 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1421 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4
1422 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5
1423 ; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6
1424 ; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7
1425 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1427 ; GFX10-LABEL: v_maximum_v4f32__nsz:
1429 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1430 ; GFX10-NEXT: v_max_f32_e32 v8, v0, v4
1431 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
1432 ; GFX10-NEXT: v_max_f32_e32 v9, v1, v5
1433 ; GFX10-NEXT: v_max_f32_e32 v4, v2, v6
1434 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1435 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
1436 ; GFX10-NEXT: v_max_f32_e32 v8, v3, v7
1437 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
1438 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
1439 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1440 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
1441 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1442 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1444 ; GFX11-LABEL: v_maximum_v4f32__nsz:
1446 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1447 ; GFX11-NEXT: v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
1448 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
1449 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
1450 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1451 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
1452 ; GFX11-NEXT: v_max_f32_e32 v4, v2, v6
1453 ; GFX11-NEXT: v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
1454 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
1455 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1456 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
1457 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
1458 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1459 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1461 ; GFX12-LABEL: v_maximum_v4f32__nsz:
1463 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1464 ; GFX12-NEXT: s_wait_expcnt 0x0
1465 ; GFX12-NEXT: s_wait_samplecnt 0x0
1466 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1467 ; GFX12-NEXT: s_wait_kmcnt 0x0
1468 ; GFX12-NEXT: v_maximum_f32 v0, v0, v4
1469 ; GFX12-NEXT: v_maximum_f32 v1, v1, v5
1470 ; GFX12-NEXT: v_maximum_f32 v2, v2, v6
1471 ; GFX12-NEXT: v_maximum_f32 v3, v3, v7
1472 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1473 %op = call nsz <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1477 define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %src1) {
1478 ; GFX7-LABEL: v_maximum_v4f32__nnan_nsz:
1480 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1481 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
1482 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
1483 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
1484 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
1485 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1487 ; GFX8-LABEL: v_maximum_v4f32__nnan_nsz:
1489 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490 ; GFX8-NEXT: v_max_f32_e32 v0, v0, v4
1491 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v5
1492 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v6
1493 ; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
1494 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1496 ; GFX900-LABEL: v_maximum_v4f32__nnan_nsz:
1498 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1499 ; GFX900-NEXT: v_max_f32_e32 v0, v0, v4
1500 ; GFX900-NEXT: v_max_f32_e32 v1, v1, v5
1501 ; GFX900-NEXT: v_max_f32_e32 v2, v2, v6
1502 ; GFX900-NEXT: v_max_f32_e32 v3, v3, v7
1503 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1505 ; GFX950-LABEL: v_maximum_v4f32__nnan_nsz:
1507 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1508 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4
1509 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5
1510 ; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6
1511 ; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7
1512 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1514 ; GFX10-LABEL: v_maximum_v4f32__nnan_nsz:
1516 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1517 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v4
1518 ; GFX10-NEXT: v_max_f32_e32 v1, v1, v5
1519 ; GFX10-NEXT: v_max_f32_e32 v2, v2, v6
1520 ; GFX10-NEXT: v_max_f32_e32 v3, v3, v7
1521 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1523 ; GFX11-LABEL: v_maximum_v4f32__nnan_nsz:
1525 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1526 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
1527 ; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
1528 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1530 ; GFX12-LABEL: v_maximum_v4f32__nnan_nsz:
1532 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1533 ; GFX12-NEXT: s_wait_expcnt 0x0
1534 ; GFX12-NEXT: s_wait_samplecnt 0x0
1535 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1536 ; GFX12-NEXT: s_wait_kmcnt 0x0
1537 ; GFX12-NEXT: v_maximum_f32 v0, v0, v4
1538 ; GFX12-NEXT: v_maximum_f32 v1, v1, v5
1539 ; GFX12-NEXT: v_maximum_f32 v2, v2, v6
1540 ; GFX12-NEXT: v_maximum_f32 v3, v3, v7
1541 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1542 %op = call nnan nsz <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1546 define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
1547 ; GFX7-LABEL: v_maximum_v8f32:
1549 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1550 ; GFX7-NEXT: v_max_f32_e32 v16, v0, v8
1551 ; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1552 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
1553 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
1554 ; GFX7-NEXT: v_max_f32_e32 v8, v1, v9
1555 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
1556 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
1557 ; GFX7-NEXT: v_max_f32_e32 v8, v2, v10
1558 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
1559 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
1560 ; GFX7-NEXT: v_max_f32_e32 v8, v3, v11
1561 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
1562 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
1563 ; GFX7-NEXT: v_max_f32_e32 v8, v4, v12
1564 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
1565 ; GFX7-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
1566 ; GFX7-NEXT: v_max_f32_e32 v8, v5, v13
1567 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
1568 ; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
1569 ; GFX7-NEXT: v_max_f32_e32 v8, v6, v14
1570 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
1571 ; GFX7-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
1572 ; GFX7-NEXT: v_max_f32_e32 v8, v7, v15
1573 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
1574 ; GFX7-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
1575 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1577 ; GFX8-LABEL: v_maximum_v8f32:
1579 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1580 ; GFX8-NEXT: v_max_f32_e32 v16, v0, v8
1581 ; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1582 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
1583 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
1584 ; GFX8-NEXT: v_max_f32_e32 v8, v1, v9
1585 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
1586 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
1587 ; GFX8-NEXT: v_max_f32_e32 v8, v2, v10
1588 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
1589 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
1590 ; GFX8-NEXT: v_max_f32_e32 v8, v3, v11
1591 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
1592 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
1593 ; GFX8-NEXT: v_max_f32_e32 v8, v4, v12
1594 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
1595 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
1596 ; GFX8-NEXT: v_max_f32_e32 v8, v5, v13
1597 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
1598 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
1599 ; GFX8-NEXT: v_max_f32_e32 v8, v6, v14
1600 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
1601 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
1602 ; GFX8-NEXT: v_max_f32_e32 v8, v7, v15
1603 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
1604 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
1605 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1607 ; GFX900-LABEL: v_maximum_v8f32:
1609 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1610 ; GFX900-NEXT: v_max_f32_e32 v16, v0, v8
1611 ; GFX900-NEXT: v_mov_b32_e32 v17, 0x7fc00000
1612 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
1613 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
1614 ; GFX900-NEXT: v_max_f32_e32 v8, v1, v9
1615 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
1616 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
1617 ; GFX900-NEXT: v_max_f32_e32 v8, v2, v10
1618 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
1619 ; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
1620 ; GFX900-NEXT: v_max_f32_e32 v8, v3, v11
1621 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
1622 ; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
1623 ; GFX900-NEXT: v_max_f32_e32 v8, v4, v12
1624 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
1625 ; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
1626 ; GFX900-NEXT: v_max_f32_e32 v8, v5, v13
1627 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
1628 ; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
1629 ; GFX900-NEXT: v_max_f32_e32 v8, v6, v14
1630 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
1631 ; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
1632 ; GFX900-NEXT: v_max_f32_e32 v8, v7, v15
1633 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
1634 ; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
1635 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1637 ; GFX950-LABEL: v_maximum_v8f32:
1639 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1640 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v8, v8
1641 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v9, v9
1642 ; GFX950-NEXT: v_maximum3_f32 v2, v2, v10, v10
1643 ; GFX950-NEXT: v_maximum3_f32 v3, v3, v11, v11
1644 ; GFX950-NEXT: v_maximum3_f32 v4, v4, v12, v12
1645 ; GFX950-NEXT: v_maximum3_f32 v5, v5, v13, v13
1646 ; GFX950-NEXT: v_maximum3_f32 v6, v6, v14, v14
1647 ; GFX950-NEXT: v_maximum3_f32 v7, v7, v15, v15
1648 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1650 ; GFX10-LABEL: v_maximum_v8f32:
1652 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653 ; GFX10-NEXT: v_max_f32_e32 v16, v0, v8
1654 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
1655 ; GFX10-NEXT: v_max_f32_e32 v17, v1, v9
1656 ; GFX10-NEXT: v_max_f32_e32 v8, v2, v10
1657 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
1658 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
1659 ; GFX10-NEXT: v_max_f32_e32 v9, v3, v11
1660 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
1661 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
1662 ; GFX10-NEXT: v_max_f32_e32 v10, v7, v15
1663 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1664 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
1665 ; GFX10-NEXT: v_max_f32_e32 v8, v4, v12
1666 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
1667 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
1668 ; GFX10-NEXT: v_max_f32_e32 v9, v5, v13
1669 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v8, vcc_lo
1670 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
1671 ; GFX10-NEXT: v_max_f32_e32 v8, v6, v14
1672 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v9, vcc_lo
1673 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
1674 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
1675 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
1676 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
1677 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1679 ; GFX11-LABEL: v_maximum_v8f32:
1681 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682 ; GFX11-NEXT: v_dual_max_f32 v16, v0, v8 :: v_dual_max_f32 v17, v1, v9
1683 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
1684 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1685 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
1686 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
1687 ; GFX11-NEXT: v_dual_max_f32 v9, v3, v11 :: v_dual_max_f32 v8, v2, v10
1688 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
1689 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
1690 ; GFX11-NEXT: v_max_f32_e32 v10, v7, v15
1691 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
1692 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1693 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
1694 ; GFX11-NEXT: v_dual_max_f32 v8, v4, v12 :: v_dual_cndmask_b32 v3, 0x7fc00000, v9
1695 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
1696 ; GFX11-NEXT: v_dual_max_f32 v9, v5, v13 :: v_dual_cndmask_b32 v4, 0x7fc00000, v8
1697 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
1698 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1699 ; GFX11-NEXT: v_dual_max_f32 v8, v6, v14 :: v_dual_cndmask_b32 v5, 0x7fc00000, v9
1700 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
1701 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
1702 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
1703 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
1704 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1706 ; GFX12-LABEL: v_maximum_v8f32:
1708 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1709 ; GFX12-NEXT: s_wait_expcnt 0x0
1710 ; GFX12-NEXT: s_wait_samplecnt 0x0
1711 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1712 ; GFX12-NEXT: s_wait_kmcnt 0x0
1713 ; GFX12-NEXT: v_maximum_f32 v0, v0, v8
1714 ; GFX12-NEXT: v_maximum_f32 v1, v1, v9
1715 ; GFX12-NEXT: v_maximum_f32 v2, v2, v10
1716 ; GFX12-NEXT: v_maximum_f32 v3, v3, v11
1717 ; GFX12-NEXT: v_maximum_f32 v4, v4, v12
1718 ; GFX12-NEXT: v_maximum_f32 v5, v5, v13
1719 ; GFX12-NEXT: v_maximum_f32 v6, v6, v14
1720 ; GFX12-NEXT: v_maximum_f32 v7, v7, v15
1721 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1722 %op = call <8 x float> @llvm.maximum.v8f32(<8 x float> %src0, <8 x float> %src1)
1726 define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
1727 ; GFX7-LABEL: v_maximum_v16f32:
1729 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1730 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
1731 ; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1732 ; GFX7-NEXT: s_mov_b64 exec, s[4:5]
1733 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
1734 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v17
1735 ; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32
1736 ; GFX7-NEXT: v_writelane_b32 v31, s30, 0
1737 ; GFX7-NEXT: v_writelane_b32 v31, s31, 1
1738 ; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
1739 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v18
1740 ; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
1741 ; GFX7-NEXT: v_max_f32_e32 v3, v3, v19
1742 ; GFX7-NEXT: v_mov_b32_e32 v18, 0x7fc00000
1743 ; GFX7-NEXT: v_max_f32_e32 v19, v0, v16
1744 ; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
1745 ; GFX7-NEXT: v_max_f32_e32 v16, v14, v30
1746 ; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
1747 ; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
1748 ; GFX7-NEXT: v_max_f32_e32 v4, v4, v20
1749 ; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
1750 ; GFX7-NEXT: v_max_f32_e32 v5, v5, v21
1751 ; GFX7-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
1752 ; GFX7-NEXT: v_max_f32_e32 v6, v6, v22
1753 ; GFX7-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
1754 ; GFX7-NEXT: v_max_f32_e32 v7, v7, v23
1755 ; GFX7-NEXT: v_cmp_o_f32_e64 s[16:17], v8, v24
1756 ; GFX7-NEXT: v_max_f32_e32 v8, v8, v24
1757 ; GFX7-NEXT: v_cmp_o_f32_e64 s[18:19], v9, v25
1758 ; GFX7-NEXT: v_max_f32_e32 v9, v9, v25
1759 ; GFX7-NEXT: v_cmp_o_f32_e64 s[20:21], v10, v26
1760 ; GFX7-NEXT: v_max_f32_e32 v10, v10, v26
1761 ; GFX7-NEXT: v_cmp_o_f32_e64 s[22:23], v11, v27
1762 ; GFX7-NEXT: v_max_f32_e32 v11, v11, v27
1763 ; GFX7-NEXT: v_cmp_o_f32_e64 s[24:25], v12, v28
1764 ; GFX7-NEXT: v_max_f32_e32 v12, v12, v28
1765 ; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
1766 ; GFX7-NEXT: v_max_f32_e32 v13, v13, v29
1767 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
1768 ; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31]
1769 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
1770 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
1771 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
1772 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v18, v4, s[8:9]
1773 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v18, v5, s[10:11]
1774 ; GFX7-NEXT: v_cndmask_b32_e64 v6, v18, v6, s[12:13]
1775 ; GFX7-NEXT: v_cndmask_b32_e64 v7, v18, v7, s[14:15]
1776 ; GFX7-NEXT: v_cndmask_b32_e64 v8, v18, v8, s[16:17]
1777 ; GFX7-NEXT: v_cndmask_b32_e64 v9, v18, v9, s[18:19]
1778 ; GFX7-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[20:21]
1779 ; GFX7-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23]
1780 ; GFX7-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25]
1781 ; GFX7-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27]
1782 ; GFX7-NEXT: v_readlane_b32 s31, v31, 1
1783 ; GFX7-NEXT: v_readlane_b32 s30, v31, 0
1784 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1785 ; GFX7-NEXT: v_max_f32_e32 v16, v15, v17
1786 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
1787 ; GFX7-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc
1788 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
1789 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1790 ; GFX7-NEXT: s_mov_b64 exec, s[4:5]
1791 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1792 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1794 ; GFX8-LABEL: v_maximum_v16f32:
1796 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1797 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
1798 ; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1799 ; GFX8-NEXT: s_mov_b64 exec, s[4:5]
1800 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
1801 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v17
1802 ; GFX8-NEXT: buffer_load_dword v17, off, s[0:3], s32
1803 ; GFX8-NEXT: v_writelane_b32 v31, s30, 0
1804 ; GFX8-NEXT: v_writelane_b32 v31, s31, 1
1805 ; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
1806 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v18
1807 ; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
1808 ; GFX8-NEXT: v_max_f32_e32 v3, v3, v19
1809 ; GFX8-NEXT: v_mov_b32_e32 v18, 0x7fc00000
1810 ; GFX8-NEXT: v_max_f32_e32 v19, v0, v16
1811 ; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
1812 ; GFX8-NEXT: v_max_f32_e32 v16, v14, v30
1813 ; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
1814 ; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
1815 ; GFX8-NEXT: v_max_f32_e32 v4, v4, v20
1816 ; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
1817 ; GFX8-NEXT: v_max_f32_e32 v5, v5, v21
1818 ; GFX8-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
1819 ; GFX8-NEXT: v_max_f32_e32 v6, v6, v22
1820 ; GFX8-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
1821 ; GFX8-NEXT: v_max_f32_e32 v7, v7, v23
1822 ; GFX8-NEXT: v_cmp_o_f32_e64 s[16:17], v8, v24
1823 ; GFX8-NEXT: v_max_f32_e32 v8, v8, v24
1824 ; GFX8-NEXT: v_cmp_o_f32_e64 s[18:19], v9, v25
1825 ; GFX8-NEXT: v_max_f32_e32 v9, v9, v25
1826 ; GFX8-NEXT: v_cmp_o_f32_e64 s[20:21], v10, v26
1827 ; GFX8-NEXT: v_max_f32_e32 v10, v10, v26
1828 ; GFX8-NEXT: v_cmp_o_f32_e64 s[22:23], v11, v27
1829 ; GFX8-NEXT: v_max_f32_e32 v11, v11, v27
1830 ; GFX8-NEXT: v_cmp_o_f32_e64 s[24:25], v12, v28
1831 ; GFX8-NEXT: v_max_f32_e32 v12, v12, v28
1832 ; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
1833 ; GFX8-NEXT: v_max_f32_e32 v13, v13, v29
1834 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
1835 ; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31]
1836 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
1837 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
1838 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
1839 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v18, v4, s[8:9]
1840 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v18, v5, s[10:11]
1841 ; GFX8-NEXT: v_cndmask_b32_e64 v6, v18, v6, s[12:13]
1842 ; GFX8-NEXT: v_cndmask_b32_e64 v7, v18, v7, s[14:15]
1843 ; GFX8-NEXT: v_cndmask_b32_e64 v8, v18, v8, s[16:17]
1844 ; GFX8-NEXT: v_cndmask_b32_e64 v9, v18, v9, s[18:19]
1845 ; GFX8-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[20:21]
1846 ; GFX8-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23]
1847 ; GFX8-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25]
1848 ; GFX8-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27]
1849 ; GFX8-NEXT: v_readlane_b32 s31, v31, 1
1850 ; GFX8-NEXT: v_readlane_b32 s30, v31, 0
1851 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1852 ; GFX8-NEXT: v_max_f32_e32 v16, v15, v17
1853 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
1854 ; GFX8-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc
1855 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
1856 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1857 ; GFX8-NEXT: s_mov_b64 exec, s[4:5]
1858 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1859 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1861 ; GFX900-LABEL: v_maximum_v16f32:
1863 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
1865 ; GFX900-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1866 ; GFX900-NEXT: s_mov_b64 exec, s[4:5]
1867 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
1868 ; GFX900-NEXT: v_max_f32_e32 v1, v1, v17
1869 ; GFX900-NEXT: buffer_load_dword v17, off, s[0:3], s32
1870 ; GFX900-NEXT: v_writelane_b32 v31, s30, 0
1871 ; GFX900-NEXT: v_writelane_b32 v31, s31, 1
1872 ; GFX900-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
1873 ; GFX900-NEXT: v_max_f32_e32 v2, v2, v18
1874 ; GFX900-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
1875 ; GFX900-NEXT: v_max_f32_e32 v3, v3, v19
1876 ; GFX900-NEXT: v_mov_b32_e32 v18, 0x7fc00000
1877 ; GFX900-NEXT: v_max_f32_e32 v19, v0, v16
1878 ; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
1879 ; GFX900-NEXT: v_max_f32_e32 v16, v14, v30
1880 ; GFX900-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
1881 ; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
1882 ; GFX900-NEXT: v_max_f32_e32 v4, v4, v20
1883 ; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
1884 ; GFX900-NEXT: v_max_f32_e32 v5, v5, v21
1885 ; GFX900-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
1886 ; GFX900-NEXT: v_max_f32_e32 v6, v6, v22
1887 ; GFX900-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
1888 ; GFX900-NEXT: v_max_f32_e32 v7, v7, v23
1889 ; GFX900-NEXT: v_cmp_o_f32_e64 s[16:17], v8, v24
1890 ; GFX900-NEXT: v_max_f32_e32 v8, v8, v24
1891 ; GFX900-NEXT: v_cmp_o_f32_e64 s[18:19], v9, v25
1892 ; GFX900-NEXT: v_max_f32_e32 v9, v9, v25
1893 ; GFX900-NEXT: v_cmp_o_f32_e64 s[20:21], v10, v26
1894 ; GFX900-NEXT: v_max_f32_e32 v10, v10, v26
1895 ; GFX900-NEXT: v_cmp_o_f32_e64 s[22:23], v11, v27
1896 ; GFX900-NEXT: v_max_f32_e32 v11, v11, v27
1897 ; GFX900-NEXT: v_cmp_o_f32_e64 s[24:25], v12, v28
1898 ; GFX900-NEXT: v_max_f32_e32 v12, v12, v28
1899 ; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
1900 ; GFX900-NEXT: v_max_f32_e32 v13, v13, v29
1901 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
1902 ; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31]
1903 ; GFX900-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
1904 ; GFX900-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
1905 ; GFX900-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
1906 ; GFX900-NEXT: v_cndmask_b32_e64 v4, v18, v4, s[8:9]
1907 ; GFX900-NEXT: v_cndmask_b32_e64 v5, v18, v5, s[10:11]
1908 ; GFX900-NEXT: v_cndmask_b32_e64 v6, v18, v6, s[12:13]
1909 ; GFX900-NEXT: v_cndmask_b32_e64 v7, v18, v7, s[14:15]
1910 ; GFX900-NEXT: v_cndmask_b32_e64 v8, v18, v8, s[16:17]
1911 ; GFX900-NEXT: v_cndmask_b32_e64 v9, v18, v9, s[18:19]
1912 ; GFX900-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[20:21]
1913 ; GFX900-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23]
1914 ; GFX900-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25]
1915 ; GFX900-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27]
1916 ; GFX900-NEXT: v_readlane_b32 s31, v31, 1
1917 ; GFX900-NEXT: v_readlane_b32 s30, v31, 0
1918 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1919 ; GFX900-NEXT: v_max_f32_e32 v16, v15, v17
1920 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
1921 ; GFX900-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc
1922 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
1923 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1924 ; GFX900-NEXT: s_mov_b64 exec, s[4:5]
1925 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1926 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1928 ; GFX950-LABEL: v_maximum_v16f32:
1930 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1931 ; GFX950-NEXT: scratch_load_dword v31, off, s32
1932 ; GFX950-NEXT: v_maximum3_f32 v0, v0, v16, v16
1933 ; GFX950-NEXT: v_maximum3_f32 v1, v1, v17, v17
1934 ; GFX950-NEXT: v_maximum3_f32 v2, v2, v18, v18
1935 ; GFX950-NEXT: v_maximum3_f32 v3, v3, v19, v19
1936 ; GFX950-NEXT: v_maximum3_f32 v4, v4, v20, v20
1937 ; GFX950-NEXT: v_maximum3_f32 v5, v5, v21, v21
1938 ; GFX950-NEXT: v_maximum3_f32 v6, v6, v22, v22
1939 ; GFX950-NEXT: v_maximum3_f32 v7, v7, v23, v23
1940 ; GFX950-NEXT: v_maximum3_f32 v8, v8, v24, v24
1941 ; GFX950-NEXT: v_maximum3_f32 v9, v9, v25, v25
1942 ; GFX950-NEXT: v_maximum3_f32 v10, v10, v26, v26
1943 ; GFX950-NEXT: v_maximum3_f32 v11, v11, v27, v27
1944 ; GFX950-NEXT: v_maximum3_f32 v12, v12, v28, v28
1945 ; GFX950-NEXT: v_maximum3_f32 v13, v13, v29, v29
1946 ; GFX950-NEXT: v_maximum3_f32 v14, v14, v30, v30
1947 ; GFX950-NEXT: s_waitcnt vmcnt(0)
1948 ; GFX950-NEXT: v_maximum3_f32 v15, v15, v31, v31
1949 ; GFX950-NEXT: s_setpc_b64 s[30:31]
1951 ; GFX10-LABEL: v_maximum_v16f32:
1953 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1954 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
1955 ; GFX10-NEXT: v_max_f32_e32 v32, v0, v16
1956 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
1957 ; GFX10-NEXT: v_max_f32_e32 v33, v1, v17
1958 ; GFX10-NEXT: v_max_f32_e32 v34, v2, v18
1959 ; GFX10-NEXT: v_max_f32_e32 v35, v3, v19
1960 ; GFX10-NEXT: v_max_f32_e32 v36, v4, v20
1961 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
1962 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
1963 ; GFX10-NEXT: v_max_f32_e32 v37, v5, v21
1964 ; GFX10-NEXT: v_max_f32_e32 v38, v6, v22
1965 ; GFX10-NEXT: v_max_f32_e32 v39, v7, v23
1966 ; GFX10-NEXT: v_max_f32_e32 v48, v8, v24
1967 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
1968 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
1969 ; GFX10-NEXT: v_max_f32_e32 v49, v9, v25
1970 ; GFX10-NEXT: v_max_f32_e32 v50, v10, v26
1971 ; GFX10-NEXT: v_max_f32_e32 v51, v11, v27
1972 ; GFX10-NEXT: v_max_f32_e32 v52, v12, v28
1973 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
1974 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
1975 ; GFX10-NEXT: v_max_f32_e32 v53, v13, v29
1976 ; GFX10-NEXT: v_max_f32_e32 v54, v14, v30
1977 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
1978 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
1979 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
1980 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
1981 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
1982 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
1983 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
1984 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
1985 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
1986 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
1987 ; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
1988 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
1989 ; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
1990 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
1991 ; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
1992 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
1993 ; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
1994 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
1995 ; GFX10-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
1996 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
1997 ; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
1998 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
1999 ; GFX10-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
2000 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2001 ; GFX10-NEXT: v_max_f32_e32 v16, v15, v31
2002 ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
2003 ; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
2004 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2006 ; GFX11-LABEL: v_maximum_v16f32:
2008 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2009 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
2010 ; GFX11-NEXT: v_dual_max_f32 v32, v0, v16 :: v_dual_max_f32 v33, v1, v17
2011 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
2012 ; GFX11-NEXT: v_dual_max_f32 v34, v2, v18 :: v_dual_max_f32 v35, v3, v19
2013 ; GFX11-NEXT: v_dual_max_f32 v36, v4, v20 :: v_dual_max_f32 v37, v5, v21
2014 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
2015 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
2016 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
2017 ; GFX11-NEXT: v_max_f32_e32 v54, v14, v30
2018 ; GFX11-NEXT: v_dual_max_f32 v38, v6, v22 :: v_dual_max_f32 v39, v7, v23
2019 ; GFX11-NEXT: v_dual_max_f32 v48, v8, v24 :: v_dual_max_f32 v49, v9, v25
2020 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
2021 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
2022 ; GFX11-NEXT: v_dual_max_f32 v50, v10, v26 :: v_dual_max_f32 v51, v11, v27
2023 ; GFX11-NEXT: v_dual_max_f32 v52, v12, v28 :: v_dual_max_f32 v53, v13, v29
2024 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
2025 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
2026 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
2027 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
2028 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
2029 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
2030 ; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
2031 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
2032 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
2033 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
2034 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
2035 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
2036 ; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
2037 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
2038 ; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
2039 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
2040 ; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
2041 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
2042 ; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
2043 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
2044 ; GFX11-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
2045 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
2046 ; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
2047 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
2048 ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
2049 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2050 ; GFX11-NEXT: v_max_f32_e32 v16, v15, v31
2051 ; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
2052 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
2053 ; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
2054 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2056 ; GFX12-LABEL: v_maximum_v16f32:
2058 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2059 ; GFX12-NEXT: s_wait_expcnt 0x0
2060 ; GFX12-NEXT: s_wait_samplecnt 0x0
2061 ; GFX12-NEXT: s_wait_bvhcnt 0x0
2062 ; GFX12-NEXT: s_wait_kmcnt 0x0
2063 ; GFX12-NEXT: scratch_load_b32 v31, off, s32
2064 ; GFX12-NEXT: v_maximum_f32 v0, v0, v16
2065 ; GFX12-NEXT: v_maximum_f32 v1, v1, v17
2066 ; GFX12-NEXT: v_maximum_f32 v2, v2, v18
2067 ; GFX12-NEXT: v_maximum_f32 v3, v3, v19
2068 ; GFX12-NEXT: v_maximum_f32 v4, v4, v20
2069 ; GFX12-NEXT: v_maximum_f32 v5, v5, v21
2070 ; GFX12-NEXT: v_maximum_f32 v6, v6, v22
2071 ; GFX12-NEXT: v_maximum_f32 v7, v7, v23
2072 ; GFX12-NEXT: v_maximum_f32 v8, v8, v24
2073 ; GFX12-NEXT: v_maximum_f32 v9, v9, v25
2074 ; GFX12-NEXT: v_maximum_f32 v10, v10, v26
2075 ; GFX12-NEXT: v_maximum_f32 v11, v11, v27
2076 ; GFX12-NEXT: v_maximum_f32 v12, v12, v28
2077 ; GFX12-NEXT: v_maximum_f32 v13, v13, v29
2078 ; GFX12-NEXT: v_maximum_f32 v14, v14, v30
2079 ; GFX12-NEXT: s_wait_loadcnt 0x0
2080 ; GFX12-NEXT: v_maximum_f32 v15, v15, v31
2081 ; GFX12-NEXT: s_setpc_b64 s[30:31]
2082 %op = call <16 x float> @llvm.maximum.v16f32(<16 x float> %src0, <16 x float> %src1)
2083 ret <16 x float> %op
2085 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: