1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
8 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
10 define double @v_maximum_f64(double %src0, double %src1) {
11 ; GFX7-LABEL: v_maximum_f64:
13 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14 ; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
15 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
16 ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000
17 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
18 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
19 ; GFX7-NEXT: s_setpc_b64 s[30:31]
21 ; GFX8-LABEL: v_maximum_f64:
23 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
25 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
26 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000
27 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
28 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
29 ; GFX8-NEXT: s_setpc_b64 s[30:31]
31 ; GFX9-LABEL: v_maximum_f64:
33 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34 ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
35 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
36 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
37 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
38 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
39 ; GFX9-NEXT: s_setpc_b64 s[30:31]
41 ; GFX940-LABEL: v_maximum_f64:
43 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
45 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
46 ; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
47 ; GFX940-NEXT: s_nop 0
48 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
49 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
50 ; GFX940-NEXT: s_setpc_b64 s[30:31]
52 ; GFX10-LABEL: v_maximum_f64:
54 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
56 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
57 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
58 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
59 ; GFX10-NEXT: s_setpc_b64 s[30:31]
61 ; GFX11-LABEL: v_maximum_f64:
63 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64 ; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
65 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
66 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
67 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
68 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
69 ; GFX11-NEXT: s_setpc_b64 s[30:31]
71 ; GFX12-LABEL: v_maximum_f64:
73 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
74 ; GFX12-NEXT: s_wait_expcnt 0x0
75 ; GFX12-NEXT: s_wait_samplecnt 0x0
76 ; GFX12-NEXT: s_wait_bvhcnt 0x0
77 ; GFX12-NEXT: s_wait_kmcnt 0x0
78 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
79 ; GFX12-NEXT: s_setpc_b64 s[30:31]
80 %op = call double @llvm.maximum.f64(double %src0, double %src1)
84 define double @v_maximum_f64__nnan(double %src0, double %src1) {
85 ; GFX7-LABEL: v_maximum_f64__nnan:
87 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88 ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
89 ; GFX7-NEXT: s_setpc_b64 s[30:31]
91 ; GFX8-LABEL: v_maximum_f64__nnan:
93 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
95 ; GFX8-NEXT: s_setpc_b64 s[30:31]
97 ; GFX9-LABEL: v_maximum_f64__nnan:
99 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
101 ; GFX9-NEXT: s_setpc_b64 s[30:31]
103 ; GFX940-LABEL: v_maximum_f64__nnan:
105 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106 ; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
107 ; GFX940-NEXT: s_setpc_b64 s[30:31]
109 ; GFX10-LABEL: v_maximum_f64__nnan:
111 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112 ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
113 ; GFX10-NEXT: s_setpc_b64 s[30:31]
115 ; GFX11-LABEL: v_maximum_f64__nnan:
117 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
119 ; GFX11-NEXT: s_setpc_b64 s[30:31]
121 ; GFX12-LABEL: v_maximum_f64__nnan:
123 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
124 ; GFX12-NEXT: s_wait_expcnt 0x0
125 ; GFX12-NEXT: s_wait_samplecnt 0x0
126 ; GFX12-NEXT: s_wait_bvhcnt 0x0
127 ; GFX12-NEXT: s_wait_kmcnt 0x0
128 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
129 ; GFX12-NEXT: s_setpc_b64 s[30:31]
130 %op = call nnan double @llvm.maximum.f64(double %src0, double %src1)
134 define double @v_maximum_f64__nsz(double %src0, double %src1) {
135 ; GFX7-LABEL: v_maximum_f64__nsz:
137 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
139 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
140 ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000
141 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
142 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
143 ; GFX7-NEXT: s_setpc_b64 s[30:31]
145 ; GFX8-LABEL: v_maximum_f64__nsz:
147 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148 ; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
149 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
150 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000
151 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
152 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
153 ; GFX8-NEXT: s_setpc_b64 s[30:31]
155 ; GFX9-LABEL: v_maximum_f64__nsz:
157 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158 ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
159 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
160 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
161 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
162 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
163 ; GFX9-NEXT: s_setpc_b64 s[30:31]
165 ; GFX940-LABEL: v_maximum_f64__nsz:
167 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168 ; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
169 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
170 ; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
171 ; GFX940-NEXT: s_nop 0
172 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
173 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
174 ; GFX940-NEXT: s_setpc_b64 s[30:31]
176 ; GFX10-LABEL: v_maximum_f64__nsz:
178 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179 ; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
180 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
181 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
182 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
183 ; GFX10-NEXT: s_setpc_b64 s[30:31]
185 ; GFX11-LABEL: v_maximum_f64__nsz:
187 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
188 ; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
189 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
190 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
191 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
192 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
193 ; GFX11-NEXT: s_setpc_b64 s[30:31]
195 ; GFX12-LABEL: v_maximum_f64__nsz:
197 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
198 ; GFX12-NEXT: s_wait_expcnt 0x0
199 ; GFX12-NEXT: s_wait_samplecnt 0x0
200 ; GFX12-NEXT: s_wait_bvhcnt 0x0
201 ; GFX12-NEXT: s_wait_kmcnt 0x0
202 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
203 ; GFX12-NEXT: s_setpc_b64 s[30:31]
204 %op = call nsz double @llvm.maximum.f64(double %src0, double %src1)
208 define double @v_maximum_f64__nnan_nsz(double %src0, double %src1) {
209 ; GFX7-LABEL: v_maximum_f64__nnan_nsz:
211 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
213 ; GFX7-NEXT: s_setpc_b64 s[30:31]
215 ; GFX8-LABEL: v_maximum_f64__nnan_nsz:
217 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
219 ; GFX8-NEXT: s_setpc_b64 s[30:31]
221 ; GFX9-LABEL: v_maximum_f64__nnan_nsz:
223 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
225 ; GFX9-NEXT: s_setpc_b64 s[30:31]
227 ; GFX940-LABEL: v_maximum_f64__nnan_nsz:
229 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230 ; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
231 ; GFX940-NEXT: s_setpc_b64 s[30:31]
233 ; GFX10-LABEL: v_maximum_f64__nnan_nsz:
235 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
237 ; GFX10-NEXT: s_setpc_b64 s[30:31]
239 ; GFX11-LABEL: v_maximum_f64__nnan_nsz:
241 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
243 ; GFX11-NEXT: s_setpc_b64 s[30:31]
245 ; GFX12-LABEL: v_maximum_f64__nnan_nsz:
247 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
248 ; GFX12-NEXT: s_wait_expcnt 0x0
249 ; GFX12-NEXT: s_wait_samplecnt 0x0
250 ; GFX12-NEXT: s_wait_bvhcnt 0x0
251 ; GFX12-NEXT: s_wait_kmcnt 0x0
252 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
253 ; GFX12-NEXT: s_setpc_b64 s[30:31]
254 %op = call nnan nsz double @llvm.maximum.f64(double %src0, double %src1)
258 define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
259 ; GFX7-LABEL: v_maximum_f64__nnan_src0:
261 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262 ; GFX7-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
263 ; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
264 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
265 ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000
266 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
267 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
268 ; GFX7-NEXT: s_setpc_b64 s[30:31]
270 ; GFX8-LABEL: v_maximum_f64__nnan_src0:
272 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX8-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
274 ; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
275 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
276 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000
277 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
278 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
279 ; GFX8-NEXT: s_setpc_b64 s[30:31]
281 ; GFX9-LABEL: v_maximum_f64__nnan_src0:
283 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284 ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
285 ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
286 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
287 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
288 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
289 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
290 ; GFX9-NEXT: s_setpc_b64 s[30:31]
292 ; GFX940-LABEL: v_maximum_f64__nnan_src0:
294 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295 ; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
296 ; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
297 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
298 ; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
299 ; GFX940-NEXT: s_nop 0
300 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
301 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
302 ; GFX940-NEXT: s_setpc_b64 s[30:31]
304 ; GFX10-LABEL: v_maximum_f64__nnan_src0:
306 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
308 ; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
309 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
310 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
311 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
312 ; GFX10-NEXT: s_setpc_b64 s[30:31]
314 ; GFX11-LABEL: v_maximum_f64__nnan_src0:
316 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317 ; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
318 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
319 ; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
320 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
321 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
322 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
323 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
324 ; GFX11-NEXT: s_setpc_b64 s[30:31]
326 ; GFX12-LABEL: v_maximum_f64__nnan_src0:
328 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
329 ; GFX12-NEXT: s_wait_expcnt 0x0
330 ; GFX12-NEXT: s_wait_samplecnt 0x0
331 ; GFX12-NEXT: s_wait_bvhcnt 0x0
332 ; GFX12-NEXT: s_wait_kmcnt 0x0
333 ; GFX12-NEXT: v_add_f64_e32 v[0:1], 1.0, v[0:1]
334 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
335 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
336 ; GFX12-NEXT: s_setpc_b64 s[30:31]
337 %src0 = fadd nnan double %arg0, 1.0
338 %op = call double @llvm.maximum.f64(double %src0, double %src1)
342 define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
343 ; GFX7-LABEL: v_maximum_f64__nnan_src1:
345 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346 ; GFX7-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
347 ; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
348 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
349 ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000
350 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
351 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
352 ; GFX7-NEXT: s_setpc_b64 s[30:31]
354 ; GFX8-LABEL: v_maximum_f64__nnan_src1:
356 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357 ; GFX8-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
358 ; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
359 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
360 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000
361 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
362 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
363 ; GFX8-NEXT: s_setpc_b64 s[30:31]
365 ; GFX9-LABEL: v_maximum_f64__nnan_src1:
367 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
368 ; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
369 ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
370 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
371 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
372 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
373 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
374 ; GFX9-NEXT: s_setpc_b64 s[30:31]
376 ; GFX940-LABEL: v_maximum_f64__nnan_src1:
378 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GFX940-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
380 ; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
381 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
382 ; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
383 ; GFX940-NEXT: s_nop 0
384 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
385 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
386 ; GFX940-NEXT: s_setpc_b64 s[30:31]
388 ; GFX10-LABEL: v_maximum_f64__nnan_src1:
390 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391 ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
392 ; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
393 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
394 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
395 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
396 ; GFX10-NEXT: s_setpc_b64 s[30:31]
398 ; GFX11-LABEL: v_maximum_f64__nnan_src1:
400 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
401 ; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
402 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
403 ; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
404 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
405 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
406 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
407 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
408 ; GFX11-NEXT: s_setpc_b64 s[30:31]
410 ; GFX12-LABEL: v_maximum_f64__nnan_src1:
412 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
413 ; GFX12-NEXT: s_wait_expcnt 0x0
414 ; GFX12-NEXT: s_wait_samplecnt 0x0
415 ; GFX12-NEXT: s_wait_bvhcnt 0x0
416 ; GFX12-NEXT: s_wait_kmcnt 0x0
417 ; GFX12-NEXT: v_add_f64_e32 v[2:3], 1.0, v[2:3]
418 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
419 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
420 ; GFX12-NEXT: s_setpc_b64 s[30:31]
421 %src1 = fadd nnan double %arg1, 1.0
422 %op = call double @llvm.maximum.f64(double %src0, double %src1)
426 define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
427 ; GFX7-LABEL: s_maximum_f64:
429 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430 ; GFX7-NEXT: v_mov_b32_e32 v0, s16
431 ; GFX7-NEXT: v_mov_b32_e32 v1, s17
432 ; GFX7-NEXT: v_max_f64 v[2:3], s[6:7], v[0:1]
433 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
434 ; GFX7-NEXT: v_mov_b32_e32 v4, 0x7ff80000
435 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
436 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
437 ; GFX7-NEXT: ;;#ASMSTART
438 ; GFX7-NEXT: ; use v[0:1]
439 ; GFX7-NEXT: ;;#ASMEND
440 ; GFX7-NEXT: s_setpc_b64 s[30:31]
442 ; GFX8-LABEL: s_maximum_f64:
444 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445 ; GFX8-NEXT: v_mov_b32_e32 v0, s16
446 ; GFX8-NEXT: v_mov_b32_e32 v1, s17
447 ; GFX8-NEXT: v_max_f64 v[2:3], s[6:7], v[0:1]
448 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
449 ; GFX8-NEXT: v_mov_b32_e32 v4, 0x7ff80000
450 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
451 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
452 ; GFX8-NEXT: ;;#ASMSTART
453 ; GFX8-NEXT: ; use v[0:1]
454 ; GFX8-NEXT: ;;#ASMEND
455 ; GFX8-NEXT: s_setpc_b64 s[30:31]
457 ; GFX9-LABEL: s_maximum_f64:
459 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
460 ; GFX9-NEXT: v_mov_b32_e32 v0, s16
461 ; GFX9-NEXT: v_mov_b32_e32 v1, s17
462 ; GFX9-NEXT: v_max_f64 v[2:3], s[6:7], v[0:1]
463 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
464 ; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
465 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
466 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
467 ; GFX9-NEXT: ;;#ASMSTART
468 ; GFX9-NEXT: ; use v[0:1]
469 ; GFX9-NEXT: ;;#ASMEND
470 ; GFX9-NEXT: s_setpc_b64 s[30:31]
472 ; GFX940-LABEL: s_maximum_f64:
474 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
476 ; GFX940-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1]
477 ; GFX940-NEXT: v_mov_b32_e32 v4, 0x7ff80000
478 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
479 ; GFX940-NEXT: s_nop 1
480 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
481 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
482 ; GFX940-NEXT: ;;#ASMSTART
483 ; GFX940-NEXT: ; use v[0:1]
484 ; GFX940-NEXT: ;;#ASMEND
485 ; GFX940-NEXT: s_setpc_b64 s[30:31]
487 ; GFX10-LABEL: s_maximum_f64:
489 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
490 ; GFX10-NEXT: v_max_f64 v[0:1], s[6:7], s[16:17]
491 ; GFX10-NEXT: v_cmp_u_f64_e64 s4, s[6:7], s[16:17]
492 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x7ff80000, s4
493 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, 0, s4
494 ; GFX10-NEXT: ;;#ASMSTART
495 ; GFX10-NEXT: ; use v[0:1]
496 ; GFX10-NEXT: ;;#ASMEND
497 ; GFX10-NEXT: s_setpc_b64 s[30:31]
499 ; GFX11-LABEL: s_maximum_f64:
501 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502 ; GFX11-NEXT: v_max_f64 v[0:1], s[0:1], s[2:3]
503 ; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[2:3]
504 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
505 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0
506 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, 0, s0
507 ; GFX11-NEXT: ;;#ASMSTART
508 ; GFX11-NEXT: ; use v[0:1]
509 ; GFX11-NEXT: ;;#ASMEND
510 ; GFX11-NEXT: s_setpc_b64 s[30:31]
512 ; GFX12-LABEL: s_maximum_f64:
514 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
515 ; GFX12-NEXT: s_wait_expcnt 0x0
516 ; GFX12-NEXT: s_wait_samplecnt 0x0
517 ; GFX12-NEXT: s_wait_bvhcnt 0x0
518 ; GFX12-NEXT: s_wait_kmcnt 0x0
519 ; GFX12-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
520 ; GFX12-NEXT: ;;#ASMSTART
521 ; GFX12-NEXT: ; use v[0:1]
522 ; GFX12-NEXT: ;;#ASMEND
523 ; GFX12-NEXT: s_setpc_b64 s[30:31]
524 %op = call double @llvm.maximum.f64(double %src0, double %src1)
525 call void asm sideeffect "; use $0", "s"(double %op)
529 define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
530 ; GFX7-LABEL: v_maximum_v2f64:
532 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533 ; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
534 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
535 ; GFX7-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
536 ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
537 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
538 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
539 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
540 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
541 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
542 ; GFX7-NEXT: s_setpc_b64 s[30:31]
544 ; GFX8-LABEL: v_maximum_v2f64:
546 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
548 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
549 ; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
550 ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
551 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
552 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
553 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
554 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
555 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
556 ; GFX8-NEXT: s_setpc_b64 s[30:31]
558 ; GFX9-LABEL: v_maximum_v2f64:
560 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561 ; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
562 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
563 ; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
564 ; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
565 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
566 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
567 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
568 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
569 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
570 ; GFX9-NEXT: s_setpc_b64 s[30:31]
572 ; GFX940-LABEL: v_maximum_v2f64:
574 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575 ; GFX940-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
576 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
577 ; GFX940-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
578 ; GFX940-NEXT: s_nop 0
579 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
580 ; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
581 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
582 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
583 ; GFX940-NEXT: s_nop 1
584 ; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
585 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
586 ; GFX940-NEXT: s_setpc_b64 s[30:31]
588 ; GFX10-LABEL: v_maximum_v2f64:
590 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591 ; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
592 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
593 ; GFX10-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
594 ; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
595 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
596 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
597 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
598 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
599 ; GFX10-NEXT: s_setpc_b64 s[30:31]
601 ; GFX11-LABEL: v_maximum_v2f64:
603 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604 ; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
605 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
606 ; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
607 ; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
608 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
609 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
610 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
611 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
612 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
613 ; GFX11-NEXT: s_setpc_b64 s[30:31]
615 ; GFX12-LABEL: v_maximum_v2f64:
617 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
618 ; GFX12-NEXT: s_wait_expcnt 0x0
619 ; GFX12-NEXT: s_wait_samplecnt 0x0
620 ; GFX12-NEXT: s_wait_bvhcnt 0x0
621 ; GFX12-NEXT: s_wait_kmcnt 0x0
622 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
623 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
624 ; GFX12-NEXT: s_setpc_b64 s[30:31]
625 %op = call <2 x double> @llvm.maximum.v2f64(<2 x double> %src0, <2 x double> %src1)
629 define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src1) {
630 ; GFX7-LABEL: v_maximum_v2f64__nnan:
632 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633 ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
634 ; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
635 ; GFX7-NEXT: s_setpc_b64 s[30:31]
637 ; GFX8-LABEL: v_maximum_v2f64__nnan:
639 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
641 ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
642 ; GFX8-NEXT: s_setpc_b64 s[30:31]
644 ; GFX9-LABEL: v_maximum_v2f64__nnan:
646 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
647 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
648 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
649 ; GFX9-NEXT: s_setpc_b64 s[30:31]
651 ; GFX940-LABEL: v_maximum_v2f64__nnan:
653 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
654 ; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
655 ; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
656 ; GFX940-NEXT: s_setpc_b64 s[30:31]
658 ; GFX10-LABEL: v_maximum_v2f64__nnan:
660 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
661 ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
662 ; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
663 ; GFX10-NEXT: s_setpc_b64 s[30:31]
665 ; GFX11-LABEL: v_maximum_v2f64__nnan:
667 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
669 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
670 ; GFX11-NEXT: s_setpc_b64 s[30:31]
672 ; GFX12-LABEL: v_maximum_v2f64__nnan:
674 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
675 ; GFX12-NEXT: s_wait_expcnt 0x0
676 ; GFX12-NEXT: s_wait_samplecnt 0x0
677 ; GFX12-NEXT: s_wait_bvhcnt 0x0
678 ; GFX12-NEXT: s_wait_kmcnt 0x0
679 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
680 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
681 ; GFX12-NEXT: s_setpc_b64 s[30:31]
682 %op = call nnan <2 x double> @llvm.maximum.v2f64(<2 x double> %src0, <2 x double> %src1)
686 define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1) {
687 ; GFX7-LABEL: v_maximum_v2f64__nsz:
689 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
690 ; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
691 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
692 ; GFX7-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
693 ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
694 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
695 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
696 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
697 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
698 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
699 ; GFX7-NEXT: s_setpc_b64 s[30:31]
701 ; GFX8-LABEL: v_maximum_v2f64__nsz:
703 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704 ; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
705 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
706 ; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
707 ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
708 ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
709 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
710 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
711 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
712 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
713 ; GFX8-NEXT: s_setpc_b64 s[30:31]
715 ; GFX9-LABEL: v_maximum_v2f64__nsz:
717 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
718 ; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
719 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
720 ; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
721 ; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
722 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
723 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
724 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
725 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
726 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
727 ; GFX9-NEXT: s_setpc_b64 s[30:31]
729 ; GFX940-LABEL: v_maximum_v2f64__nsz:
731 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
732 ; GFX940-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
733 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
734 ; GFX940-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
735 ; GFX940-NEXT: s_nop 0
736 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
737 ; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
738 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
739 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
740 ; GFX940-NEXT: s_nop 1
741 ; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
742 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
743 ; GFX940-NEXT: s_setpc_b64 s[30:31]
745 ; GFX10-LABEL: v_maximum_v2f64__nsz:
747 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
748 ; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
749 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
750 ; GFX10-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
751 ; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
752 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
753 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
754 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
755 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
756 ; GFX10-NEXT: s_setpc_b64 s[30:31]
758 ; GFX11-LABEL: v_maximum_v2f64__nsz:
760 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
761 ; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
762 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
763 ; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
764 ; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
765 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
766 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
767 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
768 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
769 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
770 ; GFX11-NEXT: s_setpc_b64 s[30:31]
772 ; GFX12-LABEL: v_maximum_v2f64__nsz:
774 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
775 ; GFX12-NEXT: s_wait_expcnt 0x0
776 ; GFX12-NEXT: s_wait_samplecnt 0x0
777 ; GFX12-NEXT: s_wait_bvhcnt 0x0
778 ; GFX12-NEXT: s_wait_kmcnt 0x0
779 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
780 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
781 ; GFX12-NEXT: s_setpc_b64 s[30:31]
782 %op = call nsz <2 x double> @llvm.maximum.v2f64(<2 x double> %src0, <2 x double> %src1)
786 define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double> %src1) {
787 ; GFX7-LABEL: v_maximum_v2f64__nnan_nsz:
789 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790 ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
791 ; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
792 ; GFX7-NEXT: s_setpc_b64 s[30:31]
794 ; GFX8-LABEL: v_maximum_v2f64__nnan_nsz:
796 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
797 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
798 ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
799 ; GFX8-NEXT: s_setpc_b64 s[30:31]
801 ; GFX9-LABEL: v_maximum_v2f64__nnan_nsz:
803 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
804 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
805 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
806 ; GFX9-NEXT: s_setpc_b64 s[30:31]
808 ; GFX940-LABEL: v_maximum_v2f64__nnan_nsz:
810 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
811 ; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
812 ; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
813 ; GFX940-NEXT: s_setpc_b64 s[30:31]
815 ; GFX10-LABEL: v_maximum_v2f64__nnan_nsz:
817 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
818 ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
819 ; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
820 ; GFX10-NEXT: s_setpc_b64 s[30:31]
822 ; GFX11-LABEL: v_maximum_v2f64__nnan_nsz:
824 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
826 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
827 ; GFX11-NEXT: s_setpc_b64 s[30:31]
829 ; GFX12-LABEL: v_maximum_v2f64__nnan_nsz:
831 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
832 ; GFX12-NEXT: s_wait_expcnt 0x0
833 ; GFX12-NEXT: s_wait_samplecnt 0x0
834 ; GFX12-NEXT: s_wait_bvhcnt 0x0
835 ; GFX12-NEXT: s_wait_kmcnt 0x0
836 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
837 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
838 ; GFX12-NEXT: s_setpc_b64 s[30:31]
839 %op = call nnan nsz <2 x double> @llvm.maximum.v2f64(<2 x double> %src0, <2 x double> %src1)
843 define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) {
844 ; GFX7-LABEL: s_maximum_v2f64:
846 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847 ; GFX7-NEXT: v_mov_b32_e32 v0, s20
848 ; GFX7-NEXT: v_mov_b32_e32 v4, s18
849 ; GFX7-NEXT: v_mov_b32_e32 v1, s21
850 ; GFX7-NEXT: v_mov_b32_e32 v5, s19
851 ; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1]
852 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
853 ; GFX7-NEXT: v_max_f64 v[0:1], s[6:7], v[4:5]
854 ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], s[6:7], v[4:5]
855 ; GFX7-NEXT: v_mov_b32_e32 v6, 0x7ff80000
856 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
857 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
858 ; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
859 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
860 ; GFX7-NEXT: ;;#ASMSTART
861 ; GFX7-NEXT: ; use v[0:3]
862 ; GFX7-NEXT: ;;#ASMEND
863 ; GFX7-NEXT: s_setpc_b64 s[30:31]
865 ; GFX8-LABEL: s_maximum_v2f64:
867 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868 ; GFX8-NEXT: v_mov_b32_e32 v0, s20
869 ; GFX8-NEXT: v_mov_b32_e32 v4, s18
870 ; GFX8-NEXT: v_mov_b32_e32 v1, s21
871 ; GFX8-NEXT: v_mov_b32_e32 v5, s19
872 ; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1]
873 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
874 ; GFX8-NEXT: v_max_f64 v[0:1], s[6:7], v[4:5]
875 ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], s[6:7], v[4:5]
876 ; GFX8-NEXT: v_mov_b32_e32 v6, 0x7ff80000
877 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
878 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
879 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
880 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
881 ; GFX8-NEXT: ;;#ASMSTART
882 ; GFX8-NEXT: ; use v[0:3]
883 ; GFX8-NEXT: ;;#ASMEND
884 ; GFX8-NEXT: s_setpc_b64 s[30:31]
886 ; GFX9-LABEL: s_maximum_v2f64:
888 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889 ; GFX9-NEXT: v_mov_b32_e32 v0, s20
890 ; GFX9-NEXT: v_mov_b32_e32 v4, s18
891 ; GFX9-NEXT: v_mov_b32_e32 v1, s21
892 ; GFX9-NEXT: v_mov_b32_e32 v5, s19
893 ; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1]
894 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
895 ; GFX9-NEXT: v_max_f64 v[0:1], s[6:7], v[4:5]
896 ; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], s[6:7], v[4:5]
897 ; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
898 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
899 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
900 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
901 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
902 ; GFX9-NEXT: ;;#ASMSTART
903 ; GFX9-NEXT: ; use v[0:3]
904 ; GFX9-NEXT: ;;#ASMEND
905 ; GFX9-NEXT: s_setpc_b64 s[30:31]
907 ; GFX940-LABEL: s_maximum_v2f64:
909 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
910 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
911 ; GFX940-NEXT: v_max_f64 v[2:3], s[2:3], v[0:1]
912 ; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
913 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
914 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
915 ; GFX940-NEXT: v_max_f64 v[4:5], s[0:1], v[0:1]
916 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
917 ; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
918 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
919 ; GFX940-NEXT: s_nop 1
920 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
921 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
922 ; GFX940-NEXT: ;;#ASMSTART
923 ; GFX940-NEXT: ; use v[0:3]
924 ; GFX940-NEXT: ;;#ASMEND
925 ; GFX940-NEXT: s_setpc_b64 s[30:31]
927 ; GFX10-LABEL: s_maximum_v2f64:
929 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
930 ; GFX10-NEXT: v_max_f64 v[0:1], s[16:17], s[20:21]
931 ; GFX10-NEXT: v_cmp_u_f64_e64 s4, s[16:17], s[20:21]
932 ; GFX10-NEXT: v_max_f64 v[4:5], s[6:7], s[18:19]
933 ; GFX10-NEXT: v_cmp_u_f64_e64 s5, s[6:7], s[18:19]
934 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s4
935 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v0, 0, s4
936 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s5
937 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, s5
938 ; GFX10-NEXT: ;;#ASMSTART
939 ; GFX10-NEXT: ; use v[0:3]
940 ; GFX10-NEXT: ;;#ASMEND
941 ; GFX10-NEXT: s_setpc_b64 s[30:31]
943 ; GFX11-LABEL: s_maximum_v2f64:
945 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
946 ; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[16:17]
947 ; GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[16:17]
948 ; GFX11-NEXT: v_max_f64 v[4:5], s[0:1], s[6:7]
949 ; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[6:7]
950 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
951 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s2
952 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v0, 0, s2
953 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s0
954 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, s0
955 ; GFX11-NEXT: ;;#ASMSTART
956 ; GFX11-NEXT: ; use v[0:3]
957 ; GFX11-NEXT: ;;#ASMEND
958 ; GFX11-NEXT: s_setpc_b64 s[30:31]
960 ; GFX12-LABEL: s_maximum_v2f64:
962 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
963 ; GFX12-NEXT: s_wait_expcnt 0x0
964 ; GFX12-NEXT: s_wait_samplecnt 0x0
965 ; GFX12-NEXT: s_wait_bvhcnt 0x0
966 ; GFX12-NEXT: s_wait_kmcnt 0x0
967 ; GFX12-NEXT: v_maximum_f64 v[2:3], s[2:3], s[16:17]
968 ; GFX12-NEXT: v_maximum_f64 v[0:1], s[0:1], s[6:7]
969 ; GFX12-NEXT: ;;#ASMSTART
970 ; GFX12-NEXT: ; use v[0:3]
971 ; GFX12-NEXT: ;;#ASMEND
972 ; GFX12-NEXT: s_setpc_b64 s[30:31]
973 %op = call <2 x double> @llvm.maximum.v2f64(<2 x double> %src0, <2 x double> %src1)
974 call void asm sideeffect "; use $0", "s"(<2 x double> %op)
978 define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
979 ; GFX7-LABEL: v_maximum_v3f64:
981 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982 ; GFX7-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
983 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
984 ; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
985 ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
986 ; GFX7-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
987 ; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
988 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
989 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
990 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
991 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
992 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
993 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
994 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
995 ; GFX7-NEXT: s_setpc_b64 s[30:31]
997 ; GFX8-LABEL: v_maximum_v3f64:
999 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1000 ; GFX8-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1001 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
1002 ; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1003 ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
1004 ; GFX8-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
1005 ; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
1006 ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
1007 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
1008 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
1009 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
1010 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
1011 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
1012 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
1013 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1015 ; GFX9-LABEL: v_maximum_v3f64:
1017 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1018 ; GFX9-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1019 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
1020 ; GFX9-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1021 ; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
1022 ; GFX9-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
1023 ; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
1024 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
1025 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
1026 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
1027 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
1028 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
1029 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
1030 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
1031 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1033 ; GFX940-LABEL: v_maximum_v3f64:
1035 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036 ; GFX940-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1037 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
1038 ; GFX940-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1039 ; GFX940-NEXT: s_nop 0
1040 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
1041 ; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
1042 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
1043 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
1044 ; GFX940-NEXT: s_nop 1
1045 ; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
1046 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
1047 ; GFX940-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
1048 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
1049 ; GFX940-NEXT: s_nop 1
1050 ; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
1051 ; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
1052 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1054 ; GFX10-LABEL: v_maximum_v3f64:
1056 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1057 ; GFX10-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1058 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
1059 ; GFX10-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1060 ; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
1061 ; GFX10-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
1062 ; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
1063 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
1064 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
1065 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
1066 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
1067 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
1068 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
1069 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1071 ; GFX11-LABEL: v_maximum_v3f64:
1073 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074 ; GFX11-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1075 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
1076 ; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1077 ; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
1078 ; GFX11-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
1079 ; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
1080 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
1081 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
1082 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
1083 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
1084 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
1085 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
1086 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1088 ; GFX12-LABEL: v_maximum_v3f64:
1090 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1091 ; GFX12-NEXT: s_wait_expcnt 0x0
1092 ; GFX12-NEXT: s_wait_samplecnt 0x0
1093 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1094 ; GFX12-NEXT: s_wait_kmcnt 0x0
1095 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
1096 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
1097 ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
1098 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1099 %op = call <3 x double> @llvm.maximum.v3f64(<3 x double> %src0, <3 x double> %src1)
1100 ret <3 x double> %op
1103 define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src1) {
1104 ; GFX7-LABEL: v_maximum_v3f64__nnan:
1106 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1107 ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1108 ; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1109 ; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1110 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1112 ; GFX8-LABEL: v_maximum_v3f64__nnan:
1114 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1115 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1116 ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1117 ; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1118 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1120 ; GFX9-LABEL: v_maximum_v3f64__nnan:
1122 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1123 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1124 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1125 ; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1126 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1128 ; GFX940-LABEL: v_maximum_v3f64__nnan:
1130 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1131 ; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1132 ; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1133 ; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1134 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1136 ; GFX10-LABEL: v_maximum_v3f64__nnan:
1138 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1139 ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1140 ; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1141 ; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1142 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1144 ; GFX11-LABEL: v_maximum_v3f64__nnan:
1146 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1147 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1148 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1149 ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1150 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1152 ; GFX12-LABEL: v_maximum_v3f64__nnan:
1154 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1155 ; GFX12-NEXT: s_wait_expcnt 0x0
1156 ; GFX12-NEXT: s_wait_samplecnt 0x0
1157 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1158 ; GFX12-NEXT: s_wait_kmcnt 0x0
1159 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
1160 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
1161 ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
1162 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1163 %op = call nnan <3 x double> @llvm.maximum.v3f64(<3 x double> %src0, <3 x double> %src1)
1164 ret <3 x double> %op
1167 define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1) {
1168 ; GFX7-LABEL: v_maximum_v3f64__nsz:
1170 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1171 ; GFX7-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1172 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
1173 ; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1174 ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
1175 ; GFX7-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
1176 ; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
1177 ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
1178 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
1179 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
1180 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
1181 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
1182 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
1183 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
1184 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1186 ; GFX8-LABEL: v_maximum_v3f64__nsz:
1188 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1189 ; GFX8-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1190 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
1191 ; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1192 ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
1193 ; GFX8-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
1194 ; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
1195 ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
1196 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
1197 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
1198 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
1199 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
1200 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
1201 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
1202 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1204 ; GFX9-LABEL: v_maximum_v3f64__nsz:
1206 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1207 ; GFX9-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1208 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
1209 ; GFX9-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1210 ; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
1211 ; GFX9-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
1212 ; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
1213 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
1214 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
1215 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
1216 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
1217 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
1218 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
1219 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
1220 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1222 ; GFX940-LABEL: v_maximum_v3f64__nsz:
1224 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1225 ; GFX940-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1226 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
1227 ; GFX940-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1228 ; GFX940-NEXT: s_nop 0
1229 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
1230 ; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
1231 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
1232 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
1233 ; GFX940-NEXT: s_nop 1
1234 ; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
1235 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
1236 ; GFX940-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
1237 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
1238 ; GFX940-NEXT: s_nop 1
1239 ; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
1240 ; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
1241 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1243 ; GFX10-LABEL: v_maximum_v3f64__nsz:
1245 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1246 ; GFX10-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1247 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
1248 ; GFX10-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1249 ; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
1250 ; GFX10-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
1251 ; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
1252 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
1253 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
1254 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
1255 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
1256 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
1257 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
1258 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1260 ; GFX11-LABEL: v_maximum_v3f64__nsz:
1262 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1263 ; GFX11-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
1264 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
1265 ; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
1266 ; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
1267 ; GFX11-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
1268 ; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
1269 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
1270 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
1271 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
1272 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
1273 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
1274 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
1275 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1277 ; GFX12-LABEL: v_maximum_v3f64__nsz:
1279 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1280 ; GFX12-NEXT: s_wait_expcnt 0x0
1281 ; GFX12-NEXT: s_wait_samplecnt 0x0
1282 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1283 ; GFX12-NEXT: s_wait_kmcnt 0x0
1284 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
1285 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
1286 ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
1287 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1288 %op = call nsz <3 x double> @llvm.maximum.v3f64(<3 x double> %src0, <3 x double> %src1)
1289 ret <3 x double> %op
1292 define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double> %src1) {
1293 ; GFX7-LABEL: v_maximum_v3f64__nnan_nsz:
1295 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1296 ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1297 ; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1298 ; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1299 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1301 ; GFX8-LABEL: v_maximum_v3f64__nnan_nsz:
1303 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1304 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1305 ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1306 ; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1307 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1309 ; GFX9-LABEL: v_maximum_v3f64__nnan_nsz:
1311 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1312 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1313 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1314 ; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1315 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1317 ; GFX940-LABEL: v_maximum_v3f64__nnan_nsz:
1319 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1320 ; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1321 ; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1322 ; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1323 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1325 ; GFX10-LABEL: v_maximum_v3f64__nnan_nsz:
1327 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1328 ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1329 ; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1330 ; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1331 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1333 ; GFX11-LABEL: v_maximum_v3f64__nnan_nsz:
1335 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1336 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
1337 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
1338 ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
1339 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1341 ; GFX12-LABEL: v_maximum_v3f64__nnan_nsz:
1343 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1344 ; GFX12-NEXT: s_wait_expcnt 0x0
1345 ; GFX12-NEXT: s_wait_samplecnt 0x0
1346 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1347 ; GFX12-NEXT: s_wait_kmcnt 0x0
1348 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
1349 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
1350 ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
1351 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1352 %op = call nnan nsz <3 x double> @llvm.maximum.v3f64(<3 x double> %src0, <3 x double> %src1)
1353 ret <3 x double> %op
1356 define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
1357 ; GFX7-LABEL: v_maximum_v4f64:
1359 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1360 ; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1361 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
1362 ; GFX7-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1363 ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
1364 ; GFX7-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
1365 ; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
1366 ; GFX7-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
1367 ; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
1368 ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
1369 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
1370 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
1371 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
1372 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
1373 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
1374 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
1375 ; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
1376 ; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
1377 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1379 ; GFX8-LABEL: v_maximum_v4f64:
1381 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1382 ; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1383 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
1384 ; GFX8-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1385 ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
1386 ; GFX8-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
1387 ; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
1388 ; GFX8-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
1389 ; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
1390 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
1391 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
1392 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
1393 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
1394 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
1395 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
1396 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
1397 ; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
1398 ; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
1399 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1401 ; GFX9-LABEL: v_maximum_v4f64:
1403 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404 ; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1405 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
1406 ; GFX9-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1407 ; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
1408 ; GFX9-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
1409 ; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
1410 ; GFX9-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
1411 ; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
1412 ; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
1413 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
1414 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
1415 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
1416 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
1417 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
1418 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
1419 ; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
1420 ; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
1421 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1423 ; GFX940-LABEL: v_maximum_v4f64:
1425 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1426 ; GFX940-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1427 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
1428 ; GFX940-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1429 ; GFX940-NEXT: s_nop 0
1430 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
1431 ; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
1432 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
1433 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
1434 ; GFX940-NEXT: s_nop 1
1435 ; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
1436 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
1437 ; GFX940-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
1438 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
1439 ; GFX940-NEXT: s_nop 1
1440 ; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
1441 ; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
1442 ; GFX940-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
1443 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
1444 ; GFX940-NEXT: s_nop 1
1445 ; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
1446 ; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
1447 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1449 ; GFX10-LABEL: v_maximum_v4f64:
1451 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1452 ; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1453 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
1454 ; GFX10-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1455 ; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
1456 ; GFX10-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
1457 ; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
1458 ; GFX10-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
1459 ; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
1460 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
1461 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
1462 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
1463 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
1464 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
1465 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
1466 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
1467 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
1468 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1470 ; GFX11-LABEL: v_maximum_v4f64:
1472 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1473 ; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1474 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
1475 ; GFX11-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1476 ; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
1477 ; GFX11-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
1478 ; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
1479 ; GFX11-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
1480 ; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
1481 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
1482 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
1483 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
1484 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
1485 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
1486 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
1487 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
1488 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
1489 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1491 ; GFX12-LABEL: v_maximum_v4f64:
1493 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1494 ; GFX12-NEXT: s_wait_expcnt 0x0
1495 ; GFX12-NEXT: s_wait_samplecnt 0x0
1496 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1497 ; GFX12-NEXT: s_wait_kmcnt 0x0
1498 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
1499 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
1500 ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
1501 ; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
1502 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1503 %op = call <4 x double> @llvm.maximum.v4f64(<4 x double> %src0, <4 x double> %src1)
1504 ret <4 x double> %op
1507 define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src1) {
1508 ; GFX7-LABEL: v_maximum_v4f64__nnan:
1510 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1511 ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1512 ; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1513 ; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1514 ; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1515 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1517 ; GFX8-LABEL: v_maximum_v4f64__nnan:
1519 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1520 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1521 ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1522 ; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1523 ; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1524 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1526 ; GFX9-LABEL: v_maximum_v4f64__nnan:
1528 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1529 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1530 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1531 ; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1532 ; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1533 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1535 ; GFX940-LABEL: v_maximum_v4f64__nnan:
1537 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1538 ; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1539 ; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1540 ; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1541 ; GFX940-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1542 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1544 ; GFX10-LABEL: v_maximum_v4f64__nnan:
1546 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1547 ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1548 ; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1549 ; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1550 ; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1551 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1553 ; GFX11-LABEL: v_maximum_v4f64__nnan:
1555 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1557 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1558 ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1559 ; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1560 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1562 ; GFX12-LABEL: v_maximum_v4f64__nnan:
1564 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1565 ; GFX12-NEXT: s_wait_expcnt 0x0
1566 ; GFX12-NEXT: s_wait_samplecnt 0x0
1567 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1568 ; GFX12-NEXT: s_wait_kmcnt 0x0
1569 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
1570 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
1571 ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
1572 ; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
1573 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1574 %op = call nnan <4 x double> @llvm.maximum.v4f64(<4 x double> %src0, <4 x double> %src1)
1575 ret <4 x double> %op
1578 define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1) {
1579 ; GFX7-LABEL: v_maximum_v4f64__nsz:
1581 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1582 ; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1583 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
1584 ; GFX7-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1585 ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
1586 ; GFX7-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
1587 ; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
1588 ; GFX7-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
1589 ; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
1590 ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
1591 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
1592 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
1593 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
1594 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
1595 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
1596 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
1597 ; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
1598 ; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
1599 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1601 ; GFX8-LABEL: v_maximum_v4f64__nsz:
1603 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1604 ; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1605 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
1606 ; GFX8-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1607 ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
1608 ; GFX8-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
1609 ; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
1610 ; GFX8-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
1611 ; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
1612 ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
1613 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
1614 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
1615 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
1616 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
1617 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
1618 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
1619 ; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
1620 ; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
1621 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1623 ; GFX9-LABEL: v_maximum_v4f64__nsz:
1625 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1626 ; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1627 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
1628 ; GFX9-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1629 ; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
1630 ; GFX9-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
1631 ; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
1632 ; GFX9-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
1633 ; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
1634 ; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
1635 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
1636 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
1637 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
1638 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
1639 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
1640 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
1641 ; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
1642 ; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
1643 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1645 ; GFX940-LABEL: v_maximum_v4f64__nsz:
1647 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1648 ; GFX940-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1649 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
1650 ; GFX940-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1651 ; GFX940-NEXT: s_nop 0
1652 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
1653 ; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
1654 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
1655 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
1656 ; GFX940-NEXT: s_nop 1
1657 ; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
1658 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
1659 ; GFX940-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
1660 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
1661 ; GFX940-NEXT: s_nop 1
1662 ; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
1663 ; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
1664 ; GFX940-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
1665 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
1666 ; GFX940-NEXT: s_nop 1
1667 ; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
1668 ; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
1669 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1671 ; GFX10-LABEL: v_maximum_v4f64__nsz:
1673 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1674 ; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1675 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
1676 ; GFX10-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1677 ; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
1678 ; GFX10-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
1679 ; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
1680 ; GFX10-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
1681 ; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
1682 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
1683 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
1684 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
1685 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
1686 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
1687 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
1688 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
1689 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
1690 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1692 ; GFX11-LABEL: v_maximum_v4f64__nsz:
1694 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1695 ; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
1696 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
1697 ; GFX11-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
1698 ; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
1699 ; GFX11-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
1700 ; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
1701 ; GFX11-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
1702 ; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
1703 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
1704 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
1705 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
1706 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
1707 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
1708 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
1709 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
1710 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
1711 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1713 ; GFX12-LABEL: v_maximum_v4f64__nsz:
1715 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1716 ; GFX12-NEXT: s_wait_expcnt 0x0
1717 ; GFX12-NEXT: s_wait_samplecnt 0x0
1718 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1719 ; GFX12-NEXT: s_wait_kmcnt 0x0
1720 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
1721 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
1722 ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
1723 ; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
1724 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1725 %op = call nsz <4 x double> @llvm.maximum.v4f64(<4 x double> %src0, <4 x double> %src1)
1726 ret <4 x double> %op
1729 define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double> %src1) {
1730 ; GFX7-LABEL: v_maximum_v4f64__nnan_nsz:
1732 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1733 ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1734 ; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1735 ; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1736 ; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1737 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1739 ; GFX8-LABEL: v_maximum_v4f64__nnan_nsz:
1741 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1742 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1743 ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1744 ; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1745 ; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1746 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1748 ; GFX9-LABEL: v_maximum_v4f64__nnan_nsz:
1750 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1751 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1752 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1753 ; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1754 ; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1755 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1757 ; GFX940-LABEL: v_maximum_v4f64__nnan_nsz:
1759 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1760 ; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1761 ; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1762 ; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1763 ; GFX940-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1764 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1766 ; GFX10-LABEL: v_maximum_v4f64__nnan_nsz:
1768 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1769 ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1770 ; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1771 ; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1772 ; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1773 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1775 ; GFX11-LABEL: v_maximum_v4f64__nnan_nsz:
1777 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1778 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
1779 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
1780 ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
1781 ; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
1782 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1784 ; GFX12-LABEL: v_maximum_v4f64__nnan_nsz:
1786 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1787 ; GFX12-NEXT: s_wait_expcnt 0x0
1788 ; GFX12-NEXT: s_wait_samplecnt 0x0
1789 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1790 ; GFX12-NEXT: s_wait_kmcnt 0x0
1791 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
1792 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
1793 ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
1794 ; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
1795 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1796 %op = call nnan nsz <4 x double> @llvm.maximum.v4f64(<4 x double> %src0, <4 x double> %src1)
1797 ret <4 x double> %op
1800 define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
1801 ; GFX7-LABEL: v_maximum_v8f64:
1803 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1804 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
1805 ; GFX7-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
1806 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
1807 ; GFX7-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
1808 ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
1809 ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
1810 ; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
1811 ; GFX7-NEXT: v_mov_b32_e32 v34, 0x7ff80000
1812 ; GFX7-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
1813 ; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
1814 ; GFX7-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
1815 ; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
1816 ; GFX7-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
1817 ; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
1818 ; GFX7-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
1819 ; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
1820 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
1821 ; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
1822 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
1823 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
1824 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
1825 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
1826 ; GFX7-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
1827 ; GFX7-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
1828 ; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
1829 ; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
1830 ; GFX7-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
1831 ; GFX7-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
1832 ; GFX7-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
1833 ; GFX7-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
1834 ; GFX7-NEXT: s_waitcnt vmcnt(0)
1835 ; GFX7-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
1836 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
1837 ; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
1838 ; GFX7-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
1839 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1841 ; GFX8-LABEL: v_maximum_v8f64:
1843 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1844 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
1845 ; GFX8-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
1846 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
1847 ; GFX8-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
1848 ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
1849 ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
1850 ; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
1851 ; GFX8-NEXT: v_mov_b32_e32 v34, 0x7ff80000
1852 ; GFX8-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
1853 ; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
1854 ; GFX8-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
1855 ; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
1856 ; GFX8-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
1857 ; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
1858 ; GFX8-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
1859 ; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
1860 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
1861 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
1862 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
1863 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
1864 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
1865 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
1866 ; GFX8-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
1867 ; GFX8-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
1868 ; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
1869 ; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
1870 ; GFX8-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
1871 ; GFX8-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
1872 ; GFX8-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
1873 ; GFX8-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
1874 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1875 ; GFX8-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
1876 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
1877 ; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
1878 ; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
1879 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1881 ; GFX9-LABEL: v_maximum_v8f64:
1883 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1884 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
1885 ; GFX9-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
1886 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
1887 ; GFX9-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
1888 ; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
1889 ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
1890 ; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
1891 ; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000
1892 ; GFX9-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
1893 ; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
1894 ; GFX9-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
1895 ; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
1896 ; GFX9-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
1897 ; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
1898 ; GFX9-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
1899 ; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
1900 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
1901 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
1902 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
1903 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
1904 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
1905 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
1906 ; GFX9-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
1907 ; GFX9-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
1908 ; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
1909 ; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
1910 ; GFX9-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
1911 ; GFX9-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
1912 ; GFX9-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
1913 ; GFX9-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
1914 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1915 ; GFX9-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
1916 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
1917 ; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
1918 ; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
1919 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1921 ; GFX940-LABEL: v_maximum_v8f64:
1923 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1924 ; GFX940-NEXT: scratch_load_dword v31, off, s32
1925 ; GFX940-NEXT: v_mov_b32_e32 v54, 0x7ff80000
1926 ; GFX940-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
1927 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
1928 ; GFX940-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19]
1929 ; GFX940-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21]
1930 ; GFX940-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
1931 ; GFX940-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
1932 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
1933 ; GFX940-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23]
1934 ; GFX940-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25]
1935 ; GFX940-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
1936 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
1937 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
1938 ; GFX940-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27]
1939 ; GFX940-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29]
1940 ; GFX940-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
1941 ; GFX940-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
1942 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
1943 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1944 ; GFX940-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
1945 ; GFX940-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
1946 ; GFX940-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
1947 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
1948 ; GFX940-NEXT: s_nop 1
1949 ; GFX940-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
1950 ; GFX940-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
1951 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
1952 ; GFX940-NEXT: s_nop 1
1953 ; GFX940-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
1954 ; GFX940-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
1955 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
1956 ; GFX940-NEXT: s_nop 1
1957 ; GFX940-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
1958 ; GFX940-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
1959 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
1960 ; GFX940-NEXT: s_nop 1
1961 ; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
1962 ; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
1963 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1965 ; GFX10-LABEL: v_maximum_v8f64:
1967 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1968 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
1969 ; GFX10-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
1970 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
1971 ; GFX10-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
1972 ; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[18:19]
1973 ; GFX10-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
1974 ; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[20:21]
1975 ; GFX10-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
1976 ; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[22:23]
1977 ; GFX10-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
1978 ; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[8:9], v[24:25]
1979 ; GFX10-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
1980 ; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[10:11], v[26:27]
1981 ; GFX10-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
1982 ; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[28:29]
1983 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
1984 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
1985 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v16, 0, s4
1986 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4
1987 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, 0, s5
1988 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5
1989 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v20, 0, s6
1990 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6
1991 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v22, 0, s7
1992 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7
1993 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v24, 0, s8
1994 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8
1995 ; GFX10-NEXT: v_cndmask_b32_e64 v12, v26, 0, s9
1996 ; GFX10-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9
1997 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1998 ; GFX10-NEXT: v_max_f64 v[28:29], v[14:15], v[30:31]
1999 ; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[30:31]
2000 ; GFX10-NEXT: v_cndmask_b32_e64 v14, v28, 0, s10
2001 ; GFX10-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10
2002 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2004 ; GFX11-LABEL: v_maximum_v8f64:
2006 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
2008 ; GFX11-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
2009 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
2010 ; GFX11-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
2011 ; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[18:19]
2012 ; GFX11-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
2013 ; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[20:21]
2014 ; GFX11-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
2015 ; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[22:23]
2016 ; GFX11-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
2017 ; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[24:25]
2018 ; GFX11-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
2019 ; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[26:27]
2020 ; GFX11-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
2021 ; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[28:29]
2022 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
2023 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
2024 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v16, 0, s0
2025 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0
2026 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v18, 0, s1
2027 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1
2028 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v20, 0, s2
2029 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2
2030 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v22, 0, s3
2031 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3
2032 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v24, 0, s4
2033 ; GFX11-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4
2034 ; GFX11-NEXT: v_cndmask_b32_e64 v12, v26, 0, s5
2035 ; GFX11-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5
2036 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2037 ; GFX11-NEXT: v_max_f64 v[28:29], v[14:15], v[30:31]
2038 ; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[30:31]
2039 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2040 ; GFX11-NEXT: v_cndmask_b32_e64 v14, v28, 0, s6
2041 ; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
2042 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2044 ; GFX12-LABEL: v_maximum_v8f64:
2046 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2047 ; GFX12-NEXT: s_wait_expcnt 0x0
2048 ; GFX12-NEXT: s_wait_samplecnt 0x0
2049 ; GFX12-NEXT: s_wait_bvhcnt 0x0
2050 ; GFX12-NEXT: s_wait_kmcnt 0x0
2051 ; GFX12-NEXT: scratch_load_b32 v31, off, s32
2052 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[16:17]
2053 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[18:19]
2054 ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[20:21]
2055 ; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[22:23]
2056 ; GFX12-NEXT: v_maximum_f64 v[8:9], v[8:9], v[24:25]
2057 ; GFX12-NEXT: v_maximum_f64 v[10:11], v[10:11], v[26:27]
2058 ; GFX12-NEXT: v_maximum_f64 v[12:13], v[12:13], v[28:29]
2059 ; GFX12-NEXT: s_wait_loadcnt 0x0
2060 ; GFX12-NEXT: v_maximum_f64 v[14:15], v[14:15], v[30:31]
2061 ; GFX12-NEXT: s_setpc_b64 s[30:31]
2062 %op = call <8 x double> @llvm.maximum.v8f64(<8 x double> %src0, <8 x double> %src1)
2063 ret <8 x double> %op
2066 define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) {
2067 ; GFX7-LABEL: v_maximum_v16f64:
2069 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
2071 ; GFX7-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
2072 ; GFX7-NEXT: s_mov_b64 exec, s[4:5]
2073 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
2074 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
2075 ; GFX7-NEXT: v_writelane_b32 v34, s30, 0
2076 ; GFX7-NEXT: v_writelane_b32 v34, s31, 1
2077 ; GFX7-NEXT: v_writelane_b32 v34, s34, 2
2078 ; GFX7-NEXT: v_writelane_b32 v34, s35, 3
2079 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2080 ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
2081 ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
2082 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
2083 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
2084 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
2085 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2086 ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
2087 ; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
2088 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
2089 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
2090 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
2091 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2092 ; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
2093 ; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
2094 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
2095 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
2096 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
2097 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2098 ; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
2099 ; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
2100 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
2101 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
2102 ; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
2103 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2104 ; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
2105 ; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
2106 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
2107 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
2108 ; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
2109 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2110 ; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
2111 ; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
2112 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
2113 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
2114 ; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
2115 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2116 ; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
2117 ; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
2118 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
2119 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
2120 ; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
2121 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2122 ; GFX7-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
2123 ; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
2124 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
2125 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
2126 ; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
2127 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2128 ; GFX7-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
2129 ; GFX7-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
2130 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
2131 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
2132 ; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
2133 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2134 ; GFX7-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
2135 ; GFX7-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
2136 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
2137 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
2138 ; GFX7-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
2139 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2140 ; GFX7-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
2141 ; GFX7-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
2142 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
2143 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
2144 ; GFX7-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
2145 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2146 ; GFX7-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
2147 ; GFX7-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
2148 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
2149 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
2150 ; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
2151 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2152 ; GFX7-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
2153 ; GFX7-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
2154 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
2155 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
2156 ; GFX7-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
2157 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2158 ; GFX7-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
2159 ; GFX7-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
2160 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
2161 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
2162 ; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
2163 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2164 ; GFX7-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
2165 ; GFX7-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
2166 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
2167 ; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
2168 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
2169 ; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
2170 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2171 ; GFX7-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
2172 ; GFX7-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
2173 ; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
2174 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
2175 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
2176 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
2177 ; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
2178 ; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
2179 ; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
2180 ; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
2181 ; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
2182 ; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
2183 ; GFX7-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
2184 ; GFX7-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
2185 ; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
2186 ; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
2187 ; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
2188 ; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
2189 ; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
2190 ; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
2191 ; GFX7-NEXT: v_readlane_b32 s35, v34, 3
2192 ; GFX7-NEXT: v_readlane_b32 s34, v34, 2
2193 ; GFX7-NEXT: v_readlane_b32 s31, v34, 1
2194 ; GFX7-NEXT: v_readlane_b32 s30, v34, 0
2195 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
2196 ; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
2197 ; GFX7-NEXT: s_mov_b64 exec, s[4:5]
2198 ; GFX7-NEXT: s_waitcnt vmcnt(0)
2199 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2201 ; GFX8-LABEL: v_maximum_v16f64:
2203 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2204 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
2205 ; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
2206 ; GFX8-NEXT: s_mov_b64 exec, s[4:5]
2207 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
2208 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
2209 ; GFX8-NEXT: v_writelane_b32 v34, s30, 0
2210 ; GFX8-NEXT: v_writelane_b32 v34, s31, 1
2211 ; GFX8-NEXT: v_writelane_b32 v34, s34, 2
2212 ; GFX8-NEXT: v_writelane_b32 v34, s35, 3
2213 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2214 ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
2215 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
2216 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
2217 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
2218 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
2219 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2220 ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
2221 ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
2222 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
2223 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
2224 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
2225 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2226 ; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
2227 ; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
2228 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
2229 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
2230 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
2231 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2232 ; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
2233 ; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
2234 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
2235 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
2236 ; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
2237 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2238 ; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
2239 ; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
2240 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
2241 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
2242 ; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
2243 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2244 ; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
2245 ; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
2246 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
2247 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
2248 ; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
2249 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2250 ; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
2251 ; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
2252 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
2253 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
2254 ; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
2255 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2256 ; GFX8-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
2257 ; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
2258 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
2259 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
2260 ; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
2261 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2262 ; GFX8-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
2263 ; GFX8-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
2264 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
2265 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
2266 ; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
2267 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2268 ; GFX8-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
2269 ; GFX8-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
2270 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
2271 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
2272 ; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
2273 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2274 ; GFX8-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
2275 ; GFX8-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
2276 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
2277 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
2278 ; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
2279 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2280 ; GFX8-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
2281 ; GFX8-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
2282 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
2283 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
2284 ; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
2285 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2286 ; GFX8-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
2287 ; GFX8-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
2288 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
2289 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
2290 ; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
2291 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2292 ; GFX8-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
2293 ; GFX8-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
2294 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
2295 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
2296 ; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
2297 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2298 ; GFX8-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
2299 ; GFX8-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
2300 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
2301 ; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
2302 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
2303 ; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
2304 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2305 ; GFX8-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
2306 ; GFX8-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
2307 ; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
2308 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
2309 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
2310 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
2311 ; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
2312 ; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
2313 ; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
2314 ; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
2315 ; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
2316 ; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
2317 ; GFX8-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
2318 ; GFX8-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
2319 ; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
2320 ; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
2321 ; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
2322 ; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
2323 ; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
2324 ; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
2325 ; GFX8-NEXT: v_readlane_b32 s35, v34, 3
2326 ; GFX8-NEXT: v_readlane_b32 s34, v34, 2
2327 ; GFX8-NEXT: v_readlane_b32 s31, v34, 1
2328 ; GFX8-NEXT: v_readlane_b32 s30, v34, 0
2329 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
2330 ; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
2331 ; GFX8-NEXT: s_mov_b64 exec, s[4:5]
2332 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2333 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2335 ; GFX9-LABEL: v_maximum_v16f64:
2337 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2338 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
2339 ; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
2340 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2341 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
2342 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
2343 ; GFX9-NEXT: v_writelane_b32 v34, s30, 0
2344 ; GFX9-NEXT: v_writelane_b32 v34, s31, 1
2345 ; GFX9-NEXT: v_writelane_b32 v34, s34, 2
2346 ; GFX9-NEXT: v_writelane_b32 v34, s35, 3
2347 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2348 ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
2349 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
2350 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
2351 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
2352 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
2353 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2354 ; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
2355 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
2356 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
2357 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
2358 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
2359 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2360 ; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
2361 ; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
2362 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
2363 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
2364 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
2365 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2366 ; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
2367 ; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
2368 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
2369 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
2370 ; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
2371 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2372 ; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
2373 ; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
2374 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
2375 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
2376 ; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
2377 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2378 ; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
2379 ; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
2380 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
2381 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
2382 ; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
2383 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2384 ; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
2385 ; GFX9-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
2386 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
2387 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
2388 ; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
2389 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2390 ; GFX9-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
2391 ; GFX9-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
2392 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
2393 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
2394 ; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
2395 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2396 ; GFX9-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
2397 ; GFX9-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
2398 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
2399 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
2400 ; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
2401 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2402 ; GFX9-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
2403 ; GFX9-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
2404 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
2405 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
2406 ; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
2407 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2408 ; GFX9-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
2409 ; GFX9-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
2410 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
2411 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
2412 ; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
2413 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2414 ; GFX9-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
2415 ; GFX9-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
2416 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
2417 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
2418 ; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
2419 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2420 ; GFX9-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
2421 ; GFX9-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
2422 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
2423 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
2424 ; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
2425 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2426 ; GFX9-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
2427 ; GFX9-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
2428 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
2429 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
2430 ; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
2431 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2432 ; GFX9-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
2433 ; GFX9-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
2434 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
2435 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
2436 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
2437 ; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
2438 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2439 ; GFX9-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
2440 ; GFX9-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
2441 ; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
2442 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
2443 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
2444 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
2445 ; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
2446 ; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
2447 ; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
2448 ; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
2449 ; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
2450 ; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
2451 ; GFX9-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
2452 ; GFX9-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
2453 ; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
2454 ; GFX9-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
2455 ; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
2456 ; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
2457 ; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
2458 ; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
2459 ; GFX9-NEXT: v_readlane_b32 s35, v34, 3
2460 ; GFX9-NEXT: v_readlane_b32 s34, v34, 2
2461 ; GFX9-NEXT: v_readlane_b32 s31, v34, 1
2462 ; GFX9-NEXT: v_readlane_b32 s30, v34, 0
2463 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
2464 ; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
2465 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2466 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2467 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2469 ; GFX940-LABEL: v_maximum_v16f64:
2471 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2472 ; GFX940-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
2473 ; GFX940-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
2474 ; GFX940-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
2475 ; GFX940-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
2476 ; GFX940-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
2477 ; GFX940-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
2478 ; GFX940-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
2479 ; GFX940-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
2480 ; GFX940-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
2481 ; GFX940-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
2482 ; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:16
2483 ; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:12
2484 ; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:24
2485 ; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:20
2486 ; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:32
2487 ; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:28
2488 ; GFX940-NEXT: scratch_load_dword v57, off, s32 offset:8
2489 ; GFX940-NEXT: scratch_load_dword v56, off, s32 offset:4
2490 ; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:40
2491 ; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:36
2492 ; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:48
2493 ; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:44
2494 ; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:56
2495 ; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:52
2496 ; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
2497 ; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
2498 ; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:72
2499 ; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:68
2500 ; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:80
2501 ; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:76
2502 ; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
2503 ; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
2504 ; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:96
2505 ; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:92
2506 ; GFX940-NEXT: scratch_load_dword v31, off, s32
2507 ; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:104
2508 ; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:100
2509 ; GFX940-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
2510 ; GFX940-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
2511 ; GFX940-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
2512 ; GFX940-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
2513 ; GFX940-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
2514 ; GFX940-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
2515 ; GFX940-NEXT: s_waitcnt vmcnt(25)
2516 ; GFX940-NEXT: v_max_f64 v[58:59], v[2:3], v[36:37]
2517 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
2518 ; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:112
2519 ; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:108
2520 ; GFX940-NEXT: s_waitcnt vmcnt(25)
2521 ; GFX940-NEXT: v_max_f64 v[60:61], v[4:5], v[38:39]
2522 ; GFX940-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
2523 ; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:120
2524 ; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:116
2525 ; GFX940-NEXT: s_waitcnt vmcnt(25)
2526 ; GFX940-NEXT: v_max_f64 v[62:63], v[6:7], v[48:49]
2527 ; GFX940-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
2528 ; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:128
2529 ; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:124
2530 ; GFX940-NEXT: s_waitcnt vmcnt(25)
2531 ; GFX940-NEXT: v_max_f64 v[2:3], v[0:1], v[56:57]
2532 ; GFX940-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
2533 ; GFX940-NEXT: v_mov_b32_e32 v0, 0x7ff80000
2534 ; GFX940-NEXT: s_waitcnt vmcnt(23)
2535 ; GFX940-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47]
2536 ; GFX940-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
2537 ; GFX940-NEXT: v_accvgpr_write_b32 a0, v1
2538 ; GFX940-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
2539 ; GFX940-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
2540 ; GFX940-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
2541 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
2542 ; GFX940-NEXT: s_waitcnt vmcnt(21)
2543 ; GFX940-NEXT: v_max_f64 v[46:47], v[10:11], v[44:45]
2544 ; GFX940-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
2545 ; GFX940-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
2546 ; GFX940-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
2547 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
2548 ; GFX940-NEXT: s_waitcnt vmcnt(19)
2549 ; GFX940-NEXT: v_max_f64 v[44:45], v[12:13], v[42:43]
2550 ; GFX940-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
2551 ; GFX940-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
2552 ; GFX940-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
2553 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
2554 ; GFX940-NEXT: s_waitcnt vmcnt(17)
2555 ; GFX940-NEXT: v_max_f64 v[42:43], v[14:15], v[40:41]
2556 ; GFX940-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
2557 ; GFX940-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
2558 ; GFX940-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
2559 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
2560 ; GFX940-NEXT: s_waitcnt vmcnt(15)
2561 ; GFX940-NEXT: v_max_f64 v[40:41], v[16:17], v[54:55]
2562 ; GFX940-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
2563 ; GFX940-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
2564 ; GFX940-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
2565 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
2566 ; GFX940-NEXT: s_waitcnt vmcnt(13)
2567 ; GFX940-NEXT: v_max_f64 v[54:55], v[18:19], v[52:53]
2568 ; GFX940-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
2569 ; GFX940-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
2570 ; GFX940-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
2571 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
2572 ; GFX940-NEXT: s_waitcnt vmcnt(11)
2573 ; GFX940-NEXT: v_max_f64 v[52:53], v[20:21], v[50:51]
2574 ; GFX940-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
2575 ; GFX940-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
2576 ; GFX940-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
2577 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
2578 ; GFX940-NEXT: s_waitcnt vmcnt(9)
2579 ; GFX940-NEXT: v_max_f64 v[50:51], v[22:23], v[34:35]
2580 ; GFX940-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
2581 ; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
2582 ; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
2583 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
2584 ; GFX940-NEXT: s_waitcnt vmcnt(6)
2585 ; GFX940-NEXT: v_max_f64 v[34:35], v[24:25], v[32:33]
2586 ; GFX940-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
2587 ; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
2588 ; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
2589 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
2590 ; GFX940-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
2591 ; GFX940-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
2592 ; GFX940-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
2593 ; GFX940-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
2594 ; GFX940-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
2595 ; GFX940-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
2596 ; GFX940-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
2597 ; GFX940-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
2598 ; GFX940-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
2599 ; GFX940-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
2600 ; GFX940-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
2601 ; GFX940-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
2602 ; GFX940-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
2603 ; GFX940-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
2604 ; GFX940-NEXT: s_waitcnt vmcnt(4)
2605 ; GFX940-NEXT: v_max_f64 v[32:33], v[26:27], v[36:37]
2606 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
2607 ; GFX940-NEXT: s_nop 1
2608 ; GFX940-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
2609 ; GFX940-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
2610 ; GFX940-NEXT: s_waitcnt vmcnt(2)
2611 ; GFX940-NEXT: v_max_f64 v[32:33], v[28:29], v[38:39]
2612 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
2613 ; GFX940-NEXT: s_nop 1
2614 ; GFX940-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
2615 ; GFX940-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
2616 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2617 ; GFX940-NEXT: v_max_f64 v[32:33], v[30:31], v[48:49]
2618 ; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
2619 ; GFX940-NEXT: s_nop 1
2620 ; GFX940-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
2621 ; GFX940-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
2622 ; GFX940-NEXT: v_accvgpr_read_b32 v0, a0
2623 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2625 ; GFX10-LABEL: v_maximum_v16f64:
2627 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2628 ; GFX10-NEXT: s_clause 0x19
2629 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
2630 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
2631 ; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24
2632 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20
2633 ; GFX10-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:32
2634 ; GFX10-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:28
2635 ; GFX10-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
2636 ; GFX10-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:68
2637 ; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:64
2638 ; GFX10-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:60
2639 ; GFX10-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:56
2640 ; GFX10-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:52
2641 ; GFX10-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:48
2642 ; GFX10-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:44
2643 ; GFX10-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
2644 ; GFX10-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:8
2645 ; GFX10-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:4
2646 ; GFX10-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:100
2647 ; GFX10-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:96
2648 ; GFX10-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:92
2649 ; GFX10-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:88
2650 ; GFX10-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:84
2651 ; GFX10-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:80
2652 ; GFX10-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:76
2653 ; GFX10-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:72
2654 ; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:104
2655 ; GFX10-NEXT: s_waitcnt vmcnt(24)
2656 ; GFX10-NEXT: v_max_f64 v[82:83], v[2:3], v[31:32]
2657 ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[2:3], v[31:32]
2658 ; GFX10-NEXT: s_waitcnt vmcnt(22)
2659 ; GFX10-NEXT: v_max_f64 v[84:85], v[4:5], v[33:34]
2660 ; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[4:5], v[33:34]
2661 ; GFX10-NEXT: s_clause 0x3
2662 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:120
2663 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:116
2664 ; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:112
2665 ; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:108
2666 ; GFX10-NEXT: s_waitcnt vmcnt(24)
2667 ; GFX10-NEXT: v_max_f64 v[32:33], v[6:7], v[35:36]
2668 ; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[6:7], v[35:36]
2669 ; GFX10-NEXT: s_clause 0x2
2670 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
2671 ; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:128
2672 ; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:124
2673 ; GFX10-NEXT: s_waitcnt vmcnt(23)
2674 ; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[50:51]
2675 ; GFX10-NEXT: s_waitcnt vmcnt(21)
2676 ; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[52:53]
2677 ; GFX10-NEXT: s_waitcnt vmcnt(19)
2678 ; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[10:11], v[54:55]
2679 ; GFX10-NEXT: s_waitcnt vmcnt(18)
2680 ; GFX10-NEXT: v_max_f64 v[34:35], v[8:9], v[37:38]
2681 ; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[8:9], v[37:38]
2682 ; GFX10-NEXT: s_waitcnt vmcnt(16)
2683 ; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[64:65]
2684 ; GFX10-NEXT: v_max_f64 v[36:37], v[10:11], v[54:55]
2685 ; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[0:1], v[64:65]
2686 ; GFX10-NEXT: v_max_f64 v[38:39], v[12:13], v[52:53]
2687 ; GFX10-NEXT: v_max_f64 v[52:53], v[14:15], v[50:51]
2688 ; GFX10-NEXT: s_waitcnt vmcnt(11)
2689 ; GFX10-NEXT: v_max_f64 v[54:55], v[20:21], v[70:71]
2690 ; GFX10-NEXT: v_cmp_u_f64_e64 s13, v[20:21], v[70:71]
2691 ; GFX10-NEXT: s_waitcnt vmcnt(9)
2692 ; GFX10-NEXT: v_cmp_u_f64_e64 s12, v[18:19], v[80:81]
2693 ; GFX10-NEXT: s_waitcnt vmcnt(8)
2694 ; GFX10-NEXT: v_max_f64 v[50:51], v[16:17], v[48:49]
2695 ; GFX10-NEXT: v_cmp_u_f64_e64 s11, v[16:17], v[48:49]
2696 ; GFX10-NEXT: v_max_f64 v[48:49], v[18:19], v[80:81]
2697 ; GFX10-NEXT: v_max_f64 v[64:65], v[22:23], v[68:69]
2698 ; GFX10-NEXT: v_cmp_u_f64_e64 s14, v[22:23], v[68:69]
2699 ; GFX10-NEXT: s_waitcnt vmcnt(7)
2700 ; GFX10-NEXT: v_max_f64 v[68:69], v[24:25], v[66:67]
2701 ; GFX10-NEXT: v_cmp_u_f64_e64 s15, v[24:25], v[66:67]
2702 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v36, 0, s7
2703 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, s8
2704 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, s8
2705 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v34, 0, s6
2706 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v35, 0x7ff80000, s6
2707 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v37, 0x7ff80000, s7
2708 ; GFX10-NEXT: v_cndmask_b32_e64 v12, v38, 0, s9
2709 ; GFX10-NEXT: v_cndmask_b32_e64 v13, v39, 0x7ff80000, s9
2710 ; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, 0, s10
2711 ; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s10
2712 ; GFX10-NEXT: v_cndmask_b32_e64 v16, v50, 0, s11
2713 ; GFX10-NEXT: v_cndmask_b32_e64 v17, v51, 0x7ff80000, s11
2714 ; GFX10-NEXT: v_cndmask_b32_e64 v18, v48, 0, s12
2715 ; GFX10-NEXT: v_cndmask_b32_e64 v19, v49, 0x7ff80000, s12
2716 ; GFX10-NEXT: v_cndmask_b32_e64 v20, v54, 0, s13
2717 ; GFX10-NEXT: v_cndmask_b32_e64 v21, v55, 0x7ff80000, s13
2718 ; GFX10-NEXT: v_cndmask_b32_e64 v22, v64, 0, s14
2719 ; GFX10-NEXT: v_cndmask_b32_e64 v23, v65, 0x7ff80000, s14
2720 ; GFX10-NEXT: v_cndmask_b32_e64 v24, v68, 0, s15
2721 ; GFX10-NEXT: v_cndmask_b32_e64 v25, v69, 0x7ff80000, s15
2722 ; GFX10-NEXT: s_waitcnt vmcnt(5)
2723 ; GFX10-NEXT: v_max_f64 v[70:71], v[28:29], v[2:3]
2724 ; GFX10-NEXT: v_cmp_u_f64_e64 s17, v[28:29], v[2:3]
2725 ; GFX10-NEXT: s_waitcnt vmcnt(3)
2726 ; GFX10-NEXT: v_max_f64 v[66:67], v[26:27], v[4:5]
2727 ; GFX10-NEXT: v_cmp_u_f64_e64 s16, v[26:27], v[4:5]
2728 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v82, 0, vcc_lo
2729 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2730 ; GFX10-NEXT: v_max_f64 v[80:81], v[30:31], v[6:7]
2731 ; GFX10-NEXT: v_cmp_u_f64_e64 s18, v[30:31], v[6:7]
2732 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v83, 0x7ff80000, vcc_lo
2733 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v84, 0, s4
2734 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v85, 0x7ff80000, s4
2735 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v32, 0, s5
2736 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v33, 0x7ff80000, s5
2737 ; GFX10-NEXT: v_cndmask_b32_e64 v28, v70, 0, s17
2738 ; GFX10-NEXT: v_cndmask_b32_e64 v29, v71, 0x7ff80000, s17
2739 ; GFX10-NEXT: v_cndmask_b32_e64 v26, v66, 0, s16
2740 ; GFX10-NEXT: v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16
2741 ; GFX10-NEXT: v_cndmask_b32_e64 v30, v80, 0, s18
2742 ; GFX10-NEXT: v_cndmask_b32_e64 v31, v81, 0x7ff80000, s18
2743 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2745 ; GFX11-LABEL: v_maximum_v16f64:
2747 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2748 ; GFX11-NEXT: s_clause 0x1f
2749 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
2750 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
2751 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
2752 ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
2753 ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
2754 ; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24
2755 ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20
2756 ; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32
2757 ; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28
2758 ; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40
2759 ; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36
2760 ; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48
2761 ; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44
2762 ; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56
2763 ; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52
2764 ; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64
2765 ; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60
2766 ; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:72
2767 ; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:68
2768 ; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:80
2769 ; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:76
2770 ; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:88
2771 ; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:84
2772 ; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:96
2773 ; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:92
2774 ; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:104
2775 ; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:100
2776 ; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:112
2777 ; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:108
2778 ; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:120
2779 ; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:116
2780 ; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:128
2781 ; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:124
2782 ; GFX11-NEXT: s_waitcnt vmcnt(30)
2783 ; GFX11-NEXT: v_max_f64 v[96:97], v[0:1], v[32:33]
2784 ; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33]
2785 ; GFX11-NEXT: s_waitcnt vmcnt(28)
2786 ; GFX11-NEXT: v_max_f64 v[32:33], v[2:3], v[34:35]
2787 ; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[34:35]
2788 ; GFX11-NEXT: s_waitcnt vmcnt(26)
2789 ; GFX11-NEXT: v_max_f64 v[34:35], v[4:5], v[36:37]
2790 ; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[36:37]
2791 ; GFX11-NEXT: s_waitcnt vmcnt(24)
2792 ; GFX11-NEXT: v_max_f64 v[36:37], v[6:7], v[38:39]
2793 ; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[38:39]
2794 ; GFX11-NEXT: s_waitcnt vmcnt(22)
2795 ; GFX11-NEXT: v_max_f64 v[38:39], v[8:9], v[48:49]
2796 ; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[48:49]
2797 ; GFX11-NEXT: s_waitcnt vmcnt(20)
2798 ; GFX11-NEXT: v_max_f64 v[48:49], v[10:11], v[50:51]
2799 ; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[50:51]
2800 ; GFX11-NEXT: s_waitcnt vmcnt(18)
2801 ; GFX11-NEXT: v_max_f64 v[50:51], v[12:13], v[52:53]
2802 ; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[52:53]
2803 ; GFX11-NEXT: s_waitcnt vmcnt(16)
2804 ; GFX11-NEXT: v_max_f64 v[52:53], v[14:15], v[54:55]
2805 ; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[54:55]
2806 ; GFX11-NEXT: s_waitcnt vmcnt(14)
2807 ; GFX11-NEXT: v_max_f64 v[54:55], v[16:17], v[64:65]
2808 ; GFX11-NEXT: v_cmp_u_f64_e64 s7, v[16:17], v[64:65]
2809 ; GFX11-NEXT: s_waitcnt vmcnt(12)
2810 ; GFX11-NEXT: v_max_f64 v[64:65], v[18:19], v[66:67]
2811 ; GFX11-NEXT: v_cmp_u_f64_e64 s8, v[18:19], v[66:67]
2812 ; GFX11-NEXT: s_waitcnt vmcnt(10)
2813 ; GFX11-NEXT: v_max_f64 v[66:67], v[20:21], v[68:69]
2814 ; GFX11-NEXT: v_cmp_u_f64_e64 s9, v[20:21], v[68:69]
2815 ; GFX11-NEXT: s_waitcnt vmcnt(8)
2816 ; GFX11-NEXT: v_max_f64 v[68:69], v[22:23], v[70:71]
2817 ; GFX11-NEXT: v_cmp_u_f64_e64 s10, v[22:23], v[70:71]
2818 ; GFX11-NEXT: s_waitcnt vmcnt(6)
2819 ; GFX11-NEXT: v_max_f64 v[70:71], v[24:25], v[80:81]
2820 ; GFX11-NEXT: v_cmp_u_f64_e64 s11, v[24:25], v[80:81]
2821 ; GFX11-NEXT: s_waitcnt vmcnt(4)
2822 ; GFX11-NEXT: v_max_f64 v[80:81], v[26:27], v[82:83]
2823 ; GFX11-NEXT: v_cmp_u_f64_e64 s12, v[26:27], v[82:83]
2824 ; GFX11-NEXT: s_waitcnt vmcnt(2)
2825 ; GFX11-NEXT: v_max_f64 v[82:83], v[28:29], v[84:85]
2826 ; GFX11-NEXT: v_cmp_u_f64_e64 s13, v[28:29], v[84:85]
2827 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2828 ; GFX11-NEXT: v_max_f64 v[84:85], v[30:31], v[86:87]
2829 ; GFX11-NEXT: v_cmp_u_f64_e64 s14, v[30:31], v[86:87]
2830 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, 0, vcc_lo
2831 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo
2832 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v32, 0, s0
2833 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0
2834 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v34, 0, s1
2835 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1
2836 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v36, 0, s2
2837 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2
2838 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v38, 0, s3
2839 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3
2840 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v48, 0, s4
2841 ; GFX11-NEXT: v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4
2842 ; GFX11-NEXT: v_cndmask_b32_e64 v12, v50, 0, s5
2843 ; GFX11-NEXT: v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5
2844 ; GFX11-NEXT: v_cndmask_b32_e64 v14, v52, 0, s6
2845 ; GFX11-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6
2846 ; GFX11-NEXT: v_cndmask_b32_e64 v16, v54, 0, s7
2847 ; GFX11-NEXT: v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7
2848 ; GFX11-NEXT: v_cndmask_b32_e64 v18, v64, 0, s8
2849 ; GFX11-NEXT: v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8
2850 ; GFX11-NEXT: v_cndmask_b32_e64 v20, v66, 0, s9
2851 ; GFX11-NEXT: v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9
2852 ; GFX11-NEXT: v_cndmask_b32_e64 v22, v68, 0, s10
2853 ; GFX11-NEXT: v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10
2854 ; GFX11-NEXT: v_cndmask_b32_e64 v24, v70, 0, s11
2855 ; GFX11-NEXT: v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11
2856 ; GFX11-NEXT: v_cndmask_b32_e64 v26, v80, 0, s12
2857 ; GFX11-NEXT: v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12
2858 ; GFX11-NEXT: v_cndmask_b32_e64 v28, v82, 0, s13
2859 ; GFX11-NEXT: v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13
2860 ; GFX11-NEXT: v_cndmask_b32_e64 v30, v84, 0, s14
2861 ; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
2862 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2864 ; GFX12-LABEL: v_maximum_v16f64:
2866 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2867 ; GFX12-NEXT: s_wait_expcnt 0x0
2868 ; GFX12-NEXT: s_wait_samplecnt 0x0
2869 ; GFX12-NEXT: s_wait_bvhcnt 0x0
2870 ; GFX12-NEXT: s_wait_kmcnt 0x0
2871 ; GFX12-NEXT: s_clause 0x1f
2872 ; GFX12-NEXT: scratch_load_b32 v31, off, s32
2873 ; GFX12-NEXT: scratch_load_b32 v33, off, s32 offset:8
2874 ; GFX12-NEXT: scratch_load_b32 v32, off, s32 offset:4
2875 ; GFX12-NEXT: scratch_load_b32 v35, off, s32 offset:16
2876 ; GFX12-NEXT: scratch_load_b32 v34, off, s32 offset:12
2877 ; GFX12-NEXT: scratch_load_b32 v37, off, s32 offset:24
2878 ; GFX12-NEXT: scratch_load_b32 v36, off, s32 offset:20
2879 ; GFX12-NEXT: scratch_load_b32 v39, off, s32 offset:32
2880 ; GFX12-NEXT: scratch_load_b32 v38, off, s32 offset:28
2881 ; GFX12-NEXT: scratch_load_b32 v49, off, s32 offset:40
2882 ; GFX12-NEXT: scratch_load_b32 v48, off, s32 offset:36
2883 ; GFX12-NEXT: scratch_load_b32 v51, off, s32 offset:48
2884 ; GFX12-NEXT: scratch_load_b32 v50, off, s32 offset:44
2885 ; GFX12-NEXT: scratch_load_b32 v53, off, s32 offset:56
2886 ; GFX12-NEXT: scratch_load_b32 v52, off, s32 offset:52
2887 ; GFX12-NEXT: scratch_load_b32 v55, off, s32 offset:64
2888 ; GFX12-NEXT: scratch_load_b32 v54, off, s32 offset:60
2889 ; GFX12-NEXT: scratch_load_b32 v65, off, s32 offset:72
2890 ; GFX12-NEXT: scratch_load_b32 v64, off, s32 offset:68
2891 ; GFX12-NEXT: scratch_load_b32 v67, off, s32 offset:80
2892 ; GFX12-NEXT: scratch_load_b32 v66, off, s32 offset:76
2893 ; GFX12-NEXT: scratch_load_b32 v69, off, s32 offset:88
2894 ; GFX12-NEXT: scratch_load_b32 v68, off, s32 offset:84
2895 ; GFX12-NEXT: scratch_load_b32 v71, off, s32 offset:96
2896 ; GFX12-NEXT: scratch_load_b32 v70, off, s32 offset:92
2897 ; GFX12-NEXT: scratch_load_b32 v81, off, s32 offset:104
2898 ; GFX12-NEXT: scratch_load_b32 v80, off, s32 offset:100
2899 ; GFX12-NEXT: scratch_load_b32 v83, off, s32 offset:112
2900 ; GFX12-NEXT: scratch_load_b32 v82, off, s32 offset:108
2901 ; GFX12-NEXT: scratch_load_b32 v85, off, s32 offset:120
2902 ; GFX12-NEXT: scratch_load_b32 v84, off, s32 offset:116
2903 ; GFX12-NEXT: scratch_load_b32 v87, off, s32 offset:128
2904 ; GFX12-NEXT: scratch_load_b32 v86, off, s32 offset:124
2905 ; GFX12-NEXT: s_wait_loadcnt 0x1e
2906 ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[32:33]
2907 ; GFX12-NEXT: s_wait_loadcnt 0x1c
2908 ; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[34:35]
2909 ; GFX12-NEXT: s_wait_loadcnt 0x1a
2910 ; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[36:37]
2911 ; GFX12-NEXT: s_wait_loadcnt 0x18
2912 ; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[38:39]
2913 ; GFX12-NEXT: s_wait_loadcnt 0x16
2914 ; GFX12-NEXT: v_maximum_f64 v[8:9], v[8:9], v[48:49]
2915 ; GFX12-NEXT: s_wait_loadcnt 0x14
2916 ; GFX12-NEXT: v_maximum_f64 v[10:11], v[10:11], v[50:51]
2917 ; GFX12-NEXT: s_wait_loadcnt 0x12
2918 ; GFX12-NEXT: v_maximum_f64 v[12:13], v[12:13], v[52:53]
2919 ; GFX12-NEXT: s_wait_loadcnt 0x10
2920 ; GFX12-NEXT: v_maximum_f64 v[14:15], v[14:15], v[54:55]
2921 ; GFX12-NEXT: s_wait_loadcnt 0xe
2922 ; GFX12-NEXT: v_maximum_f64 v[16:17], v[16:17], v[64:65]
2923 ; GFX12-NEXT: s_wait_loadcnt 0xc
2924 ; GFX12-NEXT: v_maximum_f64 v[18:19], v[18:19], v[66:67]
2925 ; GFX12-NEXT: s_wait_loadcnt 0xa
2926 ; GFX12-NEXT: v_maximum_f64 v[20:21], v[20:21], v[68:69]
2927 ; GFX12-NEXT: s_wait_loadcnt 0x8
2928 ; GFX12-NEXT: v_maximum_f64 v[22:23], v[22:23], v[70:71]
2929 ; GFX12-NEXT: s_wait_loadcnt 0x6
2930 ; GFX12-NEXT: v_maximum_f64 v[24:25], v[24:25], v[80:81]
2931 ; GFX12-NEXT: s_wait_loadcnt 0x4
2932 ; GFX12-NEXT: v_maximum_f64 v[26:27], v[26:27], v[82:83]
2933 ; GFX12-NEXT: s_wait_loadcnt 0x2
2934 ; GFX12-NEXT: v_maximum_f64 v[28:29], v[28:29], v[84:85]
2935 ; GFX12-NEXT: s_wait_loadcnt 0x0
2936 ; GFX12-NEXT: v_maximum_f64 v[30:31], v[30:31], v[86:87]
2937 ; GFX12-NEXT: s_setpc_b64 s[30:31]
2938 %op = call <16 x double> @llvm.maximum.v16f64(<16 x double> %src0, <16 x double> %src1)
2939 ret <16 x double> %op
2941 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: