1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX67,GFX6 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX67,GFX7 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
9 ; We want to undo these canonicalizations to enable mad matching:
10 ; (x * y) + x --> x * (y + 1)
11 ; (x * y) - x --> x * (y - 1)
13 define i32 @v_mul_add_1_i32(i32 %x, i32 %y) {
14 ; GFX67-LABEL: v_mul_add_1_i32:
16 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
18 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v0
19 ; GFX67-NEXT: s_setpc_b64 s[30:31]
21 ; GFX8-LABEL: v_mul_add_1_i32:
23 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
25 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
26 ; GFX8-NEXT: s_setpc_b64 s[30:31]
28 ; GFX9-LABEL: v_mul_add_1_i32:
30 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
32 ; GFX9-NEXT: s_setpc_b64 s[30:31]
34 ; GFX10-LABEL: v_mul_add_1_i32:
36 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
38 ; GFX10-NEXT: s_setpc_b64 s[30:31]
40 %mul = mul i32 %x, %add
44 define i32 @v_mul_add_1_i32_commute(i32 %x, i32 %y) {
45 ; GFX67-LABEL: v_mul_add_1_i32_commute:
47 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
49 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v0
50 ; GFX67-NEXT: s_setpc_b64 s[30:31]
52 ; GFX8-LABEL: v_mul_add_1_i32_commute:
54 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
56 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
57 ; GFX8-NEXT: s_setpc_b64 s[30:31]
59 ; GFX9-LABEL: v_mul_add_1_i32_commute:
61 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
63 ; GFX9-NEXT: s_setpc_b64 s[30:31]
65 ; GFX10-LABEL: v_mul_add_1_i32_commute:
67 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
69 ; GFX10-NEXT: s_setpc_b64 s[30:31]
71 %mul = mul i32 %add, %x
75 define i32 @v_mul_add_x_i32(i32 %x, i32 %y) {
76 ; GFX67-LABEL: v_mul_add_x_i32:
78 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
80 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v0, v1
81 ; GFX67-NEXT: s_setpc_b64 s[30:31]
83 ; GFX8-LABEL: v_mul_add_x_i32:
85 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
87 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
88 ; GFX8-NEXT: s_setpc_b64 s[30:31]
90 ; GFX9-LABEL: v_mul_add_x_i32:
92 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
94 ; GFX9-NEXT: s_setpc_b64 s[30:31]
96 ; GFX10-LABEL: v_mul_add_x_i32:
98 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
100 ; GFX10-NEXT: s_setpc_b64 s[30:31]
101 %mul = mul i32 %x, %y
102 %add = add i32 %x, %mul
106 define i32 @v_mul_sub_1_i32(i32 %x, i32 %y) {
107 ; GFX67-LABEL: v_mul_sub_1_i32:
109 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
111 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
112 ; GFX67-NEXT: s_setpc_b64 s[30:31]
114 ; GFX8-LABEL: v_mul_sub_1_i32:
116 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -1, v1
118 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
119 ; GFX8-NEXT: s_setpc_b64 s[30:31]
121 ; GFX9-LABEL: v_mul_sub_1_i32:
123 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
125 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
126 ; GFX9-NEXT: s_setpc_b64 s[30:31]
128 ; GFX10-LABEL: v_mul_sub_1_i32:
130 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
132 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
133 ; GFX10-NEXT: s_setpc_b64 s[30:31]
135 %mul = mul i32 %x, %sub
139 define i32 @v_mul_sub_1_i32_commute(i32 %x, i32 %y) {
140 ; GFX67-LABEL: v_mul_sub_1_i32_commute:
142 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
144 ; GFX67-NEXT: v_mul_lo_u32 v0, v1, v0
145 ; GFX67-NEXT: s_setpc_b64 s[30:31]
147 ; GFX8-LABEL: v_mul_sub_1_i32_commute:
149 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -1, v1
151 ; GFX8-NEXT: v_mul_lo_u32 v0, v1, v0
152 ; GFX8-NEXT: s_setpc_b64 s[30:31]
154 ; GFX9-LABEL: v_mul_sub_1_i32_commute:
156 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
158 ; GFX9-NEXT: v_mul_lo_u32 v0, v1, v0
159 ; GFX9-NEXT: s_setpc_b64 s[30:31]
161 ; GFX10-LABEL: v_mul_sub_1_i32_commute:
163 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
165 ; GFX10-NEXT: v_mul_lo_u32 v0, v1, v0
166 ; GFX10-NEXT: s_setpc_b64 s[30:31]
168 %mul = mul i32 %sub, %x
172 define i32 @v_mul_sub_x_i32(i32 %x, i32 %y) {
173 ; GFX67-LABEL: v_mul_sub_x_i32:
175 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
177 ; GFX67-NEXT: v_sub_i32_e32 v0, vcc, v1, v0
178 ; GFX67-NEXT: s_setpc_b64 s[30:31]
180 ; GFX8-LABEL: v_mul_sub_x_i32:
182 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
184 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v1, v0
185 ; GFX8-NEXT: s_setpc_b64 s[30:31]
187 ; GFX9-LABEL: v_mul_sub_x_i32:
189 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
191 ; GFX9-NEXT: v_sub_u32_e32 v0, v1, v0
192 ; GFX9-NEXT: s_setpc_b64 s[30:31]
194 ; GFX10-LABEL: v_mul_sub_x_i32:
196 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1
198 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v1, v0
199 ; GFX10-NEXT: s_setpc_b64 s[30:31]
200 %mul = mul i32 %x, %y
201 %sub = sub i32 %mul, %x
205 define i32 @v_mul_add_2_i32(i32 %x, i32 %y) {
206 ; GFX67-LABEL: v_mul_add_2_i32:
208 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 2, v1
210 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
211 ; GFX67-NEXT: s_setpc_b64 s[30:31]
213 ; GFX8-LABEL: v_mul_add_2_i32:
215 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 2, v1
217 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
218 ; GFX8-NEXT: s_setpc_b64 s[30:31]
220 ; GFX9-LABEL: v_mul_add_2_i32:
222 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; GFX9-NEXT: v_add_u32_e32 v1, 2, v1
224 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
225 ; GFX9-NEXT: s_setpc_b64 s[30:31]
227 ; GFX10-LABEL: v_mul_add_2_i32:
229 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 2, v1
231 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
232 ; GFX10-NEXT: s_setpc_b64 s[30:31]
234 %mul = mul i32 %x, %add
238 define i32 @v_mul_sub_2_i32(i32 %x, i32 %y) {
239 ; GFX67-LABEL: v_mul_sub_2_i32:
241 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -2, v1
243 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
244 ; GFX67-NEXT: s_setpc_b64 s[30:31]
246 ; GFX8-LABEL: v_mul_sub_2_i32:
248 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -2, v1
250 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
251 ; GFX8-NEXT: s_setpc_b64 s[30:31]
253 ; GFX9-LABEL: v_mul_sub_2_i32:
255 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256 ; GFX9-NEXT: v_add_u32_e32 v1, -2, v1
257 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
258 ; GFX9-NEXT: s_setpc_b64 s[30:31]
260 ; GFX10-LABEL: v_mul_sub_2_i32:
262 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -2, v1
264 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
265 ; GFX10-NEXT: s_setpc_b64 s[30:31]
267 %mul = mul i32 %x, %sub
271 define i32 @v_mul_add_65_i32(i32 %x, i32 %y) {
272 ; GFX67-LABEL: v_mul_add_65_i32:
274 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 0x41, v1
276 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
277 ; GFX67-NEXT: s_setpc_b64 s[30:31]
279 ; GFX8-LABEL: v_mul_add_65_i32:
281 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x41, v1
283 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
284 ; GFX8-NEXT: s_setpc_b64 s[30:31]
286 ; GFX9-LABEL: v_mul_add_65_i32:
288 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289 ; GFX9-NEXT: v_add_u32_e32 v1, 0x41, v1
290 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
291 ; GFX9-NEXT: s_setpc_b64 s[30:31]
293 ; GFX10-LABEL: v_mul_add_65_i32:
295 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x41, v1
297 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
298 ; GFX10-NEXT: s_setpc_b64 s[30:31]
299 %add = add i32 %y, 65
300 %mul = mul i32 %x, %add
304 define i32 @v_mul_sub_65_i32(i32 %x, i32 %y) {
305 ; GFX67-LABEL: v_mul_sub_65_i32:
307 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 0xffffffbf, v1
309 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
310 ; GFX67-NEXT: s_setpc_b64 s[30:31]
312 ; GFX8-LABEL: v_mul_sub_65_i32:
314 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xffffffbf, v1
316 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
317 ; GFX8-NEXT: s_setpc_b64 s[30:31]
319 ; GFX9-LABEL: v_mul_sub_65_i32:
321 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffbf, v1
323 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
324 ; GFX9-NEXT: s_setpc_b64 s[30:31]
326 ; GFX10-LABEL: v_mul_sub_65_i32:
328 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
330 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
331 ; GFX10-NEXT: s_setpc_b64 s[30:31]
332 %sub = sub i32 %y, 65
333 %mul = mul i32 %x, %sub
337 define i24 @v_mul_add_1_i24_zext(i24 zeroext %x, i24 zeroext %y) {
338 ; GFX67-LABEL: v_mul_add_1_i24_zext:
340 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
341 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v0
342 ; GFX67-NEXT: s_setpc_b64 s[30:31]
344 ; GFX8-LABEL: v_mul_add_1_i24_zext:
346 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v1, v0
348 ; GFX8-NEXT: s_setpc_b64 s[30:31]
350 ; GFX9-LABEL: v_mul_add_1_i24_zext:
352 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v1, v0
354 ; GFX9-NEXT: s_setpc_b64 s[30:31]
356 ; GFX10-LABEL: v_mul_add_1_i24_zext:
358 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
360 ; GFX10-NEXT: s_setpc_b64 s[30:31]
362 %mul = mul i24 %x, %add
366 define i24 @v_mul_sub_1_i24_zext(i24 zeroext %x, i24 zeroext %y) {
367 ; GFX67-LABEL: v_mul_sub_1_i24_zext:
369 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
371 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
372 ; GFX67-NEXT: s_setpc_b64 s[30:31]
374 ; GFX8-LABEL: v_mul_sub_1_i24_zext:
376 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -1, v1
378 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1
379 ; GFX8-NEXT: s_setpc_b64 s[30:31]
381 ; GFX9-LABEL: v_mul_sub_1_i24_zext:
383 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
385 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
386 ; GFX9-NEXT: s_setpc_b64 s[30:31]
388 ; GFX10-LABEL: v_mul_sub_1_i24_zext:
390 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
392 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
393 ; GFX10-NEXT: s_setpc_b64 s[30:31]
395 %mul = mul i24 %x, %sub
399 define i24 @v_add_mul_i24_zext_1(i24 zeroext %x, i24 zeroext %y) {
400 ; GFX67-LABEL: v_add_mul_i24_zext_1:
402 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v0
404 ; GFX67-NEXT: s_setpc_b64 s[30:31]
406 ; GFX8-LABEL: v_add_mul_i24_zext_1:
408 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
409 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v1, v0
410 ; GFX8-NEXT: s_setpc_b64 s[30:31]
412 ; GFX9-LABEL: v_add_mul_i24_zext_1:
414 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v1, v0
416 ; GFX9-NEXT: s_setpc_b64 s[30:31]
418 ; GFX10-LABEL: v_add_mul_i24_zext_1:
420 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
422 ; GFX10-NEXT: s_setpc_b64 s[30:31]
423 %mul = mul i24 %x, %y
424 %add = add i24 %mul, %x
428 define i24 @v_mul_add_1_i24_sext(i24 signext %x, i24 signext %y) {
429 ; GFX67-LABEL: v_mul_add_1_i24_sext:
431 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v0
433 ; GFX67-NEXT: s_setpc_b64 s[30:31]
435 ; GFX8-LABEL: v_mul_add_1_i24_sext:
437 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v1, v0
439 ; GFX8-NEXT: s_setpc_b64 s[30:31]
441 ; GFX9-LABEL: v_mul_add_1_i24_sext:
443 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v1, v0
445 ; GFX9-NEXT: s_setpc_b64 s[30:31]
447 ; GFX10-LABEL: v_mul_add_1_i24_sext:
449 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
451 ; GFX10-NEXT: s_setpc_b64 s[30:31]
453 %mul = mul i24 %x, %add
457 define i24 @v_add_mul_i24_sext_1(i24 signext %x, i24 signext %y) {
458 ; GFX67-LABEL: v_add_mul_i24_sext_1:
460 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v0
462 ; GFX67-NEXT: s_setpc_b64 s[30:31]
464 ; GFX8-LABEL: v_add_mul_i24_sext_1:
466 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
467 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v1, v0
468 ; GFX8-NEXT: s_setpc_b64 s[30:31]
470 ; GFX9-LABEL: v_add_mul_i24_sext_1:
472 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v1, v0
474 ; GFX9-NEXT: s_setpc_b64 s[30:31]
476 ; GFX10-LABEL: v_add_mul_i24_sext_1:
478 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
479 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
480 ; GFX10-NEXT: s_setpc_b64 s[30:31]
481 %mul = mul i24 %x, %y
482 %add = add i24 %mul, %x
486 define i24 @v_mul_sub_1_i24_sext(i24 signext %x, i24 signext %y) {
487 ; GFX67-LABEL: v_mul_sub_1_i24_sext:
489 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
490 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
491 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
492 ; GFX67-NEXT: s_setpc_b64 s[30:31]
494 ; GFX8-LABEL: v_mul_sub_1_i24_sext:
496 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
497 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -1, v1
498 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1
499 ; GFX8-NEXT: s_setpc_b64 s[30:31]
501 ; GFX9-LABEL: v_mul_sub_1_i24_sext:
503 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
504 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
505 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
506 ; GFX9-NEXT: s_setpc_b64 s[30:31]
508 ; GFX10-LABEL: v_mul_sub_1_i24_sext:
510 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
512 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
513 ; GFX10-NEXT: s_setpc_b64 s[30:31]
515 %mul = mul i24 %x, %sub
519 define i25 @v_mul_add_1_i25_zext(i25 zeroext %x, i25 zeroext %y) {
520 ; GFX67-LABEL: v_mul_add_1_i25_zext:
522 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
523 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
524 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v0
525 ; GFX67-NEXT: s_setpc_b64 s[30:31]
527 ; GFX8-LABEL: v_mul_add_1_i25_zext:
529 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
531 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
532 ; GFX8-NEXT: s_setpc_b64 s[30:31]
534 ; GFX9-LABEL: v_mul_add_1_i25_zext:
536 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
538 ; GFX9-NEXT: s_setpc_b64 s[30:31]
540 ; GFX10-LABEL: v_mul_add_1_i25_zext:
542 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
544 ; GFX10-NEXT: s_setpc_b64 s[30:31]
546 %mul = mul i25 %x, %add
550 define i25 @v_mul_sub_1_i25_zext(i25 zeroext %x, i25 zeroext %y) {
551 ; GFX67-LABEL: v_mul_sub_1_i25_zext:
553 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 0x1ffffff, v1
555 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
556 ; GFX67-NEXT: s_setpc_b64 s[30:31]
558 ; GFX8-LABEL: v_mul_sub_1_i25_zext:
560 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x1ffffff, v1
562 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
563 ; GFX8-NEXT: s_setpc_b64 s[30:31]
565 ; GFX9-LABEL: v_mul_sub_1_i25_zext:
567 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568 ; GFX9-NEXT: v_add_u32_e32 v1, 0x1ffffff, v1
569 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
570 ; GFX9-NEXT: s_setpc_b64 s[30:31]
572 ; GFX10-LABEL: v_mul_sub_1_i25_zext:
574 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1
576 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
577 ; GFX10-NEXT: s_setpc_b64 s[30:31]
579 %mul = mul i25 %x, %sub
583 define i25 @v_mul_add_1_i25_sext(i25 signext %x, i25 signext %y) {
584 ; GFX67-LABEL: v_mul_add_1_i25_sext:
586 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
587 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
588 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v0
589 ; GFX67-NEXT: s_setpc_b64 s[30:31]
591 ; GFX8-LABEL: v_mul_add_1_i25_sext:
593 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
594 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
595 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
596 ; GFX8-NEXT: s_setpc_b64 s[30:31]
598 ; GFX9-LABEL: v_mul_add_1_i25_sext:
600 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
601 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
602 ; GFX9-NEXT: s_setpc_b64 s[30:31]
604 ; GFX10-LABEL: v_mul_add_1_i25_sext:
606 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
607 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
608 ; GFX10-NEXT: s_setpc_b64 s[30:31]
610 %mul = mul i25 %x, %add
614 define i25 @v_mul_sub_1_i25_sext(i25 signext %x, i25 signext %y) {
615 ; GFX67-LABEL: v_mul_sub_1_i25_sext:
617 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
618 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 0x1ffffff, v1
619 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
620 ; GFX67-NEXT: s_setpc_b64 s[30:31]
622 ; GFX8-LABEL: v_mul_sub_1_i25_sext:
624 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x1ffffff, v1
626 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
627 ; GFX8-NEXT: s_setpc_b64 s[30:31]
629 ; GFX9-LABEL: v_mul_sub_1_i25_sext:
631 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
632 ; GFX9-NEXT: v_add_u32_e32 v1, 0x1ffffff, v1
633 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
634 ; GFX9-NEXT: s_setpc_b64 s[30:31]
636 ; GFX10-LABEL: v_mul_sub_1_i25_sext:
638 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1
640 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
641 ; GFX10-NEXT: s_setpc_b64 s[30:31]
643 %mul = mul i25 %x, %sub
647 define i16 @v_mul_add_1_i16(i16 %x, i16 %y) {
648 ; GFX67-LABEL: v_mul_add_1_i16:
650 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
651 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
652 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
653 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
654 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
655 ; GFX67-NEXT: s_setpc_b64 s[30:31]
657 ; GFX8-LABEL: v_mul_add_1_i16:
659 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
660 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
661 ; GFX8-NEXT: s_setpc_b64 s[30:31]
663 ; GFX9-LABEL: v_mul_add_1_i16:
665 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
667 ; GFX9-NEXT: s_setpc_b64 s[30:31]
669 ; GFX10-LABEL: v_mul_add_1_i16:
671 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
672 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
673 ; GFX10-NEXT: s_setpc_b64 s[30:31]
675 %mul = mul i16 %x, %add
679 define i32 @v_mul_add_1_i16_zext_result(i16 %x, i16 %y) {
680 ; GFX67-LABEL: v_mul_add_1_i16_zext_result:
682 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
683 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
684 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
685 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
686 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
687 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
688 ; GFX67-NEXT: s_setpc_b64 s[30:31]
690 ; GFX8-LABEL: v_mul_add_1_i16_zext_result:
692 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
693 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
694 ; GFX8-NEXT: s_setpc_b64 s[30:31]
696 ; GFX9-LABEL: v_mul_add_1_i16_zext_result:
698 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
699 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
700 ; GFX9-NEXT: s_setpc_b64 s[30:31]
702 ; GFX10-LABEL: v_mul_add_1_i16_zext_result:
704 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
706 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
707 ; GFX10-NEXT: s_setpc_b64 s[30:31]
709 %mul = mul i16 %x, %add
710 %zext = zext i16 %mul to i32
714 define i16 @v_mul_add_1_i16_commute(i16 %x, i16 %y) {
715 ; GFX67-LABEL: v_mul_add_1_i16_commute:
717 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
718 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
719 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
720 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
721 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v1, v0
722 ; GFX67-NEXT: s_setpc_b64 s[30:31]
724 ; GFX8-LABEL: v_mul_add_1_i16_commute:
726 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
727 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
728 ; GFX8-NEXT: s_setpc_b64 s[30:31]
730 ; GFX9-LABEL: v_mul_add_1_i16_commute:
732 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
733 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
734 ; GFX9-NEXT: s_setpc_b64 s[30:31]
736 ; GFX10-LABEL: v_mul_add_1_i16_commute:
738 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
739 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
740 ; GFX10-NEXT: s_setpc_b64 s[30:31]
742 %mul = mul i16 %add, %x
746 define i16 @v_mul_add_x_i16(i16 %x, i16 %y) {
747 ; GFX67-LABEL: v_mul_add_x_i16:
749 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v0
751 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
752 ; GFX67-NEXT: v_mad_u32_u24 v0, v2, v1, v0
753 ; GFX67-NEXT: s_setpc_b64 s[30:31]
755 ; GFX8-LABEL: v_mul_add_x_i16:
757 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
758 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
759 ; GFX8-NEXT: s_setpc_b64 s[30:31]
761 ; GFX9-LABEL: v_mul_add_x_i16:
763 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
764 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
765 ; GFX9-NEXT: s_setpc_b64 s[30:31]
767 ; GFX10-LABEL: v_mul_add_x_i16:
769 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
770 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
771 ; GFX10-NEXT: s_setpc_b64 s[30:31]
772 %mul = mul i16 %x, %y
773 %add = add i16 %x, %mul
777 define i16 @v_mul_sub_1_i16(i16 %x, i16 %y) {
778 ; GFX67-LABEL: v_mul_sub_1_i16:
780 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
781 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
782 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
783 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
784 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
785 ; GFX67-NEXT: s_setpc_b64 s[30:31]
787 ; GFX8-LABEL: v_mul_sub_1_i16:
789 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790 ; GFX8-NEXT: v_add_u16_e32 v1, -1, v1
791 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v1
792 ; GFX8-NEXT: s_setpc_b64 s[30:31]
794 ; GFX9-LABEL: v_mul_sub_1_i16:
796 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
797 ; GFX9-NEXT: v_add_u16_e32 v1, -1, v1
798 ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
799 ; GFX9-NEXT: s_setpc_b64 s[30:31]
801 ; GFX10-LABEL: v_mul_sub_1_i16:
803 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
804 ; GFX10-NEXT: v_add_nc_u16 v1, v1, -1
805 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
806 ; GFX10-NEXT: s_setpc_b64 s[30:31]
808 %mul = mul i16 %x, %sub
812 define i16 @v_mul_sub_1_i16_commute(i16 %x, i16 %y) {
813 ; GFX67-LABEL: v_mul_sub_1_i16_commute:
815 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
816 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
817 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
818 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
819 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v1, v0
820 ; GFX67-NEXT: s_setpc_b64 s[30:31]
822 ; GFX8-LABEL: v_mul_sub_1_i16_commute:
824 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825 ; GFX8-NEXT: v_add_u16_e32 v1, -1, v1
826 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v1, v0
827 ; GFX8-NEXT: s_setpc_b64 s[30:31]
829 ; GFX9-LABEL: v_mul_sub_1_i16_commute:
831 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832 ; GFX9-NEXT: v_add_u16_e32 v1, -1, v1
833 ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v1, v0
834 ; GFX9-NEXT: s_setpc_b64 s[30:31]
836 ; GFX10-LABEL: v_mul_sub_1_i16_commute:
838 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839 ; GFX10-NEXT: v_add_nc_u16 v1, v1, -1
840 ; GFX10-NEXT: v_mul_lo_u16 v0, v1, v0
841 ; GFX10-NEXT: s_setpc_b64 s[30:31]
843 %mul = mul i16 %sub, %x
847 define i16 @v_mul_sub_x_i16(i16 %x, i16 %y) {
848 ; GFX67-LABEL: v_mul_sub_x_i16:
850 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
851 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v0
852 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
853 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v2, v1
854 ; GFX67-NEXT: v_sub_i32_e32 v0, vcc, v1, v0
855 ; GFX67-NEXT: s_setpc_b64 s[30:31]
857 ; GFX8-LABEL: v_mul_sub_x_i16:
859 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860 ; GFX8-NEXT: v_mul_lo_u16_e32 v1, v0, v1
861 ; GFX8-NEXT: v_sub_u16_e32 v0, v1, v0
862 ; GFX8-NEXT: s_setpc_b64 s[30:31]
864 ; GFX9-LABEL: v_mul_sub_x_i16:
866 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867 ; GFX9-NEXT: v_mul_lo_u16_e32 v1, v0, v1
868 ; GFX9-NEXT: v_sub_u16_e32 v0, v1, v0
869 ; GFX9-NEXT: s_setpc_b64 s[30:31]
871 ; GFX10-LABEL: v_mul_sub_x_i16:
873 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
874 ; GFX10-NEXT: v_mul_lo_u16 v1, v0, v1
875 ; GFX10-NEXT: v_sub_nc_u16 v0, v1, v0
876 ; GFX10-NEXT: s_setpc_b64 s[30:31]
877 %mul = mul i16 %x, %y
878 %sub = sub i16 %mul, %x
882 define i16 @v_mul_add_2_i16(i16 %x, i16 %y) {
883 ; GFX67-LABEL: v_mul_add_2_i16:
885 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
886 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 2, v1
887 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
888 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
889 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
890 ; GFX67-NEXT: s_setpc_b64 s[30:31]
892 ; GFX8-LABEL: v_mul_add_2_i16:
894 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
895 ; GFX8-NEXT: v_add_u16_e32 v1, 2, v1
896 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v1
897 ; GFX8-NEXT: s_setpc_b64 s[30:31]
899 ; GFX9-LABEL: v_mul_add_2_i16:
901 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
902 ; GFX9-NEXT: v_add_u16_e32 v1, 2, v1
903 ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
904 ; GFX9-NEXT: s_setpc_b64 s[30:31]
906 ; GFX10-LABEL: v_mul_add_2_i16:
908 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
909 ; GFX10-NEXT: v_add_nc_u16 v1, v1, 2
910 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
911 ; GFX10-NEXT: s_setpc_b64 s[30:31]
913 %mul = mul i16 %x, %add
917 define i16 @v_mul_sub_2_i16(i16 %x, i16 %y) {
918 ; GFX67-LABEL: v_mul_sub_2_i16:
920 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
921 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -2, v1
922 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
923 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
924 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
925 ; GFX67-NEXT: s_setpc_b64 s[30:31]
927 ; GFX8-LABEL: v_mul_sub_2_i16:
929 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
930 ; GFX8-NEXT: v_add_u16_e32 v1, -2, v1
931 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v1
932 ; GFX8-NEXT: s_setpc_b64 s[30:31]
934 ; GFX9-LABEL: v_mul_sub_2_i16:
936 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937 ; GFX9-NEXT: v_add_u16_e32 v1, -2, v1
938 ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
939 ; GFX9-NEXT: s_setpc_b64 s[30:31]
941 ; GFX10-LABEL: v_mul_sub_2_i16:
943 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
944 ; GFX10-NEXT: v_add_nc_u16 v1, v1, -2
945 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
946 ; GFX10-NEXT: s_setpc_b64 s[30:31]
948 %mul = mul i16 %x, %sub
952 define i64 @v_mul_add_1_i64(i64 %x, i64 %y) {
953 ; GFX6-LABEL: v_mul_add_1_i64:
955 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
956 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
957 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
958 ; GFX6-NEXT: v_mul_lo_u32 v5, v1, v2
959 ; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2
960 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v4, v3
961 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
962 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0
963 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
964 ; GFX6-NEXT: s_setpc_b64 s[30:31]
966 ; GFX7-LABEL: v_mul_add_1_i64:
968 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
969 ; GFX7-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
970 ; GFX7-NEXT: v_mul_lo_u32 v1, v1, v2
971 ; GFX7-NEXT: v_mul_lo_u32 v0, v0, v3
972 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v5
973 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v0, v1
974 ; GFX7-NEXT: v_mov_b32_e32 v0, v4
975 ; GFX7-NEXT: s_setpc_b64 s[30:31]
977 ; GFX8-LABEL: v_mul_add_1_i64:
979 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
980 ; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
981 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2
982 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v3
983 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5
984 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
985 ; GFX8-NEXT: v_mov_b32_e32 v0, v4
986 ; GFX8-NEXT: s_setpc_b64 s[30:31]
988 ; GFX9-LABEL: v_mul_add_1_i64:
990 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
991 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
992 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v3
993 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2
994 ; GFX9-NEXT: v_add3_u32 v1, v1, v5, v0
995 ; GFX9-NEXT: v_mov_b32_e32 v0, v4
996 ; GFX9-NEXT: s_setpc_b64 s[30:31]
998 ; GFX10-LABEL: v_mul_add_1_i64:
1000 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1001 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
1002 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3
1003 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2
1004 ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v0
1005 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
1006 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1007 %add = add i64 %y, 1
1008 %mul = mul i64 %x, %add
1012 define i64 @v_mul_add_1_i64_commute(i64 %x, i64 %y) {
1013 ; GFX6-LABEL: v_mul_add_1_i64_commute:
1015 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1016 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1017 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1018 ; GFX6-NEXT: v_mul_lo_u32 v5, v1, v2
1019 ; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2
1020 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v4, v3
1021 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1022 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0
1023 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
1024 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1026 ; GFX7-LABEL: v_mul_add_1_i64_commute:
1028 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1029 ; GFX7-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1030 ; GFX7-NEXT: v_mul_lo_u32 v1, v1, v2
1031 ; GFX7-NEXT: v_mul_lo_u32 v0, v0, v3
1032 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v5
1033 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v0, v1
1034 ; GFX7-NEXT: v_mov_b32_e32 v0, v4
1035 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1037 ; GFX8-LABEL: v_mul_add_1_i64_commute:
1039 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1040 ; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1041 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2
1042 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v3
1043 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5
1044 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
1045 ; GFX8-NEXT: v_mov_b32_e32 v0, v4
1046 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1048 ; GFX9-LABEL: v_mul_add_1_i64_commute:
1050 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1051 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1052 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v3
1053 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2
1054 ; GFX9-NEXT: v_add3_u32 v1, v1, v5, v0
1055 ; GFX9-NEXT: v_mov_b32_e32 v0, v4
1056 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1058 ; GFX10-LABEL: v_mul_add_1_i64_commute:
1060 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1061 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
1062 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3
1063 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2
1064 ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v0
1065 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
1066 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1067 %add = add i64 %y, 1
1068 %mul = mul i64 %add, %x
1072 define i64 @v_mul_add_x_i64(i64 %x, i64 %y) {
1073 ; GFX6-LABEL: v_mul_add_x_i64:
1075 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1076 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1077 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1078 ; GFX6-NEXT: v_mul_lo_u32 v5, v1, v2
1079 ; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2
1080 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v4, v3
1081 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1082 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1083 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
1084 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1086 ; GFX7-LABEL: v_mul_add_x_i64:
1088 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1089 ; GFX7-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1090 ; GFX7-NEXT: v_mul_lo_u32 v1, v1, v2
1091 ; GFX7-NEXT: v_mul_lo_u32 v0, v0, v3
1092 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v5
1093 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v0, v1
1094 ; GFX7-NEXT: v_mov_b32_e32 v0, v4
1095 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1097 ; GFX8-LABEL: v_mul_add_x_i64:
1099 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1100 ; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1101 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2
1102 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v3
1103 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5
1104 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
1105 ; GFX8-NEXT: v_mov_b32_e32 v0, v4
1106 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1108 ; GFX9-LABEL: v_mul_add_x_i64:
1110 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1111 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1112 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v3
1113 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2
1114 ; GFX9-NEXT: v_add3_u32 v1, v1, v5, v0
1115 ; GFX9-NEXT: v_mov_b32_e32 v0, v4
1116 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1118 ; GFX10-LABEL: v_mul_add_x_i64:
1120 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
1122 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3
1123 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2
1124 ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v0
1125 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
1126 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1127 %mul = mul i64 %x, %y
1128 %add = add i64 %x, %mul
1132 define i64 @v_mul_sub_1_i64(i64 %x, i64 %y) {
1133 ; GFX6-LABEL: v_mul_sub_1_i64:
1135 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1136 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, -1, v2
1137 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, -1, v3, vcc
1138 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1139 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1140 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, v2
1141 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, v2
1142 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v3
1143 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
1144 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1146 ; GFX7-LABEL: v_mul_sub_1_i64:
1148 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1149 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, -1, v2
1150 ; GFX7-NEXT: v_mov_b32_e32 v4, v1
1151 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, -1, v3, vcc
1152 ; GFX7-NEXT: v_mul_lo_u32 v3, v0, v1
1153 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1154 ; GFX7-NEXT: v_mul_lo_u32 v2, v4, v2
1155 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
1156 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1157 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1159 ; GFX8-LABEL: v_mul_sub_1_i64:
1161 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1162 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -1, v2
1163 ; GFX8-NEXT: v_mov_b32_e32 v4, v1
1164 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, -1, v3, vcc
1165 ; GFX8-NEXT: v_mul_lo_u32 v3, v0, v1
1166 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1167 ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v2
1168 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1169 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
1170 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1172 ; GFX9-LABEL: v_mul_sub_1_i64:
1174 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1175 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, -1, v2
1176 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
1177 ; GFX9-NEXT: v_mul_lo_u32 v4, v1, v2
1178 ; GFX9-NEXT: v_mul_lo_u32 v3, v0, v3
1179 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1180 ; GFX9-NEXT: v_add3_u32 v1, v1, v3, v4
1181 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1183 ; GFX10-LABEL: v_mul_sub_1_i64:
1185 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1186 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -1
1187 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo
1188 ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
1189 ; GFX10-NEXT: v_mul_lo_u32 v3, v0, v3
1190 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, 0
1191 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v4
1192 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1193 %sub = sub i64 %y, 1
1194 %mul = mul i64 %x, %sub
1198 define i64 @v_mul_sub_1_i64_commute(i64 %x, i64 %y) {
1199 ; GFX6-LABEL: v_mul_sub_1_i64_commute:
1201 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1202 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, -1, v2
1203 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, -1, v3, vcc
1204 ; GFX6-NEXT: v_mul_lo_u32 v1, v2, v1
1205 ; GFX6-NEXT: v_mul_hi_u32 v4, v2, v0
1206 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, v0
1207 ; GFX6-NEXT: v_mul_lo_u32 v0, v2, v0
1208 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v4, v1
1209 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3
1210 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1212 ; GFX7-LABEL: v_mul_sub_1_i64_commute:
1214 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215 ; GFX7-NEXT: v_mov_b32_e32 v4, v0
1216 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, -1, v2
1217 ; GFX7-NEXT: v_addc_u32_e32 v2, vcc, -1, v3, vcc
1218 ; GFX7-NEXT: v_mul_lo_u32 v3, v0, v1
1219 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, 0
1220 ; GFX7-NEXT: v_mul_lo_u32 v2, v2, v4
1221 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
1222 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1223 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1225 ; GFX8-LABEL: v_mul_sub_1_i64_commute:
1227 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1228 ; GFX8-NEXT: v_mov_b32_e32 v4, v0
1229 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, -1, v2
1230 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, -1, v3, vcc
1231 ; GFX8-NEXT: v_mul_lo_u32 v3, v0, v1
1232 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, 0
1233 ; GFX8-NEXT: v_mul_lo_u32 v2, v2, v4
1234 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1235 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
1236 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1238 ; GFX9-LABEL: v_mul_sub_1_i64_commute:
1240 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, -1, v2
1242 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
1243 ; GFX9-NEXT: v_mul_lo_u32 v3, v3, v0
1244 ; GFX9-NEXT: v_mul_lo_u32 v4, v2, v1
1245 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, 0
1246 ; GFX9-NEXT: v_add3_u32 v1, v1, v4, v3
1247 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1249 ; GFX10-LABEL: v_mul_sub_1_i64_commute:
1251 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1252 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -1
1253 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo
1254 ; GFX10-NEXT: v_mul_lo_u32 v4, v2, v1
1255 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, v0
1256 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v2, v0, 0
1257 ; GFX10-NEXT: v_add3_u32 v1, v1, v4, v3
1258 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1259 %sub = sub i64 %y, 1
1260 %mul = mul i64 %sub, %x
1264 define i64 @v_mul_sub_x_i64(i64 %x, i64 %y) {
1265 ; GFX6-LABEL: v_mul_sub_x_i64:
1267 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1268 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1269 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1270 ; GFX6-NEXT: v_mul_lo_u32 v5, v1, v2
1271 ; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2
1272 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v4, v3
1273 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1274 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v2, v0
1275 ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc
1276 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1278 ; GFX7-LABEL: v_mul_sub_x_i64:
1280 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1281 ; GFX7-NEXT: v_mul_lo_u32 v5, v0, v3
1282 ; GFX7-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v2, 0
1283 ; GFX7-NEXT: v_mul_lo_u32 v2, v1, v2
1284 ; GFX7-NEXT: v_add_i32_e32 v4, vcc, v4, v5
1285 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, v4, v2
1286 ; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v3, v0
1287 ; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc
1288 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1290 ; GFX8-LABEL: v_mul_sub_x_i64:
1292 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1293 ; GFX8-NEXT: v_mul_lo_u32 v5, v0, v3
1294 ; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v2, 0
1295 ; GFX8-NEXT: v_mul_lo_u32 v2, v1, v2
1296 ; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5
1297 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v4, v2
1298 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v3, v0
1299 ; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc
1300 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1302 ; GFX9-LABEL: v_mul_sub_x_i64:
1304 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1305 ; GFX9-NEXT: v_mul_lo_u32 v4, v1, v2
1306 ; GFX9-NEXT: v_mul_lo_u32 v5, v0, v3
1307 ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v2, 0
1308 ; GFX9-NEXT: v_add3_u32 v3, v3, v5, v4
1309 ; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1310 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
1311 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1313 ; GFX10-LABEL: v_mul_sub_x_i64:
1315 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1316 ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
1317 ; GFX10-NEXT: v_mul_lo_u32 v5, v0, v3
1318 ; GFX10-NEXT: v_mad_u64_u32 v[2:3], null, v0, v2, 0
1319 ; GFX10-NEXT: v_add3_u32 v3, v3, v5, v4
1320 ; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v2, v0
1321 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
1322 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1323 %mul = mul i64 %x, %y
1324 %sub = sub i64 %mul, %x
1328 define i64 @v_mul_add_2_i64(i64 %x, i64 %y) {
1329 ; GFX6-LABEL: v_mul_add_2_i64:
1331 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, 2, v2
1333 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1334 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1335 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1336 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, v2
1337 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, v2
1338 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v3
1339 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
1340 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1342 ; GFX7-LABEL: v_mul_add_2_i64:
1344 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1345 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 2, v2
1346 ; GFX7-NEXT: v_mov_b32_e32 v4, v1
1347 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
1348 ; GFX7-NEXT: v_mul_lo_u32 v3, v0, v1
1349 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1350 ; GFX7-NEXT: v_mul_lo_u32 v2, v4, v2
1351 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
1352 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1353 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1355 ; GFX8-LABEL: v_mul_add_2_i64:
1357 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1358 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 2, v2
1359 ; GFX8-NEXT: v_mov_b32_e32 v4, v1
1360 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
1361 ; GFX8-NEXT: v_mul_lo_u32 v3, v0, v1
1362 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1363 ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v2
1364 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1365 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
1366 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1368 ; GFX9-LABEL: v_mul_add_2_i64:
1370 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v2
1372 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
1373 ; GFX9-NEXT: v_mul_lo_u32 v4, v1, v2
1374 ; GFX9-NEXT: v_mul_lo_u32 v3, v0, v3
1375 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1376 ; GFX9-NEXT: v_add3_u32 v1, v1, v3, v4
1377 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1379 ; GFX10-LABEL: v_mul_add_2_i64:
1381 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1382 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 2
1383 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
1384 ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
1385 ; GFX10-NEXT: v_mul_lo_u32 v3, v0, v3
1386 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, 0
1387 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v4
1388 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1389 %add = add i64 %y, 2
1390 %mul = mul i64 %x, %add
1394 define i64 @v_mul_sub_2_i64(i64 %x, i64 %y) {
1395 ; GFX6-LABEL: v_mul_sub_2_i64:
1397 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, -2, v2
1399 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, -1, v3, vcc
1400 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1401 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1402 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, v2
1403 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, v2
1404 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v3
1405 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
1406 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1408 ; GFX7-LABEL: v_mul_sub_2_i64:
1410 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1411 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, -2, v2
1412 ; GFX7-NEXT: v_mov_b32_e32 v4, v1
1413 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, -1, v3, vcc
1414 ; GFX7-NEXT: v_mul_lo_u32 v3, v0, v1
1415 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1416 ; GFX7-NEXT: v_mul_lo_u32 v2, v4, v2
1417 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
1418 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1419 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1421 ; GFX8-LABEL: v_mul_sub_2_i64:
1423 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1424 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -2, v2
1425 ; GFX8-NEXT: v_mov_b32_e32 v4, v1
1426 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, -1, v3, vcc
1427 ; GFX8-NEXT: v_mul_lo_u32 v3, v0, v1
1428 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1429 ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v2
1430 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1431 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
1432 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1434 ; GFX9-LABEL: v_mul_sub_2_i64:
1436 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1437 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, -2, v2
1438 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
1439 ; GFX9-NEXT: v_mul_lo_u32 v4, v1, v2
1440 ; GFX9-NEXT: v_mul_lo_u32 v3, v0, v3
1441 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1442 ; GFX9-NEXT: v_add3_u32 v1, v1, v3, v4
1443 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1445 ; GFX10-LABEL: v_mul_sub_2_i64:
1447 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1448 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -2
1449 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo
1450 ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
1451 ; GFX10-NEXT: v_mul_lo_u32 v3, v0, v3
1452 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, 0
1453 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v4
1454 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1455 %sub = sub i64 %y, 2
1456 %mul = mul i64 %x, %sub
1460 define <2 x i32> @v_mul_add_1_i32_multiple(i32 %x, i32 %y, i32 %z) {
1461 ; GFX67-LABEL: v_mul_add_1_i32_multiple:
1463 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1464 ; GFX67-NEXT: v_mul_lo_u32 v3, v0, v1
1465 ; GFX67-NEXT: v_mul_lo_u32 v1, v2, v1
1466 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v3, v0
1467 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1468 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1470 ; GFX8-LABEL: v_mul_add_1_i32_multiple:
1472 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1473 ; GFX8-NEXT: v_mul_lo_u32 v3, v0, v1
1474 ; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1
1475 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0
1476 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
1477 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1479 ; GFX900-LABEL: v_mul_add_1_i32_multiple:
1481 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1482 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1483 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v3, v[0:1]
1484 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, v3, v[2:3]
1485 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1487 ; GFX90A-LABEL: v_mul_add_1_i32_multiple:
1489 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1491 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v3, v[0:1]
1492 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v3, v[2:3]
1493 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
1494 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1496 ; GFX10-LABEL: v_mul_add_1_i32_multiple:
1498 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1499 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
1500 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1]
1501 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, v3, v[2:3]
1502 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1503 %add = add i32 %y, 1
1504 %mul0 = mul i32 %x, %add
1505 %mul1 = mul i32 %z, %add
1506 %insert.0 = insertelement <2 x i32> poison, i32 %mul0, i32 0
1507 %insert.1 = insertelement <2 x i32> %insert.0, i32 %mul1, i32 1
1508 ret <2 x i32> %insert.1
1511 define <2 x i32> @v_mul_add_1_i32_other_use(i32 %x, i32 %y, i32 %z) {
1512 ; GFX67-LABEL: v_mul_add_1_i32_other_use:
1514 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1515 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1516 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
1517 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1519 ; GFX8-LABEL: v_mul_add_1_i32_other_use:
1521 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1522 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v1
1523 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
1524 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1526 ; GFX9-LABEL: v_mul_add_1_i32_other_use:
1528 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1529 ; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
1530 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
1531 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1533 ; GFX10-LABEL: v_mul_add_1_i32_other_use:
1535 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1536 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
1537 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
1538 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1539 %add = add i32 %y, 1
1540 %mul0 = mul i32 %x, %add
1541 %mul1 = mul i32 %z, %add
1542 %insert.0 = insertelement <2 x i32> poison, i32 %mul0, i32 0
1543 %insert.1 = insertelement <2 x i32> %insert.0, i32 %add, i32 1
1544 ret <2 x i32> %insert.1
1547 define i32 @v_mul_add_1_i32_chain(i32 %arg0, i32 %arg1, i32 %arg2) {
1548 ; GFX67-LABEL: v_mul_add_1_i32_chain:
1550 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1551 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v0
1552 ; GFX67-NEXT: v_mul_lo_u32 v1, v2, v1
1553 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, v1, v2
1554 ; GFX67-NEXT: v_mul_lo_u32 v0, v2, v0
1555 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
1556 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1557 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1559 ; GFX8-LABEL: v_mul_add_1_i32_chain:
1561 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1562 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0
1563 ; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1
1564 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v1, v2
1565 ; GFX8-NEXT: v_mul_lo_u32 v0, v2, v0
1566 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
1567 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
1568 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1570 ; GFX9-LABEL: v_mul_add_1_i32_chain:
1572 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1573 ; GFX9-NEXT: v_add_u32_e32 v2, 1, v0
1574 ; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1
1575 ; GFX9-NEXT: v_add_u32_e32 v2, v1, v2
1576 ; GFX9-NEXT: v_mul_lo_u32 v0, v2, v0
1577 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
1578 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1580 ; GFX10-LABEL: v_mul_add_1_i32_chain:
1582 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1583 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0
1584 ; GFX10-NEXT: v_mul_lo_u32 v1, v2, v1
1585 ; GFX10-NEXT: v_add_nc_u32_e32 v2, v1, v2
1586 ; GFX10-NEXT: v_mul_lo_u32 v0, v2, v0
1587 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
1588 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1589 %i2 = add i32 %arg0, 1
1590 %i3 = mul i32 %i2, %arg1
1591 %i4 = add i32 %i3, %i2
1592 %i5 = mul i32 %i4, %arg0
1593 %i6 = add i32 %i3, 1
1594 %i7 = mul i32 %i5, %i6
1597 define <2 x i16> @v_mul_add_1_v2i16(<2 x i16> %x, <2 x i16> %y) {
1598 ; GFX67-LABEL: v_mul_add_1_v2i16:
1600 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
1602 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v3
1603 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1604 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1605 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1606 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1607 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1608 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1609 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1611 ; GFX8-LABEL: v_mul_add_1_v2i16:
1613 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1614 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1615 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1616 ; GFX8-NEXT: v_mad_u16 v2, v3, v2, v3
1617 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1618 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
1619 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
1620 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1622 ; GFX9-LABEL: v_mul_add_1_v2i16:
1624 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625 ; GFX9-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
1626 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1627 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1629 ; GFX10-LABEL: v_mul_add_1_v2i16:
1631 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632 ; GFX10-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
1633 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1634 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1635 %add = add <2 x i16> %y, <i16 1, i16 1>
1636 %mul = mul <2 x i16> %x, %add
1640 define <2 x i16> @v_mul_add_1_v2i16_commute(<2 x i16> %x, <2 x i16> %y) {
1641 ; GFX67-LABEL: v_mul_add_1_v2i16_commute:
1643 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
1645 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v3
1646 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1647 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1648 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1649 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1650 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v2, v0
1651 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v3, v1
1652 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1654 ; GFX8-LABEL: v_mul_add_1_v2i16_commute:
1656 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1657 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1658 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1659 ; GFX8-NEXT: v_mad_u16 v2, v3, v2, v3
1660 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1661 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
1662 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
1663 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1665 ; GFX9-LABEL: v_mul_add_1_v2i16_commute:
1667 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1668 ; GFX9-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
1669 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
1670 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1672 ; GFX10-LABEL: v_mul_add_1_v2i16_commute:
1674 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1675 ; GFX10-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
1676 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
1677 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1678 %add = add <2 x i16> %y, <i16 1, i16 1>
1679 %mul = mul <2 x i16> %add, %x
1683 define <2 x i16> @v_mul_add_x_v2i16(<2 x i16> %x, <2 x i16> %y) {
1684 ; GFX67-LABEL: v_mul_add_x_v2i16:
1686 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1687 ; GFX67-NEXT: v_and_b32_e32 v4, 0xffff, v0
1688 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1689 ; GFX67-NEXT: v_and_b32_e32 v5, 0xffff, v1
1690 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1691 ; GFX67-NEXT: v_mad_u32_u24 v1, v5, v3, v1
1692 ; GFX67-NEXT: v_mad_u32_u24 v0, v4, v2, v0
1693 ; GFX67-NEXT: v_lshlrev_b32_e32 v3, 16, v1
1694 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1695 ; GFX67-NEXT: v_or_b32_e32 v0, v0, v3
1696 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1697 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1699 ; GFX8-LABEL: v_mul_add_x_v2i16:
1701 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1702 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1703 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1704 ; GFX8-NEXT: v_mad_u16 v2, v3, v2, v3
1705 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1706 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
1707 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
1708 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1710 ; GFX9-LABEL: v_mul_add_x_v2i16:
1712 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713 ; GFX9-NEXT: v_pk_mul_lo_u16 v1, v0, v1
1714 ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
1715 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1717 ; GFX10-LABEL: v_mul_add_x_v2i16:
1719 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1720 ; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1
1721 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
1722 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1723 %mul = mul <2 x i16> %x, %y
1724 %add = add <2 x i16> %x, %mul
1728 define <2 x i16> @v_mul_sub_1_v2i16(<2 x i16> %x, <2 x i16> %y) {
1729 ; GFX67-LABEL: v_mul_sub_1_v2i16:
1731 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1732 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
1733 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
1734 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1735 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1736 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1737 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1738 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1739 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1740 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1742 ; GFX8-LABEL: v_mul_sub_1_v2i16:
1744 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1745 ; GFX8-NEXT: v_mov_b32_e32 v3, -1
1746 ; GFX8-NEXT: v_add_u16_e32 v2, -1, v1
1747 ; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1748 ; GFX8-NEXT: v_mul_lo_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1749 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v2
1750 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1751 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1753 ; GFX9-LABEL: v_mul_sub_1_v2i16:
1755 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1756 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0]
1757 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1758 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1760 ; GFX10-LABEL: v_mul_sub_1_v2i16:
1762 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1763 ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0]
1764 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1765 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1766 %sub = sub <2 x i16> %y, <i16 1, i16 1>
1767 %mul = mul <2 x i16> %x, %sub
1771 define <2 x i16> @v_mul_sub_1_v2i16_commute(<2 x i16> %x, <2 x i16> %y) {
1772 ; GFX67-LABEL: v_mul_sub_1_v2i16_commute:
1774 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1775 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
1776 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
1777 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1778 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1779 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1780 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1781 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v2, v0
1782 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v3, v1
1783 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1785 ; GFX8-LABEL: v_mul_sub_1_v2i16_commute:
1787 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1788 ; GFX8-NEXT: v_mov_b32_e32 v3, -1
1789 ; GFX8-NEXT: v_add_u16_e32 v2, -1, v1
1790 ; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1791 ; GFX8-NEXT: v_mul_lo_u16_sdwa v1, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1792 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v2, v0
1793 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1794 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1796 ; GFX9-LABEL: v_mul_sub_1_v2i16_commute:
1798 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1799 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0]
1800 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
1801 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1803 ; GFX10-LABEL: v_mul_sub_1_v2i16_commute:
1805 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1806 ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0]
1807 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
1808 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1809 %sub = sub <2 x i16> %y, <i16 1, i16 1>
1810 %mul = mul <2 x i16> %sub, %x
1814 define <2 x i16> @v_mul_sub_x_v2i16(<2 x i16> %x, <2 x i16> %y) {
1815 ; GFX67-LABEL: v_mul_sub_x_v2i16:
1817 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1818 ; GFX67-NEXT: v_and_b32_e32 v4, 0xffff, v0
1819 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1820 ; GFX67-NEXT: v_mul_u32_u24_e32 v2, v4, v2
1821 ; GFX67-NEXT: v_and_b32_e32 v4, 0xffff, v1
1822 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1823 ; GFX67-NEXT: v_mul_u32_u24_e32 v3, v4, v3
1824 ; GFX67-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
1825 ; GFX67-NEXT: v_sub_i32_e32 v0, vcc, v2, v0
1826 ; GFX67-NEXT: v_lshlrev_b32_e32 v3, 16, v1
1827 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1828 ; GFX67-NEXT: v_or_b32_e32 v0, v0, v3
1829 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1830 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1832 ; GFX8-LABEL: v_mul_sub_x_v2i16:
1834 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1835 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0
1836 ; GFX8-NEXT: v_mul_lo_u16_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1837 ; GFX8-NEXT: v_mul_lo_u16_e32 v1, v0, v1
1838 ; GFX8-NEXT: v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1839 ; GFX8-NEXT: v_sub_u16_e32 v0, v1, v0
1840 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
1841 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1843 ; GFX9-LABEL: v_mul_sub_x_v2i16:
1845 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1846 ; GFX9-NEXT: v_pk_mul_lo_u16 v1, v0, v1
1847 ; GFX9-NEXT: v_pk_sub_i16 v0, v1, v0
1848 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1850 ; GFX10-LABEL: v_mul_sub_x_v2i16:
1852 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1853 ; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1
1854 ; GFX10-NEXT: v_pk_sub_i16 v0, v1, v0
1855 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1856 %mul = mul <2 x i16> %x, %y
1857 %sub = sub <2 x i16> %mul, %x
1861 define <2 x i16> @v_mul_add_2_v2i16(<2 x i16> %x, <2 x i16> %y) {
1862 ; GFX67-LABEL: v_mul_add_2_v2i16:
1864 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1865 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 2, v2
1866 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 2, v3
1867 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1868 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1869 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1870 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1871 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1872 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1873 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1875 ; GFX8-LABEL: v_mul_add_2_v2i16:
1877 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1878 ; GFX8-NEXT: v_mov_b32_e32 v3, 2
1879 ; GFX8-NEXT: v_add_u16_e32 v2, 2, v1
1880 ; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1881 ; GFX8-NEXT: v_mul_lo_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1882 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v2
1883 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1884 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1886 ; GFX9-LABEL: v_mul_add_2_v2i16:
1888 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1889 ; GFX9-NEXT: v_pk_sub_u16 v1, v1, -2 op_sel_hi:[1,0]
1890 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1891 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1893 ; GFX10-LABEL: v_mul_add_2_v2i16:
1895 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1896 ; GFX10-NEXT: v_pk_sub_u16 v1, v1, -2 op_sel_hi:[1,0]
1897 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1898 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1899 %add = add <2 x i16> %y, <i16 2, i16 2>
1900 %mul = mul <2 x i16> %x, %add
1904 define <2 x i16> @v_mul_sub_2_v2i16(<2 x i16> %x, <2 x i16> %y) {
1905 ; GFX67-LABEL: v_mul_sub_2_v2i16:
1907 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1908 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -2, v2
1909 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -2, v3
1910 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1911 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1912 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1913 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1914 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1915 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1916 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1918 ; GFX8-LABEL: v_mul_sub_2_v2i16:
1920 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1921 ; GFX8-NEXT: v_mov_b32_e32 v3, -2
1922 ; GFX8-NEXT: v_add_u16_e32 v2, -2, v1
1923 ; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1924 ; GFX8-NEXT: v_mul_lo_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1925 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v2
1926 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1927 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1929 ; GFX9-LABEL: v_mul_sub_2_v2i16:
1931 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1932 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, 2 op_sel_hi:[1,0]
1933 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1934 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1936 ; GFX10-LABEL: v_mul_sub_2_v2i16:
1938 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1939 ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 2 op_sel_hi:[1,0]
1940 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1941 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1942 %sub = sub <2 x i16> %y, <i16 2, i16 2>
1943 %mul = mul <2 x i16> %x, %sub
1947 define <2 x i32> @v_mul_add_1_v2i32(<2 x i32> %x, <2 x i32> %y) {
1948 ; GFX67-LABEL: v_mul_add_1_v2i32:
1950 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951 ; GFX67-NEXT: v_mul_lo_u32 v2, v0, v2
1952 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v3
1953 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v2, v0
1954 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v3, v1
1955 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1957 ; GFX8-LABEL: v_mul_add_1_v2i32:
1959 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1960 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2
1961 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v3
1962 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
1963 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1
1964 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1966 ; GFX900-LABEL: v_mul_add_1_v2i32:
1968 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1969 ; GFX900-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1970 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v1, v3, v[1:2]
1971 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
1972 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1974 ; GFX90A-LABEL: v_mul_add_1_v2i32:
1976 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1977 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1978 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1]
1979 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, v[4:5]
1980 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
1981 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1983 ; GFX10-LABEL: v_mul_add_1_v2i32:
1985 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1986 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
1987 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2]
1988 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
1989 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1990 %add = add <2 x i32> %y, <i32 1, i32 1>
1991 %mul = mul <2 x i32> %x, %add
1995 define <2 x i32> @v_mul_add_1_v2i32_commute(<2 x i32> %x, <2 x i32> %y) {
1996 ; GFX67-LABEL: v_mul_add_1_v2i32_commute:
1998 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1999 ; GFX67-NEXT: v_mul_lo_u32 v2, v0, v2
2000 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v3
2001 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v2, v0
2002 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v3, v1
2003 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2005 ; GFX8-LABEL: v_mul_add_1_v2i32_commute:
2007 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2008 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2
2009 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v3
2010 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
2011 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1
2012 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2014 ; GFX900-LABEL: v_mul_add_1_v2i32_commute:
2016 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2017 ; GFX900-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
2018 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v1, v3, v[1:2]
2019 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
2020 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2022 ; GFX90A-LABEL: v_mul_add_1_v2i32_commute:
2024 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2025 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
2026 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1]
2027 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, v[4:5]
2028 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
2029 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2031 ; GFX10-LABEL: v_mul_add_1_v2i32_commute:
2033 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2034 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
2035 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2]
2036 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
2037 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2038 %add = add <2 x i32> %y, <i32 1, i32 1>
2039 %mul = mul <2 x i32> %add, %x
2043 define <2 x i32> @v_mul_add_x_v2i32(<2 x i32> %x, <2 x i32> %y) {
2044 ; GFX67-LABEL: v_mul_add_x_v2i32:
2046 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2047 ; GFX67-NEXT: v_mul_lo_u32 v2, v0, v2
2048 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v3
2049 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v0, v2
2050 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v1, v3
2051 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2053 ; GFX8-LABEL: v_mul_add_x_v2i32:
2055 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2056 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2
2057 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v3
2058 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
2059 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
2060 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2062 ; GFX900-LABEL: v_mul_add_x_v2i32:
2064 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2065 ; GFX900-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
2066 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v1, v3, v[1:2]
2067 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
2068 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2070 ; GFX90A-LABEL: v_mul_add_x_v2i32:
2072 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2073 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
2074 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1]
2075 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, v[4:5]
2076 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
2077 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2079 ; GFX10-LABEL: v_mul_add_x_v2i32:
2081 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2082 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
2083 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2]
2084 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
2085 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2086 %mul = mul <2 x i32> %x, %y
2087 %add = add <2 x i32> %x, %mul
2091 define <2 x i32> @v_mul_sub_1_v2i32(<2 x i32> %x, <2 x i32> %y) {
2092 ; GFX67-LABEL: v_mul_sub_1_v2i32:
2094 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2095 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
2096 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
2097 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v2
2098 ; GFX67-NEXT: v_mul_lo_u32 v1, v1, v3
2099 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2101 ; GFX8-LABEL: v_mul_sub_1_v2i32:
2103 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2104 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -1, v3
2105 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -1, v2
2106 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v2
2107 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v3
2108 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2110 ; GFX9-LABEL: v_mul_sub_1_v2i32:
2112 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2113 ; GFX9-NEXT: v_add_u32_e32 v3, -1, v3
2114 ; GFX9-NEXT: v_add_u32_e32 v2, -1, v2
2115 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v2
2116 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v3
2117 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2119 ; GFX10-LABEL: v_mul_sub_1_v2i32:
2121 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2122 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
2123 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
2124 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2
2125 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v3
2126 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2127 %sub = sub <2 x i32> %y, <i32 1, i32 1>
2128 %mul = mul <2 x i32> %x, %sub
2132 define <2 x i32> @v_mul_sub_1_v2i32_commute(<2 x i32> %x, <2 x i32> %y) {
2133 ; GFX67-LABEL: v_mul_sub_1_v2i32_commute:
2135 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2136 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
2137 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
2138 ; GFX67-NEXT: v_mul_lo_u32 v0, v2, v0
2139 ; GFX67-NEXT: v_mul_lo_u32 v1, v3, v1
2140 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2142 ; GFX8-LABEL: v_mul_sub_1_v2i32_commute:
2144 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2145 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -1, v3
2146 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -1, v2
2147 ; GFX8-NEXT: v_mul_lo_u32 v0, v2, v0
2148 ; GFX8-NEXT: v_mul_lo_u32 v1, v3, v1
2149 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2151 ; GFX9-LABEL: v_mul_sub_1_v2i32_commute:
2153 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2154 ; GFX9-NEXT: v_add_u32_e32 v3, -1, v3
2155 ; GFX9-NEXT: v_add_u32_e32 v2, -1, v2
2156 ; GFX9-NEXT: v_mul_lo_u32 v0, v2, v0
2157 ; GFX9-NEXT: v_mul_lo_u32 v1, v3, v1
2158 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2160 ; GFX10-LABEL: v_mul_sub_1_v2i32_commute:
2162 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2163 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
2164 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
2165 ; GFX10-NEXT: v_mul_lo_u32 v0, v2, v0
2166 ; GFX10-NEXT: v_mul_lo_u32 v1, v3, v1
2167 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2168 %sub = sub <2 x i32> %y, <i32 1, i32 1>
2169 %mul = mul <2 x i32> %sub, %x
2173 define <2 x i32> @v_mul_sub_x_v2i32(<2 x i32> %x, <2 x i32> %y) {
2174 ; GFX67-LABEL: v_mul_sub_x_v2i32:
2176 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2177 ; GFX67-NEXT: v_mul_lo_u32 v2, v0, v2
2178 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v3
2179 ; GFX67-NEXT: v_sub_i32_e32 v0, vcc, v2, v0
2180 ; GFX67-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
2181 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2183 ; GFX8-LABEL: v_mul_sub_x_v2i32:
2185 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2186 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2
2187 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v3
2188 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v2, v0
2189 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1
2190 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2192 ; GFX900-LABEL: v_mul_sub_x_v2i32:
2194 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2195 ; GFX900-NEXT: v_mul_lo_u32 v2, v0, v2
2196 ; GFX900-NEXT: v_mul_lo_u32 v3, v1, v3
2197 ; GFX900-NEXT: v_sub_u32_e32 v0, v2, v0
2198 ; GFX900-NEXT: v_sub_u32_e32 v1, v3, v1
2199 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2201 ; GFX90A-LABEL: v_mul_sub_x_v2i32:
2203 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2204 ; GFX90A-NEXT: v_mul_lo_u32 v3, v1, v3
2205 ; GFX90A-NEXT: v_mul_lo_u32 v2, v0, v2
2206 ; GFX90A-NEXT: v_sub_u32_e32 v0, v2, v0
2207 ; GFX90A-NEXT: v_sub_u32_e32 v1, v3, v1
2208 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2210 ; GFX10-LABEL: v_mul_sub_x_v2i32:
2212 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2213 ; GFX10-NEXT: v_mul_lo_u32 v2, v0, v2
2214 ; GFX10-NEXT: v_mul_lo_u32 v3, v1, v3
2215 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v2, v0
2216 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, v3, v1
2217 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2218 %mul = mul <2 x i32> %x, %y
2219 %sub = sub <2 x i32> %mul, %x
2223 define <2 x i32> @v_mul_add_2_v2i32(<2 x i32> %x, <2 x i32> %y) {
2224 ; GFX67-LABEL: v_mul_add_2_v2i32:
2226 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2227 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 2, v3
2228 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 2, v2
2229 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v2
2230 ; GFX67-NEXT: v_mul_lo_u32 v1, v1, v3
2231 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2233 ; GFX8-LABEL: v_mul_add_2_v2i32:
2235 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2236 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 2, v3
2237 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 2, v2
2238 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v2
2239 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v3
2240 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2242 ; GFX9-LABEL: v_mul_add_2_v2i32:
2244 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2245 ; GFX9-NEXT: v_add_u32_e32 v3, 2, v3
2246 ; GFX9-NEXT: v_add_u32_e32 v2, 2, v2
2247 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v2
2248 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v3
2249 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2251 ; GFX10-LABEL: v_mul_add_2_v2i32:
2253 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2254 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2
2255 ; GFX10-NEXT: v_add_nc_u32_e32 v3, 2, v3
2256 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2
2257 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v3
2258 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2259 %add = add <2 x i32> %y, <i32 2, i32 2>
2260 %mul = mul <2 x i32> %x, %add
2264 define <2 x i32> @v_mul_sub_2_v2i32(<2 x i32> %x, <2 x i32> %y) {
2265 ; GFX67-LABEL: v_mul_sub_2_v2i32:
2267 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2268 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -2, v3
2269 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -2, v2
2270 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v2
2271 ; GFX67-NEXT: v_mul_lo_u32 v1, v1, v3
2272 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2274 ; GFX8-LABEL: v_mul_sub_2_v2i32:
2276 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2277 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -2, v3
2278 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -2, v2
2279 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v2
2280 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v3
2281 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2283 ; GFX9-LABEL: v_mul_sub_2_v2i32:
2285 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2286 ; GFX9-NEXT: v_add_u32_e32 v3, -2, v3
2287 ; GFX9-NEXT: v_add_u32_e32 v2, -2, v2
2288 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v2
2289 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v3
2290 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2292 ; GFX10-LABEL: v_mul_sub_2_v2i32:
2294 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2295 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -2, v2
2296 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -2, v3
2297 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2
2298 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v3
2299 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2300 %sub = sub <2 x i32> %y, <i32 2, i32 2>
2301 %mul = mul <2 x i32> %x, %sub
2305 define <2 x i24> @v_mul_add_1_v2i24(<2 x i24> %x, <2 x i24> %y) {
2306 ; GFX67-LABEL: v_mul_add_1_v2i24:
2308 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2309 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2310 ; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2311 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2313 ; GFX8-LABEL: v_mul_add_1_v2i24:
2315 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2316 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2317 ; GFX8-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2318 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2320 ; GFX9-LABEL: v_mul_add_1_v2i24:
2322 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2323 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2324 ; GFX9-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2325 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2327 ; GFX10-LABEL: v_mul_add_1_v2i24:
2329 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2330 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2331 ; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2332 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2333 %add = add <2 x i24> %y, <i24 1, i24 1>
2334 %mul = mul <2 x i24> %x, %add
2338 define <2 x i24> @v_mul_add_1_v2i24_commute(<2 x i24> %x, <2 x i24> %y) {
2339 ; GFX67-LABEL: v_mul_add_1_v2i24_commute:
2341 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2342 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2343 ; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2344 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2346 ; GFX8-LABEL: v_mul_add_1_v2i24_commute:
2348 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2349 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2350 ; GFX8-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2351 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2353 ; GFX9-LABEL: v_mul_add_1_v2i24_commute:
2355 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2356 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2357 ; GFX9-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2358 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2360 ; GFX10-LABEL: v_mul_add_1_v2i24_commute:
2362 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2363 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2364 ; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2365 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2366 %add = add <2 x i24> %y, <i24 1, i24 1>
2367 %mul = mul <2 x i24> %add, %x
2371 define <2 x i24> @v_mul_add_x_v2i24(<2 x i24> %x, <2 x i24> %y) {
2372 ; GFX67-LABEL: v_mul_add_x_v2i24:
2374 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2375 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2376 ; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2377 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2379 ; GFX8-LABEL: v_mul_add_x_v2i24:
2381 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2382 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2383 ; GFX8-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2384 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2386 ; GFX9-LABEL: v_mul_add_x_v2i24:
2388 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2389 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2390 ; GFX9-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2391 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2393 ; GFX10-LABEL: v_mul_add_x_v2i24:
2395 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2396 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2397 ; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2398 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2399 %mul = mul <2 x i24> %x, %y
2400 %add = add <2 x i24> %x, %mul
2404 define <2 x i24> @v_mul_sub_1_v2i24(<2 x i24> %x, <2 x i24> %y) {
2405 ; GFX67-LABEL: v_mul_sub_1_v2i24:
2407 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2408 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
2409 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
2410 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2411 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2412 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2414 ; GFX8-LABEL: v_mul_sub_1_v2i24:
2416 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2417 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -1, v3
2418 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -1, v2
2419 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2420 ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2421 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2423 ; GFX9-LABEL: v_mul_sub_1_v2i24:
2425 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2426 ; GFX9-NEXT: v_add_u32_e32 v3, -1, v3
2427 ; GFX9-NEXT: v_add_u32_e32 v2, -1, v2
2428 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2429 ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2430 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2432 ; GFX10-LABEL: v_mul_sub_1_v2i24:
2434 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2435 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
2436 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
2437 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2438 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2439 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2440 %sub = sub <2 x i24> %y, <i24 1, i24 1>
2441 %mul = mul <2 x i24> %x, %sub
2445 define <2 x i24> @v_mul_sub_1_v2i24_commute(<2 x i24> %x, <2 x i24> %y) {
2446 ; GFX67-LABEL: v_mul_sub_1_v2i24_commute:
2448 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2449 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
2450 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
2451 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2452 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2453 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2455 ; GFX8-LABEL: v_mul_sub_1_v2i24_commute:
2457 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2458 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -1, v3
2459 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -1, v2
2460 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2461 ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2462 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2464 ; GFX9-LABEL: v_mul_sub_1_v2i24_commute:
2466 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2467 ; GFX9-NEXT: v_add_u32_e32 v3, -1, v3
2468 ; GFX9-NEXT: v_add_u32_e32 v2, -1, v2
2469 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2470 ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2471 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2473 ; GFX10-LABEL: v_mul_sub_1_v2i24_commute:
2475 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2476 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
2477 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
2478 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2479 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2480 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2481 %sub = sub <2 x i24> %y, <i24 1, i24 1>
2482 %mul = mul <2 x i24> %sub, %x
2486 define <2 x i24> @v_mul_sub_x_v2i24(<2 x i24> %x, <2 x i24> %y) {
2487 ; GFX67-LABEL: v_mul_sub_x_v2i24:
2489 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2490 ; GFX67-NEXT: v_mul_u32_u24_e32 v2, v0, v2
2491 ; GFX67-NEXT: v_mul_u32_u24_e32 v3, v1, v3
2492 ; GFX67-NEXT: v_sub_i32_e32 v0, vcc, v2, v0
2493 ; GFX67-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
2494 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2496 ; GFX8-LABEL: v_mul_sub_x_v2i24:
2498 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2499 ; GFX8-NEXT: v_mul_u32_u24_e32 v2, v0, v2
2500 ; GFX8-NEXT: v_mul_u32_u24_e32 v3, v1, v3
2501 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v2, v0
2502 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1
2503 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2505 ; GFX9-LABEL: v_mul_sub_x_v2i24:
2507 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2508 ; GFX9-NEXT: v_mul_u32_u24_e32 v2, v0, v2
2509 ; GFX9-NEXT: v_mul_u32_u24_e32 v3, v1, v3
2510 ; GFX9-NEXT: v_sub_u32_e32 v0, v2, v0
2511 ; GFX9-NEXT: v_sub_u32_e32 v1, v3, v1
2512 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2514 ; GFX10-LABEL: v_mul_sub_x_v2i24:
2516 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2517 ; GFX10-NEXT: v_mul_u32_u24_e32 v2, v0, v2
2518 ; GFX10-NEXT: v_mul_u32_u24_e32 v3, v1, v3
2519 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v2, v0
2520 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, v3, v1
2521 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2522 %mul = mul <2 x i24> %x, %y
2523 %sub = sub <2 x i24> %mul, %x
2527 define <2 x i24> @v_mul_add_2_v2i24(<2 x i24> %x, <2 x i24> %y) {
2528 ; GFX67-LABEL: v_mul_add_2_v2i24:
2530 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2531 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 2, v3
2532 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 2, v2
2533 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2534 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2535 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2537 ; GFX8-LABEL: v_mul_add_2_v2i24:
2539 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2540 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 2, v3
2541 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 2, v2
2542 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2543 ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2544 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2546 ; GFX9-LABEL: v_mul_add_2_v2i24:
2548 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2549 ; GFX9-NEXT: v_add_u32_e32 v3, 2, v3
2550 ; GFX9-NEXT: v_add_u32_e32 v2, 2, v2
2551 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2552 ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2553 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2555 ; GFX10-LABEL: v_mul_add_2_v2i24:
2557 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2558 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2
2559 ; GFX10-NEXT: v_add_nc_u32_e32 v3, 2, v3
2560 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2561 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2562 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2563 %add = add <2 x i24> %y, <i24 2, i24 2>
2564 %mul = mul <2 x i24> %x, %add
2568 define <2 x i24> @v_mul_sub_2_v2i24(<2 x i24> %x, <2 x i24> %y) {
2569 ; GFX67-LABEL: v_mul_sub_2_v2i24:
2571 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2572 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -2, v3
2573 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -2, v2
2574 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2575 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2576 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2578 ; GFX8-LABEL: v_mul_sub_2_v2i24:
2580 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2581 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -2, v3
2582 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -2, v2
2583 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2584 ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2585 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2587 ; GFX9-LABEL: v_mul_sub_2_v2i24:
2589 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2590 ; GFX9-NEXT: v_add_u32_e32 v3, -2, v3
2591 ; GFX9-NEXT: v_add_u32_e32 v2, -2, v2
2592 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2593 ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2594 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2596 ; GFX10-LABEL: v_mul_sub_2_v2i24:
2598 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2599 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -2, v2
2600 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -2, v3
2601 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2602 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2603 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2604 %sub = sub <2 x i24> %y, <i24 2, i24 2>
2605 %mul = mul <2 x i24> %x, %sub
2609 define i32 @v_mul_9_add_52_i32(i32 %arg) {
2610 ; GFX67-LABEL: v_mul_9_add_52_i32:
2612 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2613 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, 9
2614 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, 52, v0
2615 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2617 ; GFX8-LABEL: v_mul_9_add_52_i32:
2619 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2620 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 9
2621 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 52, v0
2622 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2624 ; GFX9-LABEL: v_mul_9_add_52_i32:
2626 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2627 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
2628 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2630 ; GFX10-LABEL: v_mul_9_add_52_i32:
2632 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2633 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52
2634 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2635 %mul = mul i32 %arg, 9
2636 %add = add i32 %mul, 52
2640 define i16 @v_mul_9_add_52_i16(i16 %arg) {
2641 ; GFX67-LABEL: v_mul_9_add_52_i16:
2643 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2644 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2645 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, 9, 52
2646 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2648 ; GFX8-LABEL: v_mul_9_add_52_i16:
2650 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2651 ; GFX8-NEXT: v_mad_u16 v0, v0, 9, 52
2652 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2654 ; GFX9-LABEL: v_mul_9_add_52_i16:
2656 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2657 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, 9, 52
2658 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2660 ; GFX10-LABEL: v_mul_9_add_52_i16:
2662 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2663 ; GFX10-NEXT: v_mad_u16 v0, v0, 9, 52
2664 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2665 %mul = mul i16 %arg, 9
2666 %add = add i16 %mul, 52
2670 define <2 x i16> @v_mul_9_add_52_v2i16(<2 x i16> %arg) {
2671 ; GFX67-LABEL: v_mul_9_add_52_v2i16:
2673 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2674 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2675 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
2676 ; GFX67-NEXT: v_mad_u32_u24 v1, v1, 9, 52
2677 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, 9, 52
2678 ; GFX67-NEXT: v_lshlrev_b32_e32 v2, 16, v1
2679 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2680 ; GFX67-NEXT: v_or_b32_e32 v0, v0, v2
2681 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
2682 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2684 ; GFX8-LABEL: v_mul_9_add_52_v2i16:
2686 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2687 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2688 ; GFX8-NEXT: v_mad_u16 v1, v1, 9, 52
2689 ; GFX8-NEXT: v_mad_u16 v0, v0, 9, 52
2690 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2691 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
2692 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2694 ; GFX9-LABEL: v_mul_9_add_52_v2i16:
2696 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2697 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, 9 op_sel_hi:[1,0]
2698 ; GFX9-NEXT: v_pk_add_u16 v0, v0, 52 op_sel_hi:[1,0]
2699 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2701 ; GFX10-LABEL: v_mul_9_add_52_v2i16:
2703 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2704 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, 9 op_sel_hi:[1,0]
2705 ; GFX10-NEXT: v_pk_add_u16 v0, v0, 52 op_sel_hi:[1,0]
2706 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2707 %mul = mul <2 x i16> %arg, <i16 9, i16 9>
2708 %add = add <2 x i16> %mul, <i16 52, i16 52>
2712 define i64 @v_mul_9_add_52_i64(i64 %arg) {
2713 ; GFX6-LABEL: v_mul_9_add_52_i64:
2715 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2716 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, 9
2717 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, 9
2718 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 9
2719 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
2720 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 52, v0
2721 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2722 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2724 ; GFX7-LABEL: v_mul_9_add_52_i64:
2726 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2727 ; GFX7-NEXT: v_mul_lo_u32 v2, v1, 9
2728 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
2729 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1
2730 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2732 ; GFX8-LABEL: v_mul_9_add_52_i64:
2734 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2735 ; GFX8-NEXT: v_mul_lo_u32 v2, v1, 9
2736 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
2737 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
2738 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2740 ; GFX900-LABEL: v_mul_9_add_52_i64:
2742 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2743 ; GFX900-NEXT: v_mov_b32_e32 v2, v1
2744 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
2745 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, 9, v[1:2]
2746 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2748 ; GFX90A-LABEL: v_mul_9_add_52_i64:
2750 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2751 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
2752 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
2753 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
2754 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 9, v[4:5]
2755 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
2756 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2758 ; GFX10-LABEL: v_mul_9_add_52_i64:
2760 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2761 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
2762 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52
2763 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 9, v[1:2]
2764 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2765 %mul = mul i64 %arg, 9
2766 %add = add i64 %mul, 52
2770 define i32 @v_mul_5_add_1_i32(i32 %arg) {
2771 ; GFX67-LABEL: v_mul_5_add_1_i32:
2773 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2774 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, 5
2775 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2776 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2778 ; GFX8-LABEL: v_mul_5_add_1_i32:
2780 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2781 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 5
2782 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2783 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2785 ; GFX9-LABEL: v_mul_5_add_1_i32:
2787 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2788 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
2789 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2791 ; GFX10-LABEL: v_mul_5_add_1_i32:
2793 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2794 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1
2795 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2796 %mul = mul i32 %arg, 5
2797 %add = add i32 %mul, 1
2801 define i32 @v_mul_284_add_82_i32(i32 %arg) {
2802 ; GFX67-LABEL: v_mul_284_add_82_i32:
2804 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2805 ; GFX67-NEXT: s_movk_i32 s4, 0x11c
2806 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, s4
2807 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, 0x52, v0
2808 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2810 ; GFX8-LABEL: v_mul_284_add_82_i32:
2812 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2813 ; GFX8-NEXT: s_movk_i32 s4, 0x11c
2814 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, s4
2815 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x52, v0
2816 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2818 ; GFX9-LABEL: v_mul_284_add_82_i32:
2820 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2821 ; GFX9-NEXT: s_movk_i32 s4, 0x11c
2822 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, s4
2823 ; GFX9-NEXT: v_add_u32_e32 v0, 0x52, v0
2824 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2826 ; GFX10-LABEL: v_mul_284_add_82_i32:
2828 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2829 ; GFX10-NEXT: s_movk_i32 s4, 0x11c
2830 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x52
2831 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2832 %mul = mul i32 %arg, 284
2833 %add = add i32 %mul, 82
2837 define i16 @v_mul_5_add_1_i16(i16 %arg) {
2838 ; GFX67-LABEL: v_mul_5_add_1_i16:
2840 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2841 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2842 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, 5, 1
2843 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2845 ; GFX8-LABEL: v_mul_5_add_1_i16:
2847 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2848 ; GFX8-NEXT: v_mad_u16 v0, v0, 5, 1
2849 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2851 ; GFX9-LABEL: v_mul_5_add_1_i16:
2853 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2854 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, 5, 1
2855 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2857 ; GFX10-LABEL: v_mul_5_add_1_i16:
2859 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2860 ; GFX10-NEXT: v_mad_u16 v0, v0, 5, 1
2861 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2862 %mul = mul i16 %arg, 5
2863 %add = add i16 %mul, 1
2867 define i16 @v_mul_284_add_82_i16(i16 %arg) {
2868 ; GFX67-LABEL: v_mul_284_add_82_i16:
2870 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2871 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2872 ; GFX67-NEXT: s_movk_i32 s4, 0x11c
2873 ; GFX67-NEXT: v_mov_b32_e32 v1, 0x52
2874 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, s4, v1
2875 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2877 ; GFX8-LABEL: v_mul_284_add_82_i16:
2879 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2880 ; GFX8-NEXT: s_movk_i32 s4, 0x11c
2881 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x52
2882 ; GFX8-NEXT: v_mad_u16 v0, v0, s4, v1
2883 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2885 ; GFX9-LABEL: v_mul_284_add_82_i16:
2887 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2888 ; GFX9-NEXT: s_movk_i32 s4, 0x11c
2889 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x52
2890 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, s4, v1
2891 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2893 ; GFX10-LABEL: v_mul_284_add_82_i16:
2895 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2896 ; GFX10-NEXT: s_movk_i32 s4, 0x11c
2897 ; GFX10-NEXT: v_mad_u16 v0, v0, s4, 0x52
2898 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2899 %mul = mul i16 %arg, 284
2900 %add = add i16 %mul, 82
2904 define <2 x i16> @v_mul_5_add_1_v2i16(<2 x i16> %arg) {
2905 ; GFX67-LABEL: v_mul_5_add_1_v2i16:
2907 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2908 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2909 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
2910 ; GFX67-NEXT: v_mad_u32_u24 v1, v1, 5, 1
2911 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, 5, 1
2912 ; GFX67-NEXT: v_lshlrev_b32_e32 v2, 16, v1
2913 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2914 ; GFX67-NEXT: v_or_b32_e32 v0, v0, v2
2915 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
2916 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2918 ; GFX8-LABEL: v_mul_5_add_1_v2i16:
2920 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2921 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2922 ; GFX8-NEXT: v_mad_u16 v1, v1, 5, 1
2923 ; GFX8-NEXT: v_mad_u16 v0, v0, 5, 1
2924 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2925 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
2926 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2928 ; GFX9-LABEL: v_mul_5_add_1_v2i16:
2930 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2931 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, 5 op_sel_hi:[1,0]
2932 ; GFX9-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
2933 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2935 ; GFX10-LABEL: v_mul_5_add_1_v2i16:
2937 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2938 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, 5 op_sel_hi:[1,0]
2939 ; GFX10-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
2940 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2941 %mul = mul <2 x i16> %arg, <i16 5, i16 5>
2942 %add = add <2 x i16> %mul, <i16 1, i16 1>
2946 define <2 x i16> @v_mul_284_add_82_v2i16(<2 x i16> %arg) {
2947 ; GFX67-LABEL: v_mul_284_add_82_v2i16:
2949 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2950 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2951 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
2952 ; GFX67-NEXT: s_movk_i32 s4, 0x11c
2953 ; GFX67-NEXT: v_mov_b32_e32 v2, 0x52
2954 ; GFX67-NEXT: v_mad_u32_u24 v1, v1, s4, v2
2955 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, s4, v2
2956 ; GFX67-NEXT: v_lshlrev_b32_e32 v3, 16, v1
2957 ; GFX67-NEXT: v_and_b32_e32 v0, 0xfffe, v0
2958 ; GFX67-NEXT: v_or_b32_e32 v0, v0, v3
2959 ; GFX67-NEXT: v_and_b32_e32 v1, 0xfffe, v1
2960 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2962 ; GFX8-LABEL: v_mul_284_add_82_v2i16:
2964 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2965 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2966 ; GFX8-NEXT: s_movk_i32 s4, 0x11c
2967 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x52
2968 ; GFX8-NEXT: v_mad_u16 v1, v1, s4, v2
2969 ; GFX8-NEXT: v_mad_u16 v0, v0, s4, v2
2970 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2971 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
2972 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2974 ; GFX9-LABEL: v_mul_284_add_82_v2i16:
2976 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2977 ; GFX9-NEXT: s_movk_i32 s4, 0x11c
2978 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, s4 op_sel_hi:[1,0]
2979 ; GFX9-NEXT: s_movk_i32 s4, 0x52
2980 ; GFX9-NEXT: v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
2981 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2983 ; GFX10-LABEL: v_mul_284_add_82_v2i16:
2985 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2986 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, 0x11c, v0 op_sel_hi:[0,1]
2987 ; GFX10-NEXT: v_pk_add_u16 v0, 0x52, v0 op_sel_hi:[0,1]
2988 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2989 %mul = mul <2 x i16> %arg, <i16 284, i16 284>
2990 %add = add <2 x i16> %mul, <i16 82, i16 82>
2994 define i64 @v_mul_5_add_1_i64(i64 %arg) {
2995 ; GFX6-LABEL: v_mul_5_add_1_i64:
2997 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2998 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, 5
2999 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, 5
3000 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 5
3001 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
3002 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3003 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3004 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3006 ; GFX7-LABEL: v_mul_5_add_1_i64:
3008 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3009 ; GFX7-NEXT: v_mul_lo_u32 v2, v1, 5
3010 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
3011 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1
3012 ; GFX7-NEXT: s_setpc_b64 s[30:31]
3014 ; GFX8-LABEL: v_mul_5_add_1_i64:
3016 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3017 ; GFX8-NEXT: v_mul_lo_u32 v2, v1, 5
3018 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
3019 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
3020 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3022 ; GFX900-LABEL: v_mul_5_add_1_i64:
3024 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3025 ; GFX900-NEXT: v_mov_b32_e32 v2, v1
3026 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
3027 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, 5, v[1:2]
3028 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3030 ; GFX90A-LABEL: v_mul_5_add_1_i64:
3032 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3033 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
3034 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
3035 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
3036 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 5, v[4:5]
3037 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
3038 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3040 ; GFX10-LABEL: v_mul_5_add_1_i64:
3042 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3043 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
3044 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1
3045 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 5, v[1:2]
3046 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3047 %mul = mul i64 %arg, 5
3048 %add = add i64 %mul, 1
3052 define i64 @v_mul_284_add_82_i64(i64 %arg) {
3053 ; GFX6-LABEL: v_mul_284_add_82_i64:
3055 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3056 ; GFX6-NEXT: s_movk_i32 s4, 0x11c
3057 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, s4
3058 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, s4
3059 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, s4
3060 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
3061 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0x52, v0
3062 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3063 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3065 ; GFX7-LABEL: v_mul_284_add_82_i64:
3067 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3068 ; GFX7-NEXT: s_movk_i32 s4, 0x11c
3069 ; GFX7-NEXT: v_mul_lo_u32 v3, v1, s4
3070 ; GFX7-NEXT: v_mov_b32_e32 v1, 0x52
3071 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
3072 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2]
3073 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v3, v1
3074 ; GFX7-NEXT: s_setpc_b64 s[30:31]
3076 ; GFX8-LABEL: v_mul_284_add_82_i64:
3078 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3079 ; GFX8-NEXT: s_movk_i32 s4, 0x11c
3080 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, s4
3081 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x52
3082 ; GFX8-NEXT: v_mov_b32_e32 v2, 0
3083 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2]
3084 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1
3085 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3087 ; GFX900-LABEL: v_mul_284_add_82_i64:
3089 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3090 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x52
3091 ; GFX900-NEXT: s_movk_i32 s6, 0x11c
3092 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3093 ; GFX900-NEXT: v_mov_b32_e32 v2, v1
3094 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4]
3095 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, s6, v[1:2]
3096 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3098 ; GFX90A-LABEL: v_mul_284_add_82_i64:
3100 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3101 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0x52
3102 ; GFX90A-NEXT: s_movk_i32 s6, 0x11c
3103 ; GFX90A-NEXT: v_mov_b32_e32 v5, 0
3104 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
3105 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[4:5]
3106 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
3107 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, v[4:5]
3108 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
3109 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3111 ; GFX10-LABEL: v_mul_284_add_82_i64:
3113 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3114 ; GFX10-NEXT: s_movk_i32 s4, 0x11c
3115 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
3116 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x52
3117 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x11c, v2, v[1:2]
3118 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3119 %mul = mul i64 %arg, 284
3120 %add = add i64 %mul, 82
3124 define i64 @v_mul_934584645_add_8234599_i64(i64 %arg) {
3125 ; GFX6-LABEL: v_mul_934584645_add_8234599_i64:
3127 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3128 ; GFX6-NEXT: s_mov_b32 s4, 0x37b4a145
3129 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, s4
3130 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, s4
3131 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, s4
3132 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
3133 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0x7da667, v0
3134 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3135 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3137 ; GFX7-LABEL: v_mul_934584645_add_8234599_i64:
3139 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3140 ; GFX7-NEXT: s_mov_b32 s4, 0x37b4a145
3141 ; GFX7-NEXT: v_mul_lo_u32 v3, v1, s4
3142 ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7da667
3143 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
3144 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2]
3145 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v3, v1
3146 ; GFX7-NEXT: s_setpc_b64 s[30:31]
3148 ; GFX8-LABEL: v_mul_934584645_add_8234599_i64:
3150 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3151 ; GFX8-NEXT: s_mov_b32 s4, 0x37b4a145
3152 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, s4
3153 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7da667
3154 ; GFX8-NEXT: v_mov_b32_e32 v2, 0
3155 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2]
3156 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1
3157 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3159 ; GFX900-LABEL: v_mul_934584645_add_8234599_i64:
3161 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3162 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7da667
3163 ; GFX900-NEXT: s_mov_b32 s6, 0x37b4a145
3164 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3165 ; GFX900-NEXT: v_mov_b32_e32 v2, v1
3166 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4]
3167 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, s6, v[1:2]
3168 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3170 ; GFX90A-LABEL: v_mul_934584645_add_8234599_i64:
3172 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3173 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0x7da667
3174 ; GFX90A-NEXT: s_mov_b32 s6, 0x37b4a145
3175 ; GFX90A-NEXT: v_mov_b32_e32 v5, 0
3176 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
3177 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[4:5]
3178 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
3179 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, v[4:5]
3180 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
3181 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3183 ; GFX10-LABEL: v_mul_934584645_add_8234599_i64:
3185 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3186 ; GFX10-NEXT: s_mov_b32 s4, 0x37b4a145
3187 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
3188 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x7da667
3189 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x37b4a145, v2, v[1:2]
3190 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3191 %mul = mul i64 %arg, 934584645
3192 %add = add i64 %mul, 8234599
3196 define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) %i21, ptr addrspace(1) nocapture noundef writeonly align 4 %arg, i32 noundef %arg1) #1 {
3197 ; GFX67-LABEL: compute_mad:
3198 ; GFX67: ; %bb.0: ; %bb
3199 ; GFX67-NEXT: s_load_dword s3, s[0:1], 0x6
3200 ; GFX67-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
3201 ; GFX67-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x4
3202 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
3203 ; GFX67-NEXT: s_load_dword s6, s[6:7], 0x1
3204 ; GFX67-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
3205 ; GFX67-NEXT: s_add_i32 s3, s3, 1
3206 ; GFX67-NEXT: v_mul_lo_u32 v1, s3, v0
3207 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
3208 ; GFX67-NEXT: s_and_b32 s6, s6, 0xffff
3209 ; GFX67-NEXT: s_mul_i32 s2, s2, s6
3210 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, s3, v1
3211 ; GFX67-NEXT: v_mul_lo_u32 v2, v2, v0
3212 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3213 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, s2, v0
3214 ; GFX67-NEXT: v_mul_lo_u32 v3, v2, v1
3215 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
3216 ; GFX67-NEXT: s_mov_b32 s2, 0
3217 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v3, v1
3218 ; GFX67-NEXT: v_mul_lo_u32 v1, v1, v2
3219 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v3
3220 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v2
3221 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, v3, v2
3222 ; GFX67-NEXT: v_mul_lo_u32 v1, v2, v1
3223 ; GFX67-NEXT: v_mov_b32_e32 v2, s5
3224 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v3
3225 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, v3, v1
3226 ; GFX67-NEXT: v_mul_lo_u32 v4, v3, v1
3227 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, s4, v0
3228 ; GFX67-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc
3229 ; GFX67-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
3230 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, v4, v3
3231 ; GFX67-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3232 ; GFX67-NEXT: s_endpgm
3234 ; GFX8-LABEL: compute_mad:
3235 ; GFX8: ; %bb.0: ; %bb
3236 ; GFX8-NEXT: s_load_dword s3, s[0:1], 0x18
3237 ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
3238 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x10
3239 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
3240 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
3241 ; GFX8-NEXT: s_add_i32 s3, s3, 1
3242 ; GFX8-NEXT: v_mul_lo_u32 v1, s3, v0
3243 ; GFX8-NEXT: v_mov_b32_e32 v4, s1
3244 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s3, v1
3245 ; GFX8-NEXT: v_mul_lo_u32 v2, v2, v0
3246 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v1
3247 ; GFX8-NEXT: s_load_dword s3, s[6:7], 0x4
3248 ; GFX8-NEXT: v_mul_lo_u32 v3, v2, v1
3249 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
3250 ; GFX8-NEXT: s_and_b32 s1, s3, 0xffff
3251 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1
3252 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2
3253 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v3
3254 ; GFX8-NEXT: s_mul_i32 s2, s2, s1
3255 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v2
3256 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v0
3257 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
3258 ; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1
3259 ; GFX8-NEXT: v_mov_b32_e32 v2, s5
3260 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v3
3261 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v1
3262 ; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1
3263 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s4, v0
3264 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc
3265 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
3266 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v3
3267 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
3268 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
3269 ; GFX8-NEXT: flat_store_dword v[0:1], v2
3270 ; GFX8-NEXT: s_endpgm
3272 ; GFX900-LABEL: compute_mad:
3273 ; GFX900: ; %bb.0: ; %bb
3274 ; GFX900-NEXT: s_load_dword s3, s[0:1], 0x18
3275 ; GFX900-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
3276 ; GFX900-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x10
3277 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
3278 ; GFX900-NEXT: s_add_i32 s3, s3, 1
3279 ; GFX900-NEXT: v_mul_lo_u32 v1, s3, v0
3280 ; GFX900-NEXT: v_mov_b32_e32 v5, s9
3281 ; GFX900-NEXT: v_add_u32_e32 v2, s3, v1
3282 ; GFX900-NEXT: v_mul_lo_u32 v2, v2, v0
3283 ; GFX900-NEXT: v_add_u32_e32 v1, 1, v1
3284 ; GFX900-NEXT: s_load_dword s3, s[6:7], 0x4
3285 ; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3286 ; GFX900-NEXT: v_mul_lo_u32 v3, v2, v1
3287 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
3288 ; GFX900-NEXT: s_and_b32 s3, s3, 0xffff
3289 ; GFX900-NEXT: s_mul_i32 s2, s2, s3
3290 ; GFX900-NEXT: v_add_u32_e32 v1, v3, v1
3291 ; GFX900-NEXT: v_mul_lo_u32 v1, v1, v2
3292 ; GFX900-NEXT: v_add_u32_e32 v2, 1, v3
3293 ; GFX900-NEXT: v_add_u32_e32 v0, s2, v0
3294 ; GFX900-NEXT: v_mov_b32_e32 v4, s1
3295 ; GFX900-NEXT: v_mul_lo_u32 v3, v1, v2
3296 ; GFX900-NEXT: v_add_u32_e32 v2, v3, v2
3297 ; GFX900-NEXT: v_mul_lo_u32 v1, v2, v1
3298 ; GFX900-NEXT: v_mad_u64_u32 v[2:3], s[2:3], v1, v3, v[1:2]
3299 ; GFX900-NEXT: v_add_co_u32_e32 v3, vcc, s0, v0
3300 ; GFX900-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc
3301 ; GFX900-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4]
3302 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, v1, v[2:3]
3303 ; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, s8, v3
3304 ; GFX900-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v4, vcc
3305 ; GFX900-NEXT: global_store_dword v[1:2], v0, off
3306 ; GFX900-NEXT: s_endpgm
3308 ; GFX90A-LABEL: compute_mad:
3309 ; GFX90A: ; %bb.0: ; %bb
3310 ; GFX90A-NEXT: s_load_dword s3, s[0:1], 0x18
3311 ; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
3312 ; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x10
3313 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
3314 ; GFX90A-NEXT: s_add_i32 s3, s3, 1
3315 ; GFX90A-NEXT: v_mul_lo_u32 v1, s3, v0
3316 ; GFX90A-NEXT: v_add_u32_e32 v2, s3, v1
3317 ; GFX90A-NEXT: v_mul_lo_u32 v2, v2, v0
3318 ; GFX90A-NEXT: v_add_u32_e32 v1, 1, v1
3319 ; GFX90A-NEXT: v_mul_lo_u32 v3, v2, v1
3320 ; GFX90A-NEXT: v_add_u32_e32 v1, v3, v1
3321 ; GFX90A-NEXT: v_mul_lo_u32 v1, v1, v2
3322 ; GFX90A-NEXT: v_add_u32_e32 v2, 1, v3
3323 ; GFX90A-NEXT: v_mul_lo_u32 v3, v1, v2
3324 ; GFX90A-NEXT: v_add_u32_e32 v2, v3, v2
3325 ; GFX90A-NEXT: s_load_dword s3, s[6:7], 0x4
3326 ; GFX90A-NEXT: v_mul_lo_u32 v2, v2, v1
3327 ; GFX90A-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, v3, v[2:3]
3328 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3329 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
3330 ; GFX90A-NEXT: s_and_b32 s3, s3, 0xffff
3331 ; GFX90A-NEXT: s_mul_i32 s2, s2, s3
3332 ; GFX90A-NEXT: v_add_u32_e32 v0, s2, v0
3333 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v2, v[4:5]
3334 ; GFX90A-NEXT: v_mov_b32_e32 v1, s1
3335 ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
3336 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
3337 ; GFX90A-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
3338 ; GFX90A-NEXT: v_mov_b32_e32 v3, s9
3339 ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s8, v0
3340 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
3341 ; GFX90A-NEXT: global_store_dword v[0:1], v2, off
3342 ; GFX90A-NEXT: s_endpgm
3344 ; GFX10-LABEL: compute_mad:
3345 ; GFX10: ; %bb.0: ; %bb
3346 ; GFX10-NEXT: s_clause 0x2
3347 ; GFX10-NEXT: s_load_dword s3, s[0:1], 0x18
3348 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
3349 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x10
3350 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
3351 ; GFX10-NEXT: s_add_i32 s3, s3, 1
3352 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
3353 ; GFX10-NEXT: v_mul_lo_u32 v1, s3, v0
3354 ; GFX10-NEXT: v_add_nc_u32_e32 v2, s3, v1
3355 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
3356 ; GFX10-NEXT: s_load_dword s3, s[6:7], 0x4
3357 ; GFX10-NEXT: v_mul_lo_u32 v2, v2, v0
3358 ; GFX10-NEXT: v_mul_lo_u32 v3, v2, v1
3359 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v3, v1
3360 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
3361 ; GFX10-NEXT: s_and_b32 s3, s3, 0xffff
3362 ; GFX10-NEXT: v_mul_lo_u32 v2, v1, v2
3363 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v3
3364 ; GFX10-NEXT: v_mul_lo_u32 v4, v2, v1
3365 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v1
3366 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s2, s3, v[0:1]
3367 ; GFX10-NEXT: v_mul_lo_u32 v1, v3, v2
3368 ; GFX10-NEXT: v_add_co_u32 v2, s2, s4, v0
3369 ; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s5, 0, s2
3370 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v1, v4, v[1:2]
3371 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
3372 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v4, v1, v[4:5]
3373 ; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, s0, v2
3374 ; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, s1, v3, vcc_lo
3375 ; GFX10-NEXT: global_store_dword v[1:2], v0, off
3376 ; GFX10-NEXT: s_endpgm
3378 %i = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
3379 %i2 = add i32 %arg1, 1
3380 %i3 = mul i32 %i2, %i
3381 %i4 = add i32 %i3, %i2
3382 %i5 = mul i32 %i4, %i
3383 %i6 = add i32 %i3, 1
3384 %i7 = mul i32 %i5, %i6
3385 %i8 = add i32 %i7, %i6
3386 %i9 = mul i32 %i8, %i5
3387 %i10 = add i32 %i7, 1
3388 %i11 = mul i32 %i9, %i10
3389 %i12 = add i32 %i11, %i10
3390 %i13 = mul i32 %i12, %i9
3391 %i14 = add i32 %i11, 1
3392 %i15 = add i32 %i13, 1
3393 %i16 = mul i32 %i13, %i14
3394 %i17 = mul i32 %i16, %i15
3395 %i19 = load i64, ptr addrspace(4) %i18, align 8
3396 %i20 = tail call i32 @llvm.amdgcn.workgroup.id.x()
3397 %i22 = getelementptr i8, ptr addrspace(4) %i21, i64 4
3398 %i23 = load i16, ptr addrspace(4) %i22, align 4
3399 %i24 = zext i16 %i23 to i32
3400 %i25 = mul i32 %i20, %i24
3401 %i26 = add i32 %i25, %i
3402 %i27 = zext i32 %i26 to i64
3403 %i28 = add i64 %i19, %i27
3404 %i29 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %i28
3405 store i32 %i17, ptr addrspace(1) %i29, align 4
3409 define amdgpu_ps i32 @s_mul_add_1_i32(i32 inreg %x, i32 inreg %y) {
3410 ; GFX67-LABEL: s_mul_add_1_i32:
3412 ; GFX67-NEXT: s_add_i32 s1, s1, 1
3413 ; GFX67-NEXT: s_mul_i32 s0, s0, s1
3414 ; GFX67-NEXT: ; return to shader part epilog
3416 ; GFX8-LABEL: s_mul_add_1_i32:
3418 ; GFX8-NEXT: s_add_i32 s1, s1, 1
3419 ; GFX8-NEXT: s_mul_i32 s0, s0, s1
3420 ; GFX8-NEXT: ; return to shader part epilog
3422 ; GFX9-LABEL: s_mul_add_1_i32:
3424 ; GFX9-NEXT: s_add_i32 s1, s1, 1
3425 ; GFX9-NEXT: s_mul_i32 s0, s0, s1
3426 ; GFX9-NEXT: ; return to shader part epilog
3428 ; GFX10-LABEL: s_mul_add_1_i32:
3430 ; GFX10-NEXT: s_add_i32 s1, s1, 1
3431 ; GFX10-NEXT: s_mul_i32 s0, s0, s1
3432 ; GFX10-NEXT: ; return to shader part epilog
3433 %add = add i32 %y, 1
3434 %mul = mul i32 %x, %add
3438 define amdgpu_ps i32 @s_mul_add_1_i32_commute(i32 inreg %x, i32 inreg %y) {
3439 ; GFX67-LABEL: s_mul_add_1_i32_commute:
3441 ; GFX67-NEXT: s_add_i32 s1, s1, 1
3442 ; GFX67-NEXT: s_mul_i32 s0, s1, s0
3443 ; GFX67-NEXT: ; return to shader part epilog
3445 ; GFX8-LABEL: s_mul_add_1_i32_commute:
3447 ; GFX8-NEXT: s_add_i32 s1, s1, 1
3448 ; GFX8-NEXT: s_mul_i32 s0, s1, s0
3449 ; GFX8-NEXT: ; return to shader part epilog
3451 ; GFX9-LABEL: s_mul_add_1_i32_commute:
3453 ; GFX9-NEXT: s_add_i32 s1, s1, 1
3454 ; GFX9-NEXT: s_mul_i32 s0, s1, s0
3455 ; GFX9-NEXT: ; return to shader part epilog
3457 ; GFX10-LABEL: s_mul_add_1_i32_commute:
3459 ; GFX10-NEXT: s_add_i32 s1, s1, 1
3460 ; GFX10-NEXT: s_mul_i32 s0, s1, s0
3461 ; GFX10-NEXT: ; return to shader part epilog
3462 %add = add i32 %y, 1
3463 %mul = mul i32 %add, %x
3467 define i8 @v_mul_add_1_i8(i8 %x, i8 %y) {
3468 ; GFX67-LABEL: v_mul_add_1_i8:
3470 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3471 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3472 ; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
3473 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3474 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3475 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3477 ; GFX8-LABEL: v_mul_add_1_i8:
3479 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3480 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
3481 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3483 ; GFX9-LABEL: v_mul_add_1_i8:
3485 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3486 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
3487 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3489 ; GFX10-LABEL: v_mul_add_1_i8:
3491 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3492 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
3493 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3495 %mul = mul i8 %x, %add
3499 define i8 @v_mul_add_1_i8_commute(i8 %x, i8 %y) {
3500 ; GFX67-LABEL: v_mul_add_1_i8_commute:
3502 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3503 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3504 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3505 ; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
3506 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3507 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3509 ; GFX8-LABEL: v_mul_add_1_i8_commute:
3511 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3512 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
3513 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3515 ; GFX9-LABEL: v_mul_add_1_i8_commute:
3517 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3518 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
3519 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3521 ; GFX10-LABEL: v_mul_add_1_i8_commute:
3523 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3524 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
3525 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3527 %mul = mul i8 %add, %x
3531 define i8 @v_mul_add_1_i8_zext(i8 zeroext %x, i8 zeroext %y) {
3532 ; GFX67-LABEL: v_mul_add_1_i8_zext:
3534 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3535 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3536 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3537 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3538 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3540 ; GFX8-LABEL: v_mul_add_1_i8_zext:
3542 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3543 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
3544 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3546 ; GFX9-LABEL: v_mul_add_1_i8_zext:
3548 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3549 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
3550 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3552 ; GFX10-LABEL: v_mul_add_1_i8_zext:
3554 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3555 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
3556 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3558 %mul = mul i8 %x, %add
3562 define i8 @v_mul_add_1_i8_zext_commute(i8 zeroext %x, i8 zeroext %y) {
3563 ; GFX67-LABEL: v_mul_add_1_i8_zext_commute:
3565 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3566 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3567 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3568 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3569 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3571 ; GFX8-LABEL: v_mul_add_1_i8_zext_commute:
3573 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3574 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
3575 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3577 ; GFX9-LABEL: v_mul_add_1_i8_zext_commute:
3579 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3580 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
3581 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3583 ; GFX10-LABEL: v_mul_add_1_i8_zext_commute:
3585 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3586 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
3587 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3589 %mul = mul i8 %add, %x
3593 define <2 x i8> @v_mul_add_1_v2i8(<2 x i8> %x, <2 x i8> %y) {
3594 ; GFX67-LABEL: v_mul_add_1_v2i8:
3596 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3597 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
3598 ; GFX67-NEXT: v_lshlrev_b32_e32 v3, 8, v3
3599 ; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v2
3600 ; GFX67-NEXT: v_or_b32_e32 v2, v3, v2
3601 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 0x100, v2
3602 ; GFX67-NEXT: v_bfe_u32 v3, v2, 8, 8
3603 ; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
3604 ; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v2
3605 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3606 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
3607 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
3608 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3610 ; GFX8-LABEL: v_mul_add_1_v2i8:
3612 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3613 ; GFX8-NEXT: v_mad_u16 v1, v1, v3, v1
3614 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 8, v1
3615 ; GFX8-NEXT: v_mad_u16 v0, v0, v2, v0
3616 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3617 ; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1
3618 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3620 ; GFX9-LABEL: v_mul_add_1_v2i8:
3622 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3623 ; GFX9-NEXT: v_mad_legacy_u16 v1, v1, v3, v1
3624 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, 8, v1
3625 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v2, v0
3626 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3627 ; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1
3628 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3630 ; GFX10-LABEL: v_mul_add_1_v2i8:
3632 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3633 ; GFX10-NEXT: v_mad_u16 v1, v1, v3, v1
3634 ; GFX10-NEXT: v_mad_u16 v0, v0, v2, v0
3635 ; GFX10-NEXT: v_lshlrev_b16 v2, 8, v1
3636 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
3637 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3638 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3639 %add = add <2 x i8> %y, <i8 1, i8 1>
3640 %mul = mul <2 x i8> %x, %add
3644 define <2 x i8> @v_mul_add_1_v2i8_commute(<2 x i8> %x, <2 x i8> %y) {
3645 ; GFX67-LABEL: v_mul_add_1_v2i8_commute:
3647 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3648 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
3649 ; GFX67-NEXT: v_lshlrev_b32_e32 v3, 8, v3
3650 ; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v2
3651 ; GFX67-NEXT: v_or_b32_e32 v2, v3, v2
3652 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 0x100, v2
3653 ; GFX67-NEXT: v_bfe_u32 v3, v2, 8, 8
3654 ; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v2
3655 ; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
3656 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3657 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v2, v0
3658 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v3, v1
3659 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3661 ; GFX8-LABEL: v_mul_add_1_v2i8_commute:
3663 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3664 ; GFX8-NEXT: v_mad_u16 v1, v1, v3, v1
3665 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 8, v1
3666 ; GFX8-NEXT: v_mad_u16 v0, v0, v2, v0
3667 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3668 ; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1
3669 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3671 ; GFX9-LABEL: v_mul_add_1_v2i8_commute:
3673 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3674 ; GFX9-NEXT: v_mad_legacy_u16 v1, v1, v3, v1
3675 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, 8, v1
3676 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v2, v0
3677 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3678 ; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1
3679 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3681 ; GFX10-LABEL: v_mul_add_1_v2i8_commute:
3683 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3684 ; GFX10-NEXT: v_mad_u16 v1, v1, v3, v1
3685 ; GFX10-NEXT: v_mad_u16 v0, v0, v2, v0
3686 ; GFX10-NEXT: v_lshlrev_b16 v2, 8, v1
3687 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
3688 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3689 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3690 %add = add <2 x i8> %y, <i8 1, i8 1>
3691 %mul = mul <2 x i8> %add, %x
3695 declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
3696 declare i32 @llvm.amdgcn.workitem.id.x() #2
3697 declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #2
3698 declare i32 @llvm.amdgcn.workgroup.id.x() #2
3700 attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
3701 attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(read, argmem: readwrite, inaccessiblemem: none) }
3702 attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
3704 !0 = !{i32 0, i32 1024}