1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX67,GFX6 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX67,GFX7 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
9 ; We want to undo these canonicalizations to enable mad matching:
10 ; (x * y) + x --> x * (y + 1)
11 ; (x * y) - x --> x * (y - 1)
13 define i32 @v_mul_add_1_i32(i32 %x, i32 %y) {
14 ; GFX67-LABEL: v_mul_add_1_i32:
16 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
18 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v0
19 ; GFX67-NEXT: s_setpc_b64 s[30:31]
21 ; GFX8-LABEL: v_mul_add_1_i32:
23 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
25 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
26 ; GFX8-NEXT: s_setpc_b64 s[30:31]
28 ; GFX9-LABEL: v_mul_add_1_i32:
30 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
32 ; GFX9-NEXT: s_setpc_b64 s[30:31]
34 ; GFX10-LABEL: v_mul_add_1_i32:
36 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
38 ; GFX10-NEXT: s_setpc_b64 s[30:31]
40 %mul = mul i32 %x, %add
44 define i32 @v_mul_add_1_i32_commute(i32 %x, i32 %y) {
45 ; GFX67-LABEL: v_mul_add_1_i32_commute:
47 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
49 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v0
50 ; GFX67-NEXT: s_setpc_b64 s[30:31]
52 ; GFX8-LABEL: v_mul_add_1_i32_commute:
54 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
56 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
57 ; GFX8-NEXT: s_setpc_b64 s[30:31]
59 ; GFX9-LABEL: v_mul_add_1_i32_commute:
61 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
63 ; GFX9-NEXT: s_setpc_b64 s[30:31]
65 ; GFX10-LABEL: v_mul_add_1_i32_commute:
67 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
69 ; GFX10-NEXT: s_setpc_b64 s[30:31]
71 %mul = mul i32 %add, %x
75 define i32 @v_mul_add_x_i32(i32 %x, i32 %y) {
76 ; GFX67-LABEL: v_mul_add_x_i32:
78 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
80 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v0, v1
81 ; GFX67-NEXT: s_setpc_b64 s[30:31]
83 ; GFX8-LABEL: v_mul_add_x_i32:
85 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
87 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
88 ; GFX8-NEXT: s_setpc_b64 s[30:31]
90 ; GFX9-LABEL: v_mul_add_x_i32:
92 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
94 ; GFX9-NEXT: s_setpc_b64 s[30:31]
96 ; GFX10-LABEL: v_mul_add_x_i32:
98 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
100 ; GFX10-NEXT: s_setpc_b64 s[30:31]
101 %mul = mul i32 %x, %y
102 %add = add i32 %x, %mul
106 define i32 @v_mul_sub_1_i32(i32 %x, i32 %y) {
107 ; GFX67-LABEL: v_mul_sub_1_i32:
109 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
111 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
112 ; GFX67-NEXT: s_setpc_b64 s[30:31]
114 ; GFX8-LABEL: v_mul_sub_1_i32:
116 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -1, v1
118 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
119 ; GFX8-NEXT: s_setpc_b64 s[30:31]
121 ; GFX9-LABEL: v_mul_sub_1_i32:
123 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
125 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
126 ; GFX9-NEXT: s_setpc_b64 s[30:31]
128 ; GFX10-LABEL: v_mul_sub_1_i32:
130 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
132 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
133 ; GFX10-NEXT: s_setpc_b64 s[30:31]
135 %mul = mul i32 %x, %sub
139 define i32 @v_mul_sub_1_i32_commute(i32 %x, i32 %y) {
140 ; GFX67-LABEL: v_mul_sub_1_i32_commute:
142 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
144 ; GFX67-NEXT: v_mul_lo_u32 v0, v1, v0
145 ; GFX67-NEXT: s_setpc_b64 s[30:31]
147 ; GFX8-LABEL: v_mul_sub_1_i32_commute:
149 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -1, v1
151 ; GFX8-NEXT: v_mul_lo_u32 v0, v1, v0
152 ; GFX8-NEXT: s_setpc_b64 s[30:31]
154 ; GFX9-LABEL: v_mul_sub_1_i32_commute:
156 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
158 ; GFX9-NEXT: v_mul_lo_u32 v0, v1, v0
159 ; GFX9-NEXT: s_setpc_b64 s[30:31]
161 ; GFX10-LABEL: v_mul_sub_1_i32_commute:
163 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
165 ; GFX10-NEXT: v_mul_lo_u32 v0, v1, v0
166 ; GFX10-NEXT: s_setpc_b64 s[30:31]
168 %mul = mul i32 %sub, %x
172 define i32 @v_mul_sub_x_i32(i32 %x, i32 %y) {
173 ; GFX67-LABEL: v_mul_sub_x_i32:
175 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
177 ; GFX67-NEXT: v_sub_i32_e32 v0, vcc, v1, v0
178 ; GFX67-NEXT: s_setpc_b64 s[30:31]
180 ; GFX8-LABEL: v_mul_sub_x_i32:
182 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
184 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v1, v0
185 ; GFX8-NEXT: s_setpc_b64 s[30:31]
187 ; GFX9-LABEL: v_mul_sub_x_i32:
189 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
191 ; GFX9-NEXT: v_sub_u32_e32 v0, v1, v0
192 ; GFX9-NEXT: s_setpc_b64 s[30:31]
194 ; GFX10-LABEL: v_mul_sub_x_i32:
196 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1
198 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v1, v0
199 ; GFX10-NEXT: s_setpc_b64 s[30:31]
200 %mul = mul i32 %x, %y
201 %sub = sub i32 %mul, %x
205 define i32 @v_mul_add_2_i32(i32 %x, i32 %y) {
206 ; GFX67-LABEL: v_mul_add_2_i32:
208 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 2, v1
210 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
211 ; GFX67-NEXT: s_setpc_b64 s[30:31]
213 ; GFX8-LABEL: v_mul_add_2_i32:
215 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 2, v1
217 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
218 ; GFX8-NEXT: s_setpc_b64 s[30:31]
220 ; GFX9-LABEL: v_mul_add_2_i32:
222 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; GFX9-NEXT: v_add_u32_e32 v1, 2, v1
224 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
225 ; GFX9-NEXT: s_setpc_b64 s[30:31]
227 ; GFX10-LABEL: v_mul_add_2_i32:
229 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 2, v1
231 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
232 ; GFX10-NEXT: s_setpc_b64 s[30:31]
234 %mul = mul i32 %x, %add
238 define i32 @v_mul_sub_2_i32(i32 %x, i32 %y) {
239 ; GFX67-LABEL: v_mul_sub_2_i32:
241 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -2, v1
243 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
244 ; GFX67-NEXT: s_setpc_b64 s[30:31]
246 ; GFX8-LABEL: v_mul_sub_2_i32:
248 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -2, v1
250 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
251 ; GFX8-NEXT: s_setpc_b64 s[30:31]
253 ; GFX9-LABEL: v_mul_sub_2_i32:
255 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256 ; GFX9-NEXT: v_add_u32_e32 v1, -2, v1
257 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
258 ; GFX9-NEXT: s_setpc_b64 s[30:31]
260 ; GFX10-LABEL: v_mul_sub_2_i32:
262 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -2, v1
264 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
265 ; GFX10-NEXT: s_setpc_b64 s[30:31]
267 %mul = mul i32 %x, %sub
271 define i32 @v_mul_add_65_i32(i32 %x, i32 %y) {
272 ; GFX67-LABEL: v_mul_add_65_i32:
274 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 0x41, v1
276 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
277 ; GFX67-NEXT: s_setpc_b64 s[30:31]
279 ; GFX8-LABEL: v_mul_add_65_i32:
281 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x41, v1
283 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
284 ; GFX8-NEXT: s_setpc_b64 s[30:31]
286 ; GFX9-LABEL: v_mul_add_65_i32:
288 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289 ; GFX9-NEXT: v_add_u32_e32 v1, 0x41, v1
290 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
291 ; GFX9-NEXT: s_setpc_b64 s[30:31]
293 ; GFX10-LABEL: v_mul_add_65_i32:
295 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x41, v1
297 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
298 ; GFX10-NEXT: s_setpc_b64 s[30:31]
299 %add = add i32 %y, 65
300 %mul = mul i32 %x, %add
304 define i32 @v_mul_sub_65_i32(i32 %x, i32 %y) {
305 ; GFX67-LABEL: v_mul_sub_65_i32:
307 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 0xffffffbf, v1
309 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
310 ; GFX67-NEXT: s_setpc_b64 s[30:31]
312 ; GFX8-LABEL: v_mul_sub_65_i32:
314 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xffffffbf, v1
316 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
317 ; GFX8-NEXT: s_setpc_b64 s[30:31]
319 ; GFX9-LABEL: v_mul_sub_65_i32:
321 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffbf, v1
323 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
324 ; GFX9-NEXT: s_setpc_b64 s[30:31]
326 ; GFX10-LABEL: v_mul_sub_65_i32:
328 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
330 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
331 ; GFX10-NEXT: s_setpc_b64 s[30:31]
332 %sub = sub i32 %y, 65
333 %mul = mul i32 %x, %sub
337 define i24 @v_mul_add_1_i24_zext(i24 zeroext %x, i24 zeroext %y) {
338 ; GFX67-LABEL: v_mul_add_1_i24_zext:
340 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
341 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
342 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
343 ; GFX67-NEXT: s_setpc_b64 s[30:31]
345 ; GFX8-LABEL: v_mul_add_1_i24_zext:
347 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v1
349 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1
350 ; GFX8-NEXT: s_setpc_b64 s[30:31]
352 ; GFX9-LABEL: v_mul_add_1_i24_zext:
354 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
356 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
357 ; GFX9-NEXT: s_setpc_b64 s[30:31]
359 ; GFX10-LABEL: v_mul_add_1_i24_zext:
361 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
363 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
364 ; GFX10-NEXT: s_setpc_b64 s[30:31]
366 %mul = mul i24 %x, %add
370 define i24 @v_mul_sub_1_i24_zext(i24 zeroext %x, i24 zeroext %y) {
371 ; GFX67-LABEL: v_mul_sub_1_i24_zext:
373 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
375 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
376 ; GFX67-NEXT: s_setpc_b64 s[30:31]
378 ; GFX8-LABEL: v_mul_sub_1_i24_zext:
380 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -1, v1
382 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1
383 ; GFX8-NEXT: s_setpc_b64 s[30:31]
385 ; GFX9-LABEL: v_mul_sub_1_i24_zext:
387 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
388 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
389 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
390 ; GFX9-NEXT: s_setpc_b64 s[30:31]
392 ; GFX10-LABEL: v_mul_sub_1_i24_zext:
394 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
396 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
397 ; GFX10-NEXT: s_setpc_b64 s[30:31]
399 %mul = mul i24 %x, %sub
403 define i24 @v_add_mul_i24_zext_1(i24 zeroext %x, i24 zeroext %y) {
404 ; GFX67-LABEL: v_add_mul_i24_zext_1:
406 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
407 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v0
408 ; GFX67-NEXT: s_setpc_b64 s[30:31]
410 ; GFX8-LABEL: v_add_mul_i24_zext_1:
412 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v1, v0
414 ; GFX8-NEXT: s_setpc_b64 s[30:31]
416 ; GFX9-LABEL: v_add_mul_i24_zext_1:
418 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v1, v0
420 ; GFX9-NEXT: s_setpc_b64 s[30:31]
422 ; GFX10-LABEL: v_add_mul_i24_zext_1:
424 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
426 ; GFX10-NEXT: s_setpc_b64 s[30:31]
427 %mul = mul i24 %x, %y
428 %add = add i24 %mul, %x
432 define i24 @v_mul_add_1_i24_sext(i24 signext %x, i24 signext %y) {
433 ; GFX67-LABEL: v_mul_add_1_i24_sext:
435 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
436 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
437 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
438 ; GFX67-NEXT: s_setpc_b64 s[30:31]
440 ; GFX8-LABEL: v_mul_add_1_i24_sext:
442 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v1
444 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1
445 ; GFX8-NEXT: s_setpc_b64 s[30:31]
447 ; GFX9-LABEL: v_mul_add_1_i24_sext:
449 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450 ; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
451 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
452 ; GFX9-NEXT: s_setpc_b64 s[30:31]
454 ; GFX10-LABEL: v_mul_add_1_i24_sext:
456 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
458 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
459 ; GFX10-NEXT: s_setpc_b64 s[30:31]
461 %mul = mul i24 %x, %add
465 define i24 @v_add_mul_i24_sext_1(i24 signext %x, i24 signext %y) {
466 ; GFX67-LABEL: v_add_mul_i24_sext_1:
468 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
469 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v0
470 ; GFX67-NEXT: s_setpc_b64 s[30:31]
472 ; GFX8-LABEL: v_add_mul_i24_sext_1:
474 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v1, v0
476 ; GFX8-NEXT: s_setpc_b64 s[30:31]
478 ; GFX9-LABEL: v_add_mul_i24_sext_1:
480 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v1, v0
482 ; GFX9-NEXT: s_setpc_b64 s[30:31]
484 ; GFX10-LABEL: v_add_mul_i24_sext_1:
486 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
487 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
488 ; GFX10-NEXT: s_setpc_b64 s[30:31]
489 %mul = mul i24 %x, %y
490 %add = add i24 %mul, %x
494 define i24 @v_mul_sub_1_i24_sext(i24 signext %x, i24 signext %y) {
495 ; GFX67-LABEL: v_mul_sub_1_i24_sext:
497 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
499 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
500 ; GFX67-NEXT: s_setpc_b64 s[30:31]
502 ; GFX8-LABEL: v_mul_sub_1_i24_sext:
504 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
505 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -1, v1
506 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1
507 ; GFX8-NEXT: s_setpc_b64 s[30:31]
509 ; GFX9-LABEL: v_mul_sub_1_i24_sext:
511 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
513 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
514 ; GFX9-NEXT: s_setpc_b64 s[30:31]
516 ; GFX10-LABEL: v_mul_sub_1_i24_sext:
518 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
520 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
521 ; GFX10-NEXT: s_setpc_b64 s[30:31]
523 %mul = mul i24 %x, %sub
527 define i25 @v_mul_add_1_i25_zext(i25 zeroext %x, i25 zeroext %y) {
528 ; GFX67-LABEL: v_mul_add_1_i25_zext:
530 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
532 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v0
533 ; GFX67-NEXT: s_setpc_b64 s[30:31]
535 ; GFX8-LABEL: v_mul_add_1_i25_zext:
537 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
539 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
540 ; GFX8-NEXT: s_setpc_b64 s[30:31]
542 ; GFX9-LABEL: v_mul_add_1_i25_zext:
544 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
546 ; GFX9-NEXT: s_setpc_b64 s[30:31]
548 ; GFX10-LABEL: v_mul_add_1_i25_zext:
550 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
551 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
552 ; GFX10-NEXT: s_setpc_b64 s[30:31]
554 %mul = mul i25 %x, %add
558 define i25 @v_mul_sub_1_i25_zext(i25 zeroext %x, i25 zeroext %y) {
559 ; GFX67-LABEL: v_mul_sub_1_i25_zext:
561 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 0x1ffffff, v1
563 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
564 ; GFX67-NEXT: s_setpc_b64 s[30:31]
566 ; GFX8-LABEL: v_mul_sub_1_i25_zext:
568 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
569 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x1ffffff, v1
570 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
571 ; GFX8-NEXT: s_setpc_b64 s[30:31]
573 ; GFX9-LABEL: v_mul_sub_1_i25_zext:
575 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576 ; GFX9-NEXT: v_add_u32_e32 v1, 0x1ffffff, v1
577 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
578 ; GFX9-NEXT: s_setpc_b64 s[30:31]
580 ; GFX10-LABEL: v_mul_sub_1_i25_zext:
582 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
583 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1
584 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
585 ; GFX10-NEXT: s_setpc_b64 s[30:31]
587 %mul = mul i25 %x, %sub
591 define i25 @v_mul_add_1_i25_sext(i25 signext %x, i25 signext %y) {
592 ; GFX67-LABEL: v_mul_add_1_i25_sext:
594 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
596 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v0
597 ; GFX67-NEXT: s_setpc_b64 s[30:31]
599 ; GFX8-LABEL: v_mul_add_1_i25_sext:
601 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
602 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
603 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
604 ; GFX8-NEXT: s_setpc_b64 s[30:31]
606 ; GFX9-LABEL: v_mul_add_1_i25_sext:
608 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
610 ; GFX9-NEXT: s_setpc_b64 s[30:31]
612 ; GFX10-LABEL: v_mul_add_1_i25_sext:
614 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
615 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
616 ; GFX10-NEXT: s_setpc_b64 s[30:31]
618 %mul = mul i25 %x, %add
622 define i25 @v_mul_sub_1_i25_sext(i25 signext %x, i25 signext %y) {
623 ; GFX67-LABEL: v_mul_sub_1_i25_sext:
625 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 0x1ffffff, v1
627 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
628 ; GFX67-NEXT: s_setpc_b64 s[30:31]
630 ; GFX8-LABEL: v_mul_sub_1_i25_sext:
632 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x1ffffff, v1
634 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
635 ; GFX8-NEXT: s_setpc_b64 s[30:31]
637 ; GFX9-LABEL: v_mul_sub_1_i25_sext:
639 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640 ; GFX9-NEXT: v_add_u32_e32 v1, 0x1ffffff, v1
641 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
642 ; GFX9-NEXT: s_setpc_b64 s[30:31]
644 ; GFX10-LABEL: v_mul_sub_1_i25_sext:
646 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
647 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1
648 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
649 ; GFX10-NEXT: s_setpc_b64 s[30:31]
651 %mul = mul i25 %x, %sub
655 define i16 @v_mul_add_1_i16(i16 %x, i16 %y) {
656 ; GFX67-LABEL: v_mul_add_1_i16:
658 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
659 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
660 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
661 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
662 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
663 ; GFX67-NEXT: s_setpc_b64 s[30:31]
665 ; GFX8-LABEL: v_mul_add_1_i16:
667 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
669 ; GFX8-NEXT: s_setpc_b64 s[30:31]
671 ; GFX9-LABEL: v_mul_add_1_i16:
673 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
675 ; GFX9-NEXT: s_setpc_b64 s[30:31]
677 ; GFX10-LABEL: v_mul_add_1_i16:
679 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
680 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
681 ; GFX10-NEXT: s_setpc_b64 s[30:31]
683 %mul = mul i16 %x, %add
687 define i32 @v_mul_add_1_i16_zext_result(i16 %x, i16 %y) {
688 ; GFX67-LABEL: v_mul_add_1_i16_zext_result:
690 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
692 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
693 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
694 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
695 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
696 ; GFX67-NEXT: s_setpc_b64 s[30:31]
698 ; GFX8-LABEL: v_mul_add_1_i16_zext_result:
700 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
701 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
702 ; GFX8-NEXT: s_setpc_b64 s[30:31]
704 ; GFX9-LABEL: v_mul_add_1_i16_zext_result:
706 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
708 ; GFX9-NEXT: s_setpc_b64 s[30:31]
710 ; GFX10-LABEL: v_mul_add_1_i16_zext_result:
712 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
713 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
714 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
715 ; GFX10-NEXT: s_setpc_b64 s[30:31]
717 %mul = mul i16 %x, %add
718 %zext = zext i16 %mul to i32
722 define i16 @v_mul_add_1_i16_commute(i16 %x, i16 %y) {
723 ; GFX67-LABEL: v_mul_add_1_i16_commute:
725 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
726 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
727 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
728 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
729 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v1, v0
730 ; GFX67-NEXT: s_setpc_b64 s[30:31]
732 ; GFX8-LABEL: v_mul_add_1_i16_commute:
734 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
735 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
736 ; GFX8-NEXT: s_setpc_b64 s[30:31]
738 ; GFX9-LABEL: v_mul_add_1_i16_commute:
740 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
741 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
742 ; GFX9-NEXT: s_setpc_b64 s[30:31]
744 ; GFX10-LABEL: v_mul_add_1_i16_commute:
746 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
747 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
748 ; GFX10-NEXT: s_setpc_b64 s[30:31]
750 %mul = mul i16 %add, %x
754 define i16 @v_mul_add_x_i16(i16 %x, i16 %y) {
755 ; GFX67-LABEL: v_mul_add_x_i16:
757 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
758 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v0
759 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
760 ; GFX67-NEXT: v_mad_u32_u24 v0, v2, v1, v0
761 ; GFX67-NEXT: s_setpc_b64 s[30:31]
763 ; GFX8-LABEL: v_mul_add_x_i16:
765 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
766 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
767 ; GFX8-NEXT: s_setpc_b64 s[30:31]
769 ; GFX9-LABEL: v_mul_add_x_i16:
771 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
772 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
773 ; GFX9-NEXT: s_setpc_b64 s[30:31]
775 ; GFX10-LABEL: v_mul_add_x_i16:
777 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
778 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
779 ; GFX10-NEXT: s_setpc_b64 s[30:31]
780 %mul = mul i16 %x, %y
781 %add = add i16 %x, %mul
785 define i16 @v_mul_sub_1_i16(i16 %x, i16 %y) {
786 ; GFX67-LABEL: v_mul_sub_1_i16:
788 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
789 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
790 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
791 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
792 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
793 ; GFX67-NEXT: s_setpc_b64 s[30:31]
795 ; GFX8-LABEL: v_mul_sub_1_i16:
797 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798 ; GFX8-NEXT: v_add_u16_e32 v1, -1, v1
799 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v1
800 ; GFX8-NEXT: s_setpc_b64 s[30:31]
802 ; GFX9-LABEL: v_mul_sub_1_i16:
804 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805 ; GFX9-NEXT: v_add_u16_e32 v1, -1, v1
806 ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
807 ; GFX9-NEXT: s_setpc_b64 s[30:31]
809 ; GFX10-LABEL: v_mul_sub_1_i16:
811 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
812 ; GFX10-NEXT: v_add_nc_u16 v1, v1, -1
813 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
814 ; GFX10-NEXT: s_setpc_b64 s[30:31]
816 %mul = mul i16 %x, %sub
820 define i16 @v_mul_sub_1_i16_commute(i16 %x, i16 %y) {
821 ; GFX67-LABEL: v_mul_sub_1_i16_commute:
823 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
824 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -1, v1
825 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
826 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
827 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v1, v0
828 ; GFX67-NEXT: s_setpc_b64 s[30:31]
830 ; GFX8-LABEL: v_mul_sub_1_i16_commute:
832 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
833 ; GFX8-NEXT: v_add_u16_e32 v1, -1, v1
834 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v1, v0
835 ; GFX8-NEXT: s_setpc_b64 s[30:31]
837 ; GFX9-LABEL: v_mul_sub_1_i16_commute:
839 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
840 ; GFX9-NEXT: v_add_u16_e32 v1, -1, v1
841 ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v1, v0
842 ; GFX9-NEXT: s_setpc_b64 s[30:31]
844 ; GFX10-LABEL: v_mul_sub_1_i16_commute:
846 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847 ; GFX10-NEXT: v_add_nc_u16 v1, v1, -1
848 ; GFX10-NEXT: v_mul_lo_u16 v0, v1, v0
849 ; GFX10-NEXT: s_setpc_b64 s[30:31]
851 %mul = mul i16 %sub, %x
855 define i16 @v_mul_sub_x_i16(i16 %x, i16 %y) {
856 ; GFX67-LABEL: v_mul_sub_x_i16:
858 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
859 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v0
860 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
861 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v2, v1
862 ; GFX67-NEXT: v_sub_i32_e32 v0, vcc, v1, v0
863 ; GFX67-NEXT: s_setpc_b64 s[30:31]
865 ; GFX8-LABEL: v_mul_sub_x_i16:
867 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868 ; GFX8-NEXT: v_mul_lo_u16_e32 v1, v0, v1
869 ; GFX8-NEXT: v_sub_u16_e32 v0, v1, v0
870 ; GFX8-NEXT: s_setpc_b64 s[30:31]
872 ; GFX9-LABEL: v_mul_sub_x_i16:
874 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
875 ; GFX9-NEXT: v_mul_lo_u16_e32 v1, v0, v1
876 ; GFX9-NEXT: v_sub_u16_e32 v0, v1, v0
877 ; GFX9-NEXT: s_setpc_b64 s[30:31]
879 ; GFX10-LABEL: v_mul_sub_x_i16:
881 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
882 ; GFX10-NEXT: v_mul_lo_u16 v1, v0, v1
883 ; GFX10-NEXT: v_sub_nc_u16 v0, v1, v0
884 ; GFX10-NEXT: s_setpc_b64 s[30:31]
885 %mul = mul i16 %x, %y
886 %sub = sub i16 %mul, %x
890 define i16 @v_mul_add_2_i16(i16 %x, i16 %y) {
891 ; GFX67-LABEL: v_mul_add_2_i16:
893 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
894 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 2, v1
895 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
896 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
897 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
898 ; GFX67-NEXT: s_setpc_b64 s[30:31]
900 ; GFX8-LABEL: v_mul_add_2_i16:
902 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903 ; GFX8-NEXT: v_add_u16_e32 v1, 2, v1
904 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v1
905 ; GFX8-NEXT: s_setpc_b64 s[30:31]
907 ; GFX9-LABEL: v_mul_add_2_i16:
909 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
910 ; GFX9-NEXT: v_add_u16_e32 v1, 2, v1
911 ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
912 ; GFX9-NEXT: s_setpc_b64 s[30:31]
914 ; GFX10-LABEL: v_mul_add_2_i16:
916 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
917 ; GFX10-NEXT: v_add_nc_u16 v1, v1, 2
918 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
919 ; GFX10-NEXT: s_setpc_b64 s[30:31]
921 %mul = mul i16 %x, %add
925 define i16 @v_mul_sub_2_i16(i16 %x, i16 %y) {
926 ; GFX67-LABEL: v_mul_sub_2_i16:
928 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
929 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, -2, v1
930 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
931 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
932 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
933 ; GFX67-NEXT: s_setpc_b64 s[30:31]
935 ; GFX8-LABEL: v_mul_sub_2_i16:
937 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
938 ; GFX8-NEXT: v_add_u16_e32 v1, -2, v1
939 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v1
940 ; GFX8-NEXT: s_setpc_b64 s[30:31]
942 ; GFX9-LABEL: v_mul_sub_2_i16:
944 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
945 ; GFX9-NEXT: v_add_u16_e32 v1, -2, v1
946 ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
947 ; GFX9-NEXT: s_setpc_b64 s[30:31]
949 ; GFX10-LABEL: v_mul_sub_2_i16:
951 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
952 ; GFX10-NEXT: v_add_nc_u16 v1, v1, -2
953 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
954 ; GFX10-NEXT: s_setpc_b64 s[30:31]
956 %mul = mul i16 %x, %sub
960 define i64 @v_mul_add_1_i64(i64 %x, i64 %y) {
961 ; GFX6-LABEL: v_mul_add_1_i64:
963 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
964 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
965 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
966 ; GFX6-NEXT: v_mul_lo_u32 v5, v1, v2
967 ; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2
968 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v4, v3
969 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
970 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0
971 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
972 ; GFX6-NEXT: s_setpc_b64 s[30:31]
974 ; GFX7-LABEL: v_mul_add_1_i64:
976 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977 ; GFX7-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
978 ; GFX7-NEXT: v_mul_lo_u32 v1, v1, v2
979 ; GFX7-NEXT: v_mul_lo_u32 v0, v0, v3
980 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v5
981 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v0, v1
982 ; GFX7-NEXT: v_mov_b32_e32 v0, v4
983 ; GFX7-NEXT: s_setpc_b64 s[30:31]
985 ; GFX8-LABEL: v_mul_add_1_i64:
987 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988 ; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
989 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2
990 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v3
991 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5
992 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
993 ; GFX8-NEXT: v_mov_b32_e32 v0, v4
994 ; GFX8-NEXT: s_setpc_b64 s[30:31]
996 ; GFX9-LABEL: v_mul_add_1_i64:
998 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
999 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1000 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v3
1001 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2
1002 ; GFX9-NEXT: v_add3_u32 v1, v1, v5, v0
1003 ; GFX9-NEXT: v_mov_b32_e32 v0, v4
1004 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1006 ; GFX10-LABEL: v_mul_add_1_i64:
1008 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1009 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
1010 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3
1011 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2
1012 ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v0
1013 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
1014 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1015 %add = add i64 %y, 1
1016 %mul = mul i64 %x, %add
1020 define i64 @v_mul_add_1_i64_commute(i64 %x, i64 %y) {
1021 ; GFX6-LABEL: v_mul_add_1_i64_commute:
1023 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1025 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1026 ; GFX6-NEXT: v_mul_lo_u32 v5, v1, v2
1027 ; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2
1028 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v4, v3
1029 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1030 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0
1031 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
1032 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1034 ; GFX7-LABEL: v_mul_add_1_i64_commute:
1036 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1037 ; GFX7-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1038 ; GFX7-NEXT: v_mul_lo_u32 v1, v1, v2
1039 ; GFX7-NEXT: v_mul_lo_u32 v0, v0, v3
1040 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v5
1041 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v0, v1
1042 ; GFX7-NEXT: v_mov_b32_e32 v0, v4
1043 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1045 ; GFX8-LABEL: v_mul_add_1_i64_commute:
1047 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1048 ; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1049 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2
1050 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v3
1051 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5
1052 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
1053 ; GFX8-NEXT: v_mov_b32_e32 v0, v4
1054 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1056 ; GFX9-LABEL: v_mul_add_1_i64_commute:
1058 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1059 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1060 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v3
1061 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2
1062 ; GFX9-NEXT: v_add3_u32 v1, v1, v5, v0
1063 ; GFX9-NEXT: v_mov_b32_e32 v0, v4
1064 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1066 ; GFX10-LABEL: v_mul_add_1_i64_commute:
1068 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1069 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
1070 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3
1071 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2
1072 ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v0
1073 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
1074 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1075 %add = add i64 %y, 1
1076 %mul = mul i64 %add, %x
1080 define i64 @v_mul_add_x_i64(i64 %x, i64 %y) {
1081 ; GFX6-LABEL: v_mul_add_x_i64:
1083 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1084 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1085 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1086 ; GFX6-NEXT: v_mul_lo_u32 v5, v1, v2
1087 ; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2
1088 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v4, v3
1089 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1090 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1091 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
1092 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1094 ; GFX7-LABEL: v_mul_add_x_i64:
1096 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1097 ; GFX7-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1098 ; GFX7-NEXT: v_mul_lo_u32 v1, v1, v2
1099 ; GFX7-NEXT: v_mul_lo_u32 v0, v0, v3
1100 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v5
1101 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v0, v1
1102 ; GFX7-NEXT: v_mov_b32_e32 v0, v4
1103 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1105 ; GFX8-LABEL: v_mul_add_x_i64:
1107 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108 ; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1109 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2
1110 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v3
1111 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5
1112 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
1113 ; GFX8-NEXT: v_mov_b32_e32 v0, v4
1114 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1116 ; GFX9-LABEL: v_mul_add_x_i64:
1118 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1119 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1120 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v3
1121 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2
1122 ; GFX9-NEXT: v_add3_u32 v1, v1, v5, v0
1123 ; GFX9-NEXT: v_mov_b32_e32 v0, v4
1124 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1126 ; GFX10-LABEL: v_mul_add_x_i64:
1128 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1129 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
1130 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3
1131 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2
1132 ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v0
1133 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
1134 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1135 %mul = mul i64 %x, %y
1136 %add = add i64 %x, %mul
1140 define i64 @v_mul_sub_1_i64(i64 %x, i64 %y) {
1141 ; GFX6-LABEL: v_mul_sub_1_i64:
1143 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1144 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, -1, v2
1145 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, -1, v3, vcc
1146 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1147 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1148 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, v2
1149 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, v2
1150 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v3
1151 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
1152 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1154 ; GFX7-LABEL: v_mul_sub_1_i64:
1156 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1157 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, -1, v2
1158 ; GFX7-NEXT: v_mov_b32_e32 v4, v1
1159 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, -1, v3, vcc
1160 ; GFX7-NEXT: v_mul_lo_u32 v3, v0, v1
1161 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1162 ; GFX7-NEXT: v_mul_lo_u32 v2, v4, v2
1163 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
1164 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1165 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1167 ; GFX8-LABEL: v_mul_sub_1_i64:
1169 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1170 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -1, v2
1171 ; GFX8-NEXT: v_mov_b32_e32 v4, v1
1172 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, -1, v3, vcc
1173 ; GFX8-NEXT: v_mul_lo_u32 v3, v0, v1
1174 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1175 ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v2
1176 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1177 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
1178 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1180 ; GFX9-LABEL: v_mul_sub_1_i64:
1182 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1183 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, -1, v2
1184 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
1185 ; GFX9-NEXT: v_mul_lo_u32 v4, v1, v2
1186 ; GFX9-NEXT: v_mul_lo_u32 v3, v0, v3
1187 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1188 ; GFX9-NEXT: v_add3_u32 v1, v1, v3, v4
1189 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1191 ; GFX10-LABEL: v_mul_sub_1_i64:
1193 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1194 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -1
1195 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo
1196 ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
1197 ; GFX10-NEXT: v_mul_lo_u32 v3, v0, v3
1198 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, 0
1199 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v4
1200 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1201 %sub = sub i64 %y, 1
1202 %mul = mul i64 %x, %sub
1206 define i64 @v_mul_sub_1_i64_commute(i64 %x, i64 %y) {
1207 ; GFX6-LABEL: v_mul_sub_1_i64_commute:
1209 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1210 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, -1, v2
1211 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, -1, v3, vcc
1212 ; GFX6-NEXT: v_mul_lo_u32 v1, v2, v1
1213 ; GFX6-NEXT: v_mul_hi_u32 v4, v2, v0
1214 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, v0
1215 ; GFX6-NEXT: v_mul_lo_u32 v0, v2, v0
1216 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v4, v1
1217 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3
1218 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1220 ; GFX7-LABEL: v_mul_sub_1_i64_commute:
1222 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1223 ; GFX7-NEXT: v_mov_b32_e32 v4, v0
1224 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, -1, v2
1225 ; GFX7-NEXT: v_addc_u32_e32 v2, vcc, -1, v3, vcc
1226 ; GFX7-NEXT: v_mul_lo_u32 v3, v0, v1
1227 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, 0
1228 ; GFX7-NEXT: v_mul_lo_u32 v2, v2, v4
1229 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
1230 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1231 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1233 ; GFX8-LABEL: v_mul_sub_1_i64_commute:
1235 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1236 ; GFX8-NEXT: v_mov_b32_e32 v4, v0
1237 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, -1, v2
1238 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, -1, v3, vcc
1239 ; GFX8-NEXT: v_mul_lo_u32 v3, v0, v1
1240 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, 0
1241 ; GFX8-NEXT: v_mul_lo_u32 v2, v2, v4
1242 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1243 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
1244 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1246 ; GFX9-LABEL: v_mul_sub_1_i64_commute:
1248 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1249 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, -1, v2
1250 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
1251 ; GFX9-NEXT: v_mul_lo_u32 v3, v3, v0
1252 ; GFX9-NEXT: v_mul_lo_u32 v4, v2, v1
1253 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, 0
1254 ; GFX9-NEXT: v_add3_u32 v1, v1, v4, v3
1255 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1257 ; GFX10-LABEL: v_mul_sub_1_i64_commute:
1259 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1260 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -1
1261 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo
1262 ; GFX10-NEXT: v_mul_lo_u32 v4, v2, v1
1263 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, v0
1264 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v2, v0, 0
1265 ; GFX10-NEXT: v_add3_u32 v1, v1, v4, v3
1266 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1267 %sub = sub i64 %y, 1
1268 %mul = mul i64 %sub, %x
1272 define i64 @v_mul_sub_x_i64(i64 %x, i64 %y) {
1273 ; GFX6-LABEL: v_mul_sub_x_i64:
1275 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1276 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1277 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1278 ; GFX6-NEXT: v_mul_lo_u32 v5, v1, v2
1279 ; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2
1280 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v4, v3
1281 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1282 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v2, v0
1283 ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc
1284 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1286 ; GFX7-LABEL: v_mul_sub_x_i64:
1288 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1289 ; GFX7-NEXT: v_mul_lo_u32 v5, v0, v3
1290 ; GFX7-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v2, 0
1291 ; GFX7-NEXT: v_mul_lo_u32 v2, v1, v2
1292 ; GFX7-NEXT: v_add_i32_e32 v4, vcc, v4, v5
1293 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, v4, v2
1294 ; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v3, v0
1295 ; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc
1296 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1298 ; GFX8-LABEL: v_mul_sub_x_i64:
1300 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1301 ; GFX8-NEXT: v_mul_lo_u32 v5, v0, v3
1302 ; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v2, 0
1303 ; GFX8-NEXT: v_mul_lo_u32 v2, v1, v2
1304 ; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5
1305 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v4, v2
1306 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v3, v0
1307 ; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc
1308 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1310 ; GFX9-LABEL: v_mul_sub_x_i64:
1312 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313 ; GFX9-NEXT: v_mul_lo_u32 v4, v1, v2
1314 ; GFX9-NEXT: v_mul_lo_u32 v5, v0, v3
1315 ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v2, 0
1316 ; GFX9-NEXT: v_add3_u32 v3, v3, v5, v4
1317 ; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1318 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
1319 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1321 ; GFX10-LABEL: v_mul_sub_x_i64:
1323 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1324 ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
1325 ; GFX10-NEXT: v_mul_lo_u32 v5, v0, v3
1326 ; GFX10-NEXT: v_mad_u64_u32 v[2:3], null, v0, v2, 0
1327 ; GFX10-NEXT: v_add3_u32 v3, v3, v5, v4
1328 ; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v2, v0
1329 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
1330 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1331 %mul = mul i64 %x, %y
1332 %sub = sub i64 %mul, %x
1336 define i64 @v_mul_add_2_i64(i64 %x, i64 %y) {
1337 ; GFX6-LABEL: v_mul_add_2_i64:
1339 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1340 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, 2, v2
1341 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1342 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1343 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1344 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, v2
1345 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, v2
1346 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v3
1347 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
1348 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1350 ; GFX7-LABEL: v_mul_add_2_i64:
1352 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 2, v2
1354 ; GFX7-NEXT: v_mov_b32_e32 v4, v1
1355 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
1356 ; GFX7-NEXT: v_mul_lo_u32 v3, v0, v1
1357 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1358 ; GFX7-NEXT: v_mul_lo_u32 v2, v4, v2
1359 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
1360 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1361 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1363 ; GFX8-LABEL: v_mul_add_2_i64:
1365 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1366 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 2, v2
1367 ; GFX8-NEXT: v_mov_b32_e32 v4, v1
1368 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
1369 ; GFX8-NEXT: v_mul_lo_u32 v3, v0, v1
1370 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1371 ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v2
1372 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1373 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
1374 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1376 ; GFX9-LABEL: v_mul_add_2_i64:
1378 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1379 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v2
1380 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
1381 ; GFX9-NEXT: v_mul_lo_u32 v4, v1, v2
1382 ; GFX9-NEXT: v_mul_lo_u32 v3, v0, v3
1383 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1384 ; GFX9-NEXT: v_add3_u32 v1, v1, v3, v4
1385 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1387 ; GFX10-LABEL: v_mul_add_2_i64:
1389 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1390 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 2
1391 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
1392 ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
1393 ; GFX10-NEXT: v_mul_lo_u32 v3, v0, v3
1394 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, 0
1395 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v4
1396 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1397 %add = add i64 %y, 2
1398 %mul = mul i64 %x, %add
1402 define i64 @v_mul_sub_2_i64(i64 %x, i64 %y) {
1403 ; GFX6-LABEL: v_mul_sub_2_i64:
1405 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1406 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, -2, v2
1407 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, -1, v3, vcc
1408 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, v3
1409 ; GFX6-NEXT: v_mul_hi_u32 v4, v0, v2
1410 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, v2
1411 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, v2
1412 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v3
1413 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
1414 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1416 ; GFX7-LABEL: v_mul_sub_2_i64:
1418 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1419 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, -2, v2
1420 ; GFX7-NEXT: v_mov_b32_e32 v4, v1
1421 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, -1, v3, vcc
1422 ; GFX7-NEXT: v_mul_lo_u32 v3, v0, v1
1423 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1424 ; GFX7-NEXT: v_mul_lo_u32 v2, v4, v2
1425 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
1426 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1427 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1429 ; GFX8-LABEL: v_mul_sub_2_i64:
1431 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -2, v2
1433 ; GFX8-NEXT: v_mov_b32_e32 v4, v1
1434 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, -1, v3, vcc
1435 ; GFX8-NEXT: v_mul_lo_u32 v3, v0, v1
1436 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1437 ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v2
1438 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1439 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
1440 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1442 ; GFX9-LABEL: v_mul_sub_2_i64:
1444 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1445 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, -2, v2
1446 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
1447 ; GFX9-NEXT: v_mul_lo_u32 v4, v1, v2
1448 ; GFX9-NEXT: v_mul_lo_u32 v3, v0, v3
1449 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
1450 ; GFX9-NEXT: v_add3_u32 v1, v1, v3, v4
1451 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1453 ; GFX10-LABEL: v_mul_sub_2_i64:
1455 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1456 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -2
1457 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo
1458 ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
1459 ; GFX10-NEXT: v_mul_lo_u32 v3, v0, v3
1460 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, 0
1461 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v4
1462 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1463 %sub = sub i64 %y, 2
1464 %mul = mul i64 %x, %sub
1468 define <2 x i32> @v_mul_add_1_i32_multiple(i32 %x, i32 %y, i32 %z) {
1469 ; GFX67-LABEL: v_mul_add_1_i32_multiple:
1471 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1472 ; GFX67-NEXT: v_mul_lo_u32 v3, v0, v1
1473 ; GFX67-NEXT: v_mul_lo_u32 v1, v2, v1
1474 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v3, v0
1475 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1476 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1478 ; GFX8-LABEL: v_mul_add_1_i32_multiple:
1480 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1481 ; GFX8-NEXT: v_mul_lo_u32 v3, v0, v1
1482 ; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1
1483 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0
1484 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
1485 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1487 ; GFX900-LABEL: v_mul_add_1_i32_multiple:
1489 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490 ; GFX900-NEXT: v_mov_b32_e32 v3, v1
1491 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v3, v[0:1]
1492 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, v3, v[2:3]
1493 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1495 ; GFX90A-LABEL: v_mul_add_1_i32_multiple:
1497 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1498 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1
1499 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v3, v[0:1]
1500 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v3, v[2:3]
1501 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
1502 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1504 ; GFX10-LABEL: v_mul_add_1_i32_multiple:
1506 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1507 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
1508 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1]
1509 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, v3, v[2:3]
1510 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1511 %add = add i32 %y, 1
1512 %mul0 = mul i32 %x, %add
1513 %mul1 = mul i32 %z, %add
1514 %insert.0 = insertelement <2 x i32> poison, i32 %mul0, i32 0
1515 %insert.1 = insertelement <2 x i32> %insert.0, i32 %mul1, i32 1
1516 ret <2 x i32> %insert.1
1519 define <2 x i32> @v_mul_add_1_i32_other_use(i32 %x, i32 %y, i32 %z) {
1520 ; GFX67-LABEL: v_mul_add_1_i32_other_use:
1522 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1523 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1524 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v1
1525 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1527 ; GFX8-LABEL: v_mul_add_1_i32_other_use:
1529 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1530 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v1
1531 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
1532 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1534 ; GFX9-LABEL: v_mul_add_1_i32_other_use:
1536 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1537 ; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
1538 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
1539 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1541 ; GFX10-LABEL: v_mul_add_1_i32_other_use:
1543 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1544 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
1545 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
1546 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1547 %add = add i32 %y, 1
1548 %mul0 = mul i32 %x, %add
1549 %mul1 = mul i32 %z, %add
1550 %insert.0 = insertelement <2 x i32> poison, i32 %mul0, i32 0
1551 %insert.1 = insertelement <2 x i32> %insert.0, i32 %add, i32 1
1552 ret <2 x i32> %insert.1
1555 define i32 @v_mul_add_1_i32_chain(i32 %arg0, i32 %arg1, i32 %arg2) {
1556 ; GFX67-LABEL: v_mul_add_1_i32_chain:
1558 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1559 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v0
1560 ; GFX67-NEXT: v_mul_lo_u32 v1, v2, v1
1561 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, v1, v2
1562 ; GFX67-NEXT: v_mul_lo_u32 v0, v2, v0
1563 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
1564 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1565 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1567 ; GFX8-LABEL: v_mul_add_1_i32_chain:
1569 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1570 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0
1571 ; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1
1572 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v1, v2
1573 ; GFX8-NEXT: v_mul_lo_u32 v0, v2, v0
1574 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
1575 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
1576 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1578 ; GFX9-LABEL: v_mul_add_1_i32_chain:
1580 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1581 ; GFX9-NEXT: v_add_u32_e32 v2, 1, v0
1582 ; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1
1583 ; GFX9-NEXT: v_add_u32_e32 v2, v1, v2
1584 ; GFX9-NEXT: v_mul_lo_u32 v0, v2, v0
1585 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[0:1]
1586 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1588 ; GFX10-LABEL: v_mul_add_1_i32_chain:
1590 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1591 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0
1592 ; GFX10-NEXT: v_mul_lo_u32 v1, v2, v1
1593 ; GFX10-NEXT: v_add_nc_u32_e32 v2, v1, v2
1594 ; GFX10-NEXT: v_mul_lo_u32 v0, v2, v0
1595 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
1596 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1597 %i2 = add i32 %arg0, 1
1598 %i3 = mul i32 %i2, %arg1
1599 %i4 = add i32 %i3, %i2
1600 %i5 = mul i32 %i4, %arg0
1601 %i6 = add i32 %i3, 1
1602 %i7 = mul i32 %i5, %i6
1605 define <2 x i16> @v_mul_add_1_v2i16(<2 x i16> %x, <2 x i16> %y) {
1606 ; GFX67-LABEL: v_mul_add_1_v2i16:
1608 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1609 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
1610 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v3
1611 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1612 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1613 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1614 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1615 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1616 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1617 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1619 ; GFX8-LABEL: v_mul_add_1_v2i16:
1621 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1622 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1623 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1624 ; GFX8-NEXT: v_mad_u16 v2, v3, v2, v3
1625 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1626 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
1627 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
1628 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1630 ; GFX9-LABEL: v_mul_add_1_v2i16:
1632 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1633 ; GFX9-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1634 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1635 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1637 ; GFX10-LABEL: v_mul_add_1_v2i16:
1639 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1640 ; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1641 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1642 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1643 %add = add <2 x i16> %y, <i16 1, i16 1>
1644 %mul = mul <2 x i16> %x, %add
1648 define <2 x i16> @v_mul_add_1_v2i16_commute(<2 x i16> %x, <2 x i16> %y) {
1649 ; GFX67-LABEL: v_mul_add_1_v2i16_commute:
1651 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1652 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
1653 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v3
1654 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1655 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1656 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1657 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1658 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v2, v0
1659 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v3, v1
1660 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1662 ; GFX8-LABEL: v_mul_add_1_v2i16_commute:
1664 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1665 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1666 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1667 ; GFX8-NEXT: v_mad_u16 v2, v3, v2, v3
1668 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1669 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
1670 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
1671 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1673 ; GFX9-LABEL: v_mul_add_1_v2i16_commute:
1675 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1676 ; GFX9-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1677 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
1678 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1680 ; GFX10-LABEL: v_mul_add_1_v2i16_commute:
1682 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1683 ; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1684 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
1685 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1686 %add = add <2 x i16> %y, <i16 1, i16 1>
1687 %mul = mul <2 x i16> %add, %x
1691 define <2 x i16> @v_mul_add_x_v2i16(<2 x i16> %x, <2 x i16> %y) {
1692 ; GFX67-LABEL: v_mul_add_x_v2i16:
1694 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1695 ; GFX67-NEXT: v_and_b32_e32 v4, 0xffff, v0
1696 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1697 ; GFX67-NEXT: v_and_b32_e32 v5, 0xffff, v1
1698 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1699 ; GFX67-NEXT: v_mad_u32_u24 v1, v5, v3, v1
1700 ; GFX67-NEXT: v_mad_u32_u24 v0, v4, v2, v0
1701 ; GFX67-NEXT: v_lshlrev_b32_e32 v3, 16, v1
1702 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1703 ; GFX67-NEXT: v_or_b32_e32 v0, v0, v3
1704 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1705 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1707 ; GFX8-LABEL: v_mul_add_x_v2i16:
1709 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1710 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1711 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1712 ; GFX8-NEXT: v_mad_u16 v2, v3, v2, v3
1713 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1714 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
1715 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
1716 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1718 ; GFX9-LABEL: v_mul_add_x_v2i16:
1720 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1721 ; GFX9-NEXT: v_pk_mad_u16 v0, v0, v1, v0
1722 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1724 ; GFX10-LABEL: v_mul_add_x_v2i16:
1726 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1727 ; GFX10-NEXT: v_pk_mad_u16 v0, v0, v1, v0
1728 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1729 %mul = mul <2 x i16> %x, %y
1730 %add = add <2 x i16> %x, %mul
1734 define <2 x i16> @v_mul_sub_1_v2i16(<2 x i16> %x, <2 x i16> %y) {
1735 ; GFX67-LABEL: v_mul_sub_1_v2i16:
1737 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1738 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
1739 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
1740 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1741 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1742 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1743 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1744 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1745 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1746 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1748 ; GFX8-LABEL: v_mul_sub_1_v2i16:
1750 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1751 ; GFX8-NEXT: v_mov_b32_e32 v3, -1
1752 ; GFX8-NEXT: v_add_u16_e32 v2, -1, v1
1753 ; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1754 ; GFX8-NEXT: v_mul_lo_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1755 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v2
1756 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1757 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1759 ; GFX9-LABEL: v_mul_sub_1_v2i16:
1761 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1762 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0]
1763 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1764 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1766 ; GFX10-LABEL: v_mul_sub_1_v2i16:
1768 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1769 ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0]
1770 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1771 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1772 %sub = sub <2 x i16> %y, <i16 1, i16 1>
1773 %mul = mul <2 x i16> %x, %sub
1777 define <2 x i16> @v_mul_sub_1_v2i16_commute(<2 x i16> %x, <2 x i16> %y) {
1778 ; GFX67-LABEL: v_mul_sub_1_v2i16_commute:
1780 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1781 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
1782 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
1783 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1784 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1785 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1786 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1787 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v2, v0
1788 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v3, v1
1789 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1791 ; GFX8-LABEL: v_mul_sub_1_v2i16_commute:
1793 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1794 ; GFX8-NEXT: v_mov_b32_e32 v3, -1
1795 ; GFX8-NEXT: v_add_u16_e32 v2, -1, v1
1796 ; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1797 ; GFX8-NEXT: v_mul_lo_u16_sdwa v1, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1798 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v2, v0
1799 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1800 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1802 ; GFX9-LABEL: v_mul_sub_1_v2i16_commute:
1804 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1805 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0]
1806 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
1807 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1809 ; GFX10-LABEL: v_mul_sub_1_v2i16_commute:
1811 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1812 ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0]
1813 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
1814 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1815 %sub = sub <2 x i16> %y, <i16 1, i16 1>
1816 %mul = mul <2 x i16> %sub, %x
1820 define <2 x i16> @v_mul_sub_x_v2i16(<2 x i16> %x, <2 x i16> %y) {
1821 ; GFX67-LABEL: v_mul_sub_x_v2i16:
1823 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1824 ; GFX67-NEXT: v_and_b32_e32 v4, 0xffff, v0
1825 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1826 ; GFX67-NEXT: v_mul_u32_u24_e32 v2, v4, v2
1827 ; GFX67-NEXT: v_and_b32_e32 v4, 0xffff, v1
1828 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1829 ; GFX67-NEXT: v_mul_u32_u24_e32 v3, v4, v3
1830 ; GFX67-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
1831 ; GFX67-NEXT: v_sub_i32_e32 v0, vcc, v2, v0
1832 ; GFX67-NEXT: v_lshlrev_b32_e32 v3, 16, v1
1833 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1834 ; GFX67-NEXT: v_or_b32_e32 v0, v0, v3
1835 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1836 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1838 ; GFX8-LABEL: v_mul_sub_x_v2i16:
1840 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1841 ; GFX8-NEXT: v_mul_lo_u16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1842 ; GFX8-NEXT: v_mul_lo_u16_e32 v1, v0, v1
1843 ; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1844 ; GFX8-NEXT: v_sub_u16_e32 v0, v1, v0
1845 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
1846 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1848 ; GFX9-LABEL: v_mul_sub_x_v2i16:
1850 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1851 ; GFX9-NEXT: v_pk_mul_lo_u16 v1, v0, v1
1852 ; GFX9-NEXT: v_pk_sub_i16 v0, v1, v0
1853 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1855 ; GFX10-LABEL: v_mul_sub_x_v2i16:
1857 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1858 ; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1
1859 ; GFX10-NEXT: v_pk_sub_i16 v0, v1, v0
1860 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1861 %mul = mul <2 x i16> %x, %y
1862 %sub = sub <2 x i16> %mul, %x
1866 define <2 x i16> @v_mul_add_2_v2i16(<2 x i16> %x, <2 x i16> %y) {
1867 ; GFX67-LABEL: v_mul_add_2_v2i16:
1869 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1870 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 2, v2
1871 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 2, v3
1872 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1873 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1874 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1875 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1876 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1877 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1878 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1880 ; GFX8-LABEL: v_mul_add_2_v2i16:
1882 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1883 ; GFX8-NEXT: v_mov_b32_e32 v3, 2
1884 ; GFX8-NEXT: v_add_u16_e32 v2, 2, v1
1885 ; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1886 ; GFX8-NEXT: v_mul_lo_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1887 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v2
1888 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1889 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1891 ; GFX9-LABEL: v_mul_add_2_v2i16:
1893 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1894 ; GFX9-NEXT: v_pk_add_u16 v1, v1, 2 op_sel_hi:[1,0]
1895 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1896 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1898 ; GFX10-LABEL: v_mul_add_2_v2i16:
1900 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1901 ; GFX10-NEXT: v_pk_add_u16 v1, v1, 2 op_sel_hi:[1,0]
1902 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1903 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1904 %add = add <2 x i16> %y, <i16 2, i16 2>
1905 %mul = mul <2 x i16> %x, %add
1909 define <2 x i16> @v_mul_sub_2_v2i16(<2 x i16> %x, <2 x i16> %y) {
1910 ; GFX67-LABEL: v_mul_sub_2_v2i16:
1912 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1913 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -2, v2
1914 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -2, v3
1915 ; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
1916 ; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
1917 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
1918 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
1919 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1920 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1921 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1923 ; GFX8-LABEL: v_mul_sub_2_v2i16:
1925 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926 ; GFX8-NEXT: v_mov_b32_e32 v3, -2
1927 ; GFX8-NEXT: v_add_u16_e32 v2, -2, v1
1928 ; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1929 ; GFX8-NEXT: v_mul_lo_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1930 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v2
1931 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1932 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1934 ; GFX9-LABEL: v_mul_sub_2_v2i16:
1936 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1937 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, 2 op_sel_hi:[1,0]
1938 ; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1939 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1941 ; GFX10-LABEL: v_mul_sub_2_v2i16:
1943 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1944 ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 2 op_sel_hi:[1,0]
1945 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1946 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1947 %sub = sub <2 x i16> %y, <i16 2, i16 2>
1948 %mul = mul <2 x i16> %x, %sub
1952 define <2 x i32> @v_mul_add_1_v2i32(<2 x i32> %x, <2 x i32> %y) {
1953 ; GFX67-LABEL: v_mul_add_1_v2i32:
1955 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1956 ; GFX67-NEXT: v_mul_lo_u32 v2, v0, v2
1957 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v3
1958 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v2, v0
1959 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v3, v1
1960 ; GFX67-NEXT: s_setpc_b64 s[30:31]
1962 ; GFX8-LABEL: v_mul_add_1_v2i32:
1964 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1965 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2
1966 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v3
1967 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
1968 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1
1969 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1971 ; GFX900-LABEL: v_mul_add_1_v2i32:
1973 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1974 ; GFX900-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
1975 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v1, v3, v[1:2]
1976 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
1977 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1979 ; GFX90A-LABEL: v_mul_add_1_v2i32:
1981 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1982 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
1983 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1]
1984 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, v[4:5]
1985 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
1986 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1988 ; GFX10-LABEL: v_mul_add_1_v2i32:
1990 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1991 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
1992 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2]
1993 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
1994 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1995 %add = add <2 x i32> %y, <i32 1, i32 1>
1996 %mul = mul <2 x i32> %x, %add
2000 define <2 x i32> @v_mul_add_1_v2i32_commute(<2 x i32> %x, <2 x i32> %y) {
2001 ; GFX67-LABEL: v_mul_add_1_v2i32_commute:
2003 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2004 ; GFX67-NEXT: v_mul_lo_u32 v2, v0, v2
2005 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v3
2006 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v2, v0
2007 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v3, v1
2008 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2010 ; GFX8-LABEL: v_mul_add_1_v2i32_commute:
2012 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2013 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2
2014 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v3
2015 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
2016 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1
2017 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2019 ; GFX900-LABEL: v_mul_add_1_v2i32_commute:
2021 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2022 ; GFX900-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
2023 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v1, v3, v[1:2]
2024 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
2025 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2027 ; GFX90A-LABEL: v_mul_add_1_v2i32_commute:
2029 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2030 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
2031 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1]
2032 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, v[4:5]
2033 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
2034 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2036 ; GFX10-LABEL: v_mul_add_1_v2i32_commute:
2038 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
2040 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2]
2041 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
2042 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2043 %add = add <2 x i32> %y, <i32 1, i32 1>
2044 %mul = mul <2 x i32> %add, %x
2048 define <2 x i32> @v_mul_add_x_v2i32(<2 x i32> %x, <2 x i32> %y) {
2049 ; GFX67-LABEL: v_mul_add_x_v2i32:
2051 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052 ; GFX67-NEXT: v_mul_lo_u32 v2, v0, v2
2053 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v3
2054 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v0, v2
2055 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v1, v3
2056 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2058 ; GFX8-LABEL: v_mul_add_x_v2i32:
2060 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2061 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2
2062 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v3
2063 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
2064 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
2065 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2067 ; GFX900-LABEL: v_mul_add_x_v2i32:
2069 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070 ; GFX900-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, v[0:1]
2071 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v1, v3, v[1:2]
2072 ; GFX900-NEXT: v_mov_b32_e32 v0, v4
2073 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2075 ; GFX90A-LABEL: v_mul_add_x_v2i32:
2077 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2078 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
2079 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1]
2080 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, v[4:5]
2081 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
2082 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2084 ; GFX10-LABEL: v_mul_add_x_v2i32:
2086 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2087 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
2088 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2]
2089 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
2090 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2091 %mul = mul <2 x i32> %x, %y
2092 %add = add <2 x i32> %x, %mul
2096 define <2 x i32> @v_mul_sub_1_v2i32(<2 x i32> %x, <2 x i32> %y) {
2097 ; GFX67-LABEL: v_mul_sub_1_v2i32:
2099 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2100 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
2101 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
2102 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v2
2103 ; GFX67-NEXT: v_mul_lo_u32 v1, v1, v3
2104 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2106 ; GFX8-LABEL: v_mul_sub_1_v2i32:
2108 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2109 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -1, v3
2110 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -1, v2
2111 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v2
2112 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v3
2113 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2115 ; GFX9-LABEL: v_mul_sub_1_v2i32:
2117 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2118 ; GFX9-NEXT: v_add_u32_e32 v3, -1, v3
2119 ; GFX9-NEXT: v_add_u32_e32 v2, -1, v2
2120 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v2
2121 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v3
2122 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2124 ; GFX10-LABEL: v_mul_sub_1_v2i32:
2126 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2127 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
2128 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
2129 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2
2130 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v3
2131 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2132 %sub = sub <2 x i32> %y, <i32 1, i32 1>
2133 %mul = mul <2 x i32> %x, %sub
2137 define <2 x i32> @v_mul_sub_1_v2i32_commute(<2 x i32> %x, <2 x i32> %y) {
2138 ; GFX67-LABEL: v_mul_sub_1_v2i32_commute:
2140 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
2142 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
2143 ; GFX67-NEXT: v_mul_lo_u32 v0, v2, v0
2144 ; GFX67-NEXT: v_mul_lo_u32 v1, v3, v1
2145 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2147 ; GFX8-LABEL: v_mul_sub_1_v2i32_commute:
2149 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -1, v3
2151 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -1, v2
2152 ; GFX8-NEXT: v_mul_lo_u32 v0, v2, v0
2153 ; GFX8-NEXT: v_mul_lo_u32 v1, v3, v1
2154 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2156 ; GFX9-LABEL: v_mul_sub_1_v2i32_commute:
2158 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2159 ; GFX9-NEXT: v_add_u32_e32 v3, -1, v3
2160 ; GFX9-NEXT: v_add_u32_e32 v2, -1, v2
2161 ; GFX9-NEXT: v_mul_lo_u32 v0, v2, v0
2162 ; GFX9-NEXT: v_mul_lo_u32 v1, v3, v1
2163 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2165 ; GFX10-LABEL: v_mul_sub_1_v2i32_commute:
2167 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2168 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
2169 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
2170 ; GFX10-NEXT: v_mul_lo_u32 v0, v2, v0
2171 ; GFX10-NEXT: v_mul_lo_u32 v1, v3, v1
2172 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2173 %sub = sub <2 x i32> %y, <i32 1, i32 1>
2174 %mul = mul <2 x i32> %sub, %x
2178 define <2 x i32> @v_mul_sub_x_v2i32(<2 x i32> %x, <2 x i32> %y) {
2179 ; GFX67-LABEL: v_mul_sub_x_v2i32:
2181 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2182 ; GFX67-NEXT: v_mul_lo_u32 v2, v0, v2
2183 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v3
2184 ; GFX67-NEXT: v_sub_i32_e32 v0, vcc, v2, v0
2185 ; GFX67-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
2186 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2188 ; GFX8-LABEL: v_mul_sub_x_v2i32:
2190 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2191 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2
2192 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v3
2193 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v2, v0
2194 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1
2195 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2197 ; GFX900-LABEL: v_mul_sub_x_v2i32:
2199 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2200 ; GFX900-NEXT: v_mul_lo_u32 v2, v0, v2
2201 ; GFX900-NEXT: v_mul_lo_u32 v3, v1, v3
2202 ; GFX900-NEXT: v_sub_u32_e32 v0, v2, v0
2203 ; GFX900-NEXT: v_sub_u32_e32 v1, v3, v1
2204 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2206 ; GFX90A-LABEL: v_mul_sub_x_v2i32:
2208 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2209 ; GFX90A-NEXT: v_mul_lo_u32 v3, v1, v3
2210 ; GFX90A-NEXT: v_mul_lo_u32 v2, v0, v2
2211 ; GFX90A-NEXT: v_sub_u32_e32 v0, v2, v0
2212 ; GFX90A-NEXT: v_sub_u32_e32 v1, v3, v1
2213 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2215 ; GFX10-LABEL: v_mul_sub_x_v2i32:
2217 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2218 ; GFX10-NEXT: v_mul_lo_u32 v2, v0, v2
2219 ; GFX10-NEXT: v_mul_lo_u32 v3, v1, v3
2220 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v2, v0
2221 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, v3, v1
2222 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2223 %mul = mul <2 x i32> %x, %y
2224 %sub = sub <2 x i32> %mul, %x
2228 define <2 x i32> @v_mul_add_2_v2i32(<2 x i32> %x, <2 x i32> %y) {
2229 ; GFX67-LABEL: v_mul_add_2_v2i32:
2231 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2232 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 2, v3
2233 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 2, v2
2234 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v2
2235 ; GFX67-NEXT: v_mul_lo_u32 v1, v1, v3
2236 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2238 ; GFX8-LABEL: v_mul_add_2_v2i32:
2240 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2241 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 2, v3
2242 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 2, v2
2243 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v2
2244 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v3
2245 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2247 ; GFX9-LABEL: v_mul_add_2_v2i32:
2249 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2250 ; GFX9-NEXT: v_add_u32_e32 v3, 2, v3
2251 ; GFX9-NEXT: v_add_u32_e32 v2, 2, v2
2252 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v2
2253 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v3
2254 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2256 ; GFX10-LABEL: v_mul_add_2_v2i32:
2258 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2259 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2
2260 ; GFX10-NEXT: v_add_nc_u32_e32 v3, 2, v3
2261 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2
2262 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v3
2263 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2264 %add = add <2 x i32> %y, <i32 2, i32 2>
2265 %mul = mul <2 x i32> %x, %add
2269 define <2 x i32> @v_mul_sub_2_v2i32(<2 x i32> %x, <2 x i32> %y) {
2270 ; GFX67-LABEL: v_mul_sub_2_v2i32:
2272 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2273 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -2, v3
2274 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -2, v2
2275 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, v2
2276 ; GFX67-NEXT: v_mul_lo_u32 v1, v1, v3
2277 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2279 ; GFX8-LABEL: v_mul_sub_2_v2i32:
2281 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2282 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -2, v3
2283 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -2, v2
2284 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v2
2285 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v3
2286 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2288 ; GFX9-LABEL: v_mul_sub_2_v2i32:
2290 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2291 ; GFX9-NEXT: v_add_u32_e32 v3, -2, v3
2292 ; GFX9-NEXT: v_add_u32_e32 v2, -2, v2
2293 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v2
2294 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v3
2295 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2297 ; GFX10-LABEL: v_mul_sub_2_v2i32:
2299 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2300 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -2, v2
2301 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -2, v3
2302 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2
2303 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v3
2304 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2305 %sub = sub <2 x i32> %y, <i32 2, i32 2>
2306 %mul = mul <2 x i32> %x, %sub
2310 define <2 x i24> @v_mul_add_1_v2i24(<2 x i24> %x, <2 x i24> %y) {
2311 ; GFX67-LABEL: v_mul_add_1_v2i24:
2313 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2314 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v3
2315 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
2316 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2317 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2318 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2320 ; GFX8-LABEL: v_mul_add_1_v2i24:
2322 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2323 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v3
2324 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v2
2325 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2326 ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2327 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2329 ; GFX9-LABEL: v_mul_add_1_v2i24:
2331 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2332 ; GFX9-NEXT: v_add_u32_e32 v3, 1, v3
2333 ; GFX9-NEXT: v_add_u32_e32 v2, 1, v2
2334 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2335 ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2336 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2338 ; GFX10-LABEL: v_mul_add_1_v2i24:
2340 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2341 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v2
2342 ; GFX10-NEXT: v_add_nc_u32_e32 v3, 1, v3
2343 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2344 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2345 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2346 %add = add <2 x i24> %y, <i24 1, i24 1>
2347 %mul = mul <2 x i24> %x, %add
2351 define <2 x i24> @v_mul_add_1_v2i24_commute(<2 x i24> %x, <2 x i24> %y) {
2352 ; GFX67-LABEL: v_mul_add_1_v2i24_commute:
2354 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2355 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v3
2356 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
2357 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2358 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2359 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2361 ; GFX8-LABEL: v_mul_add_1_v2i24_commute:
2363 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2364 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v3
2365 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v2
2366 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2367 ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2368 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2370 ; GFX9-LABEL: v_mul_add_1_v2i24_commute:
2372 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2373 ; GFX9-NEXT: v_add_u32_e32 v3, 1, v3
2374 ; GFX9-NEXT: v_add_u32_e32 v2, 1, v2
2375 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2376 ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2377 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2379 ; GFX10-LABEL: v_mul_add_1_v2i24_commute:
2381 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2382 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v2
2383 ; GFX10-NEXT: v_add_nc_u32_e32 v3, 1, v3
2384 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2385 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2386 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2387 %add = add <2 x i24> %y, <i24 1, i24 1>
2388 %mul = mul <2 x i24> %add, %x
2392 define <2 x i24> @v_mul_add_x_v2i24(<2 x i24> %x, <2 x i24> %y) {
2393 ; GFX67-LABEL: v_mul_add_x_v2i24:
2395 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2396 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2397 ; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2398 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2400 ; GFX8-LABEL: v_mul_add_x_v2i24:
2402 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2403 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2404 ; GFX8-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2405 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2407 ; GFX9-LABEL: v_mul_add_x_v2i24:
2409 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2410 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2411 ; GFX9-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2412 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2414 ; GFX10-LABEL: v_mul_add_x_v2i24:
2416 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2417 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0
2418 ; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1
2419 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2420 %mul = mul <2 x i24> %x, %y
2421 %add = add <2 x i24> %x, %mul
2425 define <2 x i24> @v_mul_sub_1_v2i24(<2 x i24> %x, <2 x i24> %y) {
2426 ; GFX67-LABEL: v_mul_sub_1_v2i24:
2428 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2429 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
2430 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
2431 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2432 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2433 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2435 ; GFX8-LABEL: v_mul_sub_1_v2i24:
2437 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2438 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -1, v3
2439 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -1, v2
2440 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2441 ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2442 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2444 ; GFX9-LABEL: v_mul_sub_1_v2i24:
2446 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2447 ; GFX9-NEXT: v_add_u32_e32 v3, -1, v3
2448 ; GFX9-NEXT: v_add_u32_e32 v2, -1, v2
2449 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2450 ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2451 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2453 ; GFX10-LABEL: v_mul_sub_1_v2i24:
2455 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2456 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
2457 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
2458 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2459 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2460 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2461 %sub = sub <2 x i24> %y, <i24 1, i24 1>
2462 %mul = mul <2 x i24> %x, %sub
2466 define <2 x i24> @v_mul_sub_1_v2i24_commute(<2 x i24> %x, <2 x i24> %y) {
2467 ; GFX67-LABEL: v_mul_sub_1_v2i24_commute:
2469 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2470 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -1, v3
2471 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -1, v2
2472 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2473 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2474 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2476 ; GFX8-LABEL: v_mul_sub_1_v2i24_commute:
2478 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2479 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -1, v3
2480 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -1, v2
2481 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2482 ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2483 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2485 ; GFX9-LABEL: v_mul_sub_1_v2i24_commute:
2487 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2488 ; GFX9-NEXT: v_add_u32_e32 v3, -1, v3
2489 ; GFX9-NEXT: v_add_u32_e32 v2, -1, v2
2490 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2491 ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2492 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2494 ; GFX10-LABEL: v_mul_sub_1_v2i24_commute:
2496 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2497 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
2498 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
2499 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v2, v0
2500 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v3, v1
2501 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2502 %sub = sub <2 x i24> %y, <i24 1, i24 1>
2503 %mul = mul <2 x i24> %sub, %x
2507 define <2 x i24> @v_mul_sub_x_v2i24(<2 x i24> %x, <2 x i24> %y) {
2508 ; GFX67-LABEL: v_mul_sub_x_v2i24:
2510 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2511 ; GFX67-NEXT: v_mul_u32_u24_e32 v2, v0, v2
2512 ; GFX67-NEXT: v_mul_u32_u24_e32 v3, v1, v3
2513 ; GFX67-NEXT: v_sub_i32_e32 v0, vcc, v2, v0
2514 ; GFX67-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
2515 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2517 ; GFX8-LABEL: v_mul_sub_x_v2i24:
2519 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520 ; GFX8-NEXT: v_mul_u32_u24_e32 v2, v0, v2
2521 ; GFX8-NEXT: v_mul_u32_u24_e32 v3, v1, v3
2522 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v2, v0
2523 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1
2524 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2526 ; GFX9-LABEL: v_mul_sub_x_v2i24:
2528 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2529 ; GFX9-NEXT: v_mul_u32_u24_e32 v2, v0, v2
2530 ; GFX9-NEXT: v_mul_u32_u24_e32 v3, v1, v3
2531 ; GFX9-NEXT: v_sub_u32_e32 v0, v2, v0
2532 ; GFX9-NEXT: v_sub_u32_e32 v1, v3, v1
2533 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2535 ; GFX10-LABEL: v_mul_sub_x_v2i24:
2537 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2538 ; GFX10-NEXT: v_mul_u32_u24_e32 v2, v0, v2
2539 ; GFX10-NEXT: v_mul_u32_u24_e32 v3, v1, v3
2540 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v2, v0
2541 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, v3, v1
2542 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2543 %mul = mul <2 x i24> %x, %y
2544 %sub = sub <2 x i24> %mul, %x
2548 define <2 x i24> @v_mul_add_2_v2i24(<2 x i24> %x, <2 x i24> %y) {
2549 ; GFX67-LABEL: v_mul_add_2_v2i24:
2551 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2552 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, 2, v3
2553 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 2, v2
2554 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2555 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2556 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2558 ; GFX8-LABEL: v_mul_add_2_v2i24:
2560 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2561 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 2, v3
2562 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 2, v2
2563 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2564 ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2565 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2567 ; GFX9-LABEL: v_mul_add_2_v2i24:
2569 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2570 ; GFX9-NEXT: v_add_u32_e32 v3, 2, v3
2571 ; GFX9-NEXT: v_add_u32_e32 v2, 2, v2
2572 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2573 ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2574 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2576 ; GFX10-LABEL: v_mul_add_2_v2i24:
2578 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2579 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2
2580 ; GFX10-NEXT: v_add_nc_u32_e32 v3, 2, v3
2581 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2582 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2583 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2584 %add = add <2 x i24> %y, <i24 2, i24 2>
2585 %mul = mul <2 x i24> %x, %add
2589 define <2 x i24> @v_mul_sub_2_v2i24(<2 x i24> %x, <2 x i24> %y) {
2590 ; GFX67-LABEL: v_mul_sub_2_v2i24:
2592 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2593 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, -2, v3
2594 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, -2, v2
2595 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2596 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2597 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2599 ; GFX8-LABEL: v_mul_sub_2_v2i24:
2601 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2602 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, -2, v3
2603 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, -2, v2
2604 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2605 ; GFX8-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2606 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2608 ; GFX9-LABEL: v_mul_sub_2_v2i24:
2610 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2611 ; GFX9-NEXT: v_add_u32_e32 v3, -2, v3
2612 ; GFX9-NEXT: v_add_u32_e32 v2, -2, v2
2613 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2614 ; GFX9-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2615 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2617 ; GFX10-LABEL: v_mul_sub_2_v2i24:
2619 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2620 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -2, v2
2621 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -2, v3
2622 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
2623 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3
2624 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2625 %sub = sub <2 x i24> %y, <i24 2, i24 2>
2626 %mul = mul <2 x i24> %x, %sub
2630 define i32 @v_mul_9_add_52_i32(i32 %arg) {
2631 ; GFX67-LABEL: v_mul_9_add_52_i32:
2633 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2634 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, 9
2635 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, 52, v0
2636 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2638 ; GFX8-LABEL: v_mul_9_add_52_i32:
2640 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2641 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 9
2642 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 52, v0
2643 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2645 ; GFX9-LABEL: v_mul_9_add_52_i32:
2647 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2648 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
2649 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2651 ; GFX10-LABEL: v_mul_9_add_52_i32:
2653 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2654 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52
2655 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2656 %mul = mul i32 %arg, 9
2657 %add = add i32 %mul, 52
2661 define i16 @v_mul_9_add_52_i16(i16 %arg) {
2662 ; GFX67-LABEL: v_mul_9_add_52_i16:
2664 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2665 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2666 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, 9, 52
2667 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2669 ; GFX8-LABEL: v_mul_9_add_52_i16:
2671 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2672 ; GFX8-NEXT: v_mad_u16 v0, v0, 9, 52
2673 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2675 ; GFX9-LABEL: v_mul_9_add_52_i16:
2677 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2678 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, 9, 52
2679 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2681 ; GFX10-LABEL: v_mul_9_add_52_i16:
2683 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2684 ; GFX10-NEXT: v_mad_u16 v0, v0, 9, 52
2685 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2686 %mul = mul i16 %arg, 9
2687 %add = add i16 %mul, 52
2691 define <2 x i16> @v_mul_9_add_52_v2i16(<2 x i16> %arg) {
2692 ; GFX67-LABEL: v_mul_9_add_52_v2i16:
2694 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2695 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2696 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
2697 ; GFX67-NEXT: v_mad_u32_u24 v1, v1, 9, 52
2698 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, 9, 52
2699 ; GFX67-NEXT: v_lshlrev_b32_e32 v2, 16, v1
2700 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2701 ; GFX67-NEXT: v_or_b32_e32 v0, v0, v2
2702 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
2703 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2705 ; GFX8-LABEL: v_mul_9_add_52_v2i16:
2707 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2708 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2709 ; GFX8-NEXT: v_mad_u16 v1, v1, 9, 52
2710 ; GFX8-NEXT: v_mad_u16 v0, v0, 9, 52
2711 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2712 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
2713 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2715 ; GFX9-LABEL: v_mul_9_add_52_v2i16:
2717 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2718 ; GFX9-NEXT: v_pk_mad_u16 v0, v0, 9, 52 op_sel_hi:[1,0,0]
2719 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2721 ; GFX10-LABEL: v_mul_9_add_52_v2i16:
2723 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2724 ; GFX10-NEXT: v_pk_mad_u16 v0, v0, 9, 52 op_sel_hi:[1,0,0]
2725 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2726 %mul = mul <2 x i16> %arg, <i16 9, i16 9>
2727 %add = add <2 x i16> %mul, <i16 52, i16 52>
2731 define i64 @v_mul_9_add_52_i64(i64 %arg) {
2732 ; GFX6-LABEL: v_mul_9_add_52_i64:
2734 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2735 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, 9
2736 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, 9
2737 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 9
2738 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
2739 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 52, v0
2740 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2741 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2743 ; GFX7-LABEL: v_mul_9_add_52_i64:
2745 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2746 ; GFX7-NEXT: v_mul_lo_u32 v2, v1, 9
2747 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
2748 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1
2749 ; GFX7-NEXT: s_setpc_b64 s[30:31]
2751 ; GFX8-LABEL: v_mul_9_add_52_i64:
2753 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2754 ; GFX8-NEXT: v_mul_lo_u32 v2, v1, 9
2755 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
2756 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
2757 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2759 ; GFX900-LABEL: v_mul_9_add_52_i64:
2761 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2762 ; GFX900-NEXT: v_mov_b32_e32 v2, v1
2763 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
2764 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, 9, v[1:2]
2765 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2767 ; GFX90A-LABEL: v_mul_9_add_52_i64:
2769 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2770 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
2771 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
2772 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
2773 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 9, v[4:5]
2774 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
2775 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2777 ; GFX10-LABEL: v_mul_9_add_52_i64:
2779 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2780 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
2781 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52
2782 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 9, v[1:2]
2783 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2784 %mul = mul i64 %arg, 9
2785 %add = add i64 %mul, 52
2789 define i32 @v_mul_5_add_1_i32(i32 %arg) {
2790 ; GFX67-LABEL: v_mul_5_add_1_i32:
2792 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2793 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, 5
2794 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2795 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2797 ; GFX8-LABEL: v_mul_5_add_1_i32:
2799 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2800 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 5
2801 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2802 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2804 ; GFX9-LABEL: v_mul_5_add_1_i32:
2806 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2807 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
2808 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2810 ; GFX10-LABEL: v_mul_5_add_1_i32:
2812 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2813 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1
2814 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2815 %mul = mul i32 %arg, 5
2816 %add = add i32 %mul, 1
2820 define i32 @v_mul_284_add_82_i32(i32 %arg) {
2821 ; GFX67-LABEL: v_mul_284_add_82_i32:
2823 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2824 ; GFX67-NEXT: s_movk_i32 s4, 0x11c
2825 ; GFX67-NEXT: v_mul_lo_u32 v0, v0, s4
2826 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, 0x52, v0
2827 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2829 ; GFX8-LABEL: v_mul_284_add_82_i32:
2831 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2832 ; GFX8-NEXT: s_movk_i32 s4, 0x11c
2833 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, s4
2834 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x52, v0
2835 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2837 ; GFX9-LABEL: v_mul_284_add_82_i32:
2839 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2840 ; GFX9-NEXT: s_movk_i32 s4, 0x11c
2841 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, s4
2842 ; GFX9-NEXT: v_add_u32_e32 v0, 0x52, v0
2843 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2845 ; GFX10-LABEL: v_mul_284_add_82_i32:
2847 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2848 ; GFX10-NEXT: s_movk_i32 s4, 0x11c
2849 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x52
2850 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2851 %mul = mul i32 %arg, 284
2852 %add = add i32 %mul, 82
2856 define i16 @v_mul_5_add_1_i16(i16 %arg) {
2857 ; GFX67-LABEL: v_mul_5_add_1_i16:
2859 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2860 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2861 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, 5, 1
2862 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2864 ; GFX8-LABEL: v_mul_5_add_1_i16:
2866 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2867 ; GFX8-NEXT: v_mad_u16 v0, v0, 5, 1
2868 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2870 ; GFX9-LABEL: v_mul_5_add_1_i16:
2872 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2873 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, 5, 1
2874 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2876 ; GFX10-LABEL: v_mul_5_add_1_i16:
2878 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2879 ; GFX10-NEXT: v_mad_u16 v0, v0, 5, 1
2880 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2881 %mul = mul i16 %arg, 5
2882 %add = add i16 %mul, 1
2886 define i16 @v_mul_284_add_82_i16(i16 %arg) {
2887 ; GFX67-LABEL: v_mul_284_add_82_i16:
2889 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2890 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2891 ; GFX67-NEXT: s_movk_i32 s4, 0x11c
2892 ; GFX67-NEXT: v_mov_b32_e32 v1, 0x52
2893 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, s4, v1
2894 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2896 ; GFX8-LABEL: v_mul_284_add_82_i16:
2898 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2899 ; GFX8-NEXT: s_movk_i32 s4, 0x11c
2900 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x52
2901 ; GFX8-NEXT: v_mad_u16 v0, v0, s4, v1
2902 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2904 ; GFX9-LABEL: v_mul_284_add_82_i16:
2906 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2907 ; GFX9-NEXT: s_movk_i32 s4, 0x11c
2908 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x52
2909 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, s4, v1
2910 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2912 ; GFX10-LABEL: v_mul_284_add_82_i16:
2914 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2915 ; GFX10-NEXT: s_movk_i32 s4, 0x11c
2916 ; GFX10-NEXT: v_mad_u16 v0, v0, s4, 0x52
2917 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2918 %mul = mul i16 %arg, 284
2919 %add = add i16 %mul, 82
2923 define <2 x i16> @v_mul_5_add_1_v2i16(<2 x i16> %arg) {
2924 ; GFX67-LABEL: v_mul_5_add_1_v2i16:
2926 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2927 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2928 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
2929 ; GFX67-NEXT: v_mad_u32_u24 v1, v1, 5, 1
2930 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, 5, 1
2931 ; GFX67-NEXT: v_lshlrev_b32_e32 v2, 16, v1
2932 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2933 ; GFX67-NEXT: v_or_b32_e32 v0, v0, v2
2934 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
2935 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2937 ; GFX8-LABEL: v_mul_5_add_1_v2i16:
2939 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2940 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2941 ; GFX8-NEXT: v_mad_u16 v1, v1, 5, 1
2942 ; GFX8-NEXT: v_mad_u16 v0, v0, 5, 1
2943 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2944 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
2945 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2947 ; GFX9-LABEL: v_mul_5_add_1_v2i16:
2949 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2950 ; GFX9-NEXT: v_pk_mad_u16 v0, v0, 5, 1 op_sel_hi:[1,0,0]
2951 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2953 ; GFX10-LABEL: v_mul_5_add_1_v2i16:
2955 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2956 ; GFX10-NEXT: v_pk_mad_u16 v0, v0, 5, 1 op_sel_hi:[1,0,0]
2957 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2958 %mul = mul <2 x i16> %arg, <i16 5, i16 5>
2959 %add = add <2 x i16> %mul, <i16 1, i16 1>
2963 define <2 x i16> @v_mul_284_add_82_v2i16(<2 x i16> %arg) {
2964 ; GFX67-LABEL: v_mul_284_add_82_v2i16:
2966 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2967 ; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
2968 ; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
2969 ; GFX67-NEXT: s_movk_i32 s4, 0x11c
2970 ; GFX67-NEXT: v_mov_b32_e32 v2, 0x52
2971 ; GFX67-NEXT: v_mad_u32_u24 v1, v1, s4, v2
2972 ; GFX67-NEXT: v_mad_u32_u24 v0, v0, s4, v2
2973 ; GFX67-NEXT: v_lshlrev_b32_e32 v3, 16, v1
2974 ; GFX67-NEXT: v_and_b32_e32 v0, 0xfffe, v0
2975 ; GFX67-NEXT: v_or_b32_e32 v0, v0, v3
2976 ; GFX67-NEXT: v_and_b32_e32 v1, 0xfffe, v1
2977 ; GFX67-NEXT: s_setpc_b64 s[30:31]
2979 ; GFX8-LABEL: v_mul_284_add_82_v2i16:
2981 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2982 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2983 ; GFX8-NEXT: s_movk_i32 s4, 0x11c
2984 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x52
2985 ; GFX8-NEXT: v_mad_u16 v1, v1, s4, v2
2986 ; GFX8-NEXT: v_mad_u16 v0, v0, s4, v2
2987 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2988 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
2989 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2991 ; GFX9-LABEL: v_mul_284_add_82_v2i16:
2993 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2994 ; GFX9-NEXT: s_movk_i32 s4, 0x11c
2995 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x52
2996 ; GFX9-NEXT: v_pk_mad_u16 v0, v0, s4, v1 op_sel_hi:[1,0,0]
2997 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2999 ; GFX10-LABEL: v_mul_284_add_82_v2i16:
3001 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3002 ; GFX10-NEXT: s_movk_i32 s4, 0x11c
3003 ; GFX10-NEXT: v_pk_mad_u16 v0, v0, s4, 0x52 op_sel_hi:[1,0,0]
3004 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3005 %mul = mul <2 x i16> %arg, <i16 284, i16 284>
3006 %add = add <2 x i16> %mul, <i16 82, i16 82>
3010 define i64 @v_mul_5_add_1_i64(i64 %arg) {
3011 ; GFX6-LABEL: v_mul_5_add_1_i64:
3013 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3014 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, 5
3015 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, 5
3016 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 5
3017 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
3018 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3019 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3020 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3022 ; GFX7-LABEL: v_mul_5_add_1_i64:
3024 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3025 ; GFX7-NEXT: v_mul_lo_u32 v2, v1, 5
3026 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
3027 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1
3028 ; GFX7-NEXT: s_setpc_b64 s[30:31]
3030 ; GFX8-LABEL: v_mul_5_add_1_i64:
3032 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3033 ; GFX8-NEXT: v_mul_lo_u32 v2, v1, 5
3034 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
3035 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
3036 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3038 ; GFX900-LABEL: v_mul_5_add_1_i64:
3040 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3041 ; GFX900-NEXT: v_mov_b32_e32 v2, v1
3042 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
3043 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, 5, v[1:2]
3044 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3046 ; GFX90A-LABEL: v_mul_5_add_1_i64:
3048 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3049 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
3050 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
3051 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
3052 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 5, v[4:5]
3053 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
3054 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3056 ; GFX10-LABEL: v_mul_5_add_1_i64:
3058 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3059 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
3060 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1
3061 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 5, v[1:2]
3062 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3063 %mul = mul i64 %arg, 5
3064 %add = add i64 %mul, 1
3068 define i64 @v_mul_284_add_82_i64(i64 %arg) {
3069 ; GFX6-LABEL: v_mul_284_add_82_i64:
3071 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3072 ; GFX6-NEXT: s_movk_i32 s4, 0x11c
3073 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, s4
3074 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, s4
3075 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, s4
3076 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
3077 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0x52, v0
3078 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3079 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3081 ; GFX7-LABEL: v_mul_284_add_82_i64:
3083 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3084 ; GFX7-NEXT: s_movk_i32 s4, 0x11c
3085 ; GFX7-NEXT: v_mul_lo_u32 v3, v1, s4
3086 ; GFX7-NEXT: v_mov_b32_e32 v1, 0x52
3087 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
3088 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2]
3089 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v3, v1
3090 ; GFX7-NEXT: s_setpc_b64 s[30:31]
3092 ; GFX8-LABEL: v_mul_284_add_82_i64:
3094 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3095 ; GFX8-NEXT: s_movk_i32 s4, 0x11c
3096 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, s4
3097 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x52
3098 ; GFX8-NEXT: v_mov_b32_e32 v2, 0
3099 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2]
3100 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1
3101 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3103 ; GFX900-LABEL: v_mul_284_add_82_i64:
3105 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3106 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x52
3107 ; GFX900-NEXT: s_movk_i32 s6, 0x11c
3108 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3109 ; GFX900-NEXT: v_mov_b32_e32 v2, v1
3110 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4]
3111 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, s6, v[1:2]
3112 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3114 ; GFX90A-LABEL: v_mul_284_add_82_i64:
3116 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3117 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0x52
3118 ; GFX90A-NEXT: s_movk_i32 s6, 0x11c
3119 ; GFX90A-NEXT: v_mov_b32_e32 v5, 0
3120 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
3121 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[4:5]
3122 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
3123 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, v[4:5]
3124 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
3125 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3127 ; GFX10-LABEL: v_mul_284_add_82_i64:
3129 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3130 ; GFX10-NEXT: s_movk_i32 s4, 0x11c
3131 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
3132 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x52
3133 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x11c, v2, v[1:2]
3134 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3135 %mul = mul i64 %arg, 284
3136 %add = add i64 %mul, 82
3140 define i64 @v_mul_934584645_add_8234599_i64(i64 %arg) {
3141 ; GFX6-LABEL: v_mul_934584645_add_8234599_i64:
3143 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3144 ; GFX6-NEXT: s_mov_b32 s4, 0x37b4a145
3145 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, s4
3146 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, s4
3147 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, s4
3148 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
3149 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0x7da667, v0
3150 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3151 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3153 ; GFX7-LABEL: v_mul_934584645_add_8234599_i64:
3155 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3156 ; GFX7-NEXT: s_mov_b32 s4, 0x37b4a145
3157 ; GFX7-NEXT: v_mul_lo_u32 v3, v1, s4
3158 ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7da667
3159 ; GFX7-NEXT: v_mov_b32_e32 v2, 0
3160 ; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2]
3161 ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v3, v1
3162 ; GFX7-NEXT: s_setpc_b64 s[30:31]
3164 ; GFX8-LABEL: v_mul_934584645_add_8234599_i64:
3166 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3167 ; GFX8-NEXT: s_mov_b32 s4, 0x37b4a145
3168 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, s4
3169 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7da667
3170 ; GFX8-NEXT: v_mov_b32_e32 v2, 0
3171 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2]
3172 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1
3173 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3175 ; GFX900-LABEL: v_mul_934584645_add_8234599_i64:
3177 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3178 ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7da667
3179 ; GFX900-NEXT: s_mov_b32 s6, 0x37b4a145
3180 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3181 ; GFX900-NEXT: v_mov_b32_e32 v2, v1
3182 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4]
3183 ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, s6, v[1:2]
3184 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3186 ; GFX90A-LABEL: v_mul_934584645_add_8234599_i64:
3188 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3189 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0x7da667
3190 ; GFX90A-NEXT: s_mov_b32 s6, 0x37b4a145
3191 ; GFX90A-NEXT: v_mov_b32_e32 v5, 0
3192 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
3193 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[4:5]
3194 ; GFX90A-NEXT: v_mov_b32_e32 v4, v1
3195 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, v[4:5]
3196 ; GFX90A-NEXT: v_mov_b32_e32 v1, v2
3197 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3199 ; GFX10-LABEL: v_mul_934584645_add_8234599_i64:
3201 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3202 ; GFX10-NEXT: s_mov_b32 s4, 0x37b4a145
3203 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
3204 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x7da667
3205 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x37b4a145, v2, v[1:2]
3206 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3207 %mul = mul i64 %arg, 934584645
3208 %add = add i64 %mul, 8234599
3212 define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) %i21, ptr addrspace(1) nocapture noundef writeonly align 4 %arg, i32 noundef %arg1) #1 {
3213 ; GFX67-LABEL: compute_mad:
3214 ; GFX67: ; %bb.0: ; %bb
3215 ; GFX67-NEXT: s_load_dword s0, s[2:3], 0x6
3216 ; GFX67-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
3217 ; GFX67-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x4
3218 ; GFX67-NEXT: s_mov_b32 s7, 0xf000
3219 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
3220 ; GFX67-NEXT: s_add_i32 s0, s0, 1
3221 ; GFX67-NEXT: v_mul_lo_u32 v1, s0, v0
3222 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, s0, v1
3223 ; GFX67-NEXT: v_mul_lo_u32 v2, v2, v0
3224 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3225 ; GFX67-NEXT: s_load_dword s2, s[10:11], 0x1
3226 ; GFX67-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
3227 ; GFX67-NEXT: v_mul_lo_u32 v3, v2, v1
3228 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
3229 ; GFX67-NEXT: s_and_b32 s2, s2, 0xffff
3230 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v3, v1
3231 ; GFX67-NEXT: v_mul_lo_u32 v1, v1, v2
3232 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v3
3233 ; GFX67-NEXT: s_mul_i32 s6, s6, s2
3234 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v2
3235 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, s6, v0
3236 ; GFX67-NEXT: s_mov_b32 s6, 0
3237 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, v3, v2
3238 ; GFX67-NEXT: v_mul_lo_u32 v1, v2, v1
3239 ; GFX67-NEXT: v_mov_b32_e32 v2, s1
3240 ; GFX67-NEXT: v_mul_lo_u32 v3, v1, v3
3241 ; GFX67-NEXT: v_add_i32_e32 v3, vcc, v3, v1
3242 ; GFX67-NEXT: v_mul_lo_u32 v4, v3, v1
3243 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, s0, v0
3244 ; GFX67-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc
3245 ; GFX67-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
3246 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, v4, v3
3247 ; GFX67-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
3248 ; GFX67-NEXT: s_endpgm
3250 ; GFX8-LABEL: compute_mad:
3251 ; GFX8: ; %bb.0: ; %bb
3252 ; GFX8-NEXT: s_load_dword s0, s[2:3], 0x18
3253 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
3254 ; GFX8-NEXT: s_add_i32 s0, s0, 1
3255 ; GFX8-NEXT: v_mul_lo_u32 v1, s0, v0
3256 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s0, v1
3257 ; GFX8-NEXT: v_mul_lo_u32 v2, v2, v0
3258 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v1
3259 ; GFX8-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
3260 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x10
3261 ; GFX8-NEXT: v_mul_lo_u32 v3, v2, v1
3262 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
3263 ; GFX8-NEXT: s_load_dword s4, s[10:11], 0x4
3264 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1
3265 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2
3266 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v3
3267 ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
3268 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v2
3269 ; GFX8-NEXT: v_mov_b32_e32 v4, s1
3270 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
3271 ; GFX8-NEXT: s_and_b32 s1, s4, 0xffff
3272 ; GFX8-NEXT: s_mul_i32 s6, s6, s1
3273 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
3274 ; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1
3275 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s6, v0
3276 ; GFX8-NEXT: v_mov_b32_e32 v2, s3
3277 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, v3
3278 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v1
3279 ; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1
3280 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v0
3281 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc
3282 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
3283 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v3
3284 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
3285 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
3286 ; GFX8-NEXT: flat_store_dword v[0:1], v2
3287 ; GFX8-NEXT: s_endpgm
3289 ; GFX900-LABEL: compute_mad:
3290 ; GFX900: ; %bb.0: ; %bb
3291 ; GFX900-NEXT: s_load_dword s0, s[2:3], 0x18
3292 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
3293 ; GFX900-NEXT: s_add_i32 s0, s0, 1
3294 ; GFX900-NEXT: v_mul_lo_u32 v1, s0, v0
3295 ; GFX900-NEXT: v_add_u32_e32 v2, s0, v1
3296 ; GFX900-NEXT: v_mul_lo_u32 v2, v2, v0
3297 ; GFX900-NEXT: v_add_u32_e32 v1, 1, v1
3298 ; GFX900-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
3299 ; GFX900-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x10
3300 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
3301 ; GFX900-NEXT: s_load_dword s4, s[10:11], 0x4
3302 ; GFX900-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
3303 ; GFX900-NEXT: v_mul_lo_u32 v3, v2, v1
3304 ; GFX900-NEXT: v_mov_b32_e32 v5, s1
3305 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
3306 ; GFX900-NEXT: s_and_b32 s1, s4, 0xffff
3307 ; GFX900-NEXT: v_add_u32_e32 v1, v3, v1
3308 ; GFX900-NEXT: v_mul_lo_u32 v1, v1, v2
3309 ; GFX900-NEXT: v_add_u32_e32 v2, 1, v3
3310 ; GFX900-NEXT: s_mul_i32 s6, s6, s1
3311 ; GFX900-NEXT: v_add_u32_e32 v0, s6, v0
3312 ; GFX900-NEXT: v_mul_lo_u32 v3, v1, v2
3313 ; GFX900-NEXT: v_mov_b32_e32 v4, s3
3314 ; GFX900-NEXT: v_add_u32_e32 v2, v3, v2
3315 ; GFX900-NEXT: v_mul_lo_u32 v1, v2, v1
3316 ; GFX900-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, v3, v[1:2]
3317 ; GFX900-NEXT: v_add_co_u32_e32 v3, vcc, s2, v0
3318 ; GFX900-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc
3319 ; GFX900-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4]
3320 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, v1, v[2:3]
3321 ; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, s0, v3
3322 ; GFX900-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v4, vcc
3323 ; GFX900-NEXT: global_store_dword v[1:2], v0, off
3324 ; GFX900-NEXT: s_endpgm
3326 ; GFX90A-LABEL: compute_mad:
3327 ; GFX90A: ; %bb.0: ; %bb
3328 ; GFX90A-NEXT: s_load_dword s4, s[2:3], 0x18
3329 ; GFX90A-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
3330 ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x10
3331 ; GFX90A-NEXT: v_and_b32_e32 v4, 0x3ff, v0
3332 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
3333 ; GFX90A-NEXT: s_add_i32 s4, s4, 1
3334 ; GFX90A-NEXT: v_mul_lo_u32 v0, s4, v4
3335 ; GFX90A-NEXT: v_add_u32_e32 v1, s4, v0
3336 ; GFX90A-NEXT: v_mul_lo_u32 v1, v1, v4
3337 ; GFX90A-NEXT: v_add_u32_e32 v0, 1, v0
3338 ; GFX90A-NEXT: v_mul_lo_u32 v2, v1, v0
3339 ; GFX90A-NEXT: v_add_u32_e32 v0, v2, v0
3340 ; GFX90A-NEXT: v_mul_lo_u32 v0, v0, v1
3341 ; GFX90A-NEXT: v_add_u32_e32 v1, 1, v2
3342 ; GFX90A-NEXT: v_mul_lo_u32 v2, v0, v1
3343 ; GFX90A-NEXT: v_add_u32_e32 v1, v2, v1
3344 ; GFX90A-NEXT: s_load_dword s7, s[10:11], 0x4
3345 ; GFX90A-NEXT: v_mul_lo_u32 v0, v1, v0
3346 ; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[2:3], v0, v2, v[0:1]
3347 ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
3348 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
3349 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
3350 ; GFX90A-NEXT: s_and_b32 s4, s7, 0xffff
3351 ; GFX90A-NEXT: s_mul_i32 s6, s6, s4
3352 ; GFX90A-NEXT: v_add_u32_e32 v1, s6, v4
3353 ; GFX90A-NEXT: v_mov_b32_e32 v3, s3
3354 ; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, s2, v1
3355 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
3356 ; GFX90A-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
3357 ; GFX90A-NEXT: v_mov_b32_e32 v1, s1
3358 ; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
3359 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
3360 ; GFX90A-NEXT: global_store_dword v[2:3], v0, off
3361 ; GFX90A-NEXT: s_endpgm
3363 ; GFX10-LABEL: compute_mad:
3364 ; GFX10: ; %bb.0: ; %bb
3365 ; GFX10-NEXT: s_clause 0x1
3366 ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x18
3367 ; GFX10-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
3368 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
3369 ; GFX10-NEXT: s_add_i32 s0, s0, 1
3370 ; GFX10-NEXT: v_mul_lo_u32 v1, s0, v0
3371 ; GFX10-NEXT: v_add_nc_u32_e32 v2, s0, v1
3372 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x10
3373 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
3374 ; GFX10-NEXT: s_load_dword s4, s[10:11], 0x4
3375 ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
3376 ; GFX10-NEXT: v_mul_lo_u32 v2, v2, v0
3377 ; GFX10-NEXT: v_mul_lo_u32 v3, v2, v1
3378 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v3, v1
3379 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
3380 ; GFX10-NEXT: s_and_b32 s4, s4, 0xffff
3381 ; GFX10-NEXT: v_mul_lo_u32 v2, v1, v2
3382 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v3
3383 ; GFX10-NEXT: v_mul_lo_u32 v4, v2, v1
3384 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v1
3385 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s6, s4, v[0:1]
3386 ; GFX10-NEXT: v_mul_lo_u32 v1, v3, v2
3387 ; GFX10-NEXT: v_add_co_u32 v2, s2, s2, v0
3388 ; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s3, 0, s2
3389 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v1, v4, v[1:2]
3390 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
3391 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v4, v1, v[4:5]
3392 ; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, s0, v2
3393 ; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, s1, v3, vcc_lo
3394 ; GFX10-NEXT: global_store_dword v[1:2], v0, off
3395 ; GFX10-NEXT: s_endpgm
3397 %i = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
3398 %i2 = add i32 %arg1, 1
3399 %i3 = mul i32 %i2, %i
3400 %i4 = add i32 %i3, %i2
3401 %i5 = mul i32 %i4, %i
3402 %i6 = add i32 %i3, 1
3403 %i7 = mul i32 %i5, %i6
3404 %i8 = add i32 %i7, %i6
3405 %i9 = mul i32 %i8, %i5
3406 %i10 = add i32 %i7, 1
3407 %i11 = mul i32 %i9, %i10
3408 %i12 = add i32 %i11, %i10
3409 %i13 = mul i32 %i12, %i9
3410 %i14 = add i32 %i11, 1
3411 %i15 = add i32 %i13, 1
3412 %i16 = mul i32 %i13, %i14
3413 %i17 = mul i32 %i16, %i15
3414 %i19 = load i64, ptr addrspace(4) %i18, align 8
3415 %i20 = tail call i32 @llvm.amdgcn.workgroup.id.x()
3416 %i22 = getelementptr i8, ptr addrspace(4) %i21, i64 4
3417 %i23 = load i16, ptr addrspace(4) %i22, align 4
3418 %i24 = zext i16 %i23 to i32
3419 %i25 = mul i32 %i20, %i24
3420 %i26 = add i32 %i25, %i
3421 %i27 = zext i32 %i26 to i64
3422 %i28 = add i64 %i19, %i27
3423 %i29 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %i28
3424 store i32 %i17, ptr addrspace(1) %i29, align 4
3428 define amdgpu_ps i32 @s_mul_add_1_i32(i32 inreg %x, i32 inreg %y) {
3429 ; GFX67-LABEL: s_mul_add_1_i32:
3431 ; GFX67-NEXT: s_add_i32 s1, s1, 1
3432 ; GFX67-NEXT: s_mul_i32 s0, s0, s1
3433 ; GFX67-NEXT: ; return to shader part epilog
3435 ; GFX8-LABEL: s_mul_add_1_i32:
3437 ; GFX8-NEXT: s_add_i32 s1, s1, 1
3438 ; GFX8-NEXT: s_mul_i32 s0, s0, s1
3439 ; GFX8-NEXT: ; return to shader part epilog
3441 ; GFX9-LABEL: s_mul_add_1_i32:
3443 ; GFX9-NEXT: s_add_i32 s1, s1, 1
3444 ; GFX9-NEXT: s_mul_i32 s0, s0, s1
3445 ; GFX9-NEXT: ; return to shader part epilog
3447 ; GFX10-LABEL: s_mul_add_1_i32:
3449 ; GFX10-NEXT: s_add_i32 s1, s1, 1
3450 ; GFX10-NEXT: s_mul_i32 s0, s0, s1
3451 ; GFX10-NEXT: ; return to shader part epilog
3452 %add = add i32 %y, 1
3453 %mul = mul i32 %x, %add
3457 define amdgpu_ps i32 @s_mul_add_1_i32_commute(i32 inreg %x, i32 inreg %y) {
3458 ; GFX67-LABEL: s_mul_add_1_i32_commute:
3460 ; GFX67-NEXT: s_add_i32 s1, s1, 1
3461 ; GFX67-NEXT: s_mul_i32 s0, s1, s0
3462 ; GFX67-NEXT: ; return to shader part epilog
3464 ; GFX8-LABEL: s_mul_add_1_i32_commute:
3466 ; GFX8-NEXT: s_add_i32 s1, s1, 1
3467 ; GFX8-NEXT: s_mul_i32 s0, s1, s0
3468 ; GFX8-NEXT: ; return to shader part epilog
3470 ; GFX9-LABEL: s_mul_add_1_i32_commute:
3472 ; GFX9-NEXT: s_add_i32 s1, s1, 1
3473 ; GFX9-NEXT: s_mul_i32 s0, s1, s0
3474 ; GFX9-NEXT: ; return to shader part epilog
3476 ; GFX10-LABEL: s_mul_add_1_i32_commute:
3478 ; GFX10-NEXT: s_add_i32 s1, s1, 1
3479 ; GFX10-NEXT: s_mul_i32 s0, s1, s0
3480 ; GFX10-NEXT: ; return to shader part epilog
3481 %add = add i32 %y, 1
3482 %mul = mul i32 %add, %x
3486 define i8 @v_mul_add_1_i8(i8 %x, i8 %y) {
3487 ; GFX67-LABEL: v_mul_add_1_i8:
3489 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3490 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3491 ; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
3492 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3493 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3494 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3496 ; GFX8-LABEL: v_mul_add_1_i8:
3498 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3499 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
3500 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3502 ; GFX9-LABEL: v_mul_add_1_i8:
3504 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3505 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
3506 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3508 ; GFX10-LABEL: v_mul_add_1_i8:
3510 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3511 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
3512 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3514 %mul = mul i8 %x, %add
3518 define i8 @v_mul_add_1_i8_commute(i8 %x, i8 %y) {
3519 ; GFX67-LABEL: v_mul_add_1_i8_commute:
3521 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3522 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3523 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3524 ; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
3525 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3526 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3528 ; GFX8-LABEL: v_mul_add_1_i8_commute:
3530 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3531 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
3532 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3534 ; GFX9-LABEL: v_mul_add_1_i8_commute:
3536 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3537 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
3538 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3540 ; GFX10-LABEL: v_mul_add_1_i8_commute:
3542 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3543 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
3544 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3546 %mul = mul i8 %add, %x
3550 define i8 @v_mul_add_1_i8_zext(i8 zeroext %x, i8 zeroext %y) {
3551 ; GFX67-LABEL: v_mul_add_1_i8_zext:
3553 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3554 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3555 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3556 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3557 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3559 ; GFX8-LABEL: v_mul_add_1_i8_zext:
3561 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3562 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
3563 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3565 ; GFX9-LABEL: v_mul_add_1_i8_zext:
3567 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3568 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
3569 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3571 ; GFX10-LABEL: v_mul_add_1_i8_zext:
3573 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3574 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
3575 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3577 %mul = mul i8 %x, %add
3581 define i8 @v_mul_add_1_i8_zext_commute(i8 zeroext %x, i8 zeroext %y) {
3582 ; GFX67-LABEL: v_mul_add_1_i8_zext_commute:
3584 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3585 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3586 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3587 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3588 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3590 ; GFX8-LABEL: v_mul_add_1_i8_zext_commute:
3592 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3593 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v0
3594 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3596 ; GFX9-LABEL: v_mul_add_1_i8_zext_commute:
3598 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3599 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
3600 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3602 ; GFX10-LABEL: v_mul_add_1_i8_zext_commute:
3604 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3605 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
3606 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3608 %mul = mul i8 %add, %x
3612 define <2 x i8> @v_mul_add_1_v2i8(<2 x i8> %x, <2 x i8> %y) {
3613 ; GFX67-LABEL: v_mul_add_1_v2i8:
3615 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3616 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
3617 ; GFX67-NEXT: v_lshlrev_b32_e32 v3, 8, v3
3618 ; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v2
3619 ; GFX67-NEXT: v_or_b32_e32 v2, v3, v2
3620 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 0x100, v2
3621 ; GFX67-NEXT: v_bfe_u32 v3, v2, 8, 8
3622 ; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
3623 ; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v2
3624 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3625 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
3626 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v1, v3
3627 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3629 ; GFX8-LABEL: v_mul_add_1_v2i8:
3631 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3632 ; GFX8-NEXT: v_mad_u16 v1, v1, v3, v1
3633 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 8, v1
3634 ; GFX8-NEXT: v_mad_u16 v0, v0, v2, v0
3635 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3636 ; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1
3637 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3639 ; GFX9-LABEL: v_mul_add_1_v2i8:
3641 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3642 ; GFX9-NEXT: v_mad_legacy_u16 v1, v1, v3, v1
3643 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, 8, v1
3644 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v2, v0
3645 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3646 ; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1
3647 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3649 ; GFX10-LABEL: v_mul_add_1_v2i8:
3651 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3652 ; GFX10-NEXT: v_mad_u16 v1, v1, v3, v1
3653 ; GFX10-NEXT: v_mad_u16 v0, v0, v2, v0
3654 ; GFX10-NEXT: v_lshlrev_b16 v2, 8, v1
3655 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
3656 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3657 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3658 %add = add <2 x i8> %y, <i8 1, i8 1>
3659 %mul = mul <2 x i8> %x, %add
3663 define <2 x i8> @v_mul_add_1_v2i8_commute(<2 x i8> %x, <2 x i8> %y) {
3664 ; GFX67-LABEL: v_mul_add_1_v2i8_commute:
3666 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3667 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v2
3668 ; GFX67-NEXT: v_lshlrev_b32_e32 v3, 8, v3
3669 ; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v2
3670 ; GFX67-NEXT: v_or_b32_e32 v2, v3, v2
3671 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 0x100, v2
3672 ; GFX67-NEXT: v_bfe_u32 v3, v2, 8, 8
3673 ; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v2
3674 ; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
3675 ; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
3676 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v2, v0
3677 ; GFX67-NEXT: v_mul_u32_u24_e32 v1, v3, v1
3678 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3680 ; GFX8-LABEL: v_mul_add_1_v2i8_commute:
3682 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3683 ; GFX8-NEXT: v_mad_u16 v1, v1, v3, v1
3684 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 8, v1
3685 ; GFX8-NEXT: v_mad_u16 v0, v0, v2, v0
3686 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3687 ; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1
3688 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3690 ; GFX9-LABEL: v_mul_add_1_v2i8_commute:
3692 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3693 ; GFX9-NEXT: v_mad_legacy_u16 v1, v1, v3, v1
3694 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, 8, v1
3695 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v2, v0
3696 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3697 ; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1
3698 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3700 ; GFX10-LABEL: v_mul_add_1_v2i8_commute:
3702 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3703 ; GFX10-NEXT: v_mad_u16 v1, v1, v3, v1
3704 ; GFX10-NEXT: v_mad_u16 v0, v0, v2, v0
3705 ; GFX10-NEXT: v_lshlrev_b16 v2, 8, v1
3706 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
3707 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3708 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3709 %add = add <2 x i8> %y, <i8 1, i8 1>
3710 %mul = mul <2 x i8> %add, %x
3714 ; test mul_u24 intrinsic with (i32, i32) -> i64
3715 define i64 @mul_u24_with_uneven_operands(i32 %z) {
3716 ; GFX67-LABEL: mul_u24_with_uneven_operands:
3717 ; GFX67: ; %bb.0: ; %entry
3718 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3719 ; GFX67-NEXT: v_and_b32_e32 v0, 1, v0
3720 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v0
3721 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3722 ; GFX67-NEXT: v_mov_b32_e32 v1, 0
3723 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3725 ; GFX8-LABEL: mul_u24_with_uneven_operands:
3726 ; GFX8: ; %bb.0: ; %entry
3727 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3728 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
3729 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v0
3730 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3731 ; GFX8-NEXT: v_mov_b32_e32 v1, 0
3732 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3734 ; GFX9-LABEL: mul_u24_with_uneven_operands:
3735 ; GFX9: ; %bb.0: ; %entry
3736 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3737 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
3738 ; GFX9-NEXT: v_add_u32_e32 v1, 1, v0
3739 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3740 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3741 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3743 ; GFX10-LABEL: mul_u24_with_uneven_operands:
3744 ; GFX10: ; %bb.0: ; %entry
3745 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3746 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
3747 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v0
3748 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v1, v0
3749 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3750 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3753 %d = add nuw nsw i32 %c, 1
3754 %f = call i64 @llvm.amdgcn.mul.u24(i32 %d, i32 %c)
3758 define i64 @mul_u24_with_uneven_operands_swapped(i32 %z) {
3759 ; GFX67-LABEL: mul_u24_with_uneven_operands_swapped:
3760 ; GFX67: ; %bb.0: ; %entry
3761 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3762 ; GFX67-NEXT: v_and_b32_e32 v0, 1, v0
3763 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v0
3764 ; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3765 ; GFX67-NEXT: v_mov_b32_e32 v1, 0
3766 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3768 ; GFX8-LABEL: mul_u24_with_uneven_operands_swapped:
3769 ; GFX8: ; %bb.0: ; %entry
3770 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3771 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
3772 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v0
3773 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3774 ; GFX8-NEXT: v_mov_b32_e32 v1, 0
3775 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3777 ; GFX9-LABEL: mul_u24_with_uneven_operands_swapped:
3778 ; GFX9: ; %bb.0: ; %entry
3779 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3780 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
3781 ; GFX9-NEXT: v_add_u32_e32 v1, 1, v0
3782 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3783 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3784 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3786 ; GFX10-LABEL: mul_u24_with_uneven_operands_swapped:
3787 ; GFX10: ; %bb.0: ; %entry
3788 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3789 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
3790 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v0
3791 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3792 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3793 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3796 %d = add nuw nsw i32 %c, 1
3797 %f = call i64 @llvm.amdgcn.mul.u24(i32 %c, i32 %d)
3801 ; test mul_i24 intrinsic with (i32, i32) -> i64
3802 define i64 @mul_i24_with_uneven_operands(i32 %z) {
3803 ; GFX67-LABEL: mul_i24_with_uneven_operands:
3804 ; GFX67: ; %bb.0: ; %entry
3805 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3806 ; GFX67-NEXT: v_and_b32_e32 v0, 1, v0
3807 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v0
3808 ; GFX67-NEXT: v_mul_hi_i32_i24_e32 v1, v2, v0
3809 ; GFX67-NEXT: v_mul_i32_i24_e32 v0, v2, v0
3810 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3812 ; GFX8-LABEL: mul_i24_with_uneven_operands:
3813 ; GFX8: ; %bb.0: ; %entry
3814 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3815 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
3816 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0
3817 ; GFX8-NEXT: v_mul_hi_i32_i24_e32 v1, v2, v0
3818 ; GFX8-NEXT: v_mul_i32_i24_e32 v0, v2, v0
3819 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3821 ; GFX9-LABEL: mul_i24_with_uneven_operands:
3822 ; GFX9: ; %bb.0: ; %entry
3823 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3824 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
3825 ; GFX9-NEXT: v_add_u32_e32 v2, 1, v0
3826 ; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v2, v0
3827 ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v2, v0
3828 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3830 ; GFX10-LABEL: mul_i24_with_uneven_operands:
3831 ; GFX10: ; %bb.0: ; %entry
3832 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3833 ; GFX10-NEXT: v_and_b32_e32 v1, 1, v0
3834 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v1
3835 ; GFX10-NEXT: v_mul_i32_i24_e32 v0, v2, v1
3836 ; GFX10-NEXT: v_mul_hi_i32_i24_e32 v1, v2, v1
3837 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3840 %d = add nuw nsw i32 %c, 1
3841 %f = call i64 @llvm.amdgcn.mul.i24(i32 %d, i32 %c)
3845 define i64 @mul_i24_with_uneven_operands_swapped(i32 %z) {
3846 ; GFX67-LABEL: mul_i24_with_uneven_operands_swapped:
3847 ; GFX67: ; %bb.0: ; %entry
3848 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3849 ; GFX67-NEXT: v_and_b32_e32 v0, 1, v0
3850 ; GFX67-NEXT: v_add_i32_e32 v2, vcc, 1, v0
3851 ; GFX67-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
3852 ; GFX67-NEXT: v_mul_i32_i24_e32 v0, v0, v2
3853 ; GFX67-NEXT: s_setpc_b64 s[30:31]
3855 ; GFX8-LABEL: mul_i24_with_uneven_operands_swapped:
3856 ; GFX8: ; %bb.0: ; %entry
3857 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3858 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
3859 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0
3860 ; GFX8-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
3861 ; GFX8-NEXT: v_mul_i32_i24_e32 v0, v0, v2
3862 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3864 ; GFX9-LABEL: mul_i24_with_uneven_operands_swapped:
3865 ; GFX9: ; %bb.0: ; %entry
3866 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3867 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
3868 ; GFX9-NEXT: v_add_u32_e32 v2, 1, v0
3869 ; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
3870 ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v2
3871 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3873 ; GFX10-LABEL: mul_i24_with_uneven_operands_swapped:
3874 ; GFX10: ; %bb.0: ; %entry
3875 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3876 ; GFX10-NEXT: v_and_b32_e32 v1, 1, v0
3877 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v1
3878 ; GFX10-NEXT: v_mul_i32_i24_e32 v0, v1, v2
3879 ; GFX10-NEXT: v_mul_hi_i32_i24_e32 v1, v1, v2
3880 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3883 %d = add nuw nsw i32 %c, 1
3884 %f = call i64 @llvm.amdgcn.mul.i24(i32 %c, i32 %d)
3888 declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
3889 declare i32 @llvm.amdgcn.workitem.id.x() #2
3890 declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #2
3891 declare i32 @llvm.amdgcn.workgroup.id.x() #2
3892 declare i64 @llvm.amdgcn.mul.u24(i32, i32)
3893 declare i64 @llvm.amdgcn.mul.i24(i32, i32)
3895 attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
3896 attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(read, argmem: readwrite, inaccessiblemem: none) }
3897 attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
3899 !0 = !{i32 0, i32 1024}