1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s
6 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1150 %s
7 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
8 ; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s
10 ; On GFX11, ensure vdst and src2 do not partially overlap. Full overlap is ok.
12 define i64 @mad_i64_i32_sextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
13 ; CI-LABEL: mad_i64_i32_sextops:
15 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
17 ; CI-NEXT: s_setpc_b64 s[30:31]
19 ; SI-LABEL: mad_i64_i32_sextops:
21 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; SI-NEXT: v_mul_lo_u32 v4, v0, v1
23 ; SI-NEXT: v_mul_hi_i32 v1, v0, v1
24 ; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v2
25 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
26 ; SI-NEXT: s_setpc_b64 s[30:31]
28 ; GFX9-LABEL: mad_i64_i32_sextops:
30 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
32 ; GFX9-NEXT: s_setpc_b64 s[30:31]
34 ; GFX1100-LABEL: mad_i64_i32_sextops:
36 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
38 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
39 ; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
40 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
42 ; GFX1150-LABEL: mad_i64_i32_sextops:
44 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45 ; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3]
46 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
48 ; GFX12-LABEL: mad_i64_i32_sextops:
50 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
51 ; GFX12-NEXT: s_wait_expcnt 0x0
52 ; GFX12-NEXT: s_wait_samplecnt 0x0
53 ; GFX12-NEXT: s_wait_bvhcnt 0x0
54 ; GFX12-NEXT: s_wait_kmcnt 0x0
55 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3]
56 ; GFX12-NEXT: s_setpc_b64 s[30:31]
57 %sext0 = sext i32 %arg0 to i64
58 %sext1 = sext i32 %arg1 to i64
59 %mul = mul i64 %sext0, %sext1
60 %mad = add i64 %mul, %arg2
64 define i64 @mad_i64_i32_sextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
65 ; CI-LABEL: mad_i64_i32_sextops_commute:
67 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
69 ; CI-NEXT: s_setpc_b64 s[30:31]
71 ; SI-LABEL: mad_i64_i32_sextops_commute:
73 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74 ; SI-NEXT: v_mul_lo_u32 v4, v0, v1
75 ; SI-NEXT: v_mul_hi_i32 v1, v0, v1
76 ; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v4
77 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
78 ; SI-NEXT: s_setpc_b64 s[30:31]
80 ; GFX9-LABEL: mad_i64_i32_sextops_commute:
82 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
84 ; GFX9-NEXT: s_setpc_b64 s[30:31]
86 ; GFX1100-LABEL: mad_i64_i32_sextops_commute:
88 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89 ; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
90 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
91 ; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
92 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
94 ; GFX1150-LABEL: mad_i64_i32_sextops_commute:
96 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3]
98 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
100 ; GFX12-LABEL: mad_i64_i32_sextops_commute:
102 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
103 ; GFX12-NEXT: s_wait_expcnt 0x0
104 ; GFX12-NEXT: s_wait_samplecnt 0x0
105 ; GFX12-NEXT: s_wait_bvhcnt 0x0
106 ; GFX12-NEXT: s_wait_kmcnt 0x0
107 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3]
108 ; GFX12-NEXT: s_setpc_b64 s[30:31]
109 %sext0 = sext i32 %arg0 to i64
110 %sext1 = sext i32 %arg1 to i64
111 %mul = mul i64 %sext0, %sext1
112 %mad = add i64 %arg2, %mul
116 define i64 @mad_u64_u32_zextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
117 ; CI-LABEL: mad_u64_u32_zextops:
119 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
121 ; CI-NEXT: s_setpc_b64 s[30:31]
123 ; SI-LABEL: mad_u64_u32_zextops:
125 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; SI-NEXT: v_mul_lo_u32 v4, v0, v1
127 ; SI-NEXT: v_mul_hi_u32 v1, v0, v1
128 ; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v2
129 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
130 ; SI-NEXT: s_setpc_b64 s[30:31]
132 ; GFX9-LABEL: mad_u64_u32_zextops:
134 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
136 ; GFX9-NEXT: s_setpc_b64 s[30:31]
138 ; GFX1100-LABEL: mad_u64_u32_zextops:
140 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
142 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
143 ; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
144 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
146 ; GFX1150-LABEL: mad_u64_u32_zextops:
148 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3]
150 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
152 ; GFX12-LABEL: mad_u64_u32_zextops:
154 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
155 ; GFX12-NEXT: s_wait_expcnt 0x0
156 ; GFX12-NEXT: s_wait_samplecnt 0x0
157 ; GFX12-NEXT: s_wait_bvhcnt 0x0
158 ; GFX12-NEXT: s_wait_kmcnt 0x0
159 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3]
160 ; GFX12-NEXT: s_setpc_b64 s[30:31]
161 %sext0 = zext i32 %arg0 to i64
162 %sext1 = zext i32 %arg1 to i64
163 %mul = mul i64 %sext0, %sext1
164 %mad = add i64 %mul, %arg2
168 define i64 @mad_u64_u32_zextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
169 ; CI-LABEL: mad_u64_u32_zextops_commute:
171 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
173 ; CI-NEXT: s_setpc_b64 s[30:31]
175 ; SI-LABEL: mad_u64_u32_zextops_commute:
177 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; SI-NEXT: v_mul_lo_u32 v4, v0, v1
179 ; SI-NEXT: v_mul_hi_u32 v1, v0, v1
180 ; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v4
181 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
182 ; SI-NEXT: s_setpc_b64 s[30:31]
184 ; GFX9-LABEL: mad_u64_u32_zextops_commute:
186 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
188 ; GFX9-NEXT: s_setpc_b64 s[30:31]
190 ; GFX1100-LABEL: mad_u64_u32_zextops_commute:
192 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193 ; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
194 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
195 ; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
196 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
198 ; GFX1150-LABEL: mad_u64_u32_zextops_commute:
200 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3]
202 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
204 ; GFX12-LABEL: mad_u64_u32_zextops_commute:
206 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
207 ; GFX12-NEXT: s_wait_expcnt 0x0
208 ; GFX12-NEXT: s_wait_samplecnt 0x0
209 ; GFX12-NEXT: s_wait_bvhcnt 0x0
210 ; GFX12-NEXT: s_wait_kmcnt 0x0
211 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3]
212 ; GFX12-NEXT: s_setpc_b64 s[30:31]
213 %sext0 = zext i32 %arg0 to i64
214 %sext1 = zext i32 %arg1 to i64
215 %mul = mul i64 %sext0, %sext1
216 %mad = add i64 %arg2, %mul
220 define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
221 ; CI-LABEL: mad_i64_i32_sextops_i32_i128:
223 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224 ; CI-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v1, 0
225 ; CI-NEXT: v_ashrrev_i32_e32 v12, 31, v0
226 ; CI-NEXT: v_mov_b32_e32 v8, 0
227 ; CI-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v1, v[7:8]
228 ; CI-NEXT: v_ashrrev_i32_e32 v13, 31, v1
229 ; CI-NEXT: v_mov_b32_e32 v7, v9
230 ; CI-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v0, v13, v[7:8]
231 ; CI-NEXT: v_add_i32_e32 v8, vcc, v10, v8
232 ; CI-NEXT: v_mad_i64_i32 v[10:11], s[4:5], v1, v12, 0
233 ; CI-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
234 ; CI-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v13, v[8:9]
235 ; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v13, v0, v[10:11]
236 ; CI-NEXT: v_add_i32_e32 v8, vcc, v8, v0
237 ; CI-NEXT: v_addc_u32_e32 v9, vcc, v9, v1, vcc
238 ; CI-NEXT: v_add_i32_e32 v0, vcc, v6, v2
239 ; CI-NEXT: v_addc_u32_e32 v1, vcc, v7, v3, vcc
240 ; CI-NEXT: v_addc_u32_e32 v2, vcc, v8, v4, vcc
241 ; CI-NEXT: v_addc_u32_e32 v3, vcc, v9, v5, vcc
242 ; CI-NEXT: s_setpc_b64 s[30:31]
244 ; SI-LABEL: mad_i64_i32_sextops_i32_i128:
246 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247 ; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v0
248 ; SI-NEXT: v_mul_lo_u32 v11, v6, v1
249 ; SI-NEXT: v_mul_hi_u32 v12, v0, v1
250 ; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v1
251 ; SI-NEXT: v_mul_hi_u32 v14, v6, v1
252 ; SI-NEXT: v_mul_lo_u32 v13, v0, v7
253 ; SI-NEXT: v_mul_hi_u32 v10, v0, v7
254 ; SI-NEXT: v_add_i32_e32 v12, vcc, v11, v12
255 ; SI-NEXT: v_addc_u32_e32 v14, vcc, 0, v14, vcc
256 ; SI-NEXT: v_mul_hi_u32 v8, v6, v7
257 ; SI-NEXT: v_add_i32_e32 v12, vcc, v13, v12
258 ; SI-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc
259 ; SI-NEXT: v_mul_i32_i24_e32 v9, v6, v7
260 ; SI-NEXT: v_add_i32_e32 v10, vcc, v14, v10
261 ; SI-NEXT: v_mul_hi_i32 v6, v1, v6
262 ; SI-NEXT: v_mul_hi_i32 v7, v7, v0
263 ; SI-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, vcc
264 ; SI-NEXT: v_add_i32_e32 v9, vcc, v9, v10
265 ; SI-NEXT: v_addc_u32_e32 v8, vcc, v8, v14, vcc
266 ; SI-NEXT: v_add_i32_e32 v10, vcc, v13, v11
267 ; SI-NEXT: v_mul_lo_u32 v0, v0, v1
268 ; SI-NEXT: v_addc_u32_e32 v6, vcc, v7, v6, vcc
269 ; SI-NEXT: v_add_i32_e32 v7, vcc, v9, v10
270 ; SI-NEXT: v_addc_u32_e32 v6, vcc, v8, v6, vcc
271 ; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v2
272 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v12, v3, vcc
273 ; SI-NEXT: v_addc_u32_e32 v2, vcc, v7, v4, vcc
274 ; SI-NEXT: v_addc_u32_e32 v3, vcc, v6, v5, vcc
275 ; SI-NEXT: s_setpc_b64 s[30:31]
277 ; GFX9-LABEL: mad_i64_i32_sextops_i32_i128:
279 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280 ; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v1, 0
281 ; GFX9-NEXT: v_ashrrev_i32_e32 v12, 31, v0
282 ; GFX9-NEXT: v_mov_b32_e32 v9, 0
283 ; GFX9-NEXT: v_mov_b32_e32 v8, v7
284 ; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v12, v1, v[8:9]
285 ; GFX9-NEXT: v_ashrrev_i32_e32 v14, 31, v1
286 ; GFX9-NEXT: v_mov_b32_e32 v8, v10
287 ; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v14, v[8:9]
288 ; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v11, v9
289 ; GFX9-NEXT: v_addc_co_u32_e64 v11, s[4:5], 0, 0, vcc
290 ; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v12, v14, v[10:11]
291 ; GFX9-NEXT: v_mad_i64_i32 v[12:13], s[4:5], v1, v12, 0
292 ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v14, v0, v[12:13]
293 ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v10, v0
294 ; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v11, v1, vcc
295 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v2
296 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v8, v3, vcc
297 ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v7, v4, vcc
298 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v9, v5, vcc
299 ; GFX9-NEXT: s_setpc_b64 s[30:31]
301 ; GFX1100-LABEL: mad_i64_i32_sextops_i32_i128:
303 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304 ; GFX1100-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0
305 ; GFX1100-NEXT: v_mov_b32_e32 v8, 0
306 ; GFX1100-NEXT: v_ashrrev_i32_e32 v14, 31, v0
307 ; GFX1100-NEXT: v_ashrrev_i32_e32 v15, 31, v1
308 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
309 ; GFX1100-NEXT: v_mad_u64_u32 v[9:10], null, v14, v1, v[7:8]
310 ; GFX1100-NEXT: v_mov_b32_e32 v7, v9
311 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
312 ; GFX1100-NEXT: v_mad_u64_u32 v[11:12], null, v0, v15, v[7:8]
313 ; GFX1100-NEXT: v_mad_i64_i32 v[7:8], null, v1, v14, 0
314 ; GFX1100-NEXT: v_add_co_u32 v9, s0, v10, v12
315 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
316 ; GFX1100-NEXT: v_add_co_ci_u32_e64 v10, null, 0, 0, s0
317 ; GFX1100-NEXT: v_mad_i64_i32 v[12:13], null, v15, v0, v[7:8]
318 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
319 ; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v14, v15, v[9:10]
320 ; GFX1100-NEXT: v_add_co_u32 v7, vcc_lo, v0, v12
321 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
322 ; GFX1100-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v13, vcc_lo
323 ; GFX1100-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2
324 ; GFX1100-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v11, v3, vcc_lo
325 ; GFX1100-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v7, v4, vcc_lo
326 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4)
327 ; GFX1100-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v8, v5, vcc_lo
328 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
330 ; GFX1150-LABEL: mad_i64_i32_sextops_i32_i128:
332 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333 ; GFX1150-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0
334 ; GFX1150-NEXT: v_mov_b32_e32 v8, 0
335 ; GFX1150-NEXT: v_ashrrev_i32_e32 v13, 31, v0
336 ; GFX1150-NEXT: v_ashrrev_i32_e32 v14, 31, v1
337 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
338 ; GFX1150-NEXT: v_mad_i64_i32 v[11:12], null, v1, v13, 0
339 ; GFX1150-NEXT: v_mad_u64_u32 v[9:10], null, v13, v1, v[7:8]
340 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
341 ; GFX1150-NEXT: v_mov_b32_e32 v7, v9
342 ; GFX1150-NEXT: v_mad_u64_u32 v[7:8], null, v0, v14, v[7:8]
343 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
344 ; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v14, v0, v[11:12]
345 ; GFX1150-NEXT: v_add_co_u32 v8, s0, v10, v8
346 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
347 ; GFX1150-NEXT: v_add_co_ci_u32_e64 v9, null, 0, 0, s0
348 ; GFX1150-NEXT: v_mad_u64_u32 v[8:9], null, v13, v14, v[8:9]
349 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
350 ; GFX1150-NEXT: v_add_co_u32 v8, vcc_lo, v8, v0
351 ; GFX1150-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v1, vcc_lo
352 ; GFX1150-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2
353 ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo
354 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
355 ; GFX1150-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo
356 ; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo
357 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
359 ; GFX12-LABEL: mad_i64_i32_sextops_i32_i128:
361 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
362 ; GFX12-NEXT: s_wait_expcnt 0x0
363 ; GFX12-NEXT: s_wait_samplecnt 0x0
364 ; GFX12-NEXT: s_wait_bvhcnt 0x0
365 ; GFX12-NEXT: s_wait_kmcnt 0x0
366 ; GFX12-NEXT: v_mad_co_u64_u32 v[6:7], null, v0, v1, 0
367 ; GFX12-NEXT: v_mov_b32_e32 v8, 0
368 ; GFX12-NEXT: v_ashrrev_i32_e32 v13, 31, v0
369 ; GFX12-NEXT: v_ashrrev_i32_e32 v14, 31, v1
370 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
371 ; GFX12-NEXT: v_mad_co_i64_i32 v[11:12], null, v1, v13, 0
372 ; GFX12-NEXT: v_mad_co_u64_u32 v[9:10], null, v13, v1, v[7:8]
373 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
374 ; GFX12-NEXT: v_mov_b32_e32 v7, v9
375 ; GFX12-NEXT: v_mad_co_u64_u32 v[7:8], null, v0, v14, v[7:8]
376 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
377 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v14, v0, v[11:12]
378 ; GFX12-NEXT: v_add_co_u32 v8, s0, v10, v8
379 ; GFX12-NEXT: s_wait_alu 0xf1ff
380 ; GFX12-NEXT: v_add_co_ci_u32_e64 v9, null, 0, 0, s0
381 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
382 ; GFX12-NEXT: v_mad_co_u64_u32 v[8:9], null, v13, v14, v[8:9]
383 ; GFX12-NEXT: v_add_co_u32 v8, vcc_lo, v8, v0
384 ; GFX12-NEXT: s_wait_alu 0xfffd
385 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
386 ; GFX12-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v1, vcc_lo
387 ; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2
388 ; GFX12-NEXT: s_wait_alu 0xfffd
389 ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo
390 ; GFX12-NEXT: s_wait_alu 0xfffd
391 ; GFX12-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo
392 ; GFX12-NEXT: s_wait_alu 0xfffd
393 ; GFX12-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo
394 ; GFX12-NEXT: s_wait_alu 0xfffd
395 ; GFX12-NEXT: s_setpc_b64 s[30:31]
396 %sext0 = sext i32 %arg0 to i128
397 %sext1 = sext i32 %arg1 to i128
398 %mul = mul i128 %sext0, %sext1
399 %mad = add i128 %mul, %arg2
403 define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) #0 {
404 ; CI-LABEL: mad_i64_i32_sextops_i32_i63:
406 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
407 ; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
408 ; CI-NEXT: s_setpc_b64 s[30:31]
410 ; SI-LABEL: mad_i64_i32_sextops_i32_i63:
412 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413 ; SI-NEXT: v_mul_lo_u32 v4, v0, v1
414 ; SI-NEXT: v_mul_hi_i32 v1, v0, v1
415 ; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v2
416 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
417 ; SI-NEXT: s_setpc_b64 s[30:31]
419 ; GFX9-LABEL: mad_i64_i32_sextops_i32_i63:
421 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422 ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
423 ; GFX9-NEXT: s_setpc_b64 s[30:31]
425 ; GFX1100-LABEL: mad_i64_i32_sextops_i32_i63:
427 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
429 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
430 ; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
431 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
433 ; GFX1150-LABEL: mad_i64_i32_sextops_i32_i63:
435 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
436 ; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3]
437 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
439 ; GFX12-LABEL: mad_i64_i32_sextops_i32_i63:
441 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
442 ; GFX12-NEXT: s_wait_expcnt 0x0
443 ; GFX12-NEXT: s_wait_samplecnt 0x0
444 ; GFX12-NEXT: s_wait_bvhcnt 0x0
445 ; GFX12-NEXT: s_wait_kmcnt 0x0
446 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3]
447 ; GFX12-NEXT: s_setpc_b64 s[30:31]
448 %sext0 = sext i32 %arg0 to i63
449 %sext1 = sext i32 %arg1 to i63
450 %mul = mul i63 %sext0, %sext1
451 %mad = add i63 %mul, %arg2
455 define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 {
456 ; CI-LABEL: mad_i64_i32_sextops_i31_i63:
458 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459 ; CI-NEXT: v_bfe_i32 v1, v1, 0, 31
460 ; CI-NEXT: v_bfe_i32 v0, v0, 0, 31
461 ; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
462 ; CI-NEXT: s_setpc_b64 s[30:31]
464 ; SI-LABEL: mad_i64_i32_sextops_i31_i63:
466 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
467 ; SI-NEXT: v_lshlrev_b32_e32 v4, 1, v0
468 ; SI-NEXT: v_lshlrev_b32_e32 v1, 1, v1
469 ; SI-NEXT: v_ashr_i64 v[4:5], v[3:4], 33
470 ; SI-NEXT: v_ashr_i64 v[0:1], v[0:1], 33
471 ; SI-NEXT: v_mul_lo_u32 v1, v4, v0
472 ; SI-NEXT: v_mul_hi_i32 v4, v4, v0
473 ; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v2
474 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v4, v3, vcc
475 ; SI-NEXT: s_setpc_b64 s[30:31]
477 ; GFX9-LABEL: mad_i64_i32_sextops_i31_i63:
479 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480 ; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 31
481 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 31
482 ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
483 ; GFX9-NEXT: s_setpc_b64 s[30:31]
485 ; GFX1100-LABEL: mad_i64_i32_sextops_i31_i63:
487 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488 ; GFX1100-NEXT: v_bfe_i32 v4, v1, 0, 31
489 ; GFX1100-NEXT: v_bfe_i32 v5, v0, 0, 31
490 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
491 ; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
492 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
494 ; GFX1150-LABEL: mad_i64_i32_sextops_i31_i63:
496 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
497 ; GFX1150-NEXT: v_bfe_i32 v1, v1, 0, 31
498 ; GFX1150-NEXT: v_bfe_i32 v0, v0, 0, 31
499 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
500 ; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3]
501 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
503 ; GFX12-LABEL: mad_i64_i32_sextops_i31_i63:
505 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
506 ; GFX12-NEXT: s_wait_expcnt 0x0
507 ; GFX12-NEXT: s_wait_samplecnt 0x0
508 ; GFX12-NEXT: s_wait_bvhcnt 0x0
509 ; GFX12-NEXT: s_wait_kmcnt 0x0
510 ; GFX12-NEXT: v_bfe_i32 v1, v1, 0, 31
511 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 31
512 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
513 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3]
514 ; GFX12-NEXT: s_setpc_b64 s[30:31]
515 %sext0 = sext i31 %arg0 to i63
516 %sext1 = sext i31 %arg1 to i63
517 %mul = mul i63 %sext0, %sext1
518 %mad = add i63 %mul, %arg2
522 define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
523 ; CI-LABEL: mad_i64_i32_extops_i32_i64:
525 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526 ; CI-NEXT: v_ashrrev_i32_e32 v4, 31, v0
527 ; CI-NEXT: v_mul_lo_u32 v4, v4, v1
528 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
529 ; CI-NEXT: v_add_i32_e32 v1, vcc, v4, v1
530 ; CI-NEXT: s_setpc_b64 s[30:31]
532 ; SI-LABEL: mad_i64_i32_extops_i32_i64:
534 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535 ; SI-NEXT: v_ashrrev_i32_e32 v4, 31, v0
536 ; SI-NEXT: v_mul_hi_u32 v5, v0, v1
537 ; SI-NEXT: v_mul_lo_u32 v4, v4, v1
538 ; SI-NEXT: v_mul_lo_u32 v0, v0, v1
539 ; SI-NEXT: v_add_i32_e32 v1, vcc, v5, v4
540 ; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v2
541 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
542 ; SI-NEXT: s_setpc_b64 s[30:31]
544 ; GFX9-LABEL: mad_i64_i32_extops_i32_i64:
546 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
548 ; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v0
549 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, v[2:3]
550 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
551 ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v4, v[2:3]
552 ; GFX9-NEXT: v_mov_b32_e32 v1, v2
553 ; GFX9-NEXT: s_setpc_b64 s[30:31]
555 ; GFX1100-LABEL: mad_i64_i32_extops_i32_i64:
557 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558 ; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
559 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
560 ; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
561 ; GFX1100-NEXT: v_ashrrev_i32_e32 v5, 31, v5
562 ; GFX1100-NEXT: v_mov_b32_e32 v3, v1
563 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
564 ; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v5, v4, v[3:4]
565 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
567 ; GFX1150-LABEL: mad_i64_i32_extops_i32_i64:
569 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570 ; GFX1150-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
571 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
572 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
573 ; GFX1150-NEXT: v_ashrrev_i32_e32 v2, 31, v5
574 ; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v2, v4, v[1:2]
575 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
577 ; GFX12-LABEL: mad_i64_i32_extops_i32_i64:
579 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
580 ; GFX12-NEXT: s_wait_expcnt 0x0
581 ; GFX12-NEXT: s_wait_samplecnt 0x0
582 ; GFX12-NEXT: s_wait_bvhcnt 0x0
583 ; GFX12-NEXT: s_wait_kmcnt 0x0
584 ; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
585 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
586 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v5, v4, v[2:3]
587 ; GFX12-NEXT: v_ashrrev_i32_e32 v2, 31, v5
588 ; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v2, v4, v[1:2]
589 ; GFX12-NEXT: s_setpc_b64 s[30:31]
590 %ext0 = sext i32 %arg0 to i64
591 %ext1 = zext i32 %arg1 to i64
592 %mul = mul i64 %ext0, %ext1
593 %mad = add i64 %mul, %arg2
597 define i64 @mad_u64_u32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
598 ; CI-LABEL: mad_u64_u32_bitops:
600 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
601 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[4:5]
602 ; CI-NEXT: s_setpc_b64 s[30:31]
604 ; SI-LABEL: mad_u64_u32_bitops:
606 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
607 ; SI-NEXT: v_mul_lo_u32 v1, v0, v2
608 ; SI-NEXT: v_mul_hi_u32 v2, v0, v2
609 ; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v4
610 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v5, vcc
611 ; SI-NEXT: s_setpc_b64 s[30:31]
613 ; GFX9-LABEL: mad_u64_u32_bitops:
615 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[4:5]
617 ; GFX9-NEXT: s_setpc_b64 s[30:31]
619 ; GFX1100-LABEL: mad_u64_u32_bitops:
621 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
622 ; GFX1100-NEXT: v_mov_b32_e32 v3, v0
623 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
624 ; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[4:5]
625 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
627 ; GFX1150-LABEL: mad_u64_u32_bitops:
629 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, v[4:5]
631 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
633 ; GFX12-LABEL: mad_u64_u32_bitops:
635 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
636 ; GFX12-NEXT: s_wait_expcnt 0x0
637 ; GFX12-NEXT: s_wait_samplecnt 0x0
638 ; GFX12-NEXT: s_wait_bvhcnt 0x0
639 ; GFX12-NEXT: s_wait_kmcnt 0x0
640 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v2, v[4:5]
641 ; GFX12-NEXT: s_setpc_b64 s[30:31]
642 %trunc.lhs = and i64 %arg0, 4294967295
643 %trunc.rhs = and i64 %arg1, 4294967295
644 %mul = mul i64 %trunc.lhs, %trunc.rhs
645 %add = add i64 %mul, %arg2
649 define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
650 ; CI-LABEL: mad_u64_u32_bitops_lhs_mask_small:
652 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653 ; CI-NEXT: v_and_b32_e32 v3, 1, v1
654 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[4:5]
655 ; CI-NEXT: v_mul_lo_u32 v2, v3, v2
656 ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
657 ; CI-NEXT: s_setpc_b64 s[30:31]
659 ; SI-LABEL: mad_u64_u32_bitops_lhs_mask_small:
661 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
662 ; SI-NEXT: v_and_b32_e32 v1, 1, v1
663 ; SI-NEXT: v_mul_hi_u32 v3, v0, v2
664 ; SI-NEXT: v_mul_lo_u32 v1, v1, v2
665 ; SI-NEXT: v_mul_lo_u32 v0, v0, v2
666 ; SI-NEXT: v_add_i32_e32 v1, vcc, v3, v1
667 ; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v4
668 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
669 ; SI-NEXT: s_setpc_b64 s[30:31]
671 ; GFX9-LABEL: mad_u64_u32_bitops_lhs_mask_small:
673 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674 ; GFX9-NEXT: v_and_b32_e32 v3, 1, v1
675 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[4:5]
676 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
677 ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v2, v[4:5]
678 ; GFX9-NEXT: v_mov_b32_e32 v1, v2
679 ; GFX9-NEXT: s_setpc_b64 s[30:31]
681 ; GFX1100-LABEL: mad_u64_u32_bitops_lhs_mask_small:
683 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684 ; GFX1100-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v0
685 ; GFX1100-NEXT: v_mov_b32_e32 v6, v1
686 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
687 ; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v2, v3, v[4:5]
688 ; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_and_b32 v5, 1, v6
689 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
690 ; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v5, v3, v[4:5]
691 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
693 ; GFX1150-LABEL: mad_u64_u32_bitops_lhs_mask_small:
695 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696 ; GFX1150-NEXT: v_mov_b32_e32 v3, v1
697 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, v[4:5]
698 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
699 ; GFX1150-NEXT: v_and_b32_e32 v3, 1, v3
700 ; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v3, v2, v[1:2]
701 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
703 ; GFX12-LABEL: mad_u64_u32_bitops_lhs_mask_small:
705 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
706 ; GFX12-NEXT: s_wait_expcnt 0x0
707 ; GFX12-NEXT: s_wait_samplecnt 0x0
708 ; GFX12-NEXT: s_wait_bvhcnt 0x0
709 ; GFX12-NEXT: s_wait_kmcnt 0x0
710 ; GFX12-NEXT: v_mov_b32_e32 v3, v1
711 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v2, v[4:5]
712 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
713 ; GFX12-NEXT: v_and_b32_e32 v3, 1, v3
714 ; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v3, v2, v[1:2]
715 ; GFX12-NEXT: s_setpc_b64 s[30:31]
716 %trunc.lhs = and i64 %arg0, 8589934591
717 %trunc.rhs = and i64 %arg1, 4294967295
718 %mul = mul i64 %trunc.lhs, %trunc.rhs
719 %add = add i64 %mul, %arg2
723 define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
724 ; CI-LABEL: mad_u64_u32_bitops_rhs_mask_small:
726 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
727 ; CI-NEXT: v_mov_b32_e32 v6, v0
728 ; CI-NEXT: v_and_b32_e32 v3, 1, v3
729 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, v[4:5]
730 ; CI-NEXT: v_mul_lo_u32 v2, v6, v3
731 ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
732 ; CI-NEXT: s_setpc_b64 s[30:31]
734 ; SI-LABEL: mad_u64_u32_bitops_rhs_mask_small:
736 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737 ; SI-NEXT: v_and_b32_e32 v1, 1, v3
738 ; SI-NEXT: v_mul_hi_u32 v3, v0, v2
739 ; SI-NEXT: v_mul_lo_u32 v1, v0, v1
740 ; SI-NEXT: v_mul_lo_u32 v0, v0, v2
741 ; SI-NEXT: v_add_i32_e32 v1, vcc, v3, v1
742 ; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v4
743 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
744 ; SI-NEXT: s_setpc_b64 s[30:31]
746 ; GFX9-LABEL: mad_u64_u32_bitops_rhs_mask_small:
748 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
749 ; GFX9-NEXT: v_mov_b32_e32 v6, v0
750 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, v[4:5]
751 ; GFX9-NEXT: v_and_b32_e32 v3, 1, v3
752 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
753 ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v3, v[2:3]
754 ; GFX9-NEXT: v_mov_b32_e32 v1, v2
755 ; GFX9-NEXT: s_setpc_b64 s[30:31]
757 ; GFX1100-LABEL: mad_u64_u32_bitops_rhs_mask_small:
759 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
760 ; GFX1100-NEXT: v_mov_b32_e32 v6, v0
761 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
762 ; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5]
763 ; GFX1100-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v4, 1, v3
764 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
765 ; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v6, v4, v[3:4]
766 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
768 ; GFX1150-LABEL: mad_u64_u32_bitops_rhs_mask_small:
770 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771 ; GFX1150-NEXT: v_mov_b32_e32 v6, v0
772 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
773 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5]
774 ; GFX1150-NEXT: v_and_b32_e32 v2, 1, v3
775 ; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v6, v2, v[1:2]
776 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
778 ; GFX12-LABEL: mad_u64_u32_bitops_rhs_mask_small:
780 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
781 ; GFX12-NEXT: s_wait_expcnt 0x0
782 ; GFX12-NEXT: s_wait_samplecnt 0x0
783 ; GFX12-NEXT: s_wait_bvhcnt 0x0
784 ; GFX12-NEXT: s_wait_kmcnt 0x0
785 ; GFX12-NEXT: v_mov_b32_e32 v6, v0
786 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
787 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v6, v2, v[4:5]
788 ; GFX12-NEXT: v_and_b32_e32 v2, 1, v3
789 ; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v6, v2, v[1:2]
790 ; GFX12-NEXT: s_setpc_b64 s[30:31]
791 %trunc.lhs = and i64 %arg0, 4294967295
792 %trunc.rhs = and i64 %arg1, 8589934591
793 %mul = mul i64 %trunc.lhs, %trunc.rhs
794 %add = add i64 %mul, %arg2
798 define i64 @mad_i64_i32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
799 ; CI-LABEL: mad_i64_i32_bitops:
801 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
802 ; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v2, v[4:5]
803 ; CI-NEXT: s_setpc_b64 s[30:31]
805 ; SI-LABEL: mad_i64_i32_bitops:
807 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
808 ; SI-NEXT: v_mul_lo_u32 v1, v0, v2
809 ; SI-NEXT: v_mul_hi_i32 v2, v0, v2
810 ; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v4
811 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v5, vcc
812 ; SI-NEXT: s_setpc_b64 s[30:31]
814 ; GFX9-LABEL: mad_i64_i32_bitops:
816 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817 ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v2, v[4:5]
818 ; GFX9-NEXT: s_setpc_b64 s[30:31]
820 ; GFX1100-LABEL: mad_i64_i32_bitops:
822 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823 ; GFX1100-NEXT: v_mov_b32_e32 v3, v0
824 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
825 ; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v3, v2, v[4:5]
826 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
828 ; GFX1150-LABEL: mad_i64_i32_bitops:
830 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
831 ; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v2, v[4:5]
832 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
834 ; GFX12-LABEL: mad_i64_i32_bitops:
836 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
837 ; GFX12-NEXT: s_wait_expcnt 0x0
838 ; GFX12-NEXT: s_wait_samplecnt 0x0
839 ; GFX12-NEXT: s_wait_bvhcnt 0x0
840 ; GFX12-NEXT: s_wait_kmcnt 0x0
841 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v2, v[4:5]
842 ; GFX12-NEXT: s_setpc_b64 s[30:31]
843 %shl.lhs = shl i64 %arg0, 32
844 %trunc.lhs = ashr i64 %shl.lhs, 32
845 %shl.rhs = shl i64 %arg1, 32
846 %trunc.rhs = ashr i64 %shl.rhs, 32
847 %mul = mul i64 %trunc.lhs, %trunc.rhs
848 %add = add i64 %mul, %arg2
852 ; Example from bug report
853 define i64 @mad_i64_i32_unpack_i64ops(i64 %arg0) #0 {
854 ; CI-LABEL: mad_i64_i32_unpack_i64ops:
856 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
857 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[0:1]
858 ; CI-NEXT: s_setpc_b64 s[30:31]
860 ; SI-LABEL: mad_i64_i32_unpack_i64ops:
862 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863 ; SI-NEXT: v_mul_lo_u32 v2, v1, v0
864 ; SI-NEXT: v_mul_hi_u32 v3, v1, v0
865 ; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0
866 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
867 ; SI-NEXT: s_setpc_b64 s[30:31]
869 ; GFX9-LABEL: mad_i64_i32_unpack_i64ops:
871 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[0:1]
873 ; GFX9-NEXT: s_setpc_b64 s[30:31]
875 ; GFX1100-LABEL: mad_i64_i32_unpack_i64ops:
877 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
878 ; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1]
879 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
880 ; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
881 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
883 ; GFX1150-LABEL: mad_i64_i32_unpack_i64ops:
885 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
886 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[0:1]
887 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
889 ; GFX12-LABEL: mad_i64_i32_unpack_i64ops:
891 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
892 ; GFX12-NEXT: s_wait_expcnt 0x0
893 ; GFX12-NEXT: s_wait_samplecnt 0x0
894 ; GFX12-NEXT: s_wait_bvhcnt 0x0
895 ; GFX12-NEXT: s_wait_kmcnt 0x0
896 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v1, v0, v[0:1]
897 ; GFX12-NEXT: s_setpc_b64 s[30:31]
898 %tmp4 = lshr i64 %arg0, 32
899 %tmp5 = and i64 %arg0, 4294967295
900 %mul = mul nuw i64 %tmp4, %tmp5
901 %mad = add i64 %mul, %arg0
905 define amdgpu_kernel void @mad_i64_i32_uniform(ptr addrspace(1) %out, i32 %arg0, i32 %arg1, i64 %arg2) #0 {
906 ; CI-LABEL: mad_i64_i32_uniform:
908 ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
909 ; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
910 ; CI-NEXT: s_mov_b32 s7, 0xf000
911 ; CI-NEXT: s_mov_b32 s6, -1
912 ; CI-NEXT: s_waitcnt lgkmcnt(0)
913 ; CI-NEXT: v_mov_b32_e32 v2, s3
914 ; CI-NEXT: v_mov_b32_e32 v0, s4
915 ; CI-NEXT: v_mov_b32_e32 v1, s5
916 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[2:3], s2, v2, v[0:1]
917 ; CI-NEXT: s_mov_b32 s4, s0
918 ; CI-NEXT: s_mov_b32 s5, s1
919 ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
922 ; SI-LABEL: mad_i64_i32_uniform:
924 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
925 ; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
926 ; SI-NEXT: s_mov_b32 s7, 0xf000
927 ; SI-NEXT: s_mov_b32 s6, -1
928 ; SI-NEXT: s_waitcnt lgkmcnt(0)
929 ; SI-NEXT: v_mov_b32_e32 v0, s3
930 ; SI-NEXT: v_mul_hi_u32 v1, s2, v0
931 ; SI-NEXT: s_mov_b32 s4, s0
932 ; SI-NEXT: s_mul_i32 s0, s2, s3
933 ; SI-NEXT: v_mov_b32_e32 v0, s0
934 ; SI-NEXT: v_mov_b32_e32 v2, s9
935 ; SI-NEXT: v_add_i32_e32 v0, vcc, s8, v0
936 ; SI-NEXT: s_mov_b32 s5, s1
937 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
938 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
941 ; GFX9-LABEL: mad_i64_i32_uniform:
943 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
944 ; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
945 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
946 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
947 ; GFX9-NEXT: s_mul_hi_u32 s4, s2, s3
948 ; GFX9-NEXT: s_mul_i32 s2, s2, s3
949 ; GFX9-NEXT: s_add_u32 s2, s2, s6
950 ; GFX9-NEXT: s_addc_u32 s3, s4, s7
951 ; GFX9-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
952 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
953 ; GFX9-NEXT: s_endpgm
955 ; GFX11-LABEL: mad_i64_i32_uniform:
957 ; GFX11-NEXT: s_clause 0x1
958 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
959 ; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
960 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
961 ; GFX11-NEXT: s_mul_i32 s6, s2, s3
962 ; GFX11-NEXT: s_mul_hi_u32 s3, s2, s3
963 ; GFX11-NEXT: s_add_u32 s2, s6, s4
964 ; GFX11-NEXT: s_addc_u32 s3, s3, s5
965 ; GFX11-NEXT: v_mov_b32_e32 v0, s2
966 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
967 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
968 ; GFX11-NEXT: s_endpgm
970 ; GFX12-LABEL: mad_i64_i32_uniform:
972 ; GFX12-NEXT: s_clause 0x1
973 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
974 ; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
975 ; GFX12-NEXT: s_mov_b32 s7, 0
976 ; GFX12-NEXT: s_wait_kmcnt 0x0
977 ; GFX12-NEXT: s_mov_b32 s6, s2
978 ; GFX12-NEXT: s_mov_b32 s2, s3
979 ; GFX12-NEXT: s_mov_b32 s3, s7
980 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
981 ; GFX12-NEXT: s_mul_u64 s[2:3], s[6:7], s[2:3]
982 ; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[4:5]
983 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
984 ; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
985 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
986 ; GFX12-NEXT: s_endpgm
987 %ext0 = zext i32 %arg0 to i64
988 %ext1 = zext i32 %arg1 to i64
989 %mul = mul i64 %ext0, %ext1
990 %mad = add i64 %mul, %arg2
991 store i64 %mad, ptr addrspace(1) %out
995 define i64 @mad_i64_i32_twice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3) #0 {
996 ; CI-LABEL: mad_i64_i32_twice:
998 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
999 ; CI-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v0, v1, v[2:3]
1000 ; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[4:5]
1001 ; CI-NEXT: v_xor_b32_e32 v1, v3, v1
1002 ; CI-NEXT: v_xor_b32_e32 v0, v2, v0
1003 ; CI-NEXT: s_setpc_b64 s[30:31]
1005 ; SI-LABEL: mad_i64_i32_twice:
1007 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1008 ; SI-NEXT: v_mul_lo_u32 v6, v0, v1
1009 ; SI-NEXT: v_mul_hi_i32 v0, v0, v1
1010 ; SI-NEXT: v_add_i32_e32 v2, vcc, v6, v2
1011 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v0, v3, vcc
1012 ; SI-NEXT: v_add_i32_e32 v3, vcc, v6, v4
1013 ; SI-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc
1014 ; SI-NEXT: v_xor_b32_e32 v1, v1, v0
1015 ; SI-NEXT: v_xor_b32_e32 v0, v2, v3
1016 ; SI-NEXT: s_setpc_b64 s[30:31]
1018 ; GFX9-LABEL: mad_i64_i32_twice:
1020 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1021 ; GFX9-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v0, v1, v[2:3]
1022 ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[4:5]
1023 ; GFX9-NEXT: v_xor_b32_e32 v1, v3, v1
1024 ; GFX9-NEXT: v_xor_b32_e32 v0, v2, v0
1025 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1027 ; GFX1100-LABEL: mad_i64_i32_twice:
1029 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1030 ; GFX1100-NEXT: v_mad_i64_i32 v[6:7], null, v0, v1, v[2:3]
1031 ; GFX1100-NEXT: v_mad_i64_i32 v[2:3], null, v0, v1, v[4:5]
1032 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1033 ; GFX1100-NEXT: v_xor_b32_e32 v0, v6, v2
1034 ; GFX1100-NEXT: v_xor_b32_e32 v1, v7, v3
1035 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1037 ; GFX1150-LABEL: mad_i64_i32_twice:
1039 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1040 ; GFX1150-NEXT: v_mad_i64_i32 v[2:3], null, v0, v1, v[2:3]
1041 ; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[4:5]
1042 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1043 ; GFX1150-NEXT: v_xor_b32_e32 v0, v2, v0
1044 ; GFX1150-NEXT: v_xor_b32_e32 v1, v3, v1
1045 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
1047 ; GFX12-LABEL: mad_i64_i32_twice:
1049 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1050 ; GFX12-NEXT: s_wait_expcnt 0x0
1051 ; GFX12-NEXT: s_wait_samplecnt 0x0
1052 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1053 ; GFX12-NEXT: s_wait_kmcnt 0x0
1054 ; GFX12-NEXT: v_mad_co_i64_i32 v[2:3], null, v0, v1, v[2:3]
1055 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[4:5]
1056 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1057 ; GFX12-NEXT: v_xor_b32_e32 v0, v2, v0
1058 ; GFX12-NEXT: v_xor_b32_e32 v1, v3, v1
1059 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1060 %sext0 = sext i32 %arg0 to i64
1061 %sext1 = sext i32 %arg1 to i64
1062 %mul = mul i64 %sext0, %sext1
1063 %mad1 = add i64 %mul, %arg2
1064 %mad2 = add i64 %mul, %arg3
1065 %out = xor i64 %mad1, %mad2
1069 define i64 @mad_i64_i32_thrice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3, i64 %arg4) #0 {
1070 ; CI-LABEL: mad_i64_i32_thrice:
1072 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1073 ; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, 0
1074 ; CI-NEXT: v_add_i32_e32 v2, vcc, v0, v2
1075 ; CI-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc
1076 ; CI-NEXT: v_add_i32_e32 v4, vcc, v0, v4
1077 ; CI-NEXT: v_addc_u32_e32 v5, vcc, v1, v5, vcc
1078 ; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v6
1079 ; CI-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc
1080 ; CI-NEXT: v_xor_b32_e32 v3, v3, v5
1081 ; CI-NEXT: v_xor_b32_e32 v2, v2, v4
1082 ; CI-NEXT: v_xor_b32_e32 v1, v3, v1
1083 ; CI-NEXT: v_xor_b32_e32 v0, v2, v0
1084 ; CI-NEXT: s_setpc_b64 s[30:31]
1086 ; SI-LABEL: mad_i64_i32_thrice:
1088 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1089 ; SI-NEXT: v_mul_lo_u32 v8, v0, v1
1090 ; SI-NEXT: v_mul_hi_i32 v0, v0, v1
1091 ; SI-NEXT: v_add_i32_e32 v1, vcc, v8, v2
1092 ; SI-NEXT: v_addc_u32_e32 v2, vcc, v0, v3, vcc
1093 ; SI-NEXT: v_add_i32_e32 v3, vcc, v8, v4
1094 ; SI-NEXT: v_addc_u32_e32 v4, vcc, v0, v5, vcc
1095 ; SI-NEXT: v_add_i32_e32 v5, vcc, v8, v6
1096 ; SI-NEXT: v_addc_u32_e32 v0, vcc, v0, v7, vcc
1097 ; SI-NEXT: v_xor_b32_e32 v2, v2, v4
1098 ; SI-NEXT: v_xor_b32_e32 v3, v1, v3
1099 ; SI-NEXT: v_xor_b32_e32 v1, v2, v0
1100 ; SI-NEXT: v_xor_b32_e32 v0, v3, v5
1101 ; SI-NEXT: s_setpc_b64 s[30:31]
1103 ; GFX9-LABEL: mad_i64_i32_thrice:
1105 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106 ; GFX9-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v0, v1, v[2:3]
1107 ; GFX9-NEXT: v_mad_i64_i32 v[4:5], s[4:5], v0, v1, v[4:5]
1108 ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[6:7]
1109 ; GFX9-NEXT: v_xor_b32_e32 v3, v3, v5
1110 ; GFX9-NEXT: v_xor_b32_e32 v2, v2, v4
1111 ; GFX9-NEXT: v_xor_b32_e32 v1, v3, v1
1112 ; GFX9-NEXT: v_xor_b32_e32 v0, v2, v0
1113 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1115 ; GFX1100-LABEL: mad_i64_i32_thrice:
1117 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118 ; GFX1100-NEXT: v_mad_i64_i32 v[8:9], null, v0, v1, 0
1119 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1120 ; GFX1100-NEXT: v_add_co_u32 v0, vcc_lo, v8, v2
1121 ; GFX1100-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v9, v3, vcc_lo
1122 ; GFX1100-NEXT: v_add_co_u32 v2, vcc_lo, v8, v4
1123 ; GFX1100-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo
1124 ; GFX1100-NEXT: v_add_co_u32 v4, vcc_lo, v8, v6
1125 ; GFX1100-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v9, v7, vcc_lo
1126 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1127 ; GFX1100-NEXT: v_xor_b32_e32 v0, v0, v2
1128 ; GFX1100-NEXT: v_xor_b32_e32 v1, v1, v3
1129 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1130 ; GFX1100-NEXT: v_xor_b32_e32 v0, v0, v4
1131 ; GFX1100-NEXT: v_xor_b32_e32 v1, v1, v5
1132 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1134 ; GFX1150-LABEL: mad_i64_i32_thrice:
1136 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137 ; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, 0
1138 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1139 ; GFX1150-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
1140 ; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
1141 ; GFX1150-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4
1142 ; GFX1150-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo
1143 ; GFX1150-NEXT: v_add_co_u32 v0, vcc_lo, v0, v6
1144 ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v7, vcc_lo
1145 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1146 ; GFX1150-NEXT: v_xor_b32_e32 v2, v2, v4
1147 ; GFX1150-NEXT: v_xor_b32_e32 v3, v3, v5
1148 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1149 ; GFX1150-NEXT: v_xor_b32_e32 v0, v2, v0
1150 ; GFX1150-NEXT: v_xor_b32_e32 v1, v3, v1
1151 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
1153 ; GFX12-LABEL: mad_i64_i32_thrice:
1155 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1156 ; GFX12-NEXT: s_wait_expcnt 0x0
1157 ; GFX12-NEXT: s_wait_samplecnt 0x0
1158 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1159 ; GFX12-NEXT: s_wait_kmcnt 0x0
1160 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, 0
1161 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1162 ; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
1163 ; GFX12-NEXT: s_wait_alu 0xfffd
1164 ; GFX12-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
1165 ; GFX12-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4
1166 ; GFX12-NEXT: s_wait_alu 0xfffd
1167 ; GFX12-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo
1168 ; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v6
1169 ; GFX12-NEXT: s_wait_alu 0xfffd
1170 ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v7, vcc_lo
1171 ; GFX12-NEXT: v_xor_b32_e32 v2, v2, v4
1172 ; GFX12-NEXT: v_xor_b32_e32 v3, v3, v5
1173 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1174 ; GFX12-NEXT: v_xor_b32_e32 v0, v2, v0
1175 ; GFX12-NEXT: v_xor_b32_e32 v1, v3, v1
1176 ; GFX12-NEXT: s_wait_alu 0xfffd
1177 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1178 %sext0 = sext i32 %arg0 to i64
1179 %sext1 = sext i32 %arg1 to i64
1180 %mul = mul i64 %sext0, %sext1
1181 %mad1 = add i64 %mul, %arg2
1182 %mad2 = add i64 %mul, %arg3
1183 %mad3 = add i64 %mul, %arg4
1184 %out.p = xor i64 %mad1, %mad2
1185 %out = xor i64 %out.p, %mad3
1189 define i64 @mad_i64_i32_secondary_use(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
1190 ; CI-LABEL: mad_i64_i32_secondary_use:
1192 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1193 ; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, 0
1194 ; CI-NEXT: v_add_i32_e32 v2, vcc, v0, v2
1195 ; CI-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc
1196 ; CI-NEXT: v_xor_b32_e32 v1, v3, v1
1197 ; CI-NEXT: v_xor_b32_e32 v0, v2, v0
1198 ; CI-NEXT: s_setpc_b64 s[30:31]
1200 ; SI-LABEL: mad_i64_i32_secondary_use:
1202 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203 ; SI-NEXT: v_mul_lo_u32 v4, v0, v1
1204 ; SI-NEXT: v_mul_hi_i32 v0, v0, v1
1205 ; SI-NEXT: v_add_i32_e32 v2, vcc, v4, v2
1206 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v0, v3, vcc
1207 ; SI-NEXT: v_xor_b32_e32 v1, v1, v0
1208 ; SI-NEXT: v_xor_b32_e32 v0, v2, v4
1209 ; SI-NEXT: s_setpc_b64 s[30:31]
1211 ; GFX9-LABEL: mad_i64_i32_secondary_use:
1213 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1214 ; GFX9-NEXT: v_mad_i64_i32 v[4:5], s[4:5], v0, v1, 0
1215 ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
1216 ; GFX9-NEXT: v_xor_b32_e32 v1, v1, v5
1217 ; GFX9-NEXT: v_xor_b32_e32 v0, v0, v4
1218 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1220 ; GFX1100-LABEL: mad_i64_i32_secondary_use:
1222 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1223 ; GFX1100-NEXT: v_mad_i64_i32 v[4:5], null, v0, v1, 0
1224 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1225 ; GFX1100-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2
1226 ; GFX1100-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v5, v3, vcc_lo
1227 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1228 ; GFX1100-NEXT: v_xor_b32_e32 v0, v0, v4
1229 ; GFX1100-NEXT: v_xor_b32_e32 v1, v1, v5
1230 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1232 ; GFX1150-LABEL: mad_i64_i32_secondary_use:
1234 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1235 ; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, 0
1236 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1237 ; GFX1150-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
1238 ; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
1239 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1240 ; GFX1150-NEXT: v_xor_b32_e32 v0, v2, v0
1241 ; GFX1150-NEXT: v_xor_b32_e32 v1, v3, v1
1242 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
1244 ; GFX12-LABEL: mad_i64_i32_secondary_use:
1246 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1247 ; GFX12-NEXT: s_wait_expcnt 0x0
1248 ; GFX12-NEXT: s_wait_samplecnt 0x0
1249 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1250 ; GFX12-NEXT: s_wait_kmcnt 0x0
1251 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, 0
1252 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1253 ; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
1254 ; GFX12-NEXT: s_wait_alu 0xfffd
1255 ; GFX12-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
1256 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1257 ; GFX12-NEXT: v_xor_b32_e32 v0, v2, v0
1258 ; GFX12-NEXT: v_xor_b32_e32 v1, v3, v1
1259 ; GFX12-NEXT: s_wait_alu 0xfffd
1260 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1261 %sext0 = sext i32 %arg0 to i64
1262 %sext1 = sext i32 %arg1 to i64
1263 %mul = mul i64 %sext0, %sext1
1264 %mad = add i64 %mul, %arg2
1265 %out = xor i64 %mad, %mul
1269 define i48 @mad_i48_i48(i48 %arg0, i48 %arg1, i48 %arg2) #0 {
1270 ; CI-LABEL: mad_i48_i48:
1272 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; CI-NEXT: v_mov_b32_e32 v6, v1
1274 ; CI-NEXT: v_mov_b32_e32 v7, v0
1275 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v2, v[4:5]
1276 ; CI-NEXT: v_mul_lo_u32 v2, v6, v2
1277 ; CI-NEXT: v_mul_lo_u32 v3, v7, v3
1278 ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
1279 ; CI-NEXT: v_add_i32_e32 v1, vcc, v3, v1
1280 ; CI-NEXT: s_setpc_b64 s[30:31]
1282 ; SI-LABEL: mad_i48_i48:
1284 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1285 ; SI-NEXT: v_mul_lo_u32 v3, v0, v3
1286 ; SI-NEXT: v_mul_hi_u32 v6, v0, v2
1287 ; SI-NEXT: v_mul_lo_u32 v1, v1, v2
1288 ; SI-NEXT: v_mul_lo_u32 v0, v0, v2
1289 ; SI-NEXT: v_add_i32_e32 v3, vcc, v6, v3
1290 ; SI-NEXT: v_add_i32_e32 v1, vcc, v3, v1
1291 ; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1292 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
1293 ; SI-NEXT: s_setpc_b64 s[30:31]
1295 ; GFX9-LABEL: mad_i48_i48:
1297 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298 ; GFX9-NEXT: v_mov_b32_e32 v6, v1
1299 ; GFX9-NEXT: v_mov_b32_e32 v7, v0
1300 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v2, v[4:5]
1301 ; GFX9-NEXT: v_mul_lo_u32 v3, v7, v3
1302 ; GFX9-NEXT: v_mul_lo_u32 v2, v6, v2
1303 ; GFX9-NEXT: v_add3_u32 v1, v2, v1, v3
1304 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1306 ; GFX11-LABEL: mad_i48_i48:
1308 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1309 ; GFX11-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v0
1310 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1311 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v7, v2, v[4:5]
1312 ; GFX11-NEXT: v_mul_lo_u32 v3, v7, v3
1313 ; GFX11-NEXT: v_mul_lo_u32 v2, v6, v2
1314 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1315 ; GFX11-NEXT: v_add3_u32 v1, v2, v1, v3
1316 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1318 ; GFX12-LABEL: mad_i48_i48:
1320 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1321 ; GFX12-NEXT: s_wait_expcnt 0x0
1322 ; GFX12-NEXT: s_wait_samplecnt 0x0
1323 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1324 ; GFX12-NEXT: s_wait_kmcnt 0x0
1325 ; GFX12-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v0
1326 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1327 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v7, v2, v[4:5]
1328 ; GFX12-NEXT: v_mul_lo_u32 v3, v7, v3
1329 ; GFX12-NEXT: v_mul_lo_u32 v2, v6, v2
1330 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1331 ; GFX12-NEXT: v_add3_u32 v1, v2, v1, v3
1332 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1333 %m = mul i48 %arg0, %arg1
1334 %a = add i48 %m, %arg2
1338 define i64 @lshr_mad_i64_1(i64 %arg0, i64 %arg1) #0 {
1339 ; CI-LABEL: lshr_mad_i64_1:
1341 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342 ; CI-NEXT: v_mov_b32_e32 v2, v1
1343 ; CI-NEXT: v_mov_b32_e32 v1, 0
1344 ; CI-NEXT: s_movk_i32 s4, 0xfc19
1345 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1]
1346 ; CI-NEXT: s_setpc_b64 s[30:31]
1348 ; SI-LABEL: lshr_mad_i64_1:
1350 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1351 ; SI-NEXT: s_movk_i32 s4, 0xfc19
1352 ; SI-NEXT: v_mul_hi_u32 v2, v1, s4
1353 ; SI-NEXT: v_mul_lo_u32 v3, v1, s4
1354 ; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v1
1355 ; SI-NEXT: v_add_i32_e32 v0, vcc, v3, v0
1356 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
1357 ; SI-NEXT: s_setpc_b64 s[30:31]
1359 ; GFX9-LABEL: lshr_mad_i64_1:
1361 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1362 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
1363 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1364 ; GFX9-NEXT: s_movk_i32 s4, 0xfc19
1365 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1]
1366 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1368 ; GFX1100-LABEL: lshr_mad_i64_1:
1370 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371 ; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, 0
1372 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1373 ; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, 0xfffffc19, v4, v[0:1]
1374 ; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
1375 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1377 ; GFX1150-LABEL: lshr_mad_i64_1:
1379 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380 ; GFX1150-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0
1381 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
1382 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc19, v2, v[0:1]
1383 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
1385 ; GFX12-LABEL: lshr_mad_i64_1:
1387 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1388 ; GFX12-NEXT: s_wait_expcnt 0x0
1389 ; GFX12-NEXT: s_wait_samplecnt 0x0
1390 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1391 ; GFX12-NEXT: s_wait_kmcnt 0x0
1392 ; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0
1393 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1394 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xfffffc19, v2, v[0:1]
1395 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1396 %lsh = lshr i64 %arg0, 32
1397 %mul = mul i64 %lsh, s0xfffffffffffffc19
1398 %mad = add i64 %mul, %arg0
1403 define i64 @lshr_mad_i64_2(i64 %arg0) #0 {
1404 ; CI-LABEL: lshr_mad_i64_2:
1406 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1407 ; CI-NEXT: v_mov_b32_e32 v2, v1
1408 ; CI-NEXT: v_mov_b32_e32 v1, 0
1409 ; CI-NEXT: s_movk_i32 s4, 0xd1
1410 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1]
1411 ; CI-NEXT: s_setpc_b64 s[30:31]
1413 ; SI-LABEL: lshr_mad_i64_2:
1415 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1416 ; SI-NEXT: s_movk_i32 s4, 0xd1
1417 ; SI-NEXT: v_mul_hi_u32 v2, v1, s4
1418 ; SI-NEXT: v_mul_lo_u32 v3, v1, s4
1419 ; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v1
1420 ; SI-NEXT: v_add_i32_e32 v0, vcc, v3, v0
1421 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
1422 ; SI-NEXT: s_setpc_b64 s[30:31]
1424 ; GFX9-LABEL: lshr_mad_i64_2:
1426 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1427 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
1428 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1429 ; GFX9-NEXT: s_movk_i32 s4, 0xd1
1430 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1]
1431 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1433 ; GFX1100-LABEL: lshr_mad_i64_2:
1435 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1436 ; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, 0
1437 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1438 ; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, 0xd1, v4, v[0:1]
1439 ; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
1440 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1442 ; GFX1150-LABEL: lshr_mad_i64_2:
1444 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1445 ; GFX1150-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0
1446 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
1447 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xd1, v2, v[0:1]
1448 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
1450 ; GFX12-LABEL: lshr_mad_i64_2:
1452 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1453 ; GFX12-NEXT: s_wait_expcnt 0x0
1454 ; GFX12-NEXT: s_wait_samplecnt 0x0
1455 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1456 ; GFX12-NEXT: s_wait_kmcnt 0x0
1457 ; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0
1458 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1459 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xd1, v2, v[0:1]
1460 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1461 %lsh = lshr i64 %arg0, 32
1462 %mul = mul i64 %lsh, s0xffffffff000000d1
1463 %mad = add i64 %mul, %arg0
1468 define i64 @lshr_mad_i64_3(i64 %arg0) #0 {
1469 ; CI-LABEL: lshr_mad_i64_3:
1471 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1472 ; CI-NEXT: v_mov_b32_e32 v2, v1
1473 ; CI-NEXT: v_mov_b32_e32 v1, 0
1474 ; CI-NEXT: s_movk_i32 s4, 0xfc88
1475 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1]
1476 ; CI-NEXT: s_setpc_b64 s[30:31]
1478 ; SI-LABEL: lshr_mad_i64_3:
1480 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1481 ; SI-NEXT: s_movk_i32 s4, 0xfc88
1482 ; SI-NEXT: v_mul_hi_u32 v2, v1, s4
1483 ; SI-NEXT: v_mul_lo_u32 v3, v1, s4
1484 ; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v1
1485 ; SI-NEXT: v_add_i32_e32 v0, vcc, v3, v0
1486 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
1487 ; SI-NEXT: s_setpc_b64 s[30:31]
1489 ; GFX9-LABEL: lshr_mad_i64_3:
1491 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1492 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
1493 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1494 ; GFX9-NEXT: s_movk_i32 s4, 0xfc88
1495 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1]
1496 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1498 ; GFX1100-LABEL: lshr_mad_i64_3:
1500 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1501 ; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, 0
1502 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1503 ; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, 0xfffffc88, v4, v[0:1]
1504 ; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
1505 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1507 ; GFX1150-LABEL: lshr_mad_i64_3:
1509 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510 ; GFX1150-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0
1511 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
1512 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc88, v2, v[0:1]
1513 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
1515 ; GFX12-LABEL: lshr_mad_i64_3:
1517 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1518 ; GFX12-NEXT: s_wait_expcnt 0x0
1519 ; GFX12-NEXT: s_wait_samplecnt 0x0
1520 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1521 ; GFX12-NEXT: s_wait_kmcnt 0x0
1522 ; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0
1523 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1524 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xfffffc88, v2, v[0:1]
1525 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1526 %lsh = lshr i64 %arg0, 32
1527 %mul = mul i64 s0xfffffffffffffc88, %lsh
1528 %mad = add i64 %mul, %arg0
1533 define i64 @lshr_mad_i64_4(i32 %arg0, i64 %arg1) #0 {
1534 ; CI-LABEL: lshr_mad_i64_4:
1536 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1537 ; CI-NEXT: v_mul_lo_u32 v2, v2, v0
1538 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, 0
1539 ; CI-NEXT: s_movk_i32 s4, 0xfc88
1540 ; CI-NEXT: v_add_i32_e32 v2, vcc, v1, v2
1541 ; CI-NEXT: v_mov_b32_e32 v1, 0
1542 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1]
1543 ; CI-NEXT: s_setpc_b64 s[30:31]
1545 ; SI-LABEL: lshr_mad_i64_4:
1547 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1548 ; SI-NEXT: v_mul_lo_u32 v2, v2, v0
1549 ; SI-NEXT: v_mul_hi_u32 v3, v1, v0
1550 ; SI-NEXT: s_movk_i32 s4, 0xfc88
1551 ; SI-NEXT: v_mul_lo_u32 v0, v1, v0
1552 ; SI-NEXT: v_add_i32_e32 v2, vcc, v3, v2
1553 ; SI-NEXT: v_mul_hi_u32 v3, v2, s4
1554 ; SI-NEXT: v_mul_lo_u32 v1, v2, s4
1555 ; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v2
1556 ; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1557 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v2, vcc
1558 ; SI-NEXT: s_setpc_b64 s[30:31]
1560 ; GFX9-LABEL: lshr_mad_i64_4:
1562 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1563 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v0, 0
1564 ; GFX9-NEXT: v_mov_b32_e32 v6, v5
1565 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[6:7]
1566 ; GFX9-NEXT: v_mov_b32_e32 v5, 0
1567 ; GFX9-NEXT: s_movk_i32 s4, 0xfc88
1568 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[4:5]
1569 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1571 ; GFX1100-LABEL: lshr_mad_i64_4:
1573 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1574 ; GFX1100-NEXT: v_mad_u64_u32 v[3:4], null, v1, v0, 0
1575 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1576 ; GFX1100-NEXT: v_dual_mov_b32 v1, v4 :: v_dual_mov_b32 v4, 0
1577 ; GFX1100-NEXT: v_mad_u64_u32 v[5:6], null, v2, v0, v[1:2]
1578 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
1579 ; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc88, v5, v[3:4]
1580 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1582 ; GFX1150-LABEL: lshr_mad_i64_4:
1584 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1585 ; GFX1150-NEXT: v_mad_u64_u32 v[3:4], null, v1, v0, 0
1586 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1587 ; GFX1150-NEXT: v_dual_mov_b32 v1, v4 :: v_dual_mov_b32 v4, 0
1588 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v2, v0, v[1:2]
1589 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
1590 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc88, v0, v[3:4]
1591 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
1593 ; GFX12-LABEL: lshr_mad_i64_4:
1595 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1596 ; GFX12-NEXT: s_wait_expcnt 0x0
1597 ; GFX12-NEXT: s_wait_samplecnt 0x0
1598 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1599 ; GFX12-NEXT: s_wait_kmcnt 0x0
1600 ; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], null, v1, v0, 0
1601 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1602 ; GFX12-NEXT: v_dual_mov_b32 v1, v4 :: v_dual_mov_b32 v4, 0
1603 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v2, v0, v[1:2]
1604 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1605 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xfffffc88, v0, v[3:4]
1606 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1607 %ext = zext i32 %arg0 to i64
1608 %mul1 = mul i64 %arg1, %ext
1609 %lsh = lshr i64 %mul1, 32
1610 %mul2 = mul i64 %lsh, s0xfffffffffffffc88
1611 %mad = add i64 %mul2, %mul1
1615 define i64 @lshr_mad_i64_negative_1(i64 %arg0) #0 {
1616 ; CI-LABEL: lshr_mad_i64_negative_1:
1618 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1619 ; CI-NEXT: v_lshrrev_b32_e32 v2, 4, v1
1620 ; CI-NEXT: s_movk_i32 s4, 0xfc19
1621 ; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v2, s4, v[0:1]
1622 ; CI-NEXT: s_setpc_b64 s[30:31]
1624 ; SI-LABEL: lshr_mad_i64_negative_1:
1626 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1627 ; SI-NEXT: v_lshrrev_b32_e32 v2, 4, v1
1628 ; SI-NEXT: s_movk_i32 s4, 0xfc19
1629 ; SI-NEXT: v_mul_lo_u32 v3, v2, s4
1630 ; SI-NEXT: v_mul_hi_i32 v2, v2, s4
1631 ; SI-NEXT: v_add_i32_e32 v0, vcc, v3, v0
1632 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
1633 ; SI-NEXT: s_setpc_b64 s[30:31]
1635 ; GFX9-LABEL: lshr_mad_i64_negative_1:
1637 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 4, v1
1639 ; GFX9-NEXT: s_movk_i32 s4, 0xfc19
1640 ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v2, s4, v[0:1]
1641 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1643 ; GFX1100-LABEL: lshr_mad_i64_negative_1:
1645 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1646 ; GFX1100-NEXT: v_lshrrev_b32_e32 v4, 4, v1
1647 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1648 ; GFX1100-NEXT: v_mad_i64_i32 v[2:3], null, 0xfffffc19, v4, v[0:1]
1649 ; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
1650 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1652 ; GFX1150-LABEL: lshr_mad_i64_negative_1:
1654 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1655 ; GFX1150-NEXT: v_lshrrev_b32_e32 v2, 4, v1
1656 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
1657 ; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, 0xfffffc19, v2, v[0:1]
1658 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
1660 ; GFX12-LABEL: lshr_mad_i64_negative_1:
1662 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1663 ; GFX12-NEXT: s_wait_expcnt 0x0
1664 ; GFX12-NEXT: s_wait_samplecnt 0x0
1665 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1666 ; GFX12-NEXT: s_wait_kmcnt 0x0
1667 ; GFX12-NEXT: v_lshrrev_b32_e32 v2, 4, v1
1668 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1669 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, 0xfffffc19, v2, v[0:1]
1670 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1671 %lsh = lshr i64 %arg0, 36
1672 %mul = mul i64 %lsh, s0xfffffffffffffc19
1673 %mad = add i64 %mul, %arg0
1678 define i64 @lshr_mad_i64_negative_2(i64 %arg0) #0 {
1679 ; CI-LABEL: lshr_mad_i64_negative_2:
1681 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682 ; CI-NEXT: s_movk_i32 s4, 0xd1
1683 ; CI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s4, v[0:1]
1684 ; CI-NEXT: v_lshlrev_b32_e32 v0, 8, v1
1685 ; CI-NEXT: v_sub_i32_e32 v1, vcc, v3, v0
1686 ; CI-NEXT: v_mov_b32_e32 v0, v2
1687 ; CI-NEXT: s_setpc_b64 s[30:31]
1689 ; SI-LABEL: lshr_mad_i64_negative_2:
1691 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1692 ; SI-NEXT: s_movk_i32 s4, 0xd1
1693 ; SI-NEXT: v_mul_hi_u32 v2, v1, s4
1694 ; SI-NEXT: v_mul_lo_u32 v4, v1, s4
1695 ; SI-NEXT: v_lshlrev_b32_e32 v3, 8, v1
1696 ; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
1697 ; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v0
1698 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
1699 ; SI-NEXT: s_setpc_b64 s[30:31]
1701 ; GFX9-LABEL: lshr_mad_i64_negative_2:
1703 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1704 ; GFX9-NEXT: s_movk_i32 s4, 0xd1
1705 ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s4, v[0:1]
1706 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 8, v1
1707 ; GFX9-NEXT: v_sub_u32_e32 v1, v3, v0
1708 ; GFX9-NEXT: v_mov_b32_e32 v0, v2
1709 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1711 ; GFX11-LABEL: lshr_mad_i64_negative_2:
1713 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1714 ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, 0xd1, v1, v[0:1]
1715 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v1
1716 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
1717 ; GFX11-NEXT: v_sub_nc_u32_e32 v1, v3, v0
1718 ; GFX11-NEXT: v_mov_b32_e32 v0, v2
1719 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1721 ; GFX12-LABEL: lshr_mad_i64_negative_2:
1723 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1724 ; GFX12-NEXT: s_wait_expcnt 0x0
1725 ; GFX12-NEXT: s_wait_samplecnt 0x0
1726 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1727 ; GFX12-NEXT: s_wait_kmcnt 0x0
1728 ; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, 0xd1, v1, v[0:1]
1729 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 8, v1
1730 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
1731 ; GFX12-NEXT: v_sub_nc_u32_e32 v1, v3, v0
1732 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
1733 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1734 %lsh = lshr i64 %arg0, 32
1735 %mul = mul i64 %lsh, s0xffffff00000000d1
1736 %mad = add i64 %mul, %arg0
1741 define i64 @lshr_mad_i64_negative_3(i64 %arg0) #0 {
1742 ; CI-LABEL: lshr_mad_i64_negative_3:
1744 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1745 ; CI-NEXT: v_lshr_b64 v[2:3], v[0:1], 22
1746 ; CI-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2
1747 ; CI-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
1748 ; CI-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
1749 ; CI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1750 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1751 ; CI-NEXT: s_setpc_b64 s[30:31]
1753 ; SI-LABEL: lshr_mad_i64_negative_3:
1755 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1756 ; SI-NEXT: v_lshr_b64 v[2:3], v[0:1], 22
1757 ; SI-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2
1758 ; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
1759 ; SI-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
1760 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1761 ; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1762 ; SI-NEXT: s_setpc_b64 s[30:31]
1764 ; GFX9-LABEL: lshr_mad_i64_negative_3:
1766 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1767 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 22, v[0:1]
1768 ; GFX9-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2
1769 ; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
1770 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
1771 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
1772 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1773 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1775 ; GFX11-LABEL: lshr_mad_i64_negative_3:
1777 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1778 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 22, v[0:1]
1779 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1780 ; GFX11-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2
1781 ; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
1782 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1783 ; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1784 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
1785 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1786 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
1787 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1789 ; GFX12-LABEL: lshr_mad_i64_negative_3:
1791 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1792 ; GFX12-NEXT: s_wait_expcnt 0x0
1793 ; GFX12-NEXT: s_wait_samplecnt 0x0
1794 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1795 ; GFX12-NEXT: s_wait_kmcnt 0x0
1796 ; GFX12-NEXT: v_lshrrev_b64 v[2:3], 22, v[0:1]
1797 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1798 ; GFX12-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2
1799 ; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
1800 ; GFX12-NEXT: s_wait_alu 0xfffd
1801 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1802 ; GFX12-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1803 ; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
1804 ; GFX12-NEXT: s_wait_alu 0xfffd
1805 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
1806 ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
1807 ; GFX12-NEXT: s_wait_alu 0xfffd
1808 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1809 %op = add i64 %arg0, 1
1810 %lsh = lshr i64 %arg0, 32
1811 %mul = mul i64 %lsh, s0xfffffffffffffc00
1812 %mad = add i64 %mul, %op
1817 define i64 @lshr_mad_i64_negative_4(i64 %arg0) #0 {
1818 ; CI-LABEL: lshr_mad_i64_negative_4:
1820 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1821 ; CI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, v0, v[0:1]
1822 ; CI-NEXT: v_mul_lo_u32 v0, v1, v1
1823 ; CI-NEXT: v_add_i32_e32 v1, vcc, v0, v3
1824 ; CI-NEXT: v_mov_b32_e32 v0, v2
1825 ; CI-NEXT: s_setpc_b64 s[30:31]
1827 ; SI-LABEL: lshr_mad_i64_negative_4:
1829 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1830 ; SI-NEXT: v_mul_hi_u32 v2, v1, v0
1831 ; SI-NEXT: v_mul_lo_u32 v3, v1, v1
1832 ; SI-NEXT: v_mul_lo_u32 v4, v1, v0
1833 ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1834 ; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v0
1835 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
1836 ; SI-NEXT: s_setpc_b64 s[30:31]
1838 ; GFX9-LABEL: lshr_mad_i64_negative_4:
1840 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1841 ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, v0, v[0:1]
1842 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
1843 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v1, v[0:1]
1844 ; GFX9-NEXT: v_mov_b32_e32 v0, v2
1845 ; GFX9-NEXT: v_mov_b32_e32 v1, v4
1846 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1848 ; GFX1100-LABEL: lshr_mad_i64_negative_4:
1850 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1851 ; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1]
1852 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1853 ; GFX1100-NEXT: v_mov_b32_e32 v0, v3
1854 ; GFX1100-NEXT: v_mad_u64_u32 v[3:4], null, v1, v1, v[0:1]
1855 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
1856 ; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
1857 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
1859 ; GFX1150-LABEL: lshr_mad_i64_negative_4:
1861 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1862 ; GFX1150-NEXT: v_mad_u64_u32 v[3:4], null, v1, v0, v[0:1]
1863 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1864 ; GFX1150-NEXT: v_mov_b32_e32 v0, v4
1865 ; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v1, v1, v[0:1]
1866 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_3)
1867 ; GFX1150-NEXT: v_mov_b32_e32 v0, v3
1868 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
1870 ; GFX12-LABEL: lshr_mad_i64_negative_4:
1872 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1873 ; GFX12-NEXT: s_wait_expcnt 0x0
1874 ; GFX12-NEXT: s_wait_samplecnt 0x0
1875 ; GFX12-NEXT: s_wait_bvhcnt 0x0
1876 ; GFX12-NEXT: s_wait_kmcnt 0x0
1877 ; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], null, v1, v0, v[0:1]
1878 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1879 ; GFX12-NEXT: v_mov_b32_e32 v0, v4
1880 ; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v1, v1, v[0:1]
1881 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
1882 ; GFX12-NEXT: v_mov_b32_e32 v0, v3
1883 ; GFX12-NEXT: s_setpc_b64 s[30:31]
1884 %lsh = lshr i64 %arg0, 32
1885 %mul = mul i64 %lsh, %arg0
1886 %mad = add i64 %mul, %arg0
1891 define amdgpu_ps i64 @lshr_mad_i64_sgpr(i64 inreg %arg0) #0 {
1892 ; CI-LABEL: lshr_mad_i64_sgpr:
1894 ; CI-NEXT: v_mov_b32_e32 v0, s0
1895 ; CI-NEXT: v_mov_b32_e32 v1, 0
1896 ; CI-NEXT: v_mov_b32_e32 v2, 0xffff1c18
1897 ; CI-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s1, v2, v[0:1]
1898 ; CI-NEXT: v_readfirstlane_b32 s0, v0
1899 ; CI-NEXT: v_readfirstlane_b32 s1, v1
1900 ; CI-NEXT: ; return to shader part epilog
1902 ; SI-LABEL: lshr_mad_i64_sgpr:
1904 ; SI-NEXT: v_mov_b32_e32 v0, 0xffff1c18
1905 ; SI-NEXT: v_mul_hi_u32 v0, s1, v0
1906 ; SI-NEXT: s_mul_i32 s2, s1, 0xffff1c18
1907 ; SI-NEXT: v_readfirstlane_b32 s3, v0
1908 ; SI-NEXT: s_sub_i32 s3, s3, s1
1909 ; SI-NEXT: s_add_u32 s0, s2, s0
1910 ; SI-NEXT: s_addc_u32 s1, s3, s1
1911 ; SI-NEXT: ; return to shader part epilog
1913 ; GFX9-LABEL: lshr_mad_i64_sgpr:
1915 ; GFX9-NEXT: s_mul_hi_u32 s2, s1, 0xffff1c18
1916 ; GFX9-NEXT: s_sub_i32 s2, s2, s1
1917 ; GFX9-NEXT: s_mul_i32 s3, s1, 0xffff1c18
1918 ; GFX9-NEXT: s_add_u32 s0, s3, s0
1919 ; GFX9-NEXT: s_addc_u32 s1, s2, s1
1920 ; GFX9-NEXT: ; return to shader part epilog
1922 ; GFX11-LABEL: lshr_mad_i64_sgpr:
1924 ; GFX11-NEXT: s_mul_hi_u32 s2, s1, 0xffff1c18
1925 ; GFX11-NEXT: s_mul_i32 s3, s1, 0xffff1c18
1926 ; GFX11-NEXT: s_sub_i32 s2, s2, s1
1927 ; GFX11-NEXT: s_add_u32 s0, s3, s0
1928 ; GFX11-NEXT: s_addc_u32 s1, s2, s1
1929 ; GFX11-NEXT: ; return to shader part epilog
1931 ; GFX12-LABEL: lshr_mad_i64_sgpr:
1933 ; GFX12-NEXT: s_mov_b32 s4, 0xffff1c18
1934 ; GFX12-NEXT: s_mov_b32 s3, 0
1935 ; GFX12-NEXT: s_mov_b32 s2, s1
1936 ; GFX12-NEXT: s_mov_b32 s5, -1
1937 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1938 ; GFX12-NEXT: s_mul_u64 s[2:3], s[2:3], s[4:5]
1939 ; GFX12-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
1940 ; GFX12-NEXT: ; return to shader part epilog
1941 %lsh = lshr i64 %arg0, 32
1942 %mul = mul i64 %lsh, s0xffffffffffff1c18
1943 %mad = add i64 %mul, %arg0
1948 define <2 x i64> @lshr_mad_i64_vec(<2 x i64> %arg0) #0 {
1949 ; CI-LABEL: lshr_mad_i64_vec:
1951 ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1952 ; CI-NEXT: v_mov_b32_e32 v6, v3
1953 ; CI-NEXT: v_mov_b32_e32 v3, v1
1954 ; CI-NEXT: v_mov_b32_e32 v1, 0
1955 ; CI-NEXT: s_mov_b32 s4, 0xffff1c18
1956 ; CI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, s4, v[0:1]
1957 ; CI-NEXT: v_mov_b32_e32 v3, v1
1958 ; CI-NEXT: s_mov_b32 s4, 0xffff1118
1959 ; CI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s4, v[2:3]
1960 ; CI-NEXT: v_mov_b32_e32 v0, v4
1961 ; CI-NEXT: v_mov_b32_e32 v1, v5
1962 ; CI-NEXT: s_setpc_b64 s[30:31]
1964 ; SI-LABEL: lshr_mad_i64_vec:
1966 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1967 ; SI-NEXT: s_mov_b32 s4, 0xffff1118
1968 ; SI-NEXT: v_mul_lo_u32 v4, v3, s4
1969 ; SI-NEXT: v_mul_hi_u32 v5, v3, s4
1970 ; SI-NEXT: s_mov_b32 s4, 0xffff1c18
1971 ; SI-NEXT: v_mul_hi_u32 v6, v1, s4
1972 ; SI-NEXT: v_mul_lo_u32 v7, v1, s4
1973 ; SI-NEXT: v_sub_i32_e32 v5, vcc, v5, v3
1974 ; SI-NEXT: v_sub_i32_e32 v6, vcc, v6, v1
1975 ; SI-NEXT: v_add_i32_e32 v0, vcc, v7, v0
1976 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc
1977 ; SI-NEXT: v_add_i32_e32 v2, vcc, v4, v2
1978 ; SI-NEXT: v_addc_u32_e32 v3, vcc, v5, v3, vcc
1979 ; SI-NEXT: s_setpc_b64 s[30:31]
1981 ; GFX9-LABEL: lshr_mad_i64_vec:
1983 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1984 ; GFX9-NEXT: v_mov_b32_e32 v6, v3
1985 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
1986 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1987 ; GFX9-NEXT: s_mov_b32 s4, 0xffff1c18
1988 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, s4, v[0:1]
1989 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
1990 ; GFX9-NEXT: s_mov_b32 s4, 0xffff1118
1991 ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s4, v[2:3]
1992 ; GFX9-NEXT: v_mov_b32_e32 v0, v4
1993 ; GFX9-NEXT: v_mov_b32_e32 v1, v5
1994 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1996 ; GFX1100-LABEL: lshr_mad_i64_vec:
1998 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1999 ; GFX1100-NEXT: v_mov_b32_e32 v8, v3
2000 ; GFX1100-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v1, 0
2001 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2002 ; GFX1100-NEXT: v_mad_u64_u32 v[4:5], null, 0xffff1c18, v6, v[0:1]
2003 ; GFX1100-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, v4
2004 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2005 ; GFX1100-NEXT: v_mad_u64_u32 v[6:7], null, 0xffff1118, v8, v[2:3]
2006 ; GFX1100-NEXT: v_dual_mov_b32 v1, v5 :: v_dual_mov_b32 v2, v6
2007 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
2008 ; GFX1100-NEXT: v_mov_b32_e32 v3, v7
2009 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2011 ; GFX1150-LABEL: lshr_mad_i64_vec:
2013 ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2014 ; GFX1150-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v5, v1
2015 ; GFX1150-NEXT: v_mov_b32_e32 v1, 0
2016 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2017 ; GFX1150-NEXT: v_mov_b32_e32 v3, v1
2018 ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xffff1c18, v5, v[0:1]
2019 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2)
2020 ; GFX1150-NEXT: v_mad_u64_u32 v[2:3], null, 0xffff1118, v4, v[2:3]
2021 ; GFX1150-NEXT: s_setpc_b64 s[30:31]
2023 ; GFX12-LABEL: lshr_mad_i64_vec:
2025 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2026 ; GFX12-NEXT: s_wait_expcnt 0x0
2027 ; GFX12-NEXT: s_wait_samplecnt 0x0
2028 ; GFX12-NEXT: s_wait_bvhcnt 0x0
2029 ; GFX12-NEXT: s_wait_kmcnt 0x0
2030 ; GFX12-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v5, v1
2031 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
2032 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2033 ; GFX12-NEXT: v_mov_b32_e32 v3, v1
2034 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xffff1c18, v5, v[0:1]
2035 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
2036 ; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, 0xffff1118, v4, v[2:3]
2037 ; GFX12-NEXT: s_setpc_b64 s[30:31]
2038 %lsh = lshr <2 x i64> %arg0, <i64 32, i64 32>
2039 %mul = mul <2 x i64> %lsh, <i64 s0xffffffffffff1c18, i64 s0xffffffffffff1118>
2040 %mad = add <2 x i64> %mul, %arg0
2045 attributes #0 = { nounwind }
2046 attributes #1 = { nounwind readnone speculatable }