1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
5 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
7 ; Test gfx9+ s_shl[1-4]_add_u32 pattern matching
9 define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) {
10 ; GFX9-LABEL: s_shl1_add_u32:
12 ; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s1
13 ; GFX9-NEXT: ; return to shader part epilog
15 ; GFX8-LABEL: s_shl1_add_u32:
17 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
18 ; GFX8-NEXT: s_add_i32 s0, s0, s1
19 ; GFX8-NEXT: ; return to shader part epilog
21 ; GFX10-LABEL: s_shl1_add_u32:
23 ; GFX10-NEXT: s_lshl1_add_u32 s0, s0, s1
24 ; GFX10-NEXT: ; return to shader part epilog
25 %shl = shl i32 %src0, 1
26 %add = add i32 %shl, %src1
30 define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) {
31 ; GFX9-LABEL: s_shl2_add_u32:
33 ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s1
34 ; GFX9-NEXT: ; return to shader part epilog
36 ; GFX8-LABEL: s_shl2_add_u32:
38 ; GFX8-NEXT: s_lshl_b32 s0, s0, 2
39 ; GFX8-NEXT: s_add_i32 s0, s0, s1
40 ; GFX8-NEXT: ; return to shader part epilog
42 ; GFX10-LABEL: s_shl2_add_u32:
44 ; GFX10-NEXT: s_lshl2_add_u32 s0, s0, s1
45 ; GFX10-NEXT: ; return to shader part epilog
46 %shl = shl i32 %src0, 2
47 %add = add i32 %shl, %src1
51 define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) {
52 ; GFX9-LABEL: s_shl3_add_u32:
54 ; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s1
55 ; GFX9-NEXT: ; return to shader part epilog
57 ; GFX8-LABEL: s_shl3_add_u32:
59 ; GFX8-NEXT: s_lshl_b32 s0, s0, 3
60 ; GFX8-NEXT: s_add_i32 s0, s0, s1
61 ; GFX8-NEXT: ; return to shader part epilog
63 ; GFX10-LABEL: s_shl3_add_u32:
65 ; GFX10-NEXT: s_lshl3_add_u32 s0, s0, s1
66 ; GFX10-NEXT: ; return to shader part epilog
67 %shl = shl i32 %src0, 3
68 %add = add i32 %shl, %src1
72 define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) {
73 ; GFX9-LABEL: s_shl4_add_u32:
75 ; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s1
76 ; GFX9-NEXT: ; return to shader part epilog
78 ; GFX8-LABEL: s_shl4_add_u32:
80 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4
81 ; GFX8-NEXT: s_add_i32 s0, s0, s1
82 ; GFX8-NEXT: ; return to shader part epilog
84 ; GFX10-LABEL: s_shl4_add_u32:
86 ; GFX10-NEXT: s_lshl4_add_u32 s0, s0, s1
87 ; GFX10-NEXT: ; return to shader part epilog
88 %shl = shl i32 %src0, 4
89 %add = add i32 %shl, %src1
93 define amdgpu_ps i32 @s_shl5_add_u32(i32 inreg %src0, i32 inreg %src1) {
94 ; GCN-LABEL: s_shl5_add_u32:
96 ; GCN-NEXT: s_lshl_b32 s0, s0, 5
97 ; GCN-NEXT: s_add_i32 s0, s0, s1
98 ; GCN-NEXT: ; return to shader part epilog
99 %shl = shl i32 %src0, 5
100 %add = add i32 %shl, %src1
104 define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) {
105 ; GFX9-LABEL: v_shl1_add_u32:
107 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 1, v1
109 ; GFX9-NEXT: s_setpc_b64 s[30:31]
111 ; GFX8-LABEL: v_shl1_add_u32:
113 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
115 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
116 ; GFX8-NEXT: s_setpc_b64 s[30:31]
118 ; GFX10-LABEL: v_shl1_add_u32:
120 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 1, v1
122 ; GFX10-NEXT: s_setpc_b64 s[30:31]
123 %shl = shl i32 %src0, 1
124 %add = add i32 %shl, %src1
128 define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) {
129 ; GFX9-LABEL: v_shl2_add_u32:
131 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1
133 ; GFX9-NEXT: s_setpc_b64 s[30:31]
135 ; GFX8-LABEL: v_shl2_add_u32:
137 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
139 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
140 ; GFX8-NEXT: s_setpc_b64 s[30:31]
142 ; GFX10-LABEL: v_shl2_add_u32:
144 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1
146 ; GFX10-NEXT: s_setpc_b64 s[30:31]
147 %shl = shl i32 %src0, 2
148 %add = add i32 %shl, %src1
152 define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) {
153 ; GFX9-LABEL: v_shl3_add_u32:
155 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 3, v1
157 ; GFX9-NEXT: s_setpc_b64 s[30:31]
159 ; GFX8-LABEL: v_shl3_add_u32:
161 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0
163 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
164 ; GFX8-NEXT: s_setpc_b64 s[30:31]
166 ; GFX10-LABEL: v_shl3_add_u32:
168 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 3, v1
170 ; GFX10-NEXT: s_setpc_b64 s[30:31]
171 %shl = shl i32 %src0, 3
172 %add = add i32 %shl, %src1
176 define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) {
177 ; GFX9-LABEL: v_shl4_add_u32:
179 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 4, v1
181 ; GFX9-NEXT: s_setpc_b64 s[30:31]
183 ; GFX8-LABEL: v_shl4_add_u32:
185 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0
187 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
188 ; GFX8-NEXT: s_setpc_b64 s[30:31]
190 ; GFX10-LABEL: v_shl4_add_u32:
192 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 4, v1
194 ; GFX10-NEXT: s_setpc_b64 s[30:31]
195 %shl = shl i32 %src0, 4
196 %add = add i32 %shl, %src1
200 define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) {
201 ; GFX9-LABEL: v_shl5_add_u32:
203 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 5, v1
205 ; GFX9-NEXT: s_setpc_b64 s[30:31]
207 ; GFX8-LABEL: v_shl5_add_u32:
209 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 5, v0
211 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
212 ; GFX8-NEXT: s_setpc_b64 s[30:31]
214 ; GFX10-LABEL: v_shl5_add_u32:
216 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 5, v1
218 ; GFX10-NEXT: s_setpc_b64 s[30:31]
219 %shl = shl i32 %src0, 5
220 %add = add i32 %shl, %src1
224 ; FIXME: Use v_lshl_add_u32
225 ; shift is scalar, but add is vector.
226 define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
227 ; GFX9-LABEL: shl1_add_u32_vgpr1:
229 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
230 ; GFX9-NEXT: v_add_u32_e32 v0, s0, v0
231 ; GFX9-NEXT: ; return to shader part epilog
233 ; GFX8-LABEL: shl1_add_u32_vgpr1:
235 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
236 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
237 ; GFX8-NEXT: ; return to shader part epilog
239 ; GFX10-LABEL: shl1_add_u32_vgpr1:
241 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
242 ; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0
243 ; GFX10-NEXT: ; return to shader part epilog
244 %shl = shl i32 %src0, 1
245 %add = add i32 %shl, %src1
246 %cast = bitcast i32 %add to float
250 define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
251 ; GFX9-LABEL: shl2_add_u32_vgpr1:
253 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2
254 ; GFX9-NEXT: v_add_u32_e32 v0, s0, v0
255 ; GFX9-NEXT: ; return to shader part epilog
257 ; GFX8-LABEL: shl2_add_u32_vgpr1:
259 ; GFX8-NEXT: s_lshl_b32 s0, s0, 2
260 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
261 ; GFX8-NEXT: ; return to shader part epilog
263 ; GFX10-LABEL: shl2_add_u32_vgpr1:
265 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2
266 ; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0
267 ; GFX10-NEXT: ; return to shader part epilog
268 %shl = shl i32 %src0, 2
269 %add = add i32 %shl, %src1
270 %cast = bitcast i32 %add to float
274 define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
275 ; GFX9-LABEL: shl3_add_u32_vgpr1:
277 ; GFX9-NEXT: s_lshl_b32 s0, s0, 3
278 ; GFX9-NEXT: v_add_u32_e32 v0, s0, v0
279 ; GFX9-NEXT: ; return to shader part epilog
281 ; GFX8-LABEL: shl3_add_u32_vgpr1:
283 ; GFX8-NEXT: s_lshl_b32 s0, s0, 3
284 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
285 ; GFX8-NEXT: ; return to shader part epilog
287 ; GFX10-LABEL: shl3_add_u32_vgpr1:
289 ; GFX10-NEXT: s_lshl_b32 s0, s0, 3
290 ; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0
291 ; GFX10-NEXT: ; return to shader part epilog
292 %shl = shl i32 %src0, 3
293 %add = add i32 %shl, %src1
294 %cast = bitcast i32 %add to float
298 define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
299 ; GFX9-LABEL: shl4_add_u32_vgpr1:
301 ; GFX9-NEXT: s_lshl_b32 s0, s0, 4
302 ; GFX9-NEXT: v_add_u32_e32 v0, s0, v0
303 ; GFX9-NEXT: ; return to shader part epilog
305 ; GFX8-LABEL: shl4_add_u32_vgpr1:
307 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4
308 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
309 ; GFX8-NEXT: ; return to shader part epilog
311 ; GFX10-LABEL: shl4_add_u32_vgpr1:
313 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4
314 ; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0
315 ; GFX10-NEXT: ; return to shader part epilog
316 %shl = shl i32 %src0, 4
317 %add = add i32 %shl, %src1
318 %cast = bitcast i32 %add to float
322 define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
323 ; GFX9-LABEL: shl5_add_u32_vgpr1:
325 ; GFX9-NEXT: s_lshl_b32 s0, s0, 5
326 ; GFX9-NEXT: v_add_u32_e32 v0, s0, v0
327 ; GFX9-NEXT: ; return to shader part epilog
329 ; GFX8-LABEL: shl5_add_u32_vgpr1:
331 ; GFX8-NEXT: s_lshl_b32 s0, s0, 5
332 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
333 ; GFX8-NEXT: ; return to shader part epilog
335 ; GFX10-LABEL: shl5_add_u32_vgpr1:
337 ; GFX10-NEXT: s_lshl_b32 s0, s0, 5
338 ; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0
339 ; GFX10-NEXT: ; return to shader part epilog
340 %shl = shl i32 %src0, 5
341 %add = add i32 %shl, %src1
342 %cast = bitcast i32 %add to float
346 define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
347 ; GFX9-LABEL: s_shl1_add_u32_v2:
349 ; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s2
350 ; GFX9-NEXT: s_lshl1_add_u32 s1, s1, s3
351 ; GFX9-NEXT: ; return to shader part epilog
353 ; GFX8-LABEL: s_shl1_add_u32_v2:
355 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
356 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
357 ; GFX8-NEXT: s_add_i32 s0, s0, s2
358 ; GFX8-NEXT: s_add_i32 s1, s1, s3
359 ; GFX8-NEXT: ; return to shader part epilog
361 ; GFX10-LABEL: s_shl1_add_u32_v2:
363 ; GFX10-NEXT: s_lshl1_add_u32 s0, s0, s2
364 ; GFX10-NEXT: s_lshl1_add_u32 s1, s1, s3
365 ; GFX10-NEXT: ; return to shader part epilog
366 %shl = shl <2 x i32> %src0, <i32 1, i32 1>
367 %add = add <2 x i32> %shl, %src1
371 define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
372 ; GFX9-LABEL: s_shl2_add_u32_v2:
374 ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2
375 ; GFX9-NEXT: s_lshl2_add_u32 s1, s1, s3
376 ; GFX9-NEXT: ; return to shader part epilog
378 ; GFX8-LABEL: s_shl2_add_u32_v2:
380 ; GFX8-NEXT: s_lshl_b32 s0, s0, 2
381 ; GFX8-NEXT: s_lshl_b32 s1, s1, 2
382 ; GFX8-NEXT: s_add_i32 s0, s0, s2
383 ; GFX8-NEXT: s_add_i32 s1, s1, s3
384 ; GFX8-NEXT: ; return to shader part epilog
386 ; GFX10-LABEL: s_shl2_add_u32_v2:
388 ; GFX10-NEXT: s_lshl2_add_u32 s0, s0, s2
389 ; GFX10-NEXT: s_lshl2_add_u32 s1, s1, s3
390 ; GFX10-NEXT: ; return to shader part epilog
391 %shl = shl <2 x i32> %src0, <i32 2, i32 2>
392 %add = add <2 x i32> %shl, %src1
396 define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
397 ; GFX9-LABEL: s_shl3_add_u32_v2:
399 ; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s2
400 ; GFX9-NEXT: s_lshl3_add_u32 s1, s1, s3
401 ; GFX9-NEXT: ; return to shader part epilog
403 ; GFX8-LABEL: s_shl3_add_u32_v2:
405 ; GFX8-NEXT: s_lshl_b32 s0, s0, 3
406 ; GFX8-NEXT: s_lshl_b32 s1, s1, 3
407 ; GFX8-NEXT: s_add_i32 s0, s0, s2
408 ; GFX8-NEXT: s_add_i32 s1, s1, s3
409 ; GFX8-NEXT: ; return to shader part epilog
411 ; GFX10-LABEL: s_shl3_add_u32_v2:
413 ; GFX10-NEXT: s_lshl3_add_u32 s0, s0, s2
414 ; GFX10-NEXT: s_lshl3_add_u32 s1, s1, s3
415 ; GFX10-NEXT: ; return to shader part epilog
416 %shl = shl <2 x i32> %src0, <i32 3, i32 3>
417 %add = add <2 x i32> %shl, %src1
421 define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
422 ; GFX9-LABEL: s_shl4_add_u32_v2:
424 ; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s2
425 ; GFX9-NEXT: s_lshl4_add_u32 s1, s1, s3
426 ; GFX9-NEXT: ; return to shader part epilog
428 ; GFX8-LABEL: s_shl4_add_u32_v2:
430 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4
431 ; GFX8-NEXT: s_lshl_b32 s1, s1, 4
432 ; GFX8-NEXT: s_add_i32 s0, s0, s2
433 ; GFX8-NEXT: s_add_i32 s1, s1, s3
434 ; GFX8-NEXT: ; return to shader part epilog
436 ; GFX10-LABEL: s_shl4_add_u32_v2:
438 ; GFX10-NEXT: s_lshl4_add_u32 s0, s0, s2
439 ; GFX10-NEXT: s_lshl4_add_u32 s1, s1, s3
440 ; GFX10-NEXT: ; return to shader part epilog
441 %shl = shl <2 x i32> %src0, <i32 4, i32 4>
442 %add = add <2 x i32> %shl, %src1
446 define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
447 ; GFX9-LABEL: s_shl_2_4_add_u32_v2:
449 ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2
450 ; GFX9-NEXT: s_lshl4_add_u32 s1, s1, s3
451 ; GFX9-NEXT: ; return to shader part epilog
453 ; GFX8-LABEL: s_shl_2_4_add_u32_v2:
455 ; GFX8-NEXT: s_lshl_b32 s0, s0, 2
456 ; GFX8-NEXT: s_lshl_b32 s1, s1, 4
457 ; GFX8-NEXT: s_add_i32 s0, s0, s2
458 ; GFX8-NEXT: s_add_i32 s1, s1, s3
459 ; GFX8-NEXT: ; return to shader part epilog
461 ; GFX10-LABEL: s_shl_2_4_add_u32_v2:
463 ; GFX10-NEXT: s_lshl2_add_u32 s0, s0, s2
464 ; GFX10-NEXT: s_lshl4_add_u32 s1, s1, s3
465 ; GFX10-NEXT: ; return to shader part epilog
466 %shl = shl <2 x i32> %src0, <i32 2, i32 4>
467 %add = add <2 x i32> %shl, %src1
471 define amdgpu_ps { i32, i32 } @s_shl4_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
472 ; GCN-LABEL: s_shl4_add_u32_multi_use:
474 ; GCN-NEXT: s_lshl_b32 s0, s0, 4
475 ; GCN-NEXT: s_add_i32 s1, s0, s1
476 ; GCN-NEXT: ; return to shader part epilog
477 %shl = shl i32 %src0, 4
478 %add = add i32 %shl, %src1
479 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
480 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
481 ret { i32, i32 } %insert1
484 define amdgpu_ps { i32, i32 } @s_shl3_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
485 ; GCN-LABEL: s_shl3_add_u32_multi_use:
487 ; GCN-NEXT: s_lshl_b32 s0, s0, 3
488 ; GCN-NEXT: s_add_i32 s1, s0, s1
489 ; GCN-NEXT: ; return to shader part epilog
490 %shl = shl i32 %src0, 3
491 %add = add i32 %shl, %src1
492 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
493 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
494 ret { i32, i32 } %insert1
497 define amdgpu_ps { i32, i32 } @s_shl2_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
498 ; GCN-LABEL: s_shl2_add_u32_multi_use:
500 ; GCN-NEXT: s_lshl_b32 s0, s0, 2
501 ; GCN-NEXT: s_add_i32 s1, s0, s1
502 ; GCN-NEXT: ; return to shader part epilog
503 %shl = shl i32 %src0, 2
504 %add = add i32 %shl, %src1
505 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
506 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
507 ret { i32, i32 } %insert1
511 define amdgpu_ps { i32, i32 } @s_shl1_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
512 ; GCN-LABEL: s_shl1_add_u32_multi_use:
514 ; GCN-NEXT: s_lshl_b32 s0, s0, 1
515 ; GCN-NEXT: s_add_i32 s1, s0, s1
516 ; GCN-NEXT: ; return to shader part epilog
517 %shl = shl i32 %src0, 1
518 %add = add i32 %shl, %src1
519 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
520 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
521 ret { i32, i32 } %insert1