1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
7 define i8 @v_shl_i8(i8 %value, i8 %amount) {
8 ; GFX6-LABEL: v_shl_i8:
10 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
12 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v1, v0
13 ; GFX6-NEXT: s_setpc_b64 s[30:31]
15 ; GFX8-LABEL: v_shl_i8:
17 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
19 ; GFX8-NEXT: s_setpc_b64 s[30:31]
21 ; GFX9-LABEL: v_shl_i8:
23 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
25 ; GFX9-NEXT: s_setpc_b64 s[30:31]
27 ; GFX10-LABEL: v_shl_i8:
29 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
31 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
32 ; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0
33 ; GFX10-NEXT: s_setpc_b64 s[30:31]
34 %result = shl i8 %value, %amount
38 define i8 @v_shl_i8_7(i8 %value) {
39 ; GFX6-LABEL: v_shl_i8_7:
41 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 7, v0
43 ; GFX6-NEXT: s_setpc_b64 s[30:31]
45 ; GFX8-LABEL: v_shl_i8_7:
47 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 7, v0
49 ; GFX8-NEXT: s_setpc_b64 s[30:31]
51 ; GFX9-LABEL: v_shl_i8_7:
53 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 7, v0
55 ; GFX9-NEXT: s_setpc_b64 s[30:31]
57 ; GFX10-LABEL: v_shl_i8_7:
59 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
61 ; GFX10-NEXT: v_lshlrev_b16 v0, 7, v0
62 ; GFX10-NEXT: s_setpc_b64 s[30:31]
63 %result = shl i8 %value, 7
67 define amdgpu_ps i8 @s_shl_i8(i8 inreg %value, i8 inreg %amount) {
68 ; GFX6-LABEL: s_shl_i8:
70 ; GFX6-NEXT: s_lshl_b32 s0, s0, s1
71 ; GFX6-NEXT: ; return to shader part epilog
73 ; GFX8-LABEL: s_shl_i8:
75 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
76 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1
77 ; GFX8-NEXT: ; return to shader part epilog
79 ; GFX9-LABEL: s_shl_i8:
81 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
82 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
83 ; GFX9-NEXT: ; return to shader part epilog
85 ; GFX10-LABEL: s_shl_i8:
87 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff
88 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
89 ; GFX10-NEXT: ; return to shader part epilog
90 %result = shl i8 %value, %amount
94 define amdgpu_ps i8 @s_shl_i8_7(i8 inreg %value) {
95 ; GFX6-LABEL: s_shl_i8_7:
97 ; GFX6-NEXT: s_lshl_b32 s0, s0, 7
98 ; GFX6-NEXT: ; return to shader part epilog
100 ; GFX8-LABEL: s_shl_i8_7:
102 ; GFX8-NEXT: s_bfe_u32 s1, 7, 0x100000
103 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1
104 ; GFX8-NEXT: ; return to shader part epilog
106 ; GFX9-LABEL: s_shl_i8_7:
108 ; GFX9-NEXT: s_bfe_u32 s1, 7, 0x100000
109 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
110 ; GFX9-NEXT: ; return to shader part epilog
112 ; GFX10-LABEL: s_shl_i8_7:
114 ; GFX10-NEXT: s_bfe_u32 s1, 7, 0x100000
115 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
116 ; GFX10-NEXT: ; return to shader part epilog
117 %result = shl i8 %value, 7
122 define i24 @v_shl_i24(i24 %value, i24 %amount) {
123 ; GCN-LABEL: v_shl_i24:
125 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GCN-NEXT: v_and_b32_e32 v1, 0xffffff, v1
127 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v1, v0
128 ; GCN-NEXT: s_setpc_b64 s[30:31]
130 ; GFX10-LABEL: v_shl_i24:
132 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
134 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
135 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, v1, v0
136 ; GFX10-NEXT: s_setpc_b64 s[30:31]
137 %result = shl i24 %value, %amount
141 define i24 @v_shl_i24_7(i24 %value) {
142 ; GCN-LABEL: v_shl_i24_7:
144 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
146 ; GCN-NEXT: s_setpc_b64 s[30:31]
148 ; GFX10-LABEL: v_shl_i24_7:
150 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
152 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0
153 ; GFX10-NEXT: s_setpc_b64 s[30:31]
154 %result = shl i24 %value, 7
158 define amdgpu_ps i24 @s_shl_i24(i24 inreg %value, i24 inreg %amount) {
159 ; GCN-LABEL: s_shl_i24:
161 ; GCN-NEXT: s_lshl_b32 s0, s0, s1
162 ; GCN-NEXT: ; return to shader part epilog
164 ; GFX10-LABEL: s_shl_i24:
166 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
167 ; GFX10-NEXT: ; return to shader part epilog
168 %result = shl i24 %value, %amount
172 define amdgpu_ps i24 @s_shl_i24_7(i24 inreg %value) {
173 ; GCN-LABEL: s_shl_i24_7:
175 ; GCN-NEXT: s_lshl_b32 s0, s0, 7
176 ; GCN-NEXT: ; return to shader part epilog
178 ; GFX10-LABEL: s_shl_i24_7:
180 ; GFX10-NEXT: s_lshl_b32 s0, s0, 7
181 ; GFX10-NEXT: ; return to shader part epilog
182 %result = shl i24 %value, 7
186 define i32 @v_shl_i32(i32 %value, i32 %amount) {
187 ; GCN-LABEL: v_shl_i32:
189 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v1, v0
191 ; GCN-NEXT: s_setpc_b64 s[30:31]
193 ; GFX10-LABEL: v_shl_i32:
195 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
197 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, v1, v0
198 ; GFX10-NEXT: s_setpc_b64 s[30:31]
199 %result = shl i32 %value, %amount
203 define i32 @v_shl_i32_31(i32 %value) {
204 ; GCN-LABEL: v_shl_i32_31:
206 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 31, v0
208 ; GCN-NEXT: s_setpc_b64 s[30:31]
210 ; GFX10-LABEL: v_shl_i32_31:
212 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
214 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 31, v0
215 ; GFX10-NEXT: s_setpc_b64 s[30:31]
216 %result = shl i32 %value, 31
220 define amdgpu_ps i32 @s_shl_i32(i32 inreg %value, i32 inreg %amount) {
221 ; GCN-LABEL: s_shl_i32:
223 ; GCN-NEXT: s_lshl_b32 s0, s0, s1
224 ; GCN-NEXT: ; return to shader part epilog
226 ; GFX10-LABEL: s_shl_i32:
228 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
229 ; GFX10-NEXT: ; return to shader part epilog
230 %result = shl i32 %value, %amount
234 define amdgpu_ps i32 @s_shl_i32_31(i32 inreg %value) {
235 ; GCN-LABEL: s_shl_i32_31:
237 ; GCN-NEXT: s_lshl_b32 s0, s0, 31
238 ; GCN-NEXT: ; return to shader part epilog
240 ; GFX10-LABEL: s_shl_i32_31:
242 ; GFX10-NEXT: s_lshl_b32 s0, s0, 31
243 ; GFX10-NEXT: ; return to shader part epilog
244 %result = shl i32 %value, 31
248 define amdgpu_ps float @shl_i32_sv(i32 inreg %value, i32 %amount) {
249 ; GFX6-LABEL: shl_i32_sv:
251 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
252 ; GFX6-NEXT: ; return to shader part epilog
254 ; GFX8-LABEL: shl_i32_sv:
256 ; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0
257 ; GFX8-NEXT: ; return to shader part epilog
259 ; GFX9-LABEL: shl_i32_sv:
261 ; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s0
262 ; GFX9-NEXT: ; return to shader part epilog
264 ; GFX10-LABEL: shl_i32_sv:
266 ; GFX10-NEXT: v_lshlrev_b32_e64 v0, v0, s0
267 ; GFX10-NEXT: ; return to shader part epilog
268 %result = shl i32 %value, %amount
269 %cast = bitcast i32 %result to float
273 define amdgpu_ps float @shl_i32_vs(i32 %value, i32 inreg %amount) {
274 ; GCN-LABEL: shl_i32_vs:
276 ; GCN-NEXT: v_lshlrev_b32_e32 v0, s0, v0
277 ; GCN-NEXT: ; return to shader part epilog
279 ; GFX10-LABEL: shl_i32_vs:
281 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, s0, v0
282 ; GFX10-NEXT: ; return to shader part epilog
283 %result = shl i32 %value, %amount
284 %cast = bitcast i32 %result to float
288 define <2 x i32> @v_shl_v2i32(<2 x i32> %value, <2 x i32> %amount) {
289 ; GCN-LABEL: v_shl_v2i32:
291 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v2, v0
293 ; GCN-NEXT: v_lshlrev_b32_e32 v1, v3, v1
294 ; GCN-NEXT: s_setpc_b64 s[30:31]
296 ; GFX10-LABEL: v_shl_v2i32:
298 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
300 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, v2, v0
301 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, v3, v1
302 ; GFX10-NEXT: s_setpc_b64 s[30:31]
303 %result = shl <2 x i32> %value, %amount
304 ret <2 x i32> %result
307 define <2 x i32> @v_shl_v2i32_31(<2 x i32> %value) {
308 ; GCN-LABEL: v_shl_v2i32_31:
310 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 31, v0
312 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 31, v1
313 ; GCN-NEXT: s_setpc_b64 s[30:31]
315 ; GFX10-LABEL: v_shl_v2i32_31:
317 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
319 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 31, v0
320 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 31, v1
321 ; GFX10-NEXT: s_setpc_b64 s[30:31]
322 %result = shl <2 x i32> %value, <i32 31, i32 31>
323 ret <2 x i32> %result
326 define amdgpu_ps <2 x i32> @s_shl_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) {
327 ; GCN-LABEL: s_shl_v2i32:
329 ; GCN-NEXT: s_lshl_b32 s0, s0, s2
330 ; GCN-NEXT: s_lshl_b32 s1, s1, s3
331 ; GCN-NEXT: ; return to shader part epilog
333 ; GFX10-LABEL: s_shl_v2i32:
335 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
336 ; GFX10-NEXT: s_lshl_b32 s1, s1, s3
337 ; GFX10-NEXT: ; return to shader part epilog
338 %result = shl <2 x i32> %value, %amount
339 ret <2 x i32> %result
342 define <3 x i32> @v_shl_v3i32(<3 x i32> %value, <3 x i32> %amount) {
343 ; GCN-LABEL: v_shl_v3i32:
345 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v3, v0
347 ; GCN-NEXT: v_lshlrev_b32_e32 v1, v4, v1
348 ; GCN-NEXT: v_lshlrev_b32_e32 v2, v5, v2
349 ; GCN-NEXT: s_setpc_b64 s[30:31]
351 ; GFX10-LABEL: v_shl_v3i32:
353 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
355 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, v3, v0
356 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, v4, v1
357 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, v5, v2
358 ; GFX10-NEXT: s_setpc_b64 s[30:31]
359 %result = shl <3 x i32> %value, %amount
360 ret <3 x i32> %result
363 define amdgpu_ps <3 x i32> @s_shl_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) {
364 ; GCN-LABEL: s_shl_v3i32:
366 ; GCN-NEXT: s_lshl_b32 s0, s0, s3
367 ; GCN-NEXT: s_lshl_b32 s1, s1, s4
368 ; GCN-NEXT: s_lshl_b32 s2, s2, s5
369 ; GCN-NEXT: ; return to shader part epilog
371 ; GFX10-LABEL: s_shl_v3i32:
373 ; GFX10-NEXT: s_lshl_b32 s0, s0, s3
374 ; GFX10-NEXT: s_lshl_b32 s1, s1, s4
375 ; GFX10-NEXT: s_lshl_b32 s2, s2, s5
376 ; GFX10-NEXT: ; return to shader part epilog
377 %result = shl <3 x i32> %value, %amount
378 ret <3 x i32> %result
381 define <4 x i32> @v_shl_v4i32(<4 x i32> %value, <4 x i32> %amount) {
382 ; GCN-LABEL: v_shl_v4i32:
384 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v4, v0
386 ; GCN-NEXT: v_lshlrev_b32_e32 v1, v5, v1
387 ; GCN-NEXT: v_lshlrev_b32_e32 v2, v6, v2
388 ; GCN-NEXT: v_lshlrev_b32_e32 v3, v7, v3
389 ; GCN-NEXT: s_setpc_b64 s[30:31]
391 ; GFX10-LABEL: v_shl_v4i32:
393 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
395 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, v4, v0
396 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, v5, v1
397 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, v6, v2
398 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, v7, v3
399 ; GFX10-NEXT: s_setpc_b64 s[30:31]
400 %result = shl <4 x i32> %value, %amount
401 ret <4 x i32> %result
404 define amdgpu_ps <4 x i32> @s_shl_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) {
405 ; GCN-LABEL: s_shl_v4i32:
407 ; GCN-NEXT: s_lshl_b32 s0, s0, s4
408 ; GCN-NEXT: s_lshl_b32 s1, s1, s5
409 ; GCN-NEXT: s_lshl_b32 s2, s2, s6
410 ; GCN-NEXT: s_lshl_b32 s3, s3, s7
411 ; GCN-NEXT: ; return to shader part epilog
413 ; GFX10-LABEL: s_shl_v4i32:
415 ; GFX10-NEXT: s_lshl_b32 s0, s0, s4
416 ; GFX10-NEXT: s_lshl_b32 s1, s1, s5
417 ; GFX10-NEXT: s_lshl_b32 s2, s2, s6
418 ; GFX10-NEXT: s_lshl_b32 s3, s3, s7
419 ; GFX10-NEXT: ; return to shader part epilog
420 %result = shl <4 x i32> %value, %amount
421 ret <4 x i32> %result
424 define <5 x i32> @v_shl_v5i32(<5 x i32> %value, <5 x i32> %amount) {
425 ; GCN-LABEL: v_shl_v5i32:
427 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v5, v0
429 ; GCN-NEXT: v_lshlrev_b32_e32 v1, v6, v1
430 ; GCN-NEXT: v_lshlrev_b32_e32 v2, v7, v2
431 ; GCN-NEXT: v_lshlrev_b32_e32 v3, v8, v3
432 ; GCN-NEXT: v_lshlrev_b32_e32 v4, v9, v4
433 ; GCN-NEXT: s_setpc_b64 s[30:31]
435 ; GFX10-LABEL: v_shl_v5i32:
437 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
439 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, v5, v0
440 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, v6, v1
441 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, v7, v2
442 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, v8, v3
443 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, v9, v4
444 ; GFX10-NEXT: s_setpc_b64 s[30:31]
445 %result = shl <5 x i32> %value, %amount
446 ret <5 x i32> %result
449 define amdgpu_ps <5 x i32> @s_shl_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) {
450 ; GCN-LABEL: s_shl_v5i32:
452 ; GCN-NEXT: s_lshl_b32 s0, s0, s5
453 ; GCN-NEXT: s_lshl_b32 s1, s1, s6
454 ; GCN-NEXT: s_lshl_b32 s2, s2, s7
455 ; GCN-NEXT: s_lshl_b32 s3, s3, s8
456 ; GCN-NEXT: s_lshl_b32 s4, s4, s9
457 ; GCN-NEXT: ; return to shader part epilog
459 ; GFX10-LABEL: s_shl_v5i32:
461 ; GFX10-NEXT: s_lshl_b32 s0, s0, s5
462 ; GFX10-NEXT: s_lshl_b32 s1, s1, s6
463 ; GFX10-NEXT: s_lshl_b32 s2, s2, s7
464 ; GFX10-NEXT: s_lshl_b32 s3, s3, s8
465 ; GFX10-NEXT: s_lshl_b32 s4, s4, s9
466 ; GFX10-NEXT: ; return to shader part epilog
467 %result = shl <5 x i32> %value, %amount
468 ret <5 x i32> %result
471 define <16 x i32> @v_shl_v16i32(<16 x i32> %value, <16 x i32> %amount) {
472 ; GCN-LABEL: v_shl_v16i32:
474 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v16, v0
476 ; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32
477 ; GCN-NEXT: v_lshlrev_b32_e32 v1, v17, v1
478 ; GCN-NEXT: v_lshlrev_b32_e32 v2, v18, v2
479 ; GCN-NEXT: v_lshlrev_b32_e32 v3, v19, v3
480 ; GCN-NEXT: v_lshlrev_b32_e32 v4, v20, v4
481 ; GCN-NEXT: v_lshlrev_b32_e32 v5, v21, v5
482 ; GCN-NEXT: v_lshlrev_b32_e32 v6, v22, v6
483 ; GCN-NEXT: v_lshlrev_b32_e32 v7, v23, v7
484 ; GCN-NEXT: v_lshlrev_b32_e32 v8, v24, v8
485 ; GCN-NEXT: v_lshlrev_b32_e32 v9, v25, v9
486 ; GCN-NEXT: v_lshlrev_b32_e32 v10, v26, v10
487 ; GCN-NEXT: v_lshlrev_b32_e32 v11, v27, v11
488 ; GCN-NEXT: v_lshlrev_b32_e32 v12, v28, v12
489 ; GCN-NEXT: v_lshlrev_b32_e32 v13, v29, v13
490 ; GCN-NEXT: v_lshlrev_b32_e32 v14, v30, v14
491 ; GCN-NEXT: s_waitcnt vmcnt(0)
492 ; GCN-NEXT: v_lshlrev_b32_e32 v15, v16, v15
493 ; GCN-NEXT: s_setpc_b64 s[30:31]
495 ; GFX10-LABEL: v_shl_v16i32:
497 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
499 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
500 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, v16, v0
501 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, v17, v1
502 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, v18, v2
503 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, v19, v3
504 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, v20, v4
505 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, v21, v5
506 ; GFX10-NEXT: v_lshlrev_b32_e32 v6, v22, v6
507 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, v23, v7
508 ; GFX10-NEXT: v_lshlrev_b32_e32 v8, v24, v8
509 ; GFX10-NEXT: v_lshlrev_b32_e32 v9, v25, v9
510 ; GFX10-NEXT: v_lshlrev_b32_e32 v10, v26, v10
511 ; GFX10-NEXT: v_lshlrev_b32_e32 v11, v27, v11
512 ; GFX10-NEXT: v_lshlrev_b32_e32 v12, v28, v12
513 ; GFX10-NEXT: v_lshlrev_b32_e32 v13, v29, v13
514 ; GFX10-NEXT: v_lshlrev_b32_e32 v14, v30, v14
515 ; GFX10-NEXT: s_waitcnt vmcnt(0)
516 ; GFX10-NEXT: v_lshlrev_b32_e32 v15, v31, v15
517 ; GFX10-NEXT: s_setpc_b64 s[30:31]
518 %result = shl <16 x i32> %value, %amount
519 ret <16 x i32> %result
522 define amdgpu_ps <16 x i32> @s_shl_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) {
523 ; GCN-LABEL: s_shl_v16i32:
525 ; GCN-NEXT: s_lshl_b32 s0, s0, s16
526 ; GCN-NEXT: s_lshl_b32 s1, s1, s17
527 ; GCN-NEXT: s_lshl_b32 s2, s2, s18
528 ; GCN-NEXT: s_lshl_b32 s3, s3, s19
529 ; GCN-NEXT: s_lshl_b32 s4, s4, s20
530 ; GCN-NEXT: s_lshl_b32 s5, s5, s21
531 ; GCN-NEXT: s_lshl_b32 s6, s6, s22
532 ; GCN-NEXT: s_lshl_b32 s7, s7, s23
533 ; GCN-NEXT: s_lshl_b32 s8, s8, s24
534 ; GCN-NEXT: s_lshl_b32 s9, s9, s25
535 ; GCN-NEXT: s_lshl_b32 s10, s10, s26
536 ; GCN-NEXT: s_lshl_b32 s11, s11, s27
537 ; GCN-NEXT: s_lshl_b32 s12, s12, s28
538 ; GCN-NEXT: s_lshl_b32 s13, s13, s29
539 ; GCN-NEXT: s_lshl_b32 s14, s14, s30
540 ; GCN-NEXT: s_lshl_b32 s15, s15, s31
541 ; GCN-NEXT: ; return to shader part epilog
543 ; GFX10-LABEL: s_shl_v16i32:
545 ; GFX10-NEXT: s_lshl_b32 s0, s0, s16
546 ; GFX10-NEXT: s_lshl_b32 s1, s1, s17
547 ; GFX10-NEXT: s_lshl_b32 s2, s2, s18
548 ; GFX10-NEXT: s_lshl_b32 s3, s3, s19
549 ; GFX10-NEXT: s_lshl_b32 s4, s4, s20
550 ; GFX10-NEXT: s_lshl_b32 s5, s5, s21
551 ; GFX10-NEXT: s_lshl_b32 s6, s6, s22
552 ; GFX10-NEXT: s_lshl_b32 s7, s7, s23
553 ; GFX10-NEXT: s_lshl_b32 s8, s8, s24
554 ; GFX10-NEXT: s_lshl_b32 s9, s9, s25
555 ; GFX10-NEXT: s_lshl_b32 s10, s10, s26
556 ; GFX10-NEXT: s_lshl_b32 s11, s11, s27
557 ; GFX10-NEXT: s_lshl_b32 s12, s12, s28
558 ; GFX10-NEXT: s_lshl_b32 s13, s13, s29
559 ; GFX10-NEXT: s_lshl_b32 s14, s14, s30
560 ; GFX10-NEXT: s_lshl_b32 s15, s15, s31
561 ; GFX10-NEXT: ; return to shader part epilog
562 %result = shl <16 x i32> %value, %amount
563 ret <16 x i32> %result
566 define i16 @v_shl_i16(i16 %value, i16 %amount) {
567 ; GFX6-LABEL: v_shl_i16:
569 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
571 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v1, v0
572 ; GFX6-NEXT: s_setpc_b64 s[30:31]
574 ; GFX8-LABEL: v_shl_i16:
576 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
577 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v1, v0
578 ; GFX8-NEXT: s_setpc_b64 s[30:31]
580 ; GFX9-LABEL: v_shl_i16:
582 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
583 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v1, v0
584 ; GFX9-NEXT: s_setpc_b64 s[30:31]
586 ; GFX10-LABEL: v_shl_i16:
588 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
589 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
590 ; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0
591 ; GFX10-NEXT: s_setpc_b64 s[30:31]
592 %result = shl i16 %value, %amount
596 define i16 @v_shl_i16_31(i16 %value) {
597 ; GCN-LABEL: v_shl_i16_31:
599 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
600 ; GCN-NEXT: s_setpc_b64 s[30:31]
602 ; GFX10-LABEL: v_shl_i16_31:
604 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
606 ; GFX10-NEXT: s_setpc_b64 s[30:31]
607 %result = shl i16 %value, 31
611 define amdgpu_ps i16 @s_shl_i16(i16 inreg %value, i16 inreg %amount) {
612 ; GFX6-LABEL: s_shl_i16:
614 ; GFX6-NEXT: s_lshl_b32 s0, s0, s1
615 ; GFX6-NEXT: ; return to shader part epilog
617 ; GFX8-LABEL: s_shl_i16:
619 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
620 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1
621 ; GFX8-NEXT: ; return to shader part epilog
623 ; GFX9-LABEL: s_shl_i16:
625 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
626 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
627 ; GFX9-NEXT: ; return to shader part epilog
629 ; GFX10-LABEL: s_shl_i16:
631 ; GFX10-NEXT: s_and_b32 s0, s0, 0xffff
632 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
633 ; GFX10-NEXT: ; return to shader part epilog
634 %result = shl i16 %value, %amount
638 define amdgpu_ps i16 @s_shl_i16_15(i16 inreg %value) {
639 ; GFX6-LABEL: s_shl_i16_15:
641 ; GFX6-NEXT: s_lshl_b32 s0, s0, 15
642 ; GFX6-NEXT: ; return to shader part epilog
644 ; GFX8-LABEL: s_shl_i16_15:
646 ; GFX8-NEXT: s_bfe_u32 s1, 15, 0x100000
647 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1
648 ; GFX8-NEXT: ; return to shader part epilog
650 ; GFX9-LABEL: s_shl_i16_15:
652 ; GFX9-NEXT: s_bfe_u32 s1, 15, 0x100000
653 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
654 ; GFX9-NEXT: ; return to shader part epilog
656 ; GFX10-LABEL: s_shl_i16_15:
658 ; GFX10-NEXT: s_bfe_u32 s1, 15, 0x100000
659 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
660 ; GFX10-NEXT: ; return to shader part epilog
661 %result = shl i16 %value, 15
665 define amdgpu_ps half @shl_i16_sv(i16 inreg %value, i16 %amount) {
666 ; GFX6-LABEL: shl_i16_sv:
668 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
669 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
670 ; GFX6-NEXT: ; return to shader part epilog
672 ; GFX8-LABEL: shl_i16_sv:
674 ; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0
675 ; GFX8-NEXT: ; return to shader part epilog
677 ; GFX9-LABEL: shl_i16_sv:
679 ; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0
680 ; GFX9-NEXT: ; return to shader part epilog
682 ; GFX10-LABEL: shl_i16_sv:
684 ; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0
685 ; GFX10-NEXT: ; return to shader part epilog
686 %result = shl i16 %value, %amount
687 %cast = bitcast i16 %result to half
691 define amdgpu_ps half @shl_i16_vs(i16 %value, i16 inreg %amount) {
692 ; GFX6-LABEL: shl_i16_vs:
694 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
695 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s0, v0
696 ; GFX6-NEXT: ; return to shader part epilog
698 ; GFX8-LABEL: shl_i16_vs:
700 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0
701 ; GFX8-NEXT: ; return to shader part epilog
703 ; GFX9-LABEL: shl_i16_vs:
705 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, s0, v0
706 ; GFX9-NEXT: ; return to shader part epilog
708 ; GFX10-LABEL: shl_i16_vs:
710 ; GFX10-NEXT: v_lshlrev_b16 v0, s0, v0
711 ; GFX10-NEXT: ; return to shader part epilog
712 %result = shl i16 %value, %amount
713 %cast = bitcast i16 %result to half
717 define <2 x i16> @v_shl_v2i16(<2 x i16> %value, <2 x i16> %amount) {
718 ; GFX6-LABEL: v_shl_v2i16:
720 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
721 ; GFX6-NEXT: s_mov_b32 s4, 0xffff
722 ; GFX6-NEXT: v_and_b32_e32 v2, s4, v2
723 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
724 ; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
725 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
726 ; GFX6-NEXT: s_setpc_b64 s[30:31]
728 ; GFX8-LABEL: v_shl_v2i16:
730 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
731 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v1, v0
732 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
733 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
734 ; GFX8-NEXT: s_setpc_b64 s[30:31]
736 ; GFX9-LABEL: v_shl_v2i16:
738 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
739 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v1, v0
740 ; GFX9-NEXT: s_setpc_b64 s[30:31]
742 ; GFX10-LABEL: v_shl_v2i16:
744 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
745 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
746 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v1, v0
747 ; GFX10-NEXT: s_setpc_b64 s[30:31]
748 %result = shl <2 x i16> %value, %amount
749 ret <2 x i16> %result
752 define <2 x i16> @v_shl_v2i16_15(<2 x i16> %value) {
753 ; GFX6-LABEL: v_shl_v2i16_15:
755 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
756 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 15, v0
757 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 15, v1
758 ; GFX6-NEXT: s_setpc_b64 s[30:31]
760 ; GFX8-LABEL: v_shl_v2i16_15:
762 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
763 ; GFX8-NEXT: v_mov_b32_e32 v2, 15
764 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 15, v0
765 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
766 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
767 ; GFX8-NEXT: s_setpc_b64 s[30:31]
769 ; GFX9-LABEL: v_shl_v2i16_15:
771 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
772 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
773 ; GFX9-NEXT: s_setpc_b64 s[30:31]
775 ; GFX10-LABEL: v_shl_v2i16_15:
777 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
778 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
779 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
780 ; GFX10-NEXT: s_setpc_b64 s[30:31]
781 %result = shl <2 x i16> %value, <i16 15, i16 15>
782 ret <2 x i16> %result
785 define amdgpu_ps i32 @s_shl_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) {
786 ; GFX6-LABEL: s_shl_v2i16:
788 ; GFX6-NEXT: s_mov_b32 s4, 0xffff
789 ; GFX6-NEXT: s_lshl_b32 s1, s1, s3
790 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
791 ; GFX6-NEXT: s_and_b32 s1, s1, s4
792 ; GFX6-NEXT: s_and_b32 s0, s0, s4
793 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
794 ; GFX6-NEXT: s_or_b32 s0, s0, s1
795 ; GFX6-NEXT: ; return to shader part epilog
797 ; GFX8-LABEL: s_shl_v2i16:
799 ; GFX8-NEXT: s_mov_b32 s3, 0xffff
800 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
801 ; GFX8-NEXT: s_and_b32 s0, s0, s3
802 ; GFX8-NEXT: s_lshr_b32 s4, s1, 16
803 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1
804 ; GFX8-NEXT: s_lshl_b32 s1, s2, s4
805 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
806 ; GFX8-NEXT: s_and_b32 s0, s0, s3
807 ; GFX8-NEXT: s_or_b32 s0, s1, s0
808 ; GFX8-NEXT: ; return to shader part epilog
810 ; GFX9-LABEL: s_shl_v2i16:
812 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
813 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
814 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
815 ; GFX9-NEXT: s_lshl_b32 s1, s2, s3
816 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
817 ; GFX9-NEXT: ; return to shader part epilog
819 ; GFX10-LABEL: s_shl_v2i16:
821 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
822 ; GFX10-NEXT: s_lshr_b32 s3, s1, 16
823 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
824 ; GFX10-NEXT: s_lshl_b32 s1, s2, s3
825 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
826 ; GFX10-NEXT: ; return to shader part epilog
827 %result = shl <2 x i16> %value, %amount
828 %cast = bitcast <2 x i16> %result to i32
832 define amdgpu_ps float @shl_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) {
833 ; GFX6-LABEL: shl_v2i16_sv:
835 ; GFX6-NEXT: s_mov_b32 s2, 0xffff
836 ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1
837 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0
838 ; GFX6-NEXT: v_lshl_b32_e32 v1, s1, v1
839 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
840 ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1
841 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0
842 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
843 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
844 ; GFX6-NEXT: ; return to shader part epilog
846 ; GFX8-LABEL: shl_v2i16_sv:
848 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
849 ; GFX8-NEXT: v_mov_b32_e32 v2, s1
850 ; GFX8-NEXT: v_lshlrev_b16_e64 v1, v0, s0
851 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
852 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
853 ; GFX8-NEXT: ; return to shader part epilog
855 ; GFX9-LABEL: shl_v2i16_sv:
857 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, s0
858 ; GFX9-NEXT: ; return to shader part epilog
860 ; GFX10-LABEL: shl_v2i16_sv:
862 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v0, s0
863 ; GFX10-NEXT: ; return to shader part epilog
864 %result = shl <2 x i16> %value, %amount
865 %cast = bitcast <2 x i16> %result to float
869 define amdgpu_ps float @shl_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) {
870 ; GFX6-LABEL: shl_v2i16_vs:
872 ; GFX6-NEXT: s_mov_b32 s2, 0xffff
873 ; GFX6-NEXT: s_and_b32 s0, s0, s2
874 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s0, v0
875 ; GFX6-NEXT: s_and_b32 s0, s1, s2
876 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1
877 ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1
878 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0
879 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
880 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
881 ; GFX6-NEXT: ; return to shader part epilog
883 ; GFX8-LABEL: shl_v2i16_vs:
885 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
886 ; GFX8-NEXT: v_mov_b32_e32 v2, s1
887 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, s0, v0
888 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
889 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
890 ; GFX8-NEXT: ; return to shader part epilog
892 ; GFX9-LABEL: shl_v2i16_vs:
894 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, s0, v0
895 ; GFX9-NEXT: ; return to shader part epilog
897 ; GFX10-LABEL: shl_v2i16_vs:
899 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, s0, v0
900 ; GFX10-NEXT: ; return to shader part epilog
901 %result = shl <2 x i16> %value, %amount
902 %cast = bitcast <2 x i16> %result to float
907 ; define <3 x i16> @v_shl_v3i16(<3 x i16> %value, <3 x i16> %amount) {
908 ; %result = shl <3 x i16> %value, %amount
909 ; ret <3 x i16> %result
912 ; define amdgpu_ps <3 x i16> @s_shl_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) {
913 ; %result = shl <3 x i16> %value, %amount
914 ; ret <3 x i16> %result
917 define <2 x float> @v_shl_v4i16(<4 x i16> %value, <4 x i16> %amount) {
918 ; GFX6-LABEL: v_shl_v4i16:
920 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
921 ; GFX6-NEXT: s_mov_b32 s4, 0xffff
922 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v4
923 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0
924 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v5
925 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
926 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v6
927 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2
928 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v7
929 ; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
930 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
931 ; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
932 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
933 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
934 ; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
935 ; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
936 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
937 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
938 ; GFX6-NEXT: s_setpc_b64 s[30:31]
940 ; GFX8-LABEL: v_shl_v4i16:
942 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
943 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, v2, v0
944 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
945 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v3, v1
946 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
947 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
948 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
949 ; GFX8-NEXT: s_setpc_b64 s[30:31]
951 ; GFX9-LABEL: v_shl_v4i16:
953 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0
955 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v3, v1
956 ; GFX9-NEXT: s_setpc_b64 s[30:31]
958 ; GFX10-LABEL: v_shl_v4i16:
960 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
961 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
962 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v2, v0
963 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v3, v1
964 ; GFX10-NEXT: s_setpc_b64 s[30:31]
965 %result = shl <4 x i16> %value, %amount
966 %cast = bitcast <4 x i16> %result to <2 x float>
967 ret <2 x float> %cast
970 define amdgpu_ps <2 x i32> @s_shl_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) {
971 ; GFX6-LABEL: s_shl_v4i16:
973 ; GFX6-NEXT: s_mov_b32 s8, 0xffff
974 ; GFX6-NEXT: s_lshl_b32 s1, s1, s5
975 ; GFX6-NEXT: s_lshl_b32 s0, s0, s4
976 ; GFX6-NEXT: s_and_b32 s1, s1, s8
977 ; GFX6-NEXT: s_lshl_b32 s2, s2, s6
978 ; GFX6-NEXT: s_lshl_b32 s3, s3, s7
979 ; GFX6-NEXT: s_and_b32 s0, s0, s8
980 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
981 ; GFX6-NEXT: s_or_b32 s0, s0, s1
982 ; GFX6-NEXT: s_and_b32 s1, s2, s8
983 ; GFX6-NEXT: s_and_b32 s2, s3, s8
984 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
985 ; GFX6-NEXT: s_or_b32 s1, s1, s2
986 ; GFX6-NEXT: ; return to shader part epilog
988 ; GFX8-LABEL: s_shl_v4i16:
990 ; GFX8-NEXT: s_mov_b32 s6, 0xffff
991 ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
992 ; GFX8-NEXT: s_and_b32 s0, s0, s6
993 ; GFX8-NEXT: s_lshr_b32 s7, s2, 16
994 ; GFX8-NEXT: s_lshr_b32 s5, s1, 16
995 ; GFX8-NEXT: s_and_b32 s1, s1, s6
996 ; GFX8-NEXT: s_lshr_b32 s8, s3, 16
997 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
998 ; GFX8-NEXT: s_lshl_b32 s2, s4, s7
999 ; GFX8-NEXT: s_lshl_b32 s1, s1, s3
1000 ; GFX8-NEXT: s_lshl_b32 s3, s5, s8
1001 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1002 ; GFX8-NEXT: s_and_b32 s0, s0, s6
1003 ; GFX8-NEXT: s_or_b32 s0, s2, s0
1004 ; GFX8-NEXT: s_lshl_b32 s2, s3, 16
1005 ; GFX8-NEXT: s_and_b32 s1, s1, s6
1006 ; GFX8-NEXT: s_or_b32 s1, s2, s1
1007 ; GFX8-NEXT: ; return to shader part epilog
1009 ; GFX9-LABEL: s_shl_v4i16:
1011 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
1012 ; GFX9-NEXT: s_lshr_b32 s5, s2, 16
1013 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
1014 ; GFX9-NEXT: s_lshl_b32 s2, s4, s5
1015 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
1016 ; GFX9-NEXT: s_lshr_b32 s2, s1, 16
1017 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
1018 ; GFX9-NEXT: s_lshl_b32 s1, s1, s3
1019 ; GFX9-NEXT: s_lshl_b32 s2, s2, s4
1020 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
1021 ; GFX9-NEXT: ; return to shader part epilog
1023 ; GFX10-LABEL: s_shl_v4i16:
1025 ; GFX10-NEXT: s_lshr_b32 s4, s0, 16
1026 ; GFX10-NEXT: s_lshr_b32 s5, s2, 16
1027 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
1028 ; GFX10-NEXT: s_lshl_b32 s2, s4, s5
1029 ; GFX10-NEXT: s_lshr_b32 s4, s1, 16
1030 ; GFX10-NEXT: s_lshr_b32 s5, s3, 16
1031 ; GFX10-NEXT: s_lshl_b32 s1, s1, s3
1032 ; GFX10-NEXT: s_lshl_b32 s3, s4, s5
1033 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
1034 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3
1035 ; GFX10-NEXT: ; return to shader part epilog
1036 %result = shl <4 x i16> %value, %amount
1037 %cast = bitcast <4 x i16> %result to <2 x i32>
1042 ; define <5 x i16> @v_shl_v5i16(<5 x i16> %value, <5 x i16> %amount) {
1043 ; %result = shl <5 x i16> %value, %amount
1044 ; ret <5 x i16> %result
1047 ; define amdgpu_ps <5 x i16> @s_shl_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) {
1048 ; %result = shl <5 x i16> %value, %amount
1049 ; ret <5 x i16> %result
1052 ; define <3 x float> @v_shl_v6i16(<6 x i16> %value, <6 x i16> %amount) {
1053 ; %result = shl <6 x i16> %value, %amount
1054 ; %cast = bitcast <6 x i16> %result to <3 x float>
1055 ; ret <3 x float> %cast
1058 ; define amdgpu_ps <3 x i32> @s_shl_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) {
1059 ; %result = shl <6 x i16> %value, %amount
1060 ; %cast = bitcast <6 x i16> %result to <3 x i32>
1061 ; ret <3 x i32> %cast
1064 define <4 x float> @v_shl_v8i16(<8 x i16> %value, <8 x i16> %amount) {
1065 ; GFX6-LABEL: v_shl_v8i16:
1067 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1068 ; GFX6-NEXT: s_mov_b32 s4, 0xffff
1069 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v8
1070 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v8, v0
1071 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v9
1072 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v8, v1
1073 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v10
1074 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2
1075 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v11
1076 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v8, v3
1077 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v12
1078 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, v8, v4
1079 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v13
1080 ; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
1081 ; GFX6-NEXT: v_mov_b32_e32 v16, 0xffff
1082 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, v8, v5
1083 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v14
1084 ; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
1085 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1086 ; GFX6-NEXT: v_lshlrev_b32_e32 v6, v8, v6
1087 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v15
1088 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1089 ; GFX6-NEXT: v_and_b32_e32 v1, v2, v16
1090 ; GFX6-NEXT: v_and_b32_e32 v2, v3, v16
1091 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, v8, v7
1092 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1093 ; GFX6-NEXT: v_and_b32_e32 v3, v5, v16
1094 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1095 ; GFX6-NEXT: v_and_b32_e32 v2, v4, v16
1096 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1097 ; GFX6-NEXT: v_and_b32_e32 v4, v7, v16
1098 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1099 ; GFX6-NEXT: v_and_b32_e32 v3, v6, v16
1100 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
1101 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
1102 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1104 ; GFX8-LABEL: v_shl_v8i16:
1106 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1107 ; GFX8-NEXT: v_lshlrev_b16_e32 v8, v4, v0
1108 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1109 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, v5, v1
1110 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1111 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
1112 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, v6, v2
1113 ; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1114 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
1115 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, v7, v3
1116 ; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1117 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
1118 ; GFX8-NEXT: v_or_b32_e32 v3, v4, v3
1119 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1121 ; GFX9-LABEL: v_shl_v8i16:
1123 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0
1125 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v5, v1
1126 ; GFX9-NEXT: v_pk_lshlrev_b16 v2, v6, v2
1127 ; GFX9-NEXT: v_pk_lshlrev_b16 v3, v7, v3
1128 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1130 ; GFX10-LABEL: v_shl_v8i16:
1132 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1133 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1134 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v4, v0
1135 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v5, v1
1136 ; GFX10-NEXT: v_pk_lshlrev_b16 v2, v6, v2
1137 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, v7, v3
1138 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1139 %result = shl <8 x i16> %value, %amount
1140 %cast = bitcast <8 x i16> %result to <4 x float>
1141 ret <4 x float> %cast
1144 define amdgpu_ps <4 x i32> @s_shl_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) {
1145 ; GFX6-LABEL: s_shl_v8i16:
1147 ; GFX6-NEXT: s_mov_b32 s16, 0xffff
1148 ; GFX6-NEXT: s_lshl_b32 s1, s1, s9
1149 ; GFX6-NEXT: s_lshl_b32 s0, s0, s8
1150 ; GFX6-NEXT: s_and_b32 s1, s1, s16
1151 ; GFX6-NEXT: s_lshl_b32 s2, s2, s10
1152 ; GFX6-NEXT: s_lshl_b32 s3, s3, s11
1153 ; GFX6-NEXT: s_and_b32 s0, s0, s16
1154 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1155 ; GFX6-NEXT: s_lshl_b32 s5, s5, s13
1156 ; GFX6-NEXT: s_or_b32 s0, s0, s1
1157 ; GFX6-NEXT: s_and_b32 s1, s2, s16
1158 ; GFX6-NEXT: s_and_b32 s2, s3, s16
1159 ; GFX6-NEXT: s_lshl_b32 s4, s4, s12
1160 ; GFX6-NEXT: s_lshl_b32 s7, s7, s15
1161 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1162 ; GFX6-NEXT: s_and_b32 s3, s5, s16
1163 ; GFX6-NEXT: s_lshl_b32 s6, s6, s14
1164 ; GFX6-NEXT: s_or_b32 s1, s1, s2
1165 ; GFX6-NEXT: s_and_b32 s2, s4, s16
1166 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
1167 ; GFX6-NEXT: s_and_b32 s4, s7, s16
1168 ; GFX6-NEXT: s_or_b32 s2, s2, s3
1169 ; GFX6-NEXT: s_and_b32 s3, s6, s16
1170 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16
1171 ; GFX6-NEXT: s_or_b32 s3, s3, s4
1172 ; GFX6-NEXT: ; return to shader part epilog
1174 ; GFX8-LABEL: s_shl_v8i16:
1176 ; GFX8-NEXT: s_mov_b32 s12, 0xffff
1177 ; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1178 ; GFX8-NEXT: s_and_b32 s0, s0, s12
1179 ; GFX8-NEXT: s_lshr_b32 s13, s4, 16
1180 ; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1181 ; GFX8-NEXT: s_and_b32 s1, s1, s12
1182 ; GFX8-NEXT: s_lshr_b32 s14, s5, 16
1183 ; GFX8-NEXT: s_lshl_b32 s0, s0, s4
1184 ; GFX8-NEXT: s_lshl_b32 s4, s8, s13
1185 ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1186 ; GFX8-NEXT: s_and_b32 s2, s2, s12
1187 ; GFX8-NEXT: s_lshr_b32 s15, s6, 16
1188 ; GFX8-NEXT: s_lshl_b32 s1, s1, s5
1189 ; GFX8-NEXT: s_lshl_b32 s5, s9, s14
1190 ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1191 ; GFX8-NEXT: s_and_b32 s0, s0, s12
1192 ; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1193 ; GFX8-NEXT: s_and_b32 s3, s3, s12
1194 ; GFX8-NEXT: s_lshr_b32 s16, s7, 16
1195 ; GFX8-NEXT: s_lshl_b32 s2, s2, s6
1196 ; GFX8-NEXT: s_lshl_b32 s6, s10, s15
1197 ; GFX8-NEXT: s_or_b32 s0, s4, s0
1198 ; GFX8-NEXT: s_lshl_b32 s4, s5, 16
1199 ; GFX8-NEXT: s_and_b32 s1, s1, s12
1200 ; GFX8-NEXT: s_lshl_b32 s3, s3, s7
1201 ; GFX8-NEXT: s_lshl_b32 s7, s11, s16
1202 ; GFX8-NEXT: s_or_b32 s1, s4, s1
1203 ; GFX8-NEXT: s_lshl_b32 s4, s6, 16
1204 ; GFX8-NEXT: s_and_b32 s2, s2, s12
1205 ; GFX8-NEXT: s_or_b32 s2, s4, s2
1206 ; GFX8-NEXT: s_lshl_b32 s4, s7, 16
1207 ; GFX8-NEXT: s_and_b32 s3, s3, s12
1208 ; GFX8-NEXT: s_or_b32 s3, s4, s3
1209 ; GFX8-NEXT: ; return to shader part epilog
1211 ; GFX9-LABEL: s_shl_v8i16:
1213 ; GFX9-NEXT: s_lshr_b32 s8, s0, 16
1214 ; GFX9-NEXT: s_lshr_b32 s9, s4, 16
1215 ; GFX9-NEXT: s_lshl_b32 s0, s0, s4
1216 ; GFX9-NEXT: s_lshl_b32 s4, s8, s9
1217 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
1218 ; GFX9-NEXT: s_lshr_b32 s4, s1, 16
1219 ; GFX9-NEXT: s_lshr_b32 s8, s5, 16
1220 ; GFX9-NEXT: s_lshl_b32 s1, s1, s5
1221 ; GFX9-NEXT: s_lshl_b32 s4, s4, s8
1222 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4
1223 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
1224 ; GFX9-NEXT: s_lshr_b32 s5, s6, 16
1225 ; GFX9-NEXT: s_lshl_b32 s2, s2, s6
1226 ; GFX9-NEXT: s_lshl_b32 s4, s4, s5
1227 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
1228 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
1229 ; GFX9-NEXT: s_lshr_b32 s5, s7, 16
1230 ; GFX9-NEXT: s_lshl_b32 s3, s3, s7
1231 ; GFX9-NEXT: s_lshl_b32 s4, s4, s5
1232 ; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
1233 ; GFX9-NEXT: ; return to shader part epilog
1235 ; GFX10-LABEL: s_shl_v8i16:
1237 ; GFX10-NEXT: s_lshr_b32 s8, s0, 16
1238 ; GFX10-NEXT: s_lshr_b32 s9, s4, 16
1239 ; GFX10-NEXT: s_lshl_b32 s0, s0, s4
1240 ; GFX10-NEXT: s_lshl_b32 s4, s8, s9
1241 ; GFX10-NEXT: s_lshr_b32 s8, s1, 16
1242 ; GFX10-NEXT: s_lshr_b32 s9, s5, 16
1243 ; GFX10-NEXT: s_lshl_b32 s1, s1, s5
1244 ; GFX10-NEXT: s_lshl_b32 s5, s8, s9
1245 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4
1246 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
1247 ; GFX10-NEXT: s_lshr_b32 s4, s2, 16
1248 ; GFX10-NEXT: s_lshr_b32 s5, s6, 16
1249 ; GFX10-NEXT: s_lshl_b32 s2, s2, s6
1250 ; GFX10-NEXT: s_lshl_b32 s4, s4, s5
1251 ; GFX10-NEXT: s_lshr_b32 s5, s3, 16
1252 ; GFX10-NEXT: s_lshr_b32 s6, s7, 16
1253 ; GFX10-NEXT: s_lshl_b32 s3, s3, s7
1254 ; GFX10-NEXT: s_lshl_b32 s5, s5, s6
1255 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4
1256 ; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5
1257 ; GFX10-NEXT: ; return to shader part epilog
1258 %result = shl <8 x i16> %value, %amount
1259 %cast = bitcast <8 x i16> %result to <4 x i32>
1263 define i64 @v_shl_i64(i64 %value, i64 %amount) {
1264 ; GFX6-LABEL: v_shl_i64:
1266 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1267 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
1268 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1270 ; GFX8-LABEL: v_shl_i64:
1272 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
1274 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1276 ; GFX9-LABEL: v_shl_i64:
1278 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1279 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
1280 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1282 ; GFX10-LABEL: v_shl_i64:
1284 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1285 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1286 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
1287 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1288 %result = shl i64 %value, %amount
1292 define i64 @v_shl_i64_63(i64 %value) {
1293 ; GCN-LABEL: v_shl_i64_63:
1295 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1296 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 31, v0
1297 ; GCN-NEXT: v_mov_b32_e32 v0, 0
1298 ; GCN-NEXT: s_setpc_b64 s[30:31]
1300 ; GFX10-LABEL: v_shl_i64_63:
1302 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1303 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1304 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 31, v0
1305 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1306 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1307 %result = shl i64 %value, 63
1311 define i64 @v_shl_i64_33(i64 %value) {
1312 ; GCN-LABEL: v_shl_i64_33:
1314 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1315 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 1, v0
1316 ; GCN-NEXT: v_mov_b32_e32 v0, 0
1317 ; GCN-NEXT: s_setpc_b64 s[30:31]
1319 ; GFX10-LABEL: v_shl_i64_33:
1321 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1323 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v0
1324 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1325 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1326 %result = shl i64 %value, 33
1330 define i64 @v_shl_i64_32(i64 %value) {
1331 ; GCN-LABEL: v_shl_i64_32:
1333 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1334 ; GCN-NEXT: v_mov_b32_e32 v1, v0
1335 ; GCN-NEXT: v_mov_b32_e32 v0, 0
1336 ; GCN-NEXT: s_setpc_b64 s[30:31]
1338 ; GFX10-LABEL: v_shl_i64_32:
1340 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1341 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1342 ; GFX10-NEXT: v_mov_b32_e32 v1, v0
1343 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1344 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1345 %result = shl i64 %value, 32
1349 define i64 @v_shl_i64_31(i64 %value) {
1350 ; GFX6-LABEL: v_shl_i64_31:
1352 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
1354 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1356 ; GFX8-LABEL: v_shl_i64_31:
1358 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1359 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1360 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1362 ; GFX9-LABEL: v_shl_i64_31:
1364 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1365 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1366 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1368 ; GFX10-LABEL: v_shl_i64_31:
1370 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1372 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1373 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1374 %result = shl i64 %value, 31
1378 define amdgpu_ps i64 @s_shl_i64(i64 inreg %value, i64 inreg %amount) {
1379 ; GCN-LABEL: s_shl_i64:
1381 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
1382 ; GCN-NEXT: ; return to shader part epilog
1384 ; GFX10-LABEL: s_shl_i64:
1386 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
1387 ; GFX10-NEXT: ; return to shader part epilog
1388 %result = shl i64 %value, %amount
1392 define amdgpu_ps i64 @s_shl_i64_63(i64 inreg %value) {
1393 ; GCN-LABEL: s_shl_i64_63:
1395 ; GCN-NEXT: s_lshl_b32 s1, s0, 31
1396 ; GCN-NEXT: s_mov_b32 s0, 0
1397 ; GCN-NEXT: ; return to shader part epilog
1399 ; GFX10-LABEL: s_shl_i64_63:
1401 ; GFX10-NEXT: s_lshl_b32 s1, s0, 31
1402 ; GFX10-NEXT: s_mov_b32 s0, 0
1403 ; GFX10-NEXT: ; return to shader part epilog
1404 %result = shl i64 %value, 63
1408 define amdgpu_ps i64 @s_shl_i64_33(i64 inreg %value) {
1409 ; GCN-LABEL: s_shl_i64_33:
1411 ; GCN-NEXT: s_lshl_b32 s1, s0, 1
1412 ; GCN-NEXT: s_mov_b32 s0, 0
1413 ; GCN-NEXT: ; return to shader part epilog
1415 ; GFX10-LABEL: s_shl_i64_33:
1417 ; GFX10-NEXT: s_lshl_b32 s1, s0, 1
1418 ; GFX10-NEXT: s_mov_b32 s0, 0
1419 ; GFX10-NEXT: ; return to shader part epilog
1420 %result = shl i64 %value, 33
1424 define amdgpu_ps i64 @s_shl_i64_32(i64 inreg %value) {
1425 ; GCN-LABEL: s_shl_i64_32:
1427 ; GCN-NEXT: s_mov_b32 s1, s0
1428 ; GCN-NEXT: s_mov_b32 s0, 0
1429 ; GCN-NEXT: ; return to shader part epilog
1431 ; GFX10-LABEL: s_shl_i64_32:
1433 ; GFX10-NEXT: s_mov_b32 s1, s0
1434 ; GFX10-NEXT: s_mov_b32 s0, 0
1435 ; GFX10-NEXT: ; return to shader part epilog
1436 %result = shl i64 %value, 32
1440 define amdgpu_ps i64 @s_shl_i64_31(i64 inreg %value) {
1441 ; GCN-LABEL: s_shl_i64_31:
1443 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 31
1444 ; GCN-NEXT: ; return to shader part epilog
1446 ; GFX10-LABEL: s_shl_i64_31:
1448 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 31
1449 ; GFX10-NEXT: ; return to shader part epilog
1450 %result = shl i64 %value, 31
1454 define amdgpu_ps <2 x float> @shl_i64_sv(i64 inreg %value, i64 %amount) {
1455 ; GFX6-LABEL: shl_i64_sv:
1457 ; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v0
1458 ; GFX6-NEXT: ; return to shader part epilog
1460 ; GFX8-LABEL: shl_i64_sv:
1462 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
1463 ; GFX8-NEXT: ; return to shader part epilog
1465 ; GFX9-LABEL: shl_i64_sv:
1467 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
1468 ; GFX9-NEXT: ; return to shader part epilog
1470 ; GFX10-LABEL: shl_i64_sv:
1472 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
1473 ; GFX10-NEXT: ; return to shader part epilog
1474 %result = shl i64 %value, %amount
1475 %cast = bitcast i64 %result to <2 x float>
1476 ret <2 x float> %cast
1479 define amdgpu_ps <2 x float> @shl_i64_vs(i64 %value, i64 inreg %amount) {
1480 ; GFX6-LABEL: shl_i64_vs:
1482 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s0
1483 ; GFX6-NEXT: ; return to shader part epilog
1485 ; GFX8-LABEL: shl_i64_vs:
1487 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s0, v[0:1]
1488 ; GFX8-NEXT: ; return to shader part epilog
1490 ; GFX9-LABEL: shl_i64_vs:
1492 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s0, v[0:1]
1493 ; GFX9-NEXT: ; return to shader part epilog
1495 ; GFX10-LABEL: shl_i64_vs:
1497 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s0, v[0:1]
1498 ; GFX10-NEXT: ; return to shader part epilog
1499 %result = shl i64 %value, %amount
1500 %cast = bitcast i64 %result to <2 x float>
1501 ret <2 x float> %cast
1504 define <2 x i64> @v_shl_v2i64(<2 x i64> %value, <2 x i64> %amount) {
1505 ; GFX6-LABEL: v_shl_v2i64:
1507 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1508 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v4
1509 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v6
1510 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1512 ; GFX8-LABEL: v_shl_v2i64:
1514 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1515 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
1516 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
1517 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1519 ; GFX9-LABEL: v_shl_v2i64:
1521 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1522 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
1523 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
1524 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1526 ; GFX10-LABEL: v_shl_v2i64:
1528 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1529 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1530 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
1531 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
1532 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1533 %result = shl <2 x i64> %value, %amount
1534 ret <2 x i64> %result
1537 define <2 x i64> @v_shl_v2i64_31(<2 x i64> %value) {
1538 ; GFX6-LABEL: v_shl_v2i64_31:
1540 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1541 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
1542 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 31
1543 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1545 ; GFX8-LABEL: v_shl_v2i64_31:
1547 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1548 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1549 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3]
1550 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1552 ; GFX9-LABEL: v_shl_v2i64_31:
1554 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1555 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1556 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3]
1557 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1559 ; GFX10-LABEL: v_shl_v2i64_31:
1561 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1562 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1563 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1564 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3]
1565 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1566 %result = shl <2 x i64> %value, <i64 31, i64 31>
1567 ret <2 x i64> %result
1570 define amdgpu_ps <2 x i64> @s_shl_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) {
1571 ; GCN-LABEL: s_shl_v2i64:
1573 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
1574 ; GCN-NEXT: s_lshl_b64 s[2:3], s[2:3], s6
1575 ; GCN-NEXT: ; return to shader part epilog
1577 ; GFX10-LABEL: s_shl_v2i64:
1579 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
1580 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s6
1581 ; GFX10-NEXT: ; return to shader part epilog
1582 %result = shl <2 x i64> %value, %amount
1583 ret <2 x i64> %result
1586 define i65 @v_shl_i65(i65 %value, i65 %amount) {
1587 ; GFX6-LABEL: v_shl_i65:
1589 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1590 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v3
1591 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], v4
1592 ; GFX6-NEXT: v_lshl_b64 v[5:6], v[2:3], v3
1593 ; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v3
1594 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[0:1], v3
1595 ; GFX6-NEXT: v_or_b32_e32 v9, v4, v5
1596 ; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], v8
1597 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1598 ; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
1599 ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
1600 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
1601 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
1602 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
1603 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1605 ; GFX8-LABEL: v_shl_i65:
1607 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1608 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v3
1609 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1]
1610 ; GFX8-NEXT: v_lshlrev_b64 v[5:6], v3, v[2:3]
1611 ; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v3
1612 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
1613 ; GFX8-NEXT: v_or_b32_e32 v9, v4, v5
1614 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], v8, v[0:1]
1615 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1616 ; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
1617 ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
1618 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
1619 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
1620 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
1621 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1623 ; GFX9-LABEL: v_shl_i65:
1625 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1626 ; GFX9-NEXT: v_sub_u32_e32 v4, 64, v3
1627 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1]
1628 ; GFX9-NEXT: v_lshlrev_b64 v[5:6], v3, v[2:3]
1629 ; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v3
1630 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
1631 ; GFX9-NEXT: v_or_b32_e32 v9, v4, v5
1632 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v8, v[0:1]
1633 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1634 ; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
1635 ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
1636 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
1637 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
1638 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
1639 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1641 ; GFX10-LABEL: v_shl_i65:
1643 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1645 ; GFX10-NEXT: v_sub_nc_u32_e32 v6, 64, v3
1646 ; GFX10-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3]
1647 ; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v3
1648 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
1649 ; GFX10-NEXT: v_lshrrev_b64 v[5:6], v6, v[0:1]
1650 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
1651 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, v[0:1]
1652 ; GFX10-NEXT: v_or_b32_e32 v1, v5, v4
1653 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo
1654 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v1, vcc_lo
1655 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc_lo
1656 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
1657 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc_lo
1658 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1659 %result = shl i65 %value, %amount
1663 define i65 @v_shl_i65_33(i65 %value) {
1664 ; GFX6-LABEL: v_shl_i65_33:
1666 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1667 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v0
1668 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[0:1], 31
1669 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
1670 ; GFX6-NEXT: v_mov_b32_e32 v1, v4
1671 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1673 ; GFX8-LABEL: v_shl_i65_33:
1675 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1676 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 1, v0
1677 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1]
1678 ; GFX8-NEXT: v_mov_b32_e32 v0, 0
1679 ; GFX8-NEXT: v_mov_b32_e32 v1, v4
1680 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1682 ; GFX9-LABEL: v_shl_i65_33:
1684 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1685 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 1, v0
1686 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1]
1687 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1688 ; GFX9-NEXT: v_mov_b32_e32 v1, v4
1689 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1691 ; GFX10-LABEL: v_shl_i65_33:
1693 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1694 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1695 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 1, v0
1696 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1]
1697 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1698 ; GFX10-NEXT: v_mov_b32_e32 v1, v4
1699 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1700 %result = shl i65 %value, 33
1704 define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
1705 ; GCN-LABEL: s_shl_i65:
1707 ; GCN-NEXT: s_sub_i32 s10, s3, 64
1708 ; GCN-NEXT: s_sub_i32 s6, 64, s3
1709 ; GCN-NEXT: s_cmp_lt_u32 s3, 64
1710 ; GCN-NEXT: s_cselect_b32 s11, 1, 0
1711 ; GCN-NEXT: s_cmp_eq_u32 s3, 0
1712 ; GCN-NEXT: s_cselect_b32 s12, 1, 0
1713 ; GCN-NEXT: s_lshr_b64 s[6:7], s[0:1], s6
1714 ; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3], s3
1715 ; GCN-NEXT: s_lshl_b64 s[4:5], s[0:1], s3
1716 ; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
1717 ; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s10
1718 ; GCN-NEXT: s_cmp_lg_u32 s11, 0
1719 ; GCN-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
1720 ; GCN-NEXT: s_cselect_b64 s[4:5], s[6:7], s[8:9]
1721 ; GCN-NEXT: s_cmp_lg_u32 s12, 0
1722 ; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1723 ; GCN-NEXT: ; return to shader part epilog
1725 ; GFX10-LABEL: s_shl_i65:
1727 ; GFX10-NEXT: s_sub_i32 s10, s3, 64
1728 ; GFX10-NEXT: s_sub_i32 s4, 64, s3
1729 ; GFX10-NEXT: s_cmp_lt_u32 s3, 64
1730 ; GFX10-NEXT: s_cselect_b32 s11, 1, 0
1731 ; GFX10-NEXT: s_cmp_eq_u32 s3, 0
1732 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0
1733 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s4
1734 ; GFX10-NEXT: s_lshl_b64 s[6:7], s[2:3], s3
1735 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s3
1736 ; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
1737 ; GFX10-NEXT: s_lshl_b64 s[6:7], s[0:1], s10
1738 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0
1739 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], 0
1740 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
1741 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0
1742 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1743 ; GFX10-NEXT: ; return to shader part epilog
1744 %result = shl i65 %value, %amount
1748 define amdgpu_ps i65 @s_shl_i65_33(i65 inreg %value) {
1749 ; GCN-LABEL: s_shl_i65_33:
1751 ; GCN-NEXT: s_lshl_b32 s4, s0, 1
1752 ; GCN-NEXT: s_mov_b32 s6, 0
1753 ; GCN-NEXT: s_lshl_b32 s7, s2, 1
1754 ; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], 31
1755 ; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[0:1]
1756 ; GCN-NEXT: s_mov_b32 s0, 0
1757 ; GCN-NEXT: s_mov_b32 s1, s4
1758 ; GCN-NEXT: ; return to shader part epilog
1760 ; GFX10-LABEL: s_shl_i65_33:
1762 ; GFX10-NEXT: s_mov_b32 s4, 0
1763 ; GFX10-NEXT: s_lshl_b32 s5, s2, 1
1764 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[0:1], 31
1765 ; GFX10-NEXT: s_lshl_b32 s1, s0, 1
1766 ; GFX10-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
1767 ; GFX10-NEXT: s_mov_b32 s0, 0
1768 ; GFX10-NEXT: ; return to shader part epilog
1769 %result = shl i65 %value, 33
1773 ; FIXME: Argument lowering asserts
1774 ; define <2 x i65> @v_shl_v2i65(<2 x i65> %value, <2 x i65> %amount) {
1775 ; %result = shl <2 x i65> %value, %amount
1776 ; ret <2 x i65> %result
1779 ; define amdgpu_ps <2 x i65> @s_shl_v2i65(<2 x i65> inreg %value, <2 x i65> inreg %amount) {
1780 ; %result = shl <2 x i65> %value, %amount
1781 ; ret <2 x i65> %result