1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
8 define i8 @v_shl_i8(i8 %value, i8 %amount) {
9 ; GFX6-LABEL: v_shl_i8:
11 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
13 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v1, v0
14 ; GFX6-NEXT: s_setpc_b64 s[30:31]
16 ; GFX8-LABEL: v_shl_i8:
18 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
20 ; GFX8-NEXT: s_setpc_b64 s[30:31]
22 ; GFX9-LABEL: v_shl_i8:
24 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25 ; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
26 ; GFX9-NEXT: s_setpc_b64 s[30:31]
28 ; GFX10PLUS-LABEL: v_shl_i8:
30 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1
32 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, v1, v0
33 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
34 %result = shl i8 %value, %amount
38 define i8 @v_shl_i8_7(i8 %value) {
39 ; GFX6-LABEL: v_shl_i8_7:
41 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 7, v0
43 ; GFX6-NEXT: s_setpc_b64 s[30:31]
45 ; GFX8-LABEL: v_shl_i8_7:
47 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 7, v0
49 ; GFX8-NEXT: s_setpc_b64 s[30:31]
51 ; GFX9-LABEL: v_shl_i8_7:
53 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 7, v0
55 ; GFX9-NEXT: s_setpc_b64 s[30:31]
57 ; GFX10PLUS-LABEL: v_shl_i8_7:
59 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 7, v0
61 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
62 %result = shl i8 %value, 7
66 define amdgpu_ps i8 @s_shl_i8(i8 inreg %value, i8 inreg %amount) {
67 ; GFX6-LABEL: s_shl_i8:
69 ; GFX6-NEXT: s_lshl_b32 s0, s0, s1
70 ; GFX6-NEXT: ; return to shader part epilog
72 ; GFX8-LABEL: s_shl_i8:
74 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
75 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1
76 ; GFX8-NEXT: ; return to shader part epilog
78 ; GFX9-LABEL: s_shl_i8:
80 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
81 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
82 ; GFX9-NEXT: ; return to shader part epilog
84 ; GFX10PLUS-LABEL: s_shl_i8:
86 ; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff
87 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
88 ; GFX10PLUS-NEXT: ; return to shader part epilog
89 %result = shl i8 %value, %amount
93 define amdgpu_ps i8 @s_shl_i8_7(i8 inreg %value) {
94 ; GCN-LABEL: s_shl_i8_7:
96 ; GCN-NEXT: s_lshl_b32 s0, s0, 7
97 ; GCN-NEXT: ; return to shader part epilog
99 ; GFX10PLUS-LABEL: s_shl_i8_7:
100 ; GFX10PLUS: ; %bb.0:
101 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 7
102 ; GFX10PLUS-NEXT: ; return to shader part epilog
103 %result = shl i8 %value, 7
108 define i24 @v_shl_i24(i24 %value, i24 %amount) {
109 ; GCN-LABEL: v_shl_i24:
111 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112 ; GCN-NEXT: v_and_b32_e32 v1, 0xffffff, v1
113 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v1, v0
114 ; GCN-NEXT: s_setpc_b64 s[30:31]
116 ; GFX10PLUS-LABEL: v_shl_i24:
117 ; GFX10PLUS: ; %bb.0:
118 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xffffff, v1
120 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v1, v0
121 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
122 %result = shl i24 %value, %amount
126 define i24 @v_shl_i24_7(i24 %value) {
127 ; GCN-LABEL: v_shl_i24_7:
129 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
131 ; GCN-NEXT: s_setpc_b64 s[30:31]
133 ; GFX10PLUS-LABEL: v_shl_i24_7:
134 ; GFX10PLUS: ; %bb.0:
135 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 7, v0
137 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
138 %result = shl i24 %value, 7
142 define amdgpu_ps i24 @s_shl_i24(i24 inreg %value, i24 inreg %amount) {
143 ; GCN-LABEL: s_shl_i24:
145 ; GCN-NEXT: s_lshl_b32 s0, s0, s1
146 ; GCN-NEXT: ; return to shader part epilog
148 ; GFX10PLUS-LABEL: s_shl_i24:
149 ; GFX10PLUS: ; %bb.0:
150 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
151 ; GFX10PLUS-NEXT: ; return to shader part epilog
152 %result = shl i24 %value, %amount
156 define amdgpu_ps i24 @s_shl_i24_7(i24 inreg %value) {
157 ; GCN-LABEL: s_shl_i24_7:
159 ; GCN-NEXT: s_lshl_b32 s0, s0, 7
160 ; GCN-NEXT: ; return to shader part epilog
162 ; GFX10PLUS-LABEL: s_shl_i24_7:
163 ; GFX10PLUS: ; %bb.0:
164 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 7
165 ; GFX10PLUS-NEXT: ; return to shader part epilog
166 %result = shl i24 %value, 7
170 define i32 @v_shl_i32(i32 %value, i32 %amount) {
171 ; GCN-LABEL: v_shl_i32:
173 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v1, v0
175 ; GCN-NEXT: s_setpc_b64 s[30:31]
177 ; GFX10PLUS-LABEL: v_shl_i32:
178 ; GFX10PLUS: ; %bb.0:
179 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v1, v0
181 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
182 %result = shl i32 %value, %amount
186 define i32 @v_shl_i32_31(i32 %value) {
187 ; GCN-LABEL: v_shl_i32_31:
189 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 31, v0
191 ; GCN-NEXT: s_setpc_b64 s[30:31]
193 ; GFX10PLUS-LABEL: v_shl_i32_31:
194 ; GFX10PLUS: ; %bb.0:
195 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 31, v0
197 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
198 %result = shl i32 %value, 31
202 define amdgpu_ps i32 @s_shl_i32(i32 inreg %value, i32 inreg %amount) {
203 ; GCN-LABEL: s_shl_i32:
205 ; GCN-NEXT: s_lshl_b32 s0, s0, s1
206 ; GCN-NEXT: ; return to shader part epilog
208 ; GFX10PLUS-LABEL: s_shl_i32:
209 ; GFX10PLUS: ; %bb.0:
210 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
211 ; GFX10PLUS-NEXT: ; return to shader part epilog
212 %result = shl i32 %value, %amount
216 define amdgpu_ps i32 @s_shl_i32_31(i32 inreg %value) {
217 ; GCN-LABEL: s_shl_i32_31:
219 ; GCN-NEXT: s_lshl_b32 s0, s0, 31
220 ; GCN-NEXT: ; return to shader part epilog
222 ; GFX10PLUS-LABEL: s_shl_i32_31:
223 ; GFX10PLUS: ; %bb.0:
224 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 31
225 ; GFX10PLUS-NEXT: ; return to shader part epilog
226 %result = shl i32 %value, 31
230 define amdgpu_ps float @shl_i32_sv(i32 inreg %value, i32 %amount) {
231 ; GFX6-LABEL: shl_i32_sv:
233 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
234 ; GFX6-NEXT: ; return to shader part epilog
236 ; GFX8-LABEL: shl_i32_sv:
238 ; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0
239 ; GFX8-NEXT: ; return to shader part epilog
241 ; GFX9-LABEL: shl_i32_sv:
243 ; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s0
244 ; GFX9-NEXT: ; return to shader part epilog
246 ; GFX10PLUS-LABEL: shl_i32_sv:
247 ; GFX10PLUS: ; %bb.0:
248 ; GFX10PLUS-NEXT: v_lshlrev_b32_e64 v0, v0, s0
249 ; GFX10PLUS-NEXT: ; return to shader part epilog
250 %result = shl i32 %value, %amount
251 %cast = bitcast i32 %result to float
255 define amdgpu_ps float @shl_i32_vs(i32 %value, i32 inreg %amount) {
256 ; GCN-LABEL: shl_i32_vs:
258 ; GCN-NEXT: v_lshlrev_b32_e32 v0, s0, v0
259 ; GCN-NEXT: ; return to shader part epilog
261 ; GFX10PLUS-LABEL: shl_i32_vs:
262 ; GFX10PLUS: ; %bb.0:
263 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, s0, v0
264 ; GFX10PLUS-NEXT: ; return to shader part epilog
265 %result = shl i32 %value, %amount
266 %cast = bitcast i32 %result to float
270 define <2 x i32> @v_shl_v2i32(<2 x i32> %value, <2 x i32> %amount) {
271 ; GCN-LABEL: v_shl_v2i32:
273 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v2, v0
275 ; GCN-NEXT: v_lshlrev_b32_e32 v1, v3, v1
276 ; GCN-NEXT: s_setpc_b64 s[30:31]
278 ; GFX10PLUS-LABEL: v_shl_v2i32:
279 ; GFX10PLUS: ; %bb.0:
280 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v2, v0
282 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v3, v1
283 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
284 %result = shl <2 x i32> %value, %amount
285 ret <2 x i32> %result
288 define <2 x i32> @v_shl_v2i32_31(<2 x i32> %value) {
289 ; GCN-LABEL: v_shl_v2i32_31:
291 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 31, v0
293 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 31, v1
294 ; GCN-NEXT: s_setpc_b64 s[30:31]
296 ; GFX10PLUS-LABEL: v_shl_v2i32_31:
297 ; GFX10PLUS: ; %bb.0:
298 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 31, v0
300 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 31, v1
301 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
302 %result = shl <2 x i32> %value, <i32 31, i32 31>
303 ret <2 x i32> %result
306 define amdgpu_ps <2 x i32> @s_shl_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) {
307 ; GCN-LABEL: s_shl_v2i32:
309 ; GCN-NEXT: s_lshl_b32 s0, s0, s2
310 ; GCN-NEXT: s_lshl_b32 s1, s1, s3
311 ; GCN-NEXT: ; return to shader part epilog
313 ; GFX10PLUS-LABEL: s_shl_v2i32:
314 ; GFX10PLUS: ; %bb.0:
315 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2
316 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s3
317 ; GFX10PLUS-NEXT: ; return to shader part epilog
318 %result = shl <2 x i32> %value, %amount
319 ret <2 x i32> %result
322 define <3 x i32> @v_shl_v3i32(<3 x i32> %value, <3 x i32> %amount) {
323 ; GCN-LABEL: v_shl_v3i32:
325 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v3, v0
327 ; GCN-NEXT: v_lshlrev_b32_e32 v1, v4, v1
328 ; GCN-NEXT: v_lshlrev_b32_e32 v2, v5, v2
329 ; GCN-NEXT: s_setpc_b64 s[30:31]
331 ; GFX10PLUS-LABEL: v_shl_v3i32:
332 ; GFX10PLUS: ; %bb.0:
333 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
334 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v3, v0
335 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v4, v1
336 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v5, v2
337 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
338 %result = shl <3 x i32> %value, %amount
339 ret <3 x i32> %result
342 define amdgpu_ps <3 x i32> @s_shl_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) {
343 ; GCN-LABEL: s_shl_v3i32:
345 ; GCN-NEXT: s_lshl_b32 s0, s0, s3
346 ; GCN-NEXT: s_lshl_b32 s1, s1, s4
347 ; GCN-NEXT: s_lshl_b32 s2, s2, s5
348 ; GCN-NEXT: ; return to shader part epilog
350 ; GFX10PLUS-LABEL: s_shl_v3i32:
351 ; GFX10PLUS: ; %bb.0:
352 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s3
353 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s4
354 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, s5
355 ; GFX10PLUS-NEXT: ; return to shader part epilog
356 %result = shl <3 x i32> %value, %amount
357 ret <3 x i32> %result
360 define <4 x i32> @v_shl_v4i32(<4 x i32> %value, <4 x i32> %amount) {
361 ; GCN-LABEL: v_shl_v4i32:
363 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v4, v0
365 ; GCN-NEXT: v_lshlrev_b32_e32 v1, v5, v1
366 ; GCN-NEXT: v_lshlrev_b32_e32 v2, v6, v2
367 ; GCN-NEXT: v_lshlrev_b32_e32 v3, v7, v3
368 ; GCN-NEXT: s_setpc_b64 s[30:31]
370 ; GFX10PLUS-LABEL: v_shl_v4i32:
371 ; GFX10PLUS: ; %bb.0:
372 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v4, v0
374 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v5, v1
375 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v6, v2
376 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v3, v7, v3
377 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
378 %result = shl <4 x i32> %value, %amount
379 ret <4 x i32> %result
382 define amdgpu_ps <4 x i32> @s_shl_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) {
383 ; GCN-LABEL: s_shl_v4i32:
385 ; GCN-NEXT: s_lshl_b32 s0, s0, s4
386 ; GCN-NEXT: s_lshl_b32 s1, s1, s5
387 ; GCN-NEXT: s_lshl_b32 s2, s2, s6
388 ; GCN-NEXT: s_lshl_b32 s3, s3, s7
389 ; GCN-NEXT: ; return to shader part epilog
391 ; GFX10PLUS-LABEL: s_shl_v4i32:
392 ; GFX10PLUS: ; %bb.0:
393 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s4
394 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s5
395 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, s6
396 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, s7
397 ; GFX10PLUS-NEXT: ; return to shader part epilog
398 %result = shl <4 x i32> %value, %amount
399 ret <4 x i32> %result
402 define <5 x i32> @v_shl_v5i32(<5 x i32> %value, <5 x i32> %amount) {
403 ; GCN-LABEL: v_shl_v5i32:
405 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v5, v0
407 ; GCN-NEXT: v_lshlrev_b32_e32 v1, v6, v1
408 ; GCN-NEXT: v_lshlrev_b32_e32 v2, v7, v2
409 ; GCN-NEXT: v_lshlrev_b32_e32 v3, v8, v3
410 ; GCN-NEXT: v_lshlrev_b32_e32 v4, v9, v4
411 ; GCN-NEXT: s_setpc_b64 s[30:31]
413 ; GFX10PLUS-LABEL: v_shl_v5i32:
414 ; GFX10PLUS: ; %bb.0:
415 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v5, v0
417 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v6, v1
418 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v7, v2
419 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v3, v8, v3
420 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v4, v9, v4
421 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
422 %result = shl <5 x i32> %value, %amount
423 ret <5 x i32> %result
426 define amdgpu_ps <5 x i32> @s_shl_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) {
427 ; GCN-LABEL: s_shl_v5i32:
429 ; GCN-NEXT: s_lshl_b32 s0, s0, s5
430 ; GCN-NEXT: s_lshl_b32 s1, s1, s6
431 ; GCN-NEXT: s_lshl_b32 s2, s2, s7
432 ; GCN-NEXT: s_lshl_b32 s3, s3, s8
433 ; GCN-NEXT: s_lshl_b32 s4, s4, s9
434 ; GCN-NEXT: ; return to shader part epilog
436 ; GFX10PLUS-LABEL: s_shl_v5i32:
437 ; GFX10PLUS: ; %bb.0:
438 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s5
439 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s6
440 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, s7
441 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, s8
442 ; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, s9
443 ; GFX10PLUS-NEXT: ; return to shader part epilog
444 %result = shl <5 x i32> %value, %amount
445 ret <5 x i32> %result
448 define <16 x i32> @v_shl_v16i32(<16 x i32> %value, <16 x i32> %amount) {
449 ; GCN-LABEL: v_shl_v16i32:
451 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452 ; GCN-NEXT: v_lshlrev_b32_e32 v0, v16, v0
453 ; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32
454 ; GCN-NEXT: v_lshlrev_b32_e32 v1, v17, v1
455 ; GCN-NEXT: v_lshlrev_b32_e32 v2, v18, v2
456 ; GCN-NEXT: v_lshlrev_b32_e32 v3, v19, v3
457 ; GCN-NEXT: v_lshlrev_b32_e32 v4, v20, v4
458 ; GCN-NEXT: v_lshlrev_b32_e32 v5, v21, v5
459 ; GCN-NEXT: v_lshlrev_b32_e32 v6, v22, v6
460 ; GCN-NEXT: v_lshlrev_b32_e32 v7, v23, v7
461 ; GCN-NEXT: v_lshlrev_b32_e32 v8, v24, v8
462 ; GCN-NEXT: v_lshlrev_b32_e32 v9, v25, v9
463 ; GCN-NEXT: v_lshlrev_b32_e32 v10, v26, v10
464 ; GCN-NEXT: v_lshlrev_b32_e32 v11, v27, v11
465 ; GCN-NEXT: v_lshlrev_b32_e32 v12, v28, v12
466 ; GCN-NEXT: v_lshlrev_b32_e32 v13, v29, v13
467 ; GCN-NEXT: v_lshlrev_b32_e32 v14, v30, v14
468 ; GCN-NEXT: s_waitcnt vmcnt(0)
469 ; GCN-NEXT: v_lshlrev_b32_e32 v15, v16, v15
470 ; GCN-NEXT: s_setpc_b64 s[30:31]
472 ; GFX10-LABEL: v_shl_v16i32:
474 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
476 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, v16, v0
477 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, v17, v1
478 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, v18, v2
479 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, v19, v3
480 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, v20, v4
481 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, v21, v5
482 ; GFX10-NEXT: v_lshlrev_b32_e32 v6, v22, v6
483 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, v23, v7
484 ; GFX10-NEXT: v_lshlrev_b32_e32 v8, v24, v8
485 ; GFX10-NEXT: v_lshlrev_b32_e32 v9, v25, v9
486 ; GFX10-NEXT: v_lshlrev_b32_e32 v10, v26, v10
487 ; GFX10-NEXT: v_lshlrev_b32_e32 v11, v27, v11
488 ; GFX10-NEXT: v_lshlrev_b32_e32 v12, v28, v12
489 ; GFX10-NEXT: v_lshlrev_b32_e32 v13, v29, v13
490 ; GFX10-NEXT: v_lshlrev_b32_e32 v14, v30, v14
491 ; GFX10-NEXT: s_waitcnt vmcnt(0)
492 ; GFX10-NEXT: v_lshlrev_b32_e32 v15, v31, v15
493 ; GFX10-NEXT: s_setpc_b64 s[30:31]
495 ; GFX11-LABEL: v_shl_v16i32:
497 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
499 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, v16, v0
500 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, v17, v1
501 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, v18, v2
502 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, v19, v3
503 ; GFX11-NEXT: v_lshlrev_b32_e32 v4, v20, v4
504 ; GFX11-NEXT: v_lshlrev_b32_e32 v5, v21, v5
505 ; GFX11-NEXT: v_lshlrev_b32_e32 v6, v22, v6
506 ; GFX11-NEXT: v_lshlrev_b32_e32 v7, v23, v7
507 ; GFX11-NEXT: v_lshlrev_b32_e32 v8, v24, v8
508 ; GFX11-NEXT: v_lshlrev_b32_e32 v9, v25, v9
509 ; GFX11-NEXT: v_lshlrev_b32_e32 v10, v26, v10
510 ; GFX11-NEXT: v_lshlrev_b32_e32 v11, v27, v11
511 ; GFX11-NEXT: v_lshlrev_b32_e32 v12, v28, v12
512 ; GFX11-NEXT: v_lshlrev_b32_e32 v13, v29, v13
513 ; GFX11-NEXT: v_lshlrev_b32_e32 v14, v30, v14
514 ; GFX11-NEXT: s_waitcnt vmcnt(0)
515 ; GFX11-NEXT: v_lshlrev_b32_e32 v15, v31, v15
516 ; GFX11-NEXT: s_setpc_b64 s[30:31]
517 %result = shl <16 x i32> %value, %amount
518 ret <16 x i32> %result
521 define amdgpu_ps <16 x i32> @s_shl_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) {
522 ; GCN-LABEL: s_shl_v16i32:
524 ; GCN-NEXT: s_lshl_b32 s0, s0, s16
525 ; GCN-NEXT: s_lshl_b32 s1, s1, s17
526 ; GCN-NEXT: s_lshl_b32 s2, s2, s18
527 ; GCN-NEXT: s_lshl_b32 s3, s3, s19
528 ; GCN-NEXT: s_lshl_b32 s4, s4, s20
529 ; GCN-NEXT: s_lshl_b32 s5, s5, s21
530 ; GCN-NEXT: s_lshl_b32 s6, s6, s22
531 ; GCN-NEXT: s_lshl_b32 s7, s7, s23
532 ; GCN-NEXT: s_lshl_b32 s8, s8, s24
533 ; GCN-NEXT: s_lshl_b32 s9, s9, s25
534 ; GCN-NEXT: s_lshl_b32 s10, s10, s26
535 ; GCN-NEXT: s_lshl_b32 s11, s11, s27
536 ; GCN-NEXT: s_lshl_b32 s12, s12, s28
537 ; GCN-NEXT: s_lshl_b32 s13, s13, s29
538 ; GCN-NEXT: s_lshl_b32 s14, s14, s30
539 ; GCN-NEXT: s_lshl_b32 s15, s15, s31
540 ; GCN-NEXT: ; return to shader part epilog
542 ; GFX10PLUS-LABEL: s_shl_v16i32:
543 ; GFX10PLUS: ; %bb.0:
544 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s16
545 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s17
546 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, s18
547 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, s19
548 ; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, s20
549 ; GFX10PLUS-NEXT: s_lshl_b32 s5, s5, s21
550 ; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, s22
551 ; GFX10PLUS-NEXT: s_lshl_b32 s7, s7, s23
552 ; GFX10PLUS-NEXT: s_lshl_b32 s8, s8, s24
553 ; GFX10PLUS-NEXT: s_lshl_b32 s9, s9, s25
554 ; GFX10PLUS-NEXT: s_lshl_b32 s10, s10, s26
555 ; GFX10PLUS-NEXT: s_lshl_b32 s11, s11, s27
556 ; GFX10PLUS-NEXT: s_lshl_b32 s12, s12, s28
557 ; GFX10PLUS-NEXT: s_lshl_b32 s13, s13, s29
558 ; GFX10PLUS-NEXT: s_lshl_b32 s14, s14, s30
559 ; GFX10PLUS-NEXT: s_lshl_b32 s15, s15, s31
560 ; GFX10PLUS-NEXT: ; return to shader part epilog
561 %result = shl <16 x i32> %value, %amount
562 ret <16 x i32> %result
565 define i16 @v_shl_i16(i16 %value, i16 %amount) {
566 ; GFX6-LABEL: v_shl_i16:
568 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
569 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
570 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v1, v0
571 ; GFX6-NEXT: s_setpc_b64 s[30:31]
573 ; GFX8-LABEL: v_shl_i16:
575 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v1, v0
577 ; GFX8-NEXT: s_setpc_b64 s[30:31]
579 ; GFX9-LABEL: v_shl_i16:
581 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v1, v0
583 ; GFX9-NEXT: s_setpc_b64 s[30:31]
585 ; GFX10PLUS-LABEL: v_shl_i16:
586 ; GFX10PLUS: ; %bb.0:
587 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, v1, v0
589 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
590 %result = shl i16 %value, %amount
594 define i16 @v_shl_i16_15(i16 %value) {
595 ; GFX6-LABEL: v_shl_i16_15:
597 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 15, v0
599 ; GFX6-NEXT: s_setpc_b64 s[30:31]
601 ; GFX8-LABEL: v_shl_i16_15:
603 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 15, v0
605 ; GFX8-NEXT: s_setpc_b64 s[30:31]
607 ; GFX9-LABEL: v_shl_i16_15:
609 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 15, v0
611 ; GFX9-NEXT: s_setpc_b64 s[30:31]
613 ; GFX10PLUS-LABEL: v_shl_i16_15:
614 ; GFX10PLUS: ; %bb.0:
615 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 15, v0
617 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
618 %result = shl i16 %value, 15
622 define amdgpu_ps i16 @s_shl_i16(i16 inreg %value, i16 inreg %amount) {
623 ; GFX6-LABEL: s_shl_i16:
625 ; GFX6-NEXT: s_lshl_b32 s0, s0, s1
626 ; GFX6-NEXT: ; return to shader part epilog
628 ; GFX8-LABEL: s_shl_i16:
630 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
631 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1
632 ; GFX8-NEXT: ; return to shader part epilog
634 ; GFX9-LABEL: s_shl_i16:
636 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
637 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
638 ; GFX9-NEXT: ; return to shader part epilog
640 ; GFX10PLUS-LABEL: s_shl_i16:
641 ; GFX10PLUS: ; %bb.0:
642 ; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff
643 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
644 ; GFX10PLUS-NEXT: ; return to shader part epilog
645 %result = shl i16 %value, %amount
649 define amdgpu_ps i16 @s_shl_i16_15(i16 inreg %value) {
650 ; GCN-LABEL: s_shl_i16_15:
652 ; GCN-NEXT: s_lshl_b32 s0, s0, 15
653 ; GCN-NEXT: ; return to shader part epilog
655 ; GFX10PLUS-LABEL: s_shl_i16_15:
656 ; GFX10PLUS: ; %bb.0:
657 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15
658 ; GFX10PLUS-NEXT: ; return to shader part epilog
659 %result = shl i16 %value, 15
663 define amdgpu_ps half @shl_i16_sv(i16 inreg %value, i16 %amount) {
664 ; GFX6-LABEL: shl_i16_sv:
666 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
667 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
668 ; GFX6-NEXT: ; return to shader part epilog
670 ; GFX8-LABEL: shl_i16_sv:
672 ; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0
673 ; GFX8-NEXT: ; return to shader part epilog
675 ; GFX9-LABEL: shl_i16_sv:
677 ; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0
678 ; GFX9-NEXT: ; return to shader part epilog
680 ; GFX10PLUS-LABEL: shl_i16_sv:
681 ; GFX10PLUS: ; %bb.0:
682 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, v0, s0
683 ; GFX10PLUS-NEXT: ; return to shader part epilog
684 %result = shl i16 %value, %amount
685 %cast = bitcast i16 %result to half
689 define amdgpu_ps half @shl_i16_vs(i16 %value, i16 inreg %amount) {
690 ; GFX6-LABEL: shl_i16_vs:
692 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
693 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s0, v0
694 ; GFX6-NEXT: ; return to shader part epilog
696 ; GFX8-LABEL: shl_i16_vs:
698 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0
699 ; GFX8-NEXT: ; return to shader part epilog
701 ; GFX9-LABEL: shl_i16_vs:
703 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, s0, v0
704 ; GFX9-NEXT: ; return to shader part epilog
706 ; GFX10PLUS-LABEL: shl_i16_vs:
707 ; GFX10PLUS: ; %bb.0:
708 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, s0, v0
709 ; GFX10PLUS-NEXT: ; return to shader part epilog
710 %result = shl i16 %value, %amount
711 %cast = bitcast i16 %result to half
715 define <2 x i16> @v_shl_v2i16(<2 x i16> %value, <2 x i16> %amount) {
716 ; GFX6-LABEL: v_shl_v2i16:
718 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
719 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
720 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
721 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
722 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
723 ; GFX6-NEXT: s_setpc_b64 s[30:31]
725 ; GFX8-LABEL: v_shl_v2i16:
727 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
728 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v1, v0
729 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
730 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
731 ; GFX8-NEXT: s_setpc_b64 s[30:31]
733 ; GFX9-LABEL: v_shl_v2i16:
735 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v1, v0
737 ; GFX9-NEXT: s_setpc_b64 s[30:31]
739 ; GFX10PLUS-LABEL: v_shl_v2i16:
740 ; GFX10PLUS: ; %bb.0:
741 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
742 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v1, v0
743 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
744 %result = shl <2 x i16> %value, %amount
745 ret <2 x i16> %result
748 define <2 x i16> @v_shl_v2i16_15(<2 x i16> %value) {
749 ; GFX6-LABEL: v_shl_v2i16_15:
751 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
752 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 15, v0
753 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 15, v1
754 ; GFX6-NEXT: s_setpc_b64 s[30:31]
756 ; GFX8-LABEL: v_shl_v2i16_15:
758 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759 ; GFX8-NEXT: v_mov_b32_e32 v2, 15
760 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 15, v0
761 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
762 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
763 ; GFX8-NEXT: s_setpc_b64 s[30:31]
765 ; GFX9-LABEL: v_shl_v2i16_15:
767 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
768 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
769 ; GFX9-NEXT: s_setpc_b64 s[30:31]
771 ; GFX10PLUS-LABEL: v_shl_v2i16_15:
772 ; GFX10PLUS: ; %bb.0:
773 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
774 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
775 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
776 %result = shl <2 x i16> %value, <i16 15, i16 15>
777 ret <2 x i16> %result
780 define amdgpu_ps i32 @s_shl_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) {
781 ; GFX6-LABEL: s_shl_v2i16:
783 ; GFX6-NEXT: s_lshl_b32 s1, s1, s3
784 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
785 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
786 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
787 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
788 ; GFX6-NEXT: s_or_b32 s0, s0, s1
789 ; GFX6-NEXT: ; return to shader part epilog
791 ; GFX8-LABEL: s_shl_v2i16:
793 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
794 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
795 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
796 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1
797 ; GFX8-NEXT: s_lshl_b32 s1, s2, s3
798 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
799 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
800 ; GFX8-NEXT: s_or_b32 s0, s1, s0
801 ; GFX8-NEXT: ; return to shader part epilog
803 ; GFX9-LABEL: s_shl_v2i16:
805 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
806 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
807 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
808 ; GFX9-NEXT: s_lshl_b32 s1, s2, s3
809 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
810 ; GFX9-NEXT: ; return to shader part epilog
812 ; GFX10PLUS-LABEL: s_shl_v2i16:
813 ; GFX10PLUS: ; %bb.0:
814 ; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16
815 ; GFX10PLUS-NEXT: s_lshr_b32 s3, s1, 16
816 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
817 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, s3
818 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s1
819 ; GFX10PLUS-NEXT: ; return to shader part epilog
820 %result = shl <2 x i16> %value, %amount
821 %cast = bitcast <2 x i16> %result to i32
825 define amdgpu_ps float @shl_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) {
826 ; GFX6-LABEL: shl_v2i16_sv:
828 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
829 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
830 ; GFX6-NEXT: v_lshl_b32_e32 v1, s1, v1
831 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
832 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
833 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
834 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
835 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
836 ; GFX6-NEXT: ; return to shader part epilog
838 ; GFX8-LABEL: shl_v2i16_sv:
840 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
841 ; GFX8-NEXT: v_mov_b32_e32 v2, s1
842 ; GFX8-NEXT: v_lshlrev_b16_e64 v1, v0, s0
843 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
844 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
845 ; GFX8-NEXT: ; return to shader part epilog
847 ; GFX9-LABEL: shl_v2i16_sv:
849 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, s0
850 ; GFX9-NEXT: ; return to shader part epilog
852 ; GFX10PLUS-LABEL: shl_v2i16_sv:
853 ; GFX10PLUS: ; %bb.0:
854 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v0, s0
855 ; GFX10PLUS-NEXT: ; return to shader part epilog
856 %result = shl <2 x i16> %value, %amount
857 %cast = bitcast <2 x i16> %result to float
861 define amdgpu_ps float @shl_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) {
862 ; GFX6-LABEL: shl_v2i16_vs:
864 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
865 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s0, v0
866 ; GFX6-NEXT: s_and_b32 s0, s1, 0xffff
867 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1
868 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
869 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
870 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
871 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
872 ; GFX6-NEXT: ; return to shader part epilog
874 ; GFX8-LABEL: shl_v2i16_vs:
876 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
877 ; GFX8-NEXT: v_mov_b32_e32 v2, s1
878 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, s0, v0
879 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
880 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
881 ; GFX8-NEXT: ; return to shader part epilog
883 ; GFX9-LABEL: shl_v2i16_vs:
885 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, s0, v0
886 ; GFX9-NEXT: ; return to shader part epilog
888 ; GFX10PLUS-LABEL: shl_v2i16_vs:
889 ; GFX10PLUS: ; %bb.0:
890 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, s0, v0
891 ; GFX10PLUS-NEXT: ; return to shader part epilog
892 %result = shl <2 x i16> %value, %amount
893 %cast = bitcast <2 x i16> %result to float
898 ; define <3 x i16> @v_shl_v3i16(<3 x i16> %value, <3 x i16> %amount) {
899 ; %result = shl <3 x i16> %value, %amount
900 ; ret <3 x i16> %result
903 ; define amdgpu_ps <3 x i16> @s_shl_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) {
904 ; %result = shl <3 x i16> %value, %amount
905 ; ret <3 x i16> %result
908 define <2 x float> @v_shl_v4i16(<4 x i16> %value, <4 x i16> %amount) {
909 ; GFX6-LABEL: v_shl_v4i16:
911 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
912 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
913 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0
914 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5
915 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
916 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
917 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2
918 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7
919 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
920 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
921 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
922 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
923 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
924 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
925 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
926 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
927 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
928 ; GFX6-NEXT: s_setpc_b64 s[30:31]
930 ; GFX8-LABEL: v_shl_v4i16:
932 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
933 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, v2, v0
934 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
935 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v3, v1
936 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
937 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
938 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
939 ; GFX8-NEXT: s_setpc_b64 s[30:31]
941 ; GFX9-LABEL: v_shl_v4i16:
943 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
944 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0
945 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v3, v1
946 ; GFX9-NEXT: s_setpc_b64 s[30:31]
948 ; GFX10PLUS-LABEL: v_shl_v4i16:
949 ; GFX10PLUS: ; %bb.0:
950 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
951 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v2, v0
952 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, v3, v1
953 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
954 %result = shl <4 x i16> %value, %amount
955 %cast = bitcast <4 x i16> %result to <2 x float>
956 ret <2 x float> %cast
959 define amdgpu_ps <2 x i32> @s_shl_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) {
960 ; GFX6-LABEL: s_shl_v4i16:
962 ; GFX6-NEXT: s_lshl_b32 s1, s1, s5
963 ; GFX6-NEXT: s_lshl_b32 s0, s0, s4
964 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
965 ; GFX6-NEXT: s_lshl_b32 s2, s2, s6
966 ; GFX6-NEXT: s_lshl_b32 s3, s3, s7
967 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
968 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
969 ; GFX6-NEXT: s_or_b32 s0, s0, s1
970 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
971 ; GFX6-NEXT: s_and_b32 s2, s3, 0xffff
972 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
973 ; GFX6-NEXT: s_or_b32 s1, s1, s2
974 ; GFX6-NEXT: ; return to shader part epilog
976 ; GFX8-LABEL: s_shl_v4i16:
978 ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
979 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
980 ; GFX8-NEXT: s_lshr_b32 s6, s2, 16
981 ; GFX8-NEXT: s_lshr_b32 s5, s1, 16
982 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
983 ; GFX8-NEXT: s_lshr_b32 s7, s3, 16
984 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
985 ; GFX8-NEXT: s_lshl_b32 s2, s4, s6
986 ; GFX8-NEXT: s_lshl_b32 s1, s1, s3
987 ; GFX8-NEXT: s_lshl_b32 s3, s5, s7
988 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
989 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
990 ; GFX8-NEXT: s_or_b32 s0, s2, s0
991 ; GFX8-NEXT: s_lshl_b32 s2, s3, 16
992 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
993 ; GFX8-NEXT: s_or_b32 s1, s2, s1
994 ; GFX8-NEXT: ; return to shader part epilog
996 ; GFX9-LABEL: s_shl_v4i16:
998 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
999 ; GFX9-NEXT: s_lshr_b32 s5, s2, 16
1000 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
1001 ; GFX9-NEXT: s_lshl_b32 s2, s4, s5
1002 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
1003 ; GFX9-NEXT: s_lshr_b32 s2, s1, 16
1004 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
1005 ; GFX9-NEXT: s_lshl_b32 s1, s1, s3
1006 ; GFX9-NEXT: s_lshl_b32 s2, s2, s4
1007 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
1008 ; GFX9-NEXT: ; return to shader part epilog
1010 ; GFX10PLUS-LABEL: s_shl_v4i16:
1011 ; GFX10PLUS: ; %bb.0:
1012 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s0, 16
1013 ; GFX10PLUS-NEXT: s_lshr_b32 s5, s2, 16
1014 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2
1015 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s4, s5
1016 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 16
1017 ; GFX10PLUS-NEXT: s_lshr_b32 s5, s3, 16
1018 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s3
1019 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s4, s5
1020 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s2
1021 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s3
1022 ; GFX10PLUS-NEXT: ; return to shader part epilog
1023 %result = shl <4 x i16> %value, %amount
1024 %cast = bitcast <4 x i16> %result to <2 x i32>
1029 ; define <5 x i16> @v_shl_v5i16(<5 x i16> %value, <5 x i16> %amount) {
1030 ; %result = shl <5 x i16> %value, %amount
1031 ; ret <5 x i16> %result
1034 ; define amdgpu_ps <5 x i16> @s_shl_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) {
1035 ; %result = shl <5 x i16> %value, %amount
1036 ; ret <5 x i16> %result
1039 ; define <3 x float> @v_shl_v6i16(<6 x i16> %value, <6 x i16> %amount) {
1040 ; %result = shl <6 x i16> %value, %amount
1041 ; %cast = bitcast <6 x i16> %result to <3 x float>
1042 ; ret <3 x float> %cast
1045 ; define amdgpu_ps <3 x i32> @s_shl_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) {
1046 ; %result = shl <6 x i16> %value, %amount
1047 ; %cast = bitcast <6 x i16> %result to <3 x i32>
1048 ; ret <3 x i32> %cast
1051 define <4 x float> @v_shl_v8i16(<8 x i16> %value, <8 x i16> %amount) {
1052 ; GFX6-LABEL: v_shl_v8i16:
1054 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1055 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
1056 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v8, v0
1057 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v9
1058 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v8, v1
1059 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v10
1060 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2
1061 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v11
1062 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v8, v3
1063 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v12
1064 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, v8, v4
1065 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v13
1066 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
1067 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, v8, v5
1068 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v14
1069 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
1070 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1071 ; GFX6-NEXT: v_lshlrev_b32_e32 v6, v8, v6
1072 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v15
1073 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1074 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
1075 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
1076 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, v8, v7
1077 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1078 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v5
1079 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1080 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4
1081 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1082 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7
1083 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1084 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v6
1085 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
1086 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
1087 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1089 ; GFX8-LABEL: v_shl_v8i16:
1091 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1092 ; GFX8-NEXT: v_lshlrev_b16_e32 v8, v4, v0
1093 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1094 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, v5, v1
1095 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1096 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
1097 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, v6, v2
1098 ; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1099 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
1100 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, v7, v3
1101 ; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1102 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
1103 ; GFX8-NEXT: v_or_b32_e32 v3, v4, v3
1104 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1106 ; GFX9-LABEL: v_shl_v8i16:
1108 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1109 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0
1110 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v5, v1
1111 ; GFX9-NEXT: v_pk_lshlrev_b16 v2, v6, v2
1112 ; GFX9-NEXT: v_pk_lshlrev_b16 v3, v7, v3
1113 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1115 ; GFX10PLUS-LABEL: v_shl_v8i16:
1116 ; GFX10PLUS: ; %bb.0:
1117 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v4, v0
1119 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, v5, v1
1120 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v2, v6, v2
1121 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v3, v7, v3
1122 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1123 %result = shl <8 x i16> %value, %amount
1124 %cast = bitcast <8 x i16> %result to <4 x float>
1125 ret <4 x float> %cast
1128 define amdgpu_ps <4 x i32> @s_shl_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) {
1129 ; GFX6-LABEL: s_shl_v8i16:
1131 ; GFX6-NEXT: s_lshl_b32 s1, s1, s9
1132 ; GFX6-NEXT: s_lshl_b32 s0, s0, s8
1133 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
1134 ; GFX6-NEXT: s_lshl_b32 s2, s2, s10
1135 ; GFX6-NEXT: s_lshl_b32 s3, s3, s11
1136 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
1137 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1138 ; GFX6-NEXT: s_lshl_b32 s5, s5, s13
1139 ; GFX6-NEXT: s_or_b32 s0, s0, s1
1140 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
1141 ; GFX6-NEXT: s_and_b32 s2, s3, 0xffff
1142 ; GFX6-NEXT: s_lshl_b32 s4, s4, s12
1143 ; GFX6-NEXT: s_lshl_b32 s7, s7, s15
1144 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1145 ; GFX6-NEXT: s_and_b32 s3, s5, 0xffff
1146 ; GFX6-NEXT: s_lshl_b32 s6, s6, s14
1147 ; GFX6-NEXT: s_or_b32 s1, s1, s2
1148 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
1149 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
1150 ; GFX6-NEXT: s_and_b32 s4, s7, 0xffff
1151 ; GFX6-NEXT: s_or_b32 s2, s2, s3
1152 ; GFX6-NEXT: s_and_b32 s3, s6, 0xffff
1153 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16
1154 ; GFX6-NEXT: s_or_b32 s3, s3, s4
1155 ; GFX6-NEXT: ; return to shader part epilog
1157 ; GFX8-LABEL: s_shl_v8i16:
1159 ; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1160 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
1161 ; GFX8-NEXT: s_lshr_b32 s12, s4, 16
1162 ; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1163 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
1164 ; GFX8-NEXT: s_lshr_b32 s13, s5, 16
1165 ; GFX8-NEXT: s_lshl_b32 s0, s0, s4
1166 ; GFX8-NEXT: s_lshl_b32 s4, s8, s12
1167 ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1168 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1169 ; GFX8-NEXT: s_lshr_b32 s14, s6, 16
1170 ; GFX8-NEXT: s_lshl_b32 s1, s1, s5
1171 ; GFX8-NEXT: s_lshl_b32 s5, s9, s13
1172 ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1173 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
1174 ; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1175 ; GFX8-NEXT: s_and_b32 s3, s3, 0xffff
1176 ; GFX8-NEXT: s_lshr_b32 s15, s7, 16
1177 ; GFX8-NEXT: s_lshl_b32 s2, s2, s6
1178 ; GFX8-NEXT: s_lshl_b32 s6, s10, s14
1179 ; GFX8-NEXT: s_or_b32 s0, s4, s0
1180 ; GFX8-NEXT: s_lshl_b32 s4, s5, 16
1181 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
1182 ; GFX8-NEXT: s_lshl_b32 s3, s3, s7
1183 ; GFX8-NEXT: s_lshl_b32 s7, s11, s15
1184 ; GFX8-NEXT: s_or_b32 s1, s4, s1
1185 ; GFX8-NEXT: s_lshl_b32 s4, s6, 16
1186 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1187 ; GFX8-NEXT: s_or_b32 s2, s4, s2
1188 ; GFX8-NEXT: s_lshl_b32 s4, s7, 16
1189 ; GFX8-NEXT: s_and_b32 s3, s3, 0xffff
1190 ; GFX8-NEXT: s_or_b32 s3, s4, s3
1191 ; GFX8-NEXT: ; return to shader part epilog
1193 ; GFX9-LABEL: s_shl_v8i16:
1195 ; GFX9-NEXT: s_lshr_b32 s8, s0, 16
1196 ; GFX9-NEXT: s_lshr_b32 s9, s4, 16
1197 ; GFX9-NEXT: s_lshl_b32 s0, s0, s4
1198 ; GFX9-NEXT: s_lshl_b32 s4, s8, s9
1199 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
1200 ; GFX9-NEXT: s_lshr_b32 s4, s1, 16
1201 ; GFX9-NEXT: s_lshr_b32 s8, s5, 16
1202 ; GFX9-NEXT: s_lshl_b32 s1, s1, s5
1203 ; GFX9-NEXT: s_lshl_b32 s4, s4, s8
1204 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4
1205 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
1206 ; GFX9-NEXT: s_lshr_b32 s5, s6, 16
1207 ; GFX9-NEXT: s_lshl_b32 s2, s2, s6
1208 ; GFX9-NEXT: s_lshl_b32 s4, s4, s5
1209 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
1210 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
1211 ; GFX9-NEXT: s_lshr_b32 s5, s7, 16
1212 ; GFX9-NEXT: s_lshl_b32 s3, s3, s7
1213 ; GFX9-NEXT: s_lshl_b32 s4, s4, s5
1214 ; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
1215 ; GFX9-NEXT: ; return to shader part epilog
1217 ; GFX10PLUS-LABEL: s_shl_v8i16:
1218 ; GFX10PLUS: ; %bb.0:
1219 ; GFX10PLUS-NEXT: s_lshr_b32 s8, s0, 16
1220 ; GFX10PLUS-NEXT: s_lshr_b32 s9, s4, 16
1221 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s4
1222 ; GFX10PLUS-NEXT: s_lshl_b32 s4, s8, s9
1223 ; GFX10PLUS-NEXT: s_lshr_b32 s8, s1, 16
1224 ; GFX10PLUS-NEXT: s_lshr_b32 s9, s5, 16
1225 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s5
1226 ; GFX10PLUS-NEXT: s_lshl_b32 s5, s8, s9
1227 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s4
1228 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s5
1229 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s2, 16
1230 ; GFX10PLUS-NEXT: s_lshr_b32 s5, s6, 16
1231 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, s6
1232 ; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, s5
1233 ; GFX10PLUS-NEXT: s_lshr_b32 s5, s3, 16
1234 ; GFX10PLUS-NEXT: s_lshr_b32 s6, s7, 16
1235 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, s7
1236 ; GFX10PLUS-NEXT: s_lshl_b32 s5, s5, s6
1237 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s2, s4
1238 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s3, s5
1239 ; GFX10PLUS-NEXT: ; return to shader part epilog
1240 %result = shl <8 x i16> %value, %amount
1241 %cast = bitcast <8 x i16> %result to <4 x i32>
1245 define i64 @v_shl_i64(i64 %value, i64 %amount) {
1246 ; GFX6-LABEL: v_shl_i64:
1248 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1249 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
1250 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1252 ; GFX8-LABEL: v_shl_i64:
1254 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
1256 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1258 ; GFX9-LABEL: v_shl_i64:
1260 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1261 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
1262 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1264 ; GFX10PLUS-LABEL: v_shl_i64:
1265 ; GFX10PLUS: ; %bb.0:
1266 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1267 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
1268 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1269 %result = shl i64 %value, %amount
1273 define i64 @v_shl_i64_63(i64 %value) {
1274 ; GCN-LABEL: v_shl_i64_63:
1276 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1277 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 31, v0
1278 ; GCN-NEXT: v_mov_b32_e32 v0, 0
1279 ; GCN-NEXT: s_setpc_b64 s[30:31]
1281 ; GFX10-LABEL: v_shl_i64_63:
1283 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1284 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 31, v0
1285 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1286 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1288 ; GFX11-LABEL: v_shl_i64_63:
1290 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1291 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v1, 31, v0
1292 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1293 %result = shl i64 %value, 63
1297 define i64 @v_shl_i64_33(i64 %value) {
1298 ; GCN-LABEL: v_shl_i64_33:
1300 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1301 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 1, v0
1302 ; GCN-NEXT: v_mov_b32_e32 v0, 0
1303 ; GCN-NEXT: s_setpc_b64 s[30:31]
1305 ; GFX10-LABEL: v_shl_i64_33:
1307 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v0
1309 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1310 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1312 ; GFX11-LABEL: v_shl_i64_33:
1314 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1315 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v1, 1, v0
1316 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1317 %result = shl i64 %value, 33
1321 define i64 @v_shl_i64_32(i64 %value) {
1322 ; GCN-LABEL: v_shl_i64_32:
1324 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325 ; GCN-NEXT: v_mov_b32_e32 v1, v0
1326 ; GCN-NEXT: v_mov_b32_e32 v0, 0
1327 ; GCN-NEXT: s_setpc_b64 s[30:31]
1329 ; GFX10-LABEL: v_shl_i64_32:
1331 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332 ; GFX10-NEXT: v_mov_b32_e32 v1, v0
1333 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1334 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1336 ; GFX11-LABEL: v_shl_i64_32:
1338 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1339 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0
1340 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1341 %result = shl i64 %value, 32
1345 define i64 @v_shl_i64_31(i64 %value) {
1346 ; GFX6-LABEL: v_shl_i64_31:
1348 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1349 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
1350 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1352 ; GFX8-LABEL: v_shl_i64_31:
1354 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1355 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1356 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1358 ; GFX9-LABEL: v_shl_i64_31:
1360 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1361 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1362 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1364 ; GFX10PLUS-LABEL: v_shl_i64_31:
1365 ; GFX10PLUS: ; %bb.0:
1366 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1368 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1369 %result = shl i64 %value, 31
1373 define amdgpu_ps i64 @s_shl_i64(i64 inreg %value, i64 inreg %amount) {
1374 ; GCN-LABEL: s_shl_i64:
1376 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
1377 ; GCN-NEXT: ; return to shader part epilog
1379 ; GFX10PLUS-LABEL: s_shl_i64:
1380 ; GFX10PLUS: ; %bb.0:
1381 ; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
1382 ; GFX10PLUS-NEXT: ; return to shader part epilog
1383 %result = shl i64 %value, %amount
1387 define amdgpu_ps i64 @s_shl_i64_63(i64 inreg %value) {
1388 ; GCN-LABEL: s_shl_i64_63:
1390 ; GCN-NEXT: s_lshl_b32 s1, s0, 31
1391 ; GCN-NEXT: s_mov_b32 s0, 0
1392 ; GCN-NEXT: ; return to shader part epilog
1394 ; GFX10PLUS-LABEL: s_shl_i64_63:
1395 ; GFX10PLUS: ; %bb.0:
1396 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s0, 31
1397 ; GFX10PLUS-NEXT: s_mov_b32 s0, 0
1398 ; GFX10PLUS-NEXT: ; return to shader part epilog
1399 %result = shl i64 %value, 63
1403 define amdgpu_ps i64 @s_shl_i64_33(i64 inreg %value) {
1404 ; GCN-LABEL: s_shl_i64_33:
1406 ; GCN-NEXT: s_lshl_b32 s1, s0, 1
1407 ; GCN-NEXT: s_mov_b32 s0, 0
1408 ; GCN-NEXT: ; return to shader part epilog
1410 ; GFX10PLUS-LABEL: s_shl_i64_33:
1411 ; GFX10PLUS: ; %bb.0:
1412 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s0, 1
1413 ; GFX10PLUS-NEXT: s_mov_b32 s0, 0
1414 ; GFX10PLUS-NEXT: ; return to shader part epilog
1415 %result = shl i64 %value, 33
1419 define amdgpu_ps i64 @s_shl_i64_32(i64 inreg %value) {
1420 ; GCN-LABEL: s_shl_i64_32:
1422 ; GCN-NEXT: s_mov_b32 s1, s0
1423 ; GCN-NEXT: s_mov_b32 s0, 0
1424 ; GCN-NEXT: ; return to shader part epilog
1426 ; GFX10PLUS-LABEL: s_shl_i64_32:
1427 ; GFX10PLUS: ; %bb.0:
1428 ; GFX10PLUS-NEXT: s_mov_b32 s1, s0
1429 ; GFX10PLUS-NEXT: s_mov_b32 s0, 0
1430 ; GFX10PLUS-NEXT: ; return to shader part epilog
1431 %result = shl i64 %value, 32
1435 define amdgpu_ps i64 @s_shl_i64_31(i64 inreg %value) {
1436 ; GCN-LABEL: s_shl_i64_31:
1438 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 31
1439 ; GCN-NEXT: ; return to shader part epilog
1441 ; GFX10PLUS-LABEL: s_shl_i64_31:
1442 ; GFX10PLUS: ; %bb.0:
1443 ; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[0:1], 31
1444 ; GFX10PLUS-NEXT: ; return to shader part epilog
1445 %result = shl i64 %value, 31
1449 define amdgpu_ps <2 x float> @shl_i64_sv(i64 inreg %value, i64 %amount) {
1450 ; GFX6-LABEL: shl_i64_sv:
1452 ; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v0
1453 ; GFX6-NEXT: ; return to shader part epilog
1455 ; GFX8-LABEL: shl_i64_sv:
1457 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
1458 ; GFX8-NEXT: ; return to shader part epilog
1460 ; GFX9-LABEL: shl_i64_sv:
1462 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
1463 ; GFX9-NEXT: ; return to shader part epilog
1465 ; GFX10PLUS-LABEL: shl_i64_sv:
1466 ; GFX10PLUS: ; %bb.0:
1467 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
1468 ; GFX10PLUS-NEXT: ; return to shader part epilog
1469 %result = shl i64 %value, %amount
1470 %cast = bitcast i64 %result to <2 x float>
1471 ret <2 x float> %cast
1474 define amdgpu_ps <2 x float> @shl_i64_vs(i64 %value, i64 inreg %amount) {
1475 ; GFX6-LABEL: shl_i64_vs:
1477 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s0
1478 ; GFX6-NEXT: ; return to shader part epilog
1480 ; GFX8-LABEL: shl_i64_vs:
1482 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s0, v[0:1]
1483 ; GFX8-NEXT: ; return to shader part epilog
1485 ; GFX9-LABEL: shl_i64_vs:
1487 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s0, v[0:1]
1488 ; GFX9-NEXT: ; return to shader part epilog
1490 ; GFX10PLUS-LABEL: shl_i64_vs:
1491 ; GFX10PLUS: ; %bb.0:
1492 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], s0, v[0:1]
1493 ; GFX10PLUS-NEXT: ; return to shader part epilog
1494 %result = shl i64 %value, %amount
1495 %cast = bitcast i64 %result to <2 x float>
1496 ret <2 x float> %cast
1499 define <2 x i64> @v_shl_v2i64(<2 x i64> %value, <2 x i64> %amount) {
1500 ; GFX6-LABEL: v_shl_v2i64:
1502 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v4
1504 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v6
1505 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1507 ; GFX8-LABEL: v_shl_v2i64:
1509 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
1511 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
1512 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1514 ; GFX9-LABEL: v_shl_v2i64:
1516 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1517 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
1518 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
1519 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1521 ; GFX10PLUS-LABEL: v_shl_v2i64:
1522 ; GFX10PLUS: ; %bb.0:
1523 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1524 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
1525 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
1526 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1527 %result = shl <2 x i64> %value, %amount
1528 ret <2 x i64> %result
1531 define <2 x i64> @v_shl_v2i64_31(<2 x i64> %value) {
1532 ; GFX6-LABEL: v_shl_v2i64_31:
1534 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
1536 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 31
1537 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1539 ; GFX8-LABEL: v_shl_v2i64_31:
1541 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1542 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1543 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3]
1544 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1546 ; GFX9-LABEL: v_shl_v2i64_31:
1548 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1549 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1550 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3]
1551 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1553 ; GFX10PLUS-LABEL: v_shl_v2i64_31:
1554 ; GFX10PLUS: ; %bb.0:
1555 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
1557 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3]
1558 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1559 %result = shl <2 x i64> %value, <i64 31, i64 31>
1560 ret <2 x i64> %result
1563 define amdgpu_ps <2 x i64> @s_shl_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) {
1564 ; GCN-LABEL: s_shl_v2i64:
1566 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
1567 ; GCN-NEXT: s_lshl_b64 s[2:3], s[2:3], s6
1568 ; GCN-NEXT: ; return to shader part epilog
1570 ; GFX10PLUS-LABEL: s_shl_v2i64:
1571 ; GFX10PLUS: ; %bb.0:
1572 ; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
1573 ; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[2:3], s6
1574 ; GFX10PLUS-NEXT: ; return to shader part epilog
1575 %result = shl <2 x i64> %value, %amount
1576 ret <2 x i64> %result
1579 define i65 @v_shl_i65(i65 %value, i65 %amount) {
1580 ; GFX6-LABEL: v_shl_i65:
1582 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1583 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v3
1584 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], v4
1585 ; GFX6-NEXT: v_lshl_b64 v[5:6], v[2:3], v3
1586 ; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v3
1587 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[0:1], v3
1588 ; GFX6-NEXT: v_or_b32_e32 v9, v4, v5
1589 ; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], v8
1590 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1591 ; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
1592 ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
1593 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
1594 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
1595 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
1596 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1598 ; GFX8-LABEL: v_shl_i65:
1600 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v3
1602 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1]
1603 ; GFX8-NEXT: v_lshlrev_b64 v[5:6], v3, v[2:3]
1604 ; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v3
1605 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
1606 ; GFX8-NEXT: v_or_b32_e32 v9, v4, v5
1607 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], v8, v[0:1]
1608 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1609 ; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
1610 ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
1611 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
1612 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
1613 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
1614 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1616 ; GFX9-LABEL: v_shl_i65:
1618 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1619 ; GFX9-NEXT: v_sub_u32_e32 v4, 64, v3
1620 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1]
1621 ; GFX9-NEXT: v_lshlrev_b64 v[5:6], v3, v[2:3]
1622 ; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v3
1623 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
1624 ; GFX9-NEXT: v_or_b32_e32 v9, v4, v5
1625 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v8, v[0:1]
1626 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1627 ; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
1628 ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
1629 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
1630 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
1631 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
1632 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1634 ; GFX10-LABEL: v_shl_i65:
1636 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1637 ; GFX10-NEXT: v_sub_nc_u32_e32 v6, 64, v3
1638 ; GFX10-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3]
1639 ; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v3
1640 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
1641 ; GFX10-NEXT: v_lshrrev_b64 v[5:6], v6, v[0:1]
1642 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
1643 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, v[0:1]
1644 ; GFX10-NEXT: v_or_b32_e32 v1, v5, v4
1645 ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo
1646 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v1, vcc_lo
1647 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc_lo
1648 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
1649 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc_lo
1650 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1652 ; GFX11-LABEL: v_shl_i65:
1654 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1655 ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 64, v3
1656 ; GFX11-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3]
1657 ; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v3
1658 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
1659 ; GFX11-NEXT: v_lshrrev_b64 v[5:6], v6, v[0:1]
1660 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
1661 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, v[0:1]
1662 ; GFX11-NEXT: v_or_b32_e32 v1, v5, v4
1663 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo
1664 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v1 :: v_dual_cndmask_b32 v1, 0, v7
1665 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
1666 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc_lo
1667 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1668 %result = shl i65 %value, %amount
1672 define i65 @v_shl_i65_33(i65 %value) {
1673 ; GFX6-LABEL: v_shl_i65_33:
1675 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1676 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v0
1677 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[0:1], 31
1678 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
1679 ; GFX6-NEXT: v_mov_b32_e32 v1, v4
1680 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1682 ; GFX8-LABEL: v_shl_i65_33:
1684 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1685 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 1, v0
1686 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1]
1687 ; GFX8-NEXT: v_mov_b32_e32 v0, 0
1688 ; GFX8-NEXT: v_mov_b32_e32 v1, v4
1689 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1691 ; GFX9-LABEL: v_shl_i65_33:
1693 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1694 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 1, v0
1695 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1]
1696 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1697 ; GFX9-NEXT: v_mov_b32_e32 v1, v4
1698 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1700 ; GFX10-LABEL: v_shl_i65_33:
1702 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1703 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 1, v0
1704 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1]
1705 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1706 ; GFX10-NEXT: v_mov_b32_e32 v1, v4
1707 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1709 ; GFX11-LABEL: v_shl_i65_33:
1711 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1712 ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 1, v0
1713 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1]
1714 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v4
1715 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1716 %result = shl i65 %value, 33
1720 define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
1721 ; GCN-LABEL: s_shl_i65:
1723 ; GCN-NEXT: s_sub_i32 s10, s3, 64
1724 ; GCN-NEXT: s_sub_i32 s6, 64, s3
1725 ; GCN-NEXT: s_cmp_lt_u32 s3, 64
1726 ; GCN-NEXT: s_cselect_b32 s11, 1, 0
1727 ; GCN-NEXT: s_cmp_eq_u32 s3, 0
1728 ; GCN-NEXT: s_cselect_b32 s12, 1, 0
1729 ; GCN-NEXT: s_lshr_b64 s[6:7], s[0:1], s6
1730 ; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3], s3
1731 ; GCN-NEXT: s_lshl_b64 s[4:5], s[0:1], s3
1732 ; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
1733 ; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s10
1734 ; GCN-NEXT: s_cmp_lg_u32 s11, 0
1735 ; GCN-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
1736 ; GCN-NEXT: s_cselect_b64 s[4:5], s[6:7], s[8:9]
1737 ; GCN-NEXT: s_cmp_lg_u32 s12, 0
1738 ; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1739 ; GCN-NEXT: ; return to shader part epilog
1741 ; GFX10PLUS-LABEL: s_shl_i65:
1742 ; GFX10PLUS: ; %bb.0:
1743 ; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64
1744 ; GFX10PLUS-NEXT: s_sub_i32 s4, 64, s3
1745 ; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
1746 ; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0
1747 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0
1748 ; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0
1749 ; GFX10PLUS-NEXT: s_lshr_b64 s[4:5], s[0:1], s4
1750 ; GFX10PLUS-NEXT: s_lshl_b64 s[6:7], s[2:3], s3
1751 ; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[0:1], s3
1752 ; GFX10PLUS-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
1753 ; GFX10PLUS-NEXT: s_lshl_b64 s[6:7], s[0:1], s10
1754 ; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1755 ; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[8:9], 0
1756 ; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
1757 ; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
1758 ; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1759 ; GFX10PLUS-NEXT: ; return to shader part epilog
1760 %result = shl i65 %value, %amount
1764 define amdgpu_ps i65 @s_shl_i65_33(i65 inreg %value) {
1765 ; GCN-LABEL: s_shl_i65_33:
1767 ; GCN-NEXT: s_lshl_b32 s4, s0, 1
1768 ; GCN-NEXT: s_mov_b32 s6, 0
1769 ; GCN-NEXT: s_lshl_b32 s7, s2, 1
1770 ; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], 31
1771 ; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[0:1]
1772 ; GCN-NEXT: s_mov_b32 s0, 0
1773 ; GCN-NEXT: s_mov_b32 s1, s4
1774 ; GCN-NEXT: ; return to shader part epilog
1776 ; GFX10PLUS-LABEL: s_shl_i65_33:
1777 ; GFX10PLUS: ; %bb.0:
1778 ; GFX10PLUS-NEXT: s_mov_b32 s4, 0
1779 ; GFX10PLUS-NEXT: s_lshl_b32 s5, s2, 1
1780 ; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[0:1], 31
1781 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s0, 1
1782 ; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
1783 ; GFX10PLUS-NEXT: s_mov_b32 s0, 0
1784 ; GFX10PLUS-NEXT: ; return to shader part epilog
1785 %result = shl i65 %value, 33
1789 ; FIXME: Argument lowering asserts
1790 ; define <2 x i65> @v_shl_v2i65(<2 x i65> %value, <2 x i65> %amount) {
1791 ; %result = shl <2 x i65> %value, %amount
1792 ; ret <2 x i65> %result
1795 ; define amdgpu_ps <2 x i65> @s_shl_v2i65(<2 x i65> inreg %value, <2 x i65> inreg %amount) {
1796 ; %result = shl <2 x i65> %value, %amount
1797 ; ret <2 x i65> %result