1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
7 define i8 @v_ashr_i8(i8 %value, i8 %amount) {
8 ; GFX6-LABEL: v_ashr_i8:
10 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
12 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
13 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v1, v0
14 ; GFX6-NEXT: s_setpc_b64 s[30:31]
16 ; GFX8-LABEL: v_ashr_i8:
18 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0
20 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_1
21 ; GFX8-NEXT: s_setpc_b64 s[30:31]
23 ; GFX9-LABEL: v_ashr_i8:
25 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26 ; GFX9-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
27 ; GFX9-NEXT: s_setpc_b64 s[30:31]
29 ; GFX10-LABEL: v_ashr_i8:
31 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
33 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
34 ; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 8
35 ; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0
36 ; GFX10-NEXT: s_setpc_b64 s[30:31]
37 %result = ashr i8 %value, %amount
41 define i8 @v_ashr_i8_7(i8 %value) {
42 ; GFX6-LABEL: v_ashr_i8_7:
44 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
46 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 7, v0
47 ; GFX6-NEXT: s_setpc_b64 s[30:31]
49 ; GFX8-LABEL: v_ashr_i8_7:
51 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0
53 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0
54 ; GFX8-NEXT: s_setpc_b64 s[30:31]
56 ; GFX9-LABEL: v_ashr_i8_7:
58 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX9-NEXT: v_mov_b32_e32 v1, 7
60 ; GFX9-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
61 ; GFX9-NEXT: s_setpc_b64 s[30:31]
63 ; GFX10-LABEL: v_ashr_i8_7:
65 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
67 ; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 8
68 ; GFX10-NEXT: v_ashrrev_i16 v0, 7, v0
69 ; GFX10-NEXT: s_setpc_b64 s[30:31]
70 %result = ashr i8 %value, 7
74 define amdgpu_ps i8 @s_ashr_i8(i8 inreg %value, i8 inreg %amount) {
75 ; GFX6-LABEL: s_ashr_i8:
77 ; GFX6-NEXT: s_sext_i32_i8 s0, s0
78 ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
79 ; GFX6-NEXT: ; return to shader part epilog
81 ; GFX8-LABEL: s_ashr_i8:
83 ; GFX8-NEXT: s_sext_i32_i8 s0, s0
84 ; GFX8-NEXT: s_sext_i32_i8 s1, s1
85 ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
86 ; GFX8-NEXT: ; return to shader part epilog
88 ; GFX9-LABEL: s_ashr_i8:
90 ; GFX9-NEXT: s_sext_i32_i8 s0, s0
91 ; GFX9-NEXT: s_sext_i32_i8 s1, s1
92 ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
93 ; GFX9-NEXT: ; return to shader part epilog
95 ; GFX10-LABEL: s_ashr_i8:
97 ; GFX10-NEXT: s_sext_i32_i8 s0, s0
98 ; GFX10-NEXT: s_sext_i32_i8 s1, s1
99 ; GFX10-NEXT: s_ashr_i32 s0, s0, s1
100 ; GFX10-NEXT: ; return to shader part epilog
101 %result = ashr i8 %value, %amount
105 define amdgpu_ps i8 @s_ashr_i8_7(i8 inreg %value) {
106 ; GCN-LABEL: s_ashr_i8_7:
108 ; GCN-NEXT: s_sext_i32_i8 s0, s0
109 ; GCN-NEXT: s_ashr_i32 s0, s0, 7
110 ; GCN-NEXT: ; return to shader part epilog
112 ; GFX10-LABEL: s_ashr_i8_7:
114 ; GFX10-NEXT: s_sext_i32_i8 s0, s0
115 ; GFX10-NEXT: s_ashr_i32 s0, s0, 7
116 ; GFX10-NEXT: ; return to shader part epilog
117 %result = ashr i8 %value, 7
122 define i24 @v_ashr_i24(i24 %value, i24 %amount) {
123 ; GCN-LABEL: v_ashr_i24:
125 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GCN-NEXT: v_and_b32_e32 v1, 0xffffff, v1
127 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24
128 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v1, v0
129 ; GCN-NEXT: s_setpc_b64 s[30:31]
131 ; GFX10-LABEL: v_ashr_i24:
133 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
135 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
136 ; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 24
137 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, v1, v0
138 ; GFX10-NEXT: s_setpc_b64 s[30:31]
139 %result = ashr i24 %value, %amount
143 define i24 @v_ashr_i24_7(i24 %value) {
144 ; GCN-LABEL: v_ashr_i24_7:
146 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24
148 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 7, v0
149 ; GCN-NEXT: s_setpc_b64 s[30:31]
151 ; GFX10-LABEL: v_ashr_i24_7:
153 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
155 ; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 24
156 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 7, v0
157 ; GFX10-NEXT: s_setpc_b64 s[30:31]
158 %result = ashr i24 %value, 7
162 define amdgpu_ps i24 @s_ashr_i24(i24 inreg %value, i24 inreg %amount) {
163 ; GCN-LABEL: s_ashr_i24:
165 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000
166 ; GCN-NEXT: s_ashr_i32 s0, s0, s1
167 ; GCN-NEXT: ; return to shader part epilog
169 ; GFX10-LABEL: s_ashr_i24:
171 ; GFX10-NEXT: s_bfe_i32 s0, s0, 0x180000
172 ; GFX10-NEXT: s_ashr_i32 s0, s0, s1
173 ; GFX10-NEXT: ; return to shader part epilog
174 %result = ashr i24 %value, %amount
178 define amdgpu_ps i24 @s_ashr_i24_7(i24 inreg %value) {
179 ; GCN-LABEL: s_ashr_i24_7:
181 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000
182 ; GCN-NEXT: s_ashr_i32 s0, s0, 7
183 ; GCN-NEXT: ; return to shader part epilog
185 ; GFX10-LABEL: s_ashr_i24_7:
187 ; GFX10-NEXT: s_bfe_i32 s0, s0, 0x180000
188 ; GFX10-NEXT: s_ashr_i32 s0, s0, 7
189 ; GFX10-NEXT: ; return to shader part epilog
190 %result = ashr i24 %value, 7
194 define i32 @v_ashr_i32(i32 %value, i32 %amount) {
195 ; GCN-LABEL: v_ashr_i32:
197 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v1, v0
199 ; GCN-NEXT: s_setpc_b64 s[30:31]
201 ; GFX10-LABEL: v_ashr_i32:
203 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
205 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, v1, v0
206 ; GFX10-NEXT: s_setpc_b64 s[30:31]
207 %result = ashr i32 %value, %amount
211 define i32 @v_ashr_i32_31(i32 %value) {
212 ; GCN-LABEL: v_ashr_i32_31:
214 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
216 ; GCN-NEXT: s_setpc_b64 s[30:31]
218 ; GFX10-LABEL: v_ashr_i32_31:
220 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
222 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v0
223 ; GFX10-NEXT: s_setpc_b64 s[30:31]
224 %result = ashr i32 %value, 31
228 define amdgpu_ps i32 @s_ashr_i32(i32 inreg %value, i32 inreg %amount) {
229 ; GCN-LABEL: s_ashr_i32:
231 ; GCN-NEXT: s_ashr_i32 s0, s0, s1
232 ; GCN-NEXT: ; return to shader part epilog
234 ; GFX10-LABEL: s_ashr_i32:
236 ; GFX10-NEXT: s_ashr_i32 s0, s0, s1
237 ; GFX10-NEXT: ; return to shader part epilog
238 %result = ashr i32 %value, %amount
242 define amdgpu_ps i32 @s_ashr_i32_31(i32 inreg %value) {
243 ; GCN-LABEL: s_ashr_i32_31:
245 ; GCN-NEXT: s_ashr_i32 s0, s0, 31
246 ; GCN-NEXT: ; return to shader part epilog
248 ; GFX10-LABEL: s_ashr_i32_31:
250 ; GFX10-NEXT: s_ashr_i32 s0, s0, 31
251 ; GFX10-NEXT: ; return to shader part epilog
252 %result = ashr i32 %value, 31
256 define amdgpu_ps float @ashr_i32_sv(i32 inreg %value, i32 %amount) {
257 ; GFX6-LABEL: ashr_i32_sv:
259 ; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0
260 ; GFX6-NEXT: ; return to shader part epilog
262 ; GFX8-LABEL: ashr_i32_sv:
264 ; GFX8-NEXT: v_ashrrev_i32_e64 v0, v0, s0
265 ; GFX8-NEXT: ; return to shader part epilog
267 ; GFX9-LABEL: ashr_i32_sv:
269 ; GFX9-NEXT: v_ashrrev_i32_e64 v0, v0, s0
270 ; GFX9-NEXT: ; return to shader part epilog
272 ; GFX10-LABEL: ashr_i32_sv:
274 ; GFX10-NEXT: v_ashrrev_i32_e64 v0, v0, s0
275 ; GFX10-NEXT: ; return to shader part epilog
276 %result = ashr i32 %value, %amount
277 %cast = bitcast i32 %result to float
281 define amdgpu_ps float @ashr_i32_vs(i32 %value, i32 inreg %amount) {
282 ; GCN-LABEL: ashr_i32_vs:
284 ; GCN-NEXT: v_ashrrev_i32_e32 v0, s0, v0
285 ; GCN-NEXT: ; return to shader part epilog
287 ; GFX10-LABEL: ashr_i32_vs:
289 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, s0, v0
290 ; GFX10-NEXT: ; return to shader part epilog
291 %result = ashr i32 %value, %amount
292 %cast = bitcast i32 %result to float
296 define <2 x i32> @v_ashr_v2i32(<2 x i32> %value, <2 x i32> %amount) {
297 ; GCN-LABEL: v_ashr_v2i32:
299 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
300 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v2, v0
301 ; GCN-NEXT: v_ashrrev_i32_e32 v1, v3, v1
302 ; GCN-NEXT: s_setpc_b64 s[30:31]
304 ; GFX10-LABEL: v_ashr_v2i32:
306 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
308 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, v2, v0
309 ; GFX10-NEXT: v_ashrrev_i32_e32 v1, v3, v1
310 ; GFX10-NEXT: s_setpc_b64 s[30:31]
311 %result = ashr <2 x i32> %value, %amount
312 ret <2 x i32> %result
315 define <2 x i32> @v_ashr_v2i32_31(<2 x i32> %value) {
316 ; GCN-LABEL: v_ashr_v2i32_31:
318 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
320 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v1
321 ; GCN-NEXT: s_setpc_b64 s[30:31]
323 ; GFX10-LABEL: v_ashr_v2i32_31:
325 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
327 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v0
328 ; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v1
329 ; GFX10-NEXT: s_setpc_b64 s[30:31]
330 %result = ashr <2 x i32> %value, <i32 31, i32 31>
331 ret <2 x i32> %result
334 define amdgpu_ps <2 x i32> @s_ashr_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) {
335 ; GCN-LABEL: s_ashr_v2i32:
337 ; GCN-NEXT: s_ashr_i32 s0, s0, s2
338 ; GCN-NEXT: s_ashr_i32 s1, s1, s3
339 ; GCN-NEXT: ; return to shader part epilog
341 ; GFX10-LABEL: s_ashr_v2i32:
343 ; GFX10-NEXT: s_ashr_i32 s0, s0, s2
344 ; GFX10-NEXT: s_ashr_i32 s1, s1, s3
345 ; GFX10-NEXT: ; return to shader part epilog
346 %result = ashr <2 x i32> %value, %amount
347 ret <2 x i32> %result
350 define <3 x i32> @v_ashr_v3i32(<3 x i32> %value, <3 x i32> %amount) {
351 ; GCN-LABEL: v_ashr_v3i32:
353 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v3, v0
355 ; GCN-NEXT: v_ashrrev_i32_e32 v1, v4, v1
356 ; GCN-NEXT: v_ashrrev_i32_e32 v2, v5, v2
357 ; GCN-NEXT: s_setpc_b64 s[30:31]
359 ; GFX10-LABEL: v_ashr_v3i32:
361 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
363 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, v3, v0
364 ; GFX10-NEXT: v_ashrrev_i32_e32 v1, v4, v1
365 ; GFX10-NEXT: v_ashrrev_i32_e32 v2, v5, v2
366 ; GFX10-NEXT: s_setpc_b64 s[30:31]
367 %result = ashr <3 x i32> %value, %amount
368 ret <3 x i32> %result
371 define amdgpu_ps <3 x i32> @s_ashr_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) {
372 ; GCN-LABEL: s_ashr_v3i32:
374 ; GCN-NEXT: s_ashr_i32 s0, s0, s3
375 ; GCN-NEXT: s_ashr_i32 s1, s1, s4
376 ; GCN-NEXT: s_ashr_i32 s2, s2, s5
377 ; GCN-NEXT: ; return to shader part epilog
379 ; GFX10-LABEL: s_ashr_v3i32:
381 ; GFX10-NEXT: s_ashr_i32 s0, s0, s3
382 ; GFX10-NEXT: s_ashr_i32 s1, s1, s4
383 ; GFX10-NEXT: s_ashr_i32 s2, s2, s5
384 ; GFX10-NEXT: ; return to shader part epilog
385 %result = ashr <3 x i32> %value, %amount
386 ret <3 x i32> %result
389 define <4 x i32> @v_ashr_v4i32(<4 x i32> %value, <4 x i32> %amount) {
390 ; GCN-LABEL: v_ashr_v4i32:
392 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v4, v0
394 ; GCN-NEXT: v_ashrrev_i32_e32 v1, v5, v1
395 ; GCN-NEXT: v_ashrrev_i32_e32 v2, v6, v2
396 ; GCN-NEXT: v_ashrrev_i32_e32 v3, v7, v3
397 ; GCN-NEXT: s_setpc_b64 s[30:31]
399 ; GFX10-LABEL: v_ashr_v4i32:
401 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
403 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, v4, v0
404 ; GFX10-NEXT: v_ashrrev_i32_e32 v1, v5, v1
405 ; GFX10-NEXT: v_ashrrev_i32_e32 v2, v6, v2
406 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, v7, v3
407 ; GFX10-NEXT: s_setpc_b64 s[30:31]
408 %result = ashr <4 x i32> %value, %amount
409 ret <4 x i32> %result
412 define amdgpu_ps <4 x i32> @s_ashr_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) {
413 ; GCN-LABEL: s_ashr_v4i32:
415 ; GCN-NEXT: s_ashr_i32 s0, s0, s4
416 ; GCN-NEXT: s_ashr_i32 s1, s1, s5
417 ; GCN-NEXT: s_ashr_i32 s2, s2, s6
418 ; GCN-NEXT: s_ashr_i32 s3, s3, s7
419 ; GCN-NEXT: ; return to shader part epilog
421 ; GFX10-LABEL: s_ashr_v4i32:
423 ; GFX10-NEXT: s_ashr_i32 s0, s0, s4
424 ; GFX10-NEXT: s_ashr_i32 s1, s1, s5
425 ; GFX10-NEXT: s_ashr_i32 s2, s2, s6
426 ; GFX10-NEXT: s_ashr_i32 s3, s3, s7
427 ; GFX10-NEXT: ; return to shader part epilog
428 %result = ashr <4 x i32> %value, %amount
429 ret <4 x i32> %result
432 define <5 x i32> @v_ashr_v5i32(<5 x i32> %value, <5 x i32> %amount) {
433 ; GCN-LABEL: v_ashr_v5i32:
435 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
436 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v5, v0
437 ; GCN-NEXT: v_ashrrev_i32_e32 v1, v6, v1
438 ; GCN-NEXT: v_ashrrev_i32_e32 v2, v7, v2
439 ; GCN-NEXT: v_ashrrev_i32_e32 v3, v8, v3
440 ; GCN-NEXT: v_ashrrev_i32_e32 v4, v9, v4
441 ; GCN-NEXT: s_setpc_b64 s[30:31]
443 ; GFX10-LABEL: v_ashr_v5i32:
445 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
447 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, v5, v0
448 ; GFX10-NEXT: v_ashrrev_i32_e32 v1, v6, v1
449 ; GFX10-NEXT: v_ashrrev_i32_e32 v2, v7, v2
450 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, v8, v3
451 ; GFX10-NEXT: v_ashrrev_i32_e32 v4, v9, v4
452 ; GFX10-NEXT: s_setpc_b64 s[30:31]
453 %result = ashr <5 x i32> %value, %amount
454 ret <5 x i32> %result
457 define amdgpu_ps <5 x i32> @s_ashr_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) {
458 ; GCN-LABEL: s_ashr_v5i32:
460 ; GCN-NEXT: s_ashr_i32 s0, s0, s5
461 ; GCN-NEXT: s_ashr_i32 s1, s1, s6
462 ; GCN-NEXT: s_ashr_i32 s2, s2, s7
463 ; GCN-NEXT: s_ashr_i32 s3, s3, s8
464 ; GCN-NEXT: s_ashr_i32 s4, s4, s9
465 ; GCN-NEXT: ; return to shader part epilog
467 ; GFX10-LABEL: s_ashr_v5i32:
469 ; GFX10-NEXT: s_ashr_i32 s0, s0, s5
470 ; GFX10-NEXT: s_ashr_i32 s1, s1, s6
471 ; GFX10-NEXT: s_ashr_i32 s2, s2, s7
472 ; GFX10-NEXT: s_ashr_i32 s3, s3, s8
473 ; GFX10-NEXT: s_ashr_i32 s4, s4, s9
474 ; GFX10-NEXT: ; return to shader part epilog
475 %result = ashr <5 x i32> %value, %amount
476 ret <5 x i32> %result
479 define <16 x i32> @v_ashr_v16i32(<16 x i32> %value, <16 x i32> %amount) {
480 ; GCN-LABEL: v_ashr_v16i32:
482 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v16, v0
484 ; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32
485 ; GCN-NEXT: v_ashrrev_i32_e32 v1, v17, v1
486 ; GCN-NEXT: v_ashrrev_i32_e32 v2, v18, v2
487 ; GCN-NEXT: v_ashrrev_i32_e32 v3, v19, v3
488 ; GCN-NEXT: v_ashrrev_i32_e32 v4, v20, v4
489 ; GCN-NEXT: v_ashrrev_i32_e32 v5, v21, v5
490 ; GCN-NEXT: v_ashrrev_i32_e32 v6, v22, v6
491 ; GCN-NEXT: v_ashrrev_i32_e32 v7, v23, v7
492 ; GCN-NEXT: v_ashrrev_i32_e32 v8, v24, v8
493 ; GCN-NEXT: v_ashrrev_i32_e32 v9, v25, v9
494 ; GCN-NEXT: v_ashrrev_i32_e32 v10, v26, v10
495 ; GCN-NEXT: v_ashrrev_i32_e32 v11, v27, v11
496 ; GCN-NEXT: v_ashrrev_i32_e32 v12, v28, v12
497 ; GCN-NEXT: v_ashrrev_i32_e32 v13, v29, v13
498 ; GCN-NEXT: v_ashrrev_i32_e32 v14, v30, v14
499 ; GCN-NEXT: s_waitcnt vmcnt(0)
500 ; GCN-NEXT: v_ashrrev_i32_e32 v15, v16, v15
501 ; GCN-NEXT: s_setpc_b64 s[30:31]
503 ; GFX10-LABEL: v_ashr_v16i32:
505 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
506 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
507 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
508 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, v16, v0
509 ; GFX10-NEXT: v_ashrrev_i32_e32 v1, v17, v1
510 ; GFX10-NEXT: v_ashrrev_i32_e32 v2, v18, v2
511 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, v19, v3
512 ; GFX10-NEXT: v_ashrrev_i32_e32 v4, v20, v4
513 ; GFX10-NEXT: v_ashrrev_i32_e32 v5, v21, v5
514 ; GFX10-NEXT: v_ashrrev_i32_e32 v6, v22, v6
515 ; GFX10-NEXT: v_ashrrev_i32_e32 v7, v23, v7
516 ; GFX10-NEXT: v_ashrrev_i32_e32 v8, v24, v8
517 ; GFX10-NEXT: v_ashrrev_i32_e32 v9, v25, v9
518 ; GFX10-NEXT: v_ashrrev_i32_e32 v10, v26, v10
519 ; GFX10-NEXT: v_ashrrev_i32_e32 v11, v27, v11
520 ; GFX10-NEXT: v_ashrrev_i32_e32 v12, v28, v12
521 ; GFX10-NEXT: v_ashrrev_i32_e32 v13, v29, v13
522 ; GFX10-NEXT: v_ashrrev_i32_e32 v14, v30, v14
523 ; GFX10-NEXT: s_waitcnt vmcnt(0)
524 ; GFX10-NEXT: v_ashrrev_i32_e32 v15, v31, v15
525 ; GFX10-NEXT: s_setpc_b64 s[30:31]
526 %result = ashr <16 x i32> %value, %amount
527 ret <16 x i32> %result
530 define amdgpu_ps <16 x i32> @s_ashr_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) {
531 ; GCN-LABEL: s_ashr_v16i32:
533 ; GCN-NEXT: s_ashr_i32 s0, s0, s16
534 ; GCN-NEXT: s_ashr_i32 s1, s1, s17
535 ; GCN-NEXT: s_ashr_i32 s2, s2, s18
536 ; GCN-NEXT: s_ashr_i32 s3, s3, s19
537 ; GCN-NEXT: s_ashr_i32 s4, s4, s20
538 ; GCN-NEXT: s_ashr_i32 s5, s5, s21
539 ; GCN-NEXT: s_ashr_i32 s6, s6, s22
540 ; GCN-NEXT: s_ashr_i32 s7, s7, s23
541 ; GCN-NEXT: s_ashr_i32 s8, s8, s24
542 ; GCN-NEXT: s_ashr_i32 s9, s9, s25
543 ; GCN-NEXT: s_ashr_i32 s10, s10, s26
544 ; GCN-NEXT: s_ashr_i32 s11, s11, s27
545 ; GCN-NEXT: s_ashr_i32 s12, s12, s28
546 ; GCN-NEXT: s_ashr_i32 s13, s13, s29
547 ; GCN-NEXT: s_ashr_i32 s14, s14, s30
548 ; GCN-NEXT: s_ashr_i32 s15, s15, s31
549 ; GCN-NEXT: ; return to shader part epilog
551 ; GFX10-LABEL: s_ashr_v16i32:
553 ; GFX10-NEXT: s_ashr_i32 s0, s0, s16
554 ; GFX10-NEXT: s_ashr_i32 s1, s1, s17
555 ; GFX10-NEXT: s_ashr_i32 s2, s2, s18
556 ; GFX10-NEXT: s_ashr_i32 s3, s3, s19
557 ; GFX10-NEXT: s_ashr_i32 s4, s4, s20
558 ; GFX10-NEXT: s_ashr_i32 s5, s5, s21
559 ; GFX10-NEXT: s_ashr_i32 s6, s6, s22
560 ; GFX10-NEXT: s_ashr_i32 s7, s7, s23
561 ; GFX10-NEXT: s_ashr_i32 s8, s8, s24
562 ; GFX10-NEXT: s_ashr_i32 s9, s9, s25
563 ; GFX10-NEXT: s_ashr_i32 s10, s10, s26
564 ; GFX10-NEXT: s_ashr_i32 s11, s11, s27
565 ; GFX10-NEXT: s_ashr_i32 s12, s12, s28
566 ; GFX10-NEXT: s_ashr_i32 s13, s13, s29
567 ; GFX10-NEXT: s_ashr_i32 s14, s14, s30
568 ; GFX10-NEXT: s_ashr_i32 s15, s15, s31
569 ; GFX10-NEXT: ; return to shader part epilog
570 %result = ashr <16 x i32> %value, %amount
571 ret <16 x i32> %result
574 define i16 @v_ashr_i16(i16 %value, i16 %amount) {
575 ; GFX6-LABEL: v_ashr_i16:
577 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
578 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
579 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
580 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v1, v0
581 ; GFX6-NEXT: s_setpc_b64 s[30:31]
583 ; GFX8-LABEL: v_ashr_i16:
585 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
586 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, v1, v0
587 ; GFX8-NEXT: s_setpc_b64 s[30:31]
589 ; GFX9-LABEL: v_ashr_i16:
591 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592 ; GFX9-NEXT: v_ashrrev_i16_e32 v0, v1, v0
593 ; GFX9-NEXT: s_setpc_b64 s[30:31]
595 ; GFX10-LABEL: v_ashr_i16:
597 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
599 ; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0
600 ; GFX10-NEXT: s_setpc_b64 s[30:31]
601 %result = ashr i16 %value, %amount
605 define i16 @v_ashr_i16_31(i16 %value) {
606 ; GCN-LABEL: v_ashr_i16_31:
608 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609 ; GCN-NEXT: s_setpc_b64 s[30:31]
611 ; GFX10-LABEL: v_ashr_i16_31:
613 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
614 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
615 ; GFX10-NEXT: s_setpc_b64 s[30:31]
616 %result = ashr i16 %value, 31
620 define amdgpu_ps i16 @s_ashr_i16(i16 inreg %value, i16 inreg %amount) {
621 ; GFX6-LABEL: s_ashr_i16:
623 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
624 ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
625 ; GFX6-NEXT: ; return to shader part epilog
627 ; GFX8-LABEL: s_ashr_i16:
629 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
630 ; GFX8-NEXT: s_sext_i32_i16 s1, s1
631 ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
632 ; GFX8-NEXT: ; return to shader part epilog
634 ; GFX9-LABEL: s_ashr_i16:
636 ; GFX9-NEXT: s_sext_i32_i16 s0, s0
637 ; GFX9-NEXT: s_sext_i32_i16 s1, s1
638 ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
639 ; GFX9-NEXT: ; return to shader part epilog
641 ; GFX10-LABEL: s_ashr_i16:
643 ; GFX10-NEXT: s_sext_i32_i16 s0, s0
644 ; GFX10-NEXT: s_sext_i32_i16 s1, s1
645 ; GFX10-NEXT: s_ashr_i32 s0, s0, s1
646 ; GFX10-NEXT: ; return to shader part epilog
647 %result = ashr i16 %value, %amount
651 define amdgpu_ps i16 @s_ashr_i16_15(i16 inreg %value) {
652 ; GCN-LABEL: s_ashr_i16_15:
654 ; GCN-NEXT: s_sext_i32_i16 s0, s0
655 ; GCN-NEXT: s_ashr_i32 s0, s0, 15
656 ; GCN-NEXT: ; return to shader part epilog
658 ; GFX10-LABEL: s_ashr_i16_15:
660 ; GFX10-NEXT: s_sext_i32_i16 s0, s0
661 ; GFX10-NEXT: s_ashr_i32 s0, s0, 15
662 ; GFX10-NEXT: ; return to shader part epilog
663 %result = ashr i16 %value, 15
667 define amdgpu_ps half @ashr_i16_sv(i16 inreg %value, i16 %amount) {
668 ; GFX6-LABEL: ashr_i16_sv:
670 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
671 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
672 ; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0
673 ; GFX6-NEXT: ; return to shader part epilog
675 ; GFX8-LABEL: ashr_i16_sv:
677 ; GFX8-NEXT: v_ashrrev_i16_e64 v0, v0, s0
678 ; GFX8-NEXT: ; return to shader part epilog
680 ; GFX9-LABEL: ashr_i16_sv:
682 ; GFX9-NEXT: v_ashrrev_i16_e64 v0, v0, s0
683 ; GFX9-NEXT: ; return to shader part epilog
685 ; GFX10-LABEL: ashr_i16_sv:
687 ; GFX10-NEXT: v_ashrrev_i16 v0, v0, s0
688 ; GFX10-NEXT: ; return to shader part epilog
689 %result = ashr i16 %value, %amount
690 %cast = bitcast i16 %result to half
694 define amdgpu_ps half @ashr_i16_vs(i16 %value, i16 inreg %amount) {
695 ; GFX6-LABEL: ashr_i16_vs:
697 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
698 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
699 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0
700 ; GFX6-NEXT: ; return to shader part epilog
702 ; GFX8-LABEL: ashr_i16_vs:
704 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, s0, v0
705 ; GFX8-NEXT: ; return to shader part epilog
707 ; GFX9-LABEL: ashr_i16_vs:
709 ; GFX9-NEXT: v_ashrrev_i16_e32 v0, s0, v0
710 ; GFX9-NEXT: ; return to shader part epilog
712 ; GFX10-LABEL: ashr_i16_vs:
714 ; GFX10-NEXT: v_ashrrev_i16 v0, s0, v0
715 ; GFX10-NEXT: ; return to shader part epilog
716 %result = ashr i16 %value, %amount
717 %cast = bitcast i16 %result to half
721 define <2 x i16> @v_ashr_v2i16(<2 x i16> %value, <2 x i16> %amount) {
722 ; GFX6-LABEL: v_ashr_v2i16:
724 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
725 ; GFX6-NEXT: s_mov_b32 s4, 0xffff
726 ; GFX6-NEXT: v_and_b32_e32 v2, s4, v2
727 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
728 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v2, v0
729 ; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
730 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
731 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, v2, v1
732 ; GFX6-NEXT: s_setpc_b64 s[30:31]
734 ; GFX8-LABEL: v_ashr_v2i16:
736 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, v1, v0
738 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
739 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
740 ; GFX8-NEXT: s_setpc_b64 s[30:31]
742 ; GFX9-LABEL: v_ashr_v2i16:
744 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
745 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, v1, v0
746 ; GFX9-NEXT: s_setpc_b64 s[30:31]
748 ; GFX10-LABEL: v_ashr_v2i16:
750 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
751 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
752 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, v1, v0
753 ; GFX10-NEXT: s_setpc_b64 s[30:31]
754 %result = ashr <2 x i16> %value, %amount
755 ret <2 x i16> %result
758 define <2 x i16> @v_ashr_v2i16_15(<2 x i16> %value) {
759 ; GFX6-LABEL: v_ashr_v2i16_15:
761 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
763 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
764 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 15, v0
765 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1
766 ; GFX6-NEXT: s_setpc_b64 s[30:31]
768 ; GFX8-LABEL: v_ashr_v2i16_15:
770 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771 ; GFX8-NEXT: v_mov_b32_e32 v2, 15
772 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v0
773 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
774 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
775 ; GFX8-NEXT: s_setpc_b64 s[30:31]
777 ; GFX9-LABEL: v_ashr_v2i16_15:
779 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
780 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
781 ; GFX9-NEXT: s_setpc_b64 s[30:31]
783 ; GFX10-LABEL: v_ashr_v2i16_15:
785 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
786 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
787 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
788 ; GFX10-NEXT: s_setpc_b64 s[30:31]
789 %result = ashr <2 x i16> %value, <i16 15, i16 15>
790 ret <2 x i16> %result
793 define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) {
794 ; GFX6-LABEL: s_ashr_v2i16:
796 ; GFX6-NEXT: s_sext_i32_i16 s1, s1
797 ; GFX6-NEXT: s_mov_b32 s4, 0xffff
798 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
799 ; GFX6-NEXT: s_ashr_i32 s1, s1, s3
800 ; GFX6-NEXT: s_ashr_i32 s0, s0, s2
801 ; GFX6-NEXT: s_and_b32 s1, s1, s4
802 ; GFX6-NEXT: s_and_b32 s0, s0, s4
803 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
804 ; GFX6-NEXT: s_or_b32 s0, s0, s1
805 ; GFX6-NEXT: ; return to shader part epilog
807 ; GFX8-LABEL: s_ashr_v2i16:
809 ; GFX8-NEXT: s_mov_b32 s3, 0x100010
810 ; GFX8-NEXT: s_sext_i32_i16 s2, s0
811 ; GFX8-NEXT: s_bfe_i32 s0, s0, s3
812 ; GFX8-NEXT: s_sext_i32_i16 s4, s1
813 ; GFX8-NEXT: s_bfe_i32 s1, s1, s3
814 ; GFX8-NEXT: s_ashr_i32 s2, s2, s4
815 ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
816 ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
817 ; GFX8-NEXT: s_and_b32 s1, s2, 0xffff
818 ; GFX8-NEXT: s_or_b32 s0, s0, s1
819 ; GFX8-NEXT: ; return to shader part epilog
821 ; GFX9-LABEL: s_ashr_v2i16:
823 ; GFX9-NEXT: s_sext_i32_i16 s2, s0
824 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
825 ; GFX9-NEXT: s_sext_i32_i16 s3, s1
826 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16
827 ; GFX9-NEXT: s_ashr_i32 s2, s2, s3
828 ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
829 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s2, s0
830 ; GFX9-NEXT: ; return to shader part epilog
832 ; GFX10-LABEL: s_ashr_v2i16:
834 ; GFX10-NEXT: s_sext_i32_i16 s2, s0
835 ; GFX10-NEXT: s_ashr_i32 s0, s0, 16
836 ; GFX10-NEXT: s_sext_i32_i16 s3, s1
837 ; GFX10-NEXT: s_ashr_i32 s1, s1, 16
838 ; GFX10-NEXT: s_ashr_i32 s2, s2, s3
839 ; GFX10-NEXT: s_ashr_i32 s0, s0, s1
840 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s2, s0
841 ; GFX10-NEXT: ; return to shader part epilog
842 %result = ashr <2 x i16> %value, %amount
843 %cast = bitcast <2 x i16> %result to i32
847 define amdgpu_ps float @ashr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) {
848 ; GFX6-LABEL: ashr_v2i16_sv:
850 ; GFX6-NEXT: s_mov_b32 s2, 0xffff
851 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0
852 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
853 ; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0
854 ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1
855 ; GFX6-NEXT: s_sext_i32_i16 s0, s1
856 ; GFX6-NEXT: v_ashr_i32_e32 v1, s0, v1
857 ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1
858 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0
859 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
860 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
861 ; GFX6-NEXT: ; return to shader part epilog
863 ; GFX8-LABEL: ashr_v2i16_sv:
865 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
866 ; GFX8-NEXT: v_mov_b32_e32 v2, s1
867 ; GFX8-NEXT: v_ashrrev_i16_e64 v1, v0, s0
868 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
869 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
870 ; GFX8-NEXT: ; return to shader part epilog
872 ; GFX9-LABEL: ashr_v2i16_sv:
874 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, v0, s0
875 ; GFX9-NEXT: ; return to shader part epilog
877 ; GFX10-LABEL: ashr_v2i16_sv:
879 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, v0, s0
880 ; GFX10-NEXT: ; return to shader part epilog
881 %result = ashr <2 x i16> %value, %amount
882 %cast = bitcast <2 x i16> %result to float
886 define amdgpu_ps float @ashr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) {
887 ; GFX6-LABEL: ashr_v2i16_vs:
889 ; GFX6-NEXT: s_mov_b32 s2, 0xffff
890 ; GFX6-NEXT: s_and_b32 s0, s0, s2
891 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
892 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0
893 ; GFX6-NEXT: s_and_b32 s0, s1, s2
894 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
895 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, s0, v1
896 ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1
897 ; GFX6-NEXT: v_and_b32_e32 v0, s2, v0
898 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
899 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
900 ; GFX6-NEXT: ; return to shader part epilog
902 ; GFX8-LABEL: ashr_v2i16_vs:
904 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
905 ; GFX8-NEXT: v_mov_b32_e32 v2, s1
906 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, s0, v0
907 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
908 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
909 ; GFX8-NEXT: ; return to shader part epilog
911 ; GFX9-LABEL: ashr_v2i16_vs:
913 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, s0, v0
914 ; GFX9-NEXT: ; return to shader part epilog
916 ; GFX10-LABEL: ashr_v2i16_vs:
918 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, s0, v0
919 ; GFX10-NEXT: ; return to shader part epilog
920 %result = ashr <2 x i16> %value, %amount
921 %cast = bitcast <2 x i16> %result to float
926 ; define <3 x i16> @v_ashr_v3i16(<3 x i16> %value, <3 x i16> %amount) {
927 ; %result = ashr <3 x i16> %value, %amount
928 ; ret <3 x i16> %result
931 ; define amdgpu_ps <3 x i16> @s_ashr_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) {
932 ; %result = ashr <3 x i16> %value, %amount
933 ; ret <3 x i16> %result
936 define <2 x float> @v_ashr_v4i16(<4 x i16> %value, <4 x i16> %amount) {
937 ; GFX6-LABEL: v_ashr_v4i16:
939 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
940 ; GFX6-NEXT: s_mov_b32 s4, 0xffff
941 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v4
942 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
943 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v4, v0
944 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v5
945 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
946 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, v4, v1
947 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v6
948 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
949 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, v4, v2
950 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v7
951 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
952 ; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
953 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, v4, v3
954 ; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
955 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
956 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
957 ; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
958 ; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
959 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
960 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
961 ; GFX6-NEXT: s_setpc_b64 s[30:31]
963 ; GFX8-LABEL: v_ashr_v4i16:
965 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, v2, v0
967 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
968 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, v3, v1
969 ; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
970 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
971 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
972 ; GFX8-NEXT: s_setpc_b64 s[30:31]
974 ; GFX9-LABEL: v_ashr_v4i16:
976 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, v2, v0
978 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, v3, v1
979 ; GFX9-NEXT: s_setpc_b64 s[30:31]
981 ; GFX10-LABEL: v_ashr_v4i16:
983 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
985 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, v2, v0
986 ; GFX10-NEXT: v_pk_ashrrev_i16 v1, v3, v1
987 ; GFX10-NEXT: s_setpc_b64 s[30:31]
988 %result = ashr <4 x i16> %value, %amount
989 %cast = bitcast <4 x i16> %result to <2 x float>
990 ret <2 x float> %cast
993 define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) {
994 ; GFX6-LABEL: s_ashr_v4i16:
996 ; GFX6-NEXT: s_sext_i32_i16 s1, s1
997 ; GFX6-NEXT: s_mov_b32 s8, 0xffff
998 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
999 ; GFX6-NEXT: s_ashr_i32 s1, s1, s5
1000 ; GFX6-NEXT: s_ashr_i32 s0, s0, s4
1001 ; GFX6-NEXT: s_sext_i32_i16 s2, s2
1002 ; GFX6-NEXT: s_sext_i32_i16 s3, s3
1003 ; GFX6-NEXT: s_and_b32 s1, s1, s8
1004 ; GFX6-NEXT: s_ashr_i32 s2, s2, s6
1005 ; GFX6-NEXT: s_ashr_i32 s3, s3, s7
1006 ; GFX6-NEXT: s_and_b32 s0, s0, s8
1007 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1008 ; GFX6-NEXT: s_or_b32 s0, s0, s1
1009 ; GFX6-NEXT: s_and_b32 s1, s2, s8
1010 ; GFX6-NEXT: s_and_b32 s2, s3, s8
1011 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1012 ; GFX6-NEXT: s_or_b32 s1, s1, s2
1013 ; GFX6-NEXT: ; return to shader part epilog
1015 ; GFX8-LABEL: s_ashr_v4i16:
1017 ; GFX8-NEXT: s_mov_b32 s5, 0x100010
1018 ; GFX8-NEXT: s_sext_i32_i16 s4, s0
1019 ; GFX8-NEXT: s_bfe_i32 s0, s0, s5
1020 ; GFX8-NEXT: s_sext_i32_i16 s6, s1
1021 ; GFX8-NEXT: s_bfe_i32 s1, s1, s5
1022 ; GFX8-NEXT: s_sext_i32_i16 s7, s2
1023 ; GFX8-NEXT: s_bfe_i32 s2, s2, s5
1024 ; GFX8-NEXT: s_sext_i32_i16 s8, s3
1025 ; GFX8-NEXT: s_bfe_i32 s3, s3, s5
1026 ; GFX8-NEXT: s_ashr_i32 s4, s4, s7
1027 ; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1028 ; GFX8-NEXT: s_ashr_i32 s2, s6, s8
1029 ; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1030 ; GFX8-NEXT: s_mov_b32 s3, 0xffff
1031 ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1032 ; GFX8-NEXT: s_and_b32 s4, s4, s3
1033 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1034 ; GFX8-NEXT: s_and_b32 s2, s2, s3
1035 ; GFX8-NEXT: s_or_b32 s0, s0, s4
1036 ; GFX8-NEXT: s_or_b32 s1, s1, s2
1037 ; GFX8-NEXT: ; return to shader part epilog
1039 ; GFX9-LABEL: s_ashr_v4i16:
1041 ; GFX9-NEXT: s_sext_i32_i16 s4, s0
1042 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
1043 ; GFX9-NEXT: s_sext_i32_i16 s5, s2
1044 ; GFX9-NEXT: s_ashr_i32 s2, s2, 16
1045 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1046 ; GFX9-NEXT: s_ashr_i32 s0, s0, s2
1047 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s4, s0
1048 ; GFX9-NEXT: s_sext_i32_i16 s2, s1
1049 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16
1050 ; GFX9-NEXT: s_sext_i32_i16 s4, s3
1051 ; GFX9-NEXT: s_ashr_i32 s3, s3, 16
1052 ; GFX9-NEXT: s_ashr_i32 s2, s2, s4
1053 ; GFX9-NEXT: s_ashr_i32 s1, s1, s3
1054 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s2, s1
1055 ; GFX9-NEXT: ; return to shader part epilog
1057 ; GFX10-LABEL: s_ashr_v4i16:
1059 ; GFX10-NEXT: s_sext_i32_i16 s4, s0
1060 ; GFX10-NEXT: s_ashr_i32 s0, s0, 16
1061 ; GFX10-NEXT: s_sext_i32_i16 s5, s2
1062 ; GFX10-NEXT: s_ashr_i32 s2, s2, 16
1063 ; GFX10-NEXT: s_ashr_i32 s4, s4, s5
1064 ; GFX10-NEXT: s_ashr_i32 s0, s0, s2
1065 ; GFX10-NEXT: s_sext_i32_i16 s2, s1
1066 ; GFX10-NEXT: s_ashr_i32 s1, s1, 16
1067 ; GFX10-NEXT: s_sext_i32_i16 s5, s3
1068 ; GFX10-NEXT: s_ashr_i32 s3, s3, 16
1069 ; GFX10-NEXT: s_ashr_i32 s2, s2, s5
1070 ; GFX10-NEXT: s_ashr_i32 s1, s1, s3
1071 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s4, s0
1072 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s2, s1
1073 ; GFX10-NEXT: ; return to shader part epilog
1074 %result = ashr <4 x i16> %value, %amount
1075 %cast = bitcast <4 x i16> %result to <2 x i32>
1080 ; define <5 x i16> @v_ashr_v5i16(<5 x i16> %value, <5 x i16> %amount) {
1081 ; %result = ashr <5 x i16> %value, %amount
1082 ; ret <5 x i16> %result
1085 ; define amdgpu_ps <5 x i16> @s_ashr_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) {
1086 ; %result = ashr <5 x i16> %value, %amount
1087 ; ret <5 x i16> %result
1090 ; define <3 x float> @v_ashr_v6i16(<6 x i16> %value, <6 x i16> %amount) {
1091 ; %result = ashr <6 x i16> %value, %amount
1092 ; %cast = bitcast <6 x i16> %result to <3 x float>
1093 ; ret <3 x float> %cast
1096 ; define amdgpu_ps <3 x i32> @s_ashr_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) {
1097 ; %result = ashr <6 x i16> %value, %amount
1098 ; %cast = bitcast <6 x i16> %result to <3 x i32>
1099 ; ret <3 x i32> %cast
1102 define <4 x float> @v_ashr_v8i16(<8 x i16> %value, <8 x i16> %amount) {
1103 ; GFX6-LABEL: v_ashr_v8i16:
1105 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106 ; GFX6-NEXT: s_mov_b32 s4, 0xffff
1107 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v8
1108 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
1109 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v8, v0
1110 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v9
1111 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
1112 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, v8, v1
1113 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v10
1114 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
1115 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, v8, v2
1116 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v11
1117 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
1118 ; GFX6-NEXT: v_mov_b32_e32 v16, 0xffff
1119 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, v8, v3
1120 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v12
1121 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
1122 ; GFX6-NEXT: v_ashrrev_i32_e32 v4, v8, v4
1123 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v13
1124 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
1125 ; GFX6-NEXT: v_and_b32_e32 v1, v1, v16
1126 ; GFX6-NEXT: v_ashrrev_i32_e32 v5, v8, v5
1127 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v14
1128 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
1129 ; GFX6-NEXT: v_and_b32_e32 v0, v0, v16
1130 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1131 ; GFX6-NEXT: v_ashrrev_i32_e32 v6, v8, v6
1132 ; GFX6-NEXT: v_and_b32_e32 v8, v15, v16
1133 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
1134 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1135 ; GFX6-NEXT: v_and_b32_e32 v1, v2, v16
1136 ; GFX6-NEXT: v_and_b32_e32 v2, v3, v16
1137 ; GFX6-NEXT: v_ashrrev_i32_e32 v7, v8, v7
1138 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1139 ; GFX6-NEXT: v_and_b32_e32 v3, v5, v16
1140 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1141 ; GFX6-NEXT: v_and_b32_e32 v2, v4, v16
1142 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1143 ; GFX6-NEXT: v_and_b32_e32 v4, v7, v16
1144 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1145 ; GFX6-NEXT: v_and_b32_e32 v3, v6, v16
1146 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
1147 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
1148 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1150 ; GFX8-LABEL: v_ashr_v8i16:
1152 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153 ; GFX8-NEXT: v_ashrrev_i16_e32 v8, v4, v0
1154 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1155 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, v5, v1
1156 ; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1157 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
1158 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, v6, v2
1159 ; GFX8-NEXT: v_ashrrev_i16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1160 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
1161 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, v7, v3
1162 ; GFX8-NEXT: v_ashrrev_i16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1163 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
1164 ; GFX8-NEXT: v_or_b32_e32 v3, v4, v3
1165 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1167 ; GFX9-LABEL: v_ashr_v8i16:
1169 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1170 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, v4, v0
1171 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, v5, v1
1172 ; GFX9-NEXT: v_pk_ashrrev_i16 v2, v6, v2
1173 ; GFX9-NEXT: v_pk_ashrrev_i16 v3, v7, v3
1174 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1176 ; GFX10-LABEL: v_ashr_v8i16:
1178 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1179 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1180 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, v4, v0
1181 ; GFX10-NEXT: v_pk_ashrrev_i16 v1, v5, v1
1182 ; GFX10-NEXT: v_pk_ashrrev_i16 v2, v6, v2
1183 ; GFX10-NEXT: v_pk_ashrrev_i16 v3, v7, v3
1184 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1185 %result = ashr <8 x i16> %value, %amount
1186 %cast = bitcast <8 x i16> %result to <4 x float>
1187 ret <4 x float> %cast
1190 define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) {
1191 ; GFX6-LABEL: s_ashr_v8i16:
1193 ; GFX6-NEXT: s_sext_i32_i16 s1, s1
1194 ; GFX6-NEXT: s_mov_b32 s16, 0xffff
1195 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
1196 ; GFX6-NEXT: s_ashr_i32 s1, s1, s9
1197 ; GFX6-NEXT: s_ashr_i32 s0, s0, s8
1198 ; GFX6-NEXT: s_sext_i32_i16 s2, s2
1199 ; GFX6-NEXT: s_sext_i32_i16 s3, s3
1200 ; GFX6-NEXT: s_and_b32 s1, s1, s16
1201 ; GFX6-NEXT: s_ashr_i32 s2, s2, s10
1202 ; GFX6-NEXT: s_ashr_i32 s3, s3, s11
1203 ; GFX6-NEXT: s_sext_i32_i16 s5, s5
1204 ; GFX6-NEXT: s_and_b32 s0, s0, s16
1205 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1206 ; GFX6-NEXT: s_sext_i32_i16 s4, s4
1207 ; GFX6-NEXT: s_ashr_i32 s5, s5, s13
1208 ; GFX6-NEXT: s_sext_i32_i16 s7, s7
1209 ; GFX6-NEXT: s_or_b32 s0, s0, s1
1210 ; GFX6-NEXT: s_and_b32 s1, s2, s16
1211 ; GFX6-NEXT: s_and_b32 s2, s3, s16
1212 ; GFX6-NEXT: s_ashr_i32 s4, s4, s12
1213 ; GFX6-NEXT: s_sext_i32_i16 s6, s6
1214 ; GFX6-NEXT: s_ashr_i32 s7, s7, s15
1215 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1216 ; GFX6-NEXT: s_and_b32 s3, s5, s16
1217 ; GFX6-NEXT: s_ashr_i32 s6, s6, s14
1218 ; GFX6-NEXT: s_or_b32 s1, s1, s2
1219 ; GFX6-NEXT: s_and_b32 s2, s4, s16
1220 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
1221 ; GFX6-NEXT: s_and_b32 s4, s7, s16
1222 ; GFX6-NEXT: s_or_b32 s2, s2, s3
1223 ; GFX6-NEXT: s_and_b32 s3, s6, s16
1224 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16
1225 ; GFX6-NEXT: s_or_b32 s3, s3, s4
1226 ; GFX6-NEXT: ; return to shader part epilog
1228 ; GFX8-LABEL: s_ashr_v8i16:
1230 ; GFX8-NEXT: s_mov_b32 s9, 0x100010
1231 ; GFX8-NEXT: s_sext_i32_i16 s8, s0
1232 ; GFX8-NEXT: s_bfe_i32 s0, s0, s9
1233 ; GFX8-NEXT: s_sext_i32_i16 s10, s1
1234 ; GFX8-NEXT: s_bfe_i32 s1, s1, s9
1235 ; GFX8-NEXT: s_sext_i32_i16 s12, s3
1236 ; GFX8-NEXT: s_bfe_i32 s3, s3, s9
1237 ; GFX8-NEXT: s_sext_i32_i16 s13, s4
1238 ; GFX8-NEXT: s_bfe_i32 s4, s4, s9
1239 ; GFX8-NEXT: s_sext_i32_i16 s14, s5
1240 ; GFX8-NEXT: s_bfe_i32 s5, s5, s9
1241 ; GFX8-NEXT: s_sext_i32_i16 s16, s7
1242 ; GFX8-NEXT: s_bfe_i32 s7, s7, s9
1243 ; GFX8-NEXT: s_sext_i32_i16 s11, s2
1244 ; GFX8-NEXT: s_bfe_i32 s2, s2, s9
1245 ; GFX8-NEXT: s_sext_i32_i16 s15, s6
1246 ; GFX8-NEXT: s_bfe_i32 s6, s6, s9
1247 ; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1248 ; GFX8-NEXT: s_ashr_i32 s4, s10, s14
1249 ; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1250 ; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1251 ; GFX8-NEXT: s_mov_b32 s7, 0xffff
1252 ; GFX8-NEXT: s_ashr_i32 s5, s11, s15
1253 ; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1254 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1255 ; GFX8-NEXT: s_and_b32 s4, s4, s7
1256 ; GFX8-NEXT: s_ashr_i32 s8, s8, s13
1257 ; GFX8-NEXT: s_ashr_i32 s6, s12, s16
1258 ; GFX8-NEXT: s_or_b32 s1, s1, s4
1259 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1260 ; GFX8-NEXT: s_and_b32 s4, s5, s7
1261 ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1262 ; GFX8-NEXT: s_and_b32 s8, s8, s7
1263 ; GFX8-NEXT: s_or_b32 s2, s2, s4
1264 ; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1265 ; GFX8-NEXT: s_and_b32 s4, s6, s7
1266 ; GFX8-NEXT: s_or_b32 s0, s0, s8
1267 ; GFX8-NEXT: s_or_b32 s3, s3, s4
1268 ; GFX8-NEXT: ; return to shader part epilog
1270 ; GFX9-LABEL: s_ashr_v8i16:
1272 ; GFX9-NEXT: s_sext_i32_i16 s8, s0
1273 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
1274 ; GFX9-NEXT: s_sext_i32_i16 s9, s4
1275 ; GFX9-NEXT: s_ashr_i32 s4, s4, 16
1276 ; GFX9-NEXT: s_ashr_i32 s8, s8, s9
1277 ; GFX9-NEXT: s_ashr_i32 s0, s0, s4
1278 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s8, s0
1279 ; GFX9-NEXT: s_sext_i32_i16 s4, s1
1280 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16
1281 ; GFX9-NEXT: s_sext_i32_i16 s8, s5
1282 ; GFX9-NEXT: s_ashr_i32 s5, s5, 16
1283 ; GFX9-NEXT: s_ashr_i32 s4, s4, s8
1284 ; GFX9-NEXT: s_ashr_i32 s1, s1, s5
1285 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s4, s1
1286 ; GFX9-NEXT: s_sext_i32_i16 s4, s2
1287 ; GFX9-NEXT: s_ashr_i32 s2, s2, 16
1288 ; GFX9-NEXT: s_sext_i32_i16 s5, s6
1289 ; GFX9-NEXT: s_ashr_i32 s6, s6, 16
1290 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1291 ; GFX9-NEXT: s_ashr_i32 s2, s2, s6
1292 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s4, s2
1293 ; GFX9-NEXT: s_sext_i32_i16 s4, s3
1294 ; GFX9-NEXT: s_ashr_i32 s3, s3, 16
1295 ; GFX9-NEXT: s_sext_i32_i16 s5, s7
1296 ; GFX9-NEXT: s_ashr_i32 s6, s7, 16
1297 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1298 ; GFX9-NEXT: s_ashr_i32 s3, s3, s6
1299 ; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s3
1300 ; GFX9-NEXT: ; return to shader part epilog
1302 ; GFX10-LABEL: s_ashr_v8i16:
1304 ; GFX10-NEXT: s_sext_i32_i16 s8, s0
1305 ; GFX10-NEXT: s_ashr_i32 s0, s0, 16
1306 ; GFX10-NEXT: s_sext_i32_i16 s9, s4
1307 ; GFX10-NEXT: s_ashr_i32 s4, s4, 16
1308 ; GFX10-NEXT: s_ashr_i32 s8, s8, s9
1309 ; GFX10-NEXT: s_ashr_i32 s0, s0, s4
1310 ; GFX10-NEXT: s_sext_i32_i16 s4, s1
1311 ; GFX10-NEXT: s_ashr_i32 s1, s1, 16
1312 ; GFX10-NEXT: s_sext_i32_i16 s9, s5
1313 ; GFX10-NEXT: s_ashr_i32 s5, s5, 16
1314 ; GFX10-NEXT: s_ashr_i32 s4, s4, s9
1315 ; GFX10-NEXT: s_ashr_i32 s1, s1, s5
1316 ; GFX10-NEXT: s_sext_i32_i16 s5, s6
1317 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s4, s1
1318 ; GFX10-NEXT: s_sext_i32_i16 s4, s2
1319 ; GFX10-NEXT: s_ashr_i32 s2, s2, 16
1320 ; GFX10-NEXT: s_ashr_i32 s6, s6, 16
1321 ; GFX10-NEXT: s_ashr_i32 s4, s4, s5
1322 ; GFX10-NEXT: s_ashr_i32 s2, s2, s6
1323 ; GFX10-NEXT: s_sext_i32_i16 s5, s3
1324 ; GFX10-NEXT: s_ashr_i32 s3, s3, 16
1325 ; GFX10-NEXT: s_sext_i32_i16 s6, s7
1326 ; GFX10-NEXT: s_ashr_i32 s7, s7, 16
1327 ; GFX10-NEXT: s_ashr_i32 s5, s5, s6
1328 ; GFX10-NEXT: s_ashr_i32 s3, s3, s7
1329 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s8, s0
1330 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s4, s2
1331 ; GFX10-NEXT: s_pack_ll_b32_b16 s3, s5, s3
1332 ; GFX10-NEXT: ; return to shader part epilog
1333 %result = ashr <8 x i16> %value, %amount
1334 %cast = bitcast <8 x i16> %result to <4 x i32>
1338 define i64 @v_ashr_i64(i64 %value, i64 %amount) {
1339 ; GFX6-LABEL: v_ashr_i64:
1341 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342 ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], v2
1343 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1345 ; GFX8-LABEL: v_ashr_i64:
1347 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1348 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1]
1349 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1351 ; GFX9-LABEL: v_ashr_i64:
1353 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1354 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1]
1355 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1357 ; GFX10-LABEL: v_ashr_i64:
1359 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1360 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1361 ; GFX10-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1]
1362 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1363 %result = ashr i64 %value, %amount
1367 define i64 @v_ashr_i64_63(i64 %value) {
1368 ; GCN-LABEL: v_ashr_i64_63:
1370 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v1
1372 ; GCN-NEXT: v_mov_b32_e32 v1, v0
1373 ; GCN-NEXT: s_setpc_b64 s[30:31]
1375 ; GFX10-LABEL: v_ashr_i64_63:
1377 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1378 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1379 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v1
1380 ; GFX10-NEXT: v_mov_b32_e32 v1, v0
1381 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1382 %result = ashr i64 %value, 63
1386 define i64 @v_ashr_i64_33(i64 %value) {
1387 ; GCN-LABEL: v_ashr_i64_33:
1389 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1390 ; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1391 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 1, v1
1392 ; GCN-NEXT: v_mov_b32_e32 v1, v2
1393 ; GCN-NEXT: s_setpc_b64 s[30:31]
1395 ; GFX10-LABEL: v_ashr_i64_33:
1397 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1399 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 1, v1
1400 ; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v1
1401 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1402 %result = ashr i64 %value, 33
1406 define i64 @v_ashr_i64_32(i64 %value) {
1407 ; GCN-LABEL: v_ashr_i64_32:
1409 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1410 ; GCN-NEXT: v_mov_b32_e32 v0, v1
1411 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1412 ; GCN-NEXT: s_setpc_b64 s[30:31]
1414 ; GFX10-LABEL: v_ashr_i64_32:
1416 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1417 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1418 ; GFX10-NEXT: v_mov_b32_e32 v0, v1
1419 ; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1420 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1421 %result = ashr i64 %value, 32
1425 define i64 @v_ashr_i64_31(i64 %value) {
1426 ; GFX6-LABEL: v_ashr_i64_31:
1428 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1429 ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 31
1430 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1432 ; GFX8-LABEL: v_ashr_i64_31:
1434 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1435 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1436 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1438 ; GFX9-LABEL: v_ashr_i64_31:
1440 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1441 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1442 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1444 ; GFX10-LABEL: v_ashr_i64_31:
1446 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1447 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1448 ; GFX10-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1449 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1450 %result = ashr i64 %value, 31
1454 define amdgpu_ps i64 @s_ashr_i64(i64 inreg %value, i64 inreg %amount) {
1455 ; GCN-LABEL: s_ashr_i64:
1457 ; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], s2
1458 ; GCN-NEXT: ; return to shader part epilog
1460 ; GFX10-LABEL: s_ashr_i64:
1462 ; GFX10-NEXT: s_ashr_i64 s[0:1], s[0:1], s2
1463 ; GFX10-NEXT: ; return to shader part epilog
1464 %result = ashr i64 %value, %amount
1468 define amdgpu_ps i64 @s_ashr_i64_63(i64 inreg %value) {
1469 ; GCN-LABEL: s_ashr_i64_63:
1471 ; GCN-NEXT: s_ashr_i32 s0, s1, 31
1472 ; GCN-NEXT: s_mov_b32 s1, s0
1473 ; GCN-NEXT: ; return to shader part epilog
1475 ; GFX10-LABEL: s_ashr_i64_63:
1477 ; GFX10-NEXT: s_ashr_i32 s0, s1, 31
1478 ; GFX10-NEXT: s_mov_b32 s1, s0
1479 ; GFX10-NEXT: ; return to shader part epilog
1480 %result = ashr i64 %value, 63
1484 define amdgpu_ps i64 @s_ashr_i64_33(i64 inreg %value) {
1485 ; GCN-LABEL: s_ashr_i64_33:
1487 ; GCN-NEXT: s_ashr_i32 s2, s1, 31
1488 ; GCN-NEXT: s_ashr_i32 s0, s1, 1
1489 ; GCN-NEXT: s_mov_b32 s1, s2
1490 ; GCN-NEXT: ; return to shader part epilog
1492 ; GFX10-LABEL: s_ashr_i64_33:
1494 ; GFX10-NEXT: s_ashr_i32 s0, s1, 1
1495 ; GFX10-NEXT: s_ashr_i32 s1, s1, 31
1496 ; GFX10-NEXT: ; return to shader part epilog
1497 %result = ashr i64 %value, 33
1501 define amdgpu_ps i64 @s_ashr_i64_32(i64 inreg %value) {
1502 ; GCN-LABEL: s_ashr_i64_32:
1504 ; GCN-NEXT: s_mov_b32 s0, s1
1505 ; GCN-NEXT: s_ashr_i32 s1, s1, 31
1506 ; GCN-NEXT: ; return to shader part epilog
1508 ; GFX10-LABEL: s_ashr_i64_32:
1510 ; GFX10-NEXT: s_mov_b32 s0, s1
1511 ; GFX10-NEXT: s_ashr_i32 s1, s1, 31
1512 ; GFX10-NEXT: ; return to shader part epilog
1513 %result = ashr i64 %value, 32
1517 define amdgpu_ps i64 @s_ashr_i64_31(i64 inreg %value) {
1518 ; GCN-LABEL: s_ashr_i64_31:
1520 ; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], 31
1521 ; GCN-NEXT: ; return to shader part epilog
1523 ; GFX10-LABEL: s_ashr_i64_31:
1525 ; GFX10-NEXT: s_ashr_i64 s[0:1], s[0:1], 31
1526 ; GFX10-NEXT: ; return to shader part epilog
1527 %result = ashr i64 %value, 31
1531 define amdgpu_ps <2 x float> @ashr_i64_sv(i64 inreg %value, i64 %amount) {
1532 ; GFX6-LABEL: ashr_i64_sv:
1534 ; GFX6-NEXT: v_ashr_i64 v[0:1], s[0:1], v0
1535 ; GFX6-NEXT: ; return to shader part epilog
1537 ; GFX8-LABEL: ashr_i64_sv:
1539 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1]
1540 ; GFX8-NEXT: ; return to shader part epilog
1542 ; GFX9-LABEL: ashr_i64_sv:
1544 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1]
1545 ; GFX9-NEXT: ; return to shader part epilog
1547 ; GFX10-LABEL: ashr_i64_sv:
1549 ; GFX10-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1]
1550 ; GFX10-NEXT: ; return to shader part epilog
1551 %result = ashr i64 %value, %amount
1552 %cast = bitcast i64 %result to <2 x float>
1553 ret <2 x float> %cast
1556 define amdgpu_ps <2 x float> @ashr_i64_vs(i64 %value, i64 inreg %amount) {
1557 ; GFX6-LABEL: ashr_i64_vs:
1559 ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], s0
1560 ; GFX6-NEXT: ; return to shader part epilog
1562 ; GFX8-LABEL: ashr_i64_vs:
1564 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1]
1565 ; GFX8-NEXT: ; return to shader part epilog
1567 ; GFX9-LABEL: ashr_i64_vs:
1569 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1]
1570 ; GFX9-NEXT: ; return to shader part epilog
1572 ; GFX10-LABEL: ashr_i64_vs:
1574 ; GFX10-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1]
1575 ; GFX10-NEXT: ; return to shader part epilog
1576 %result = ashr i64 %value, %amount
1577 %cast = bitcast i64 %result to <2 x float>
1578 ret <2 x float> %cast
1581 define <2 x i64> @v_ashr_v2i64(<2 x i64> %value, <2 x i64> %amount) {
1582 ; GFX6-LABEL: v_ashr_v2i64:
1584 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1585 ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], v4
1586 ; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], v6
1587 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1589 ; GFX8-LABEL: v_ashr_v2i64:
1591 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1592 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1]
1593 ; GFX8-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3]
1594 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1596 ; GFX9-LABEL: v_ashr_v2i64:
1598 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1]
1600 ; GFX9-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3]
1601 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1603 ; GFX10-LABEL: v_ashr_v2i64:
1605 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1606 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1607 ; GFX10-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1]
1608 ; GFX10-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3]
1609 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1610 %result = ashr <2 x i64> %value, %amount
1611 ret <2 x i64> %result
1614 define <2 x i64> @v_ashr_v2i64_31(<2 x i64> %value) {
1615 ; GFX6-LABEL: v_ashr_v2i64_31:
1617 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1618 ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 31
1619 ; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 31
1620 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1622 ; GFX8-LABEL: v_ashr_v2i64_31:
1624 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1626 ; GFX8-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3]
1627 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1629 ; GFX9-LABEL: v_ashr_v2i64_31:
1631 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1633 ; GFX9-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3]
1634 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1636 ; GFX10-LABEL: v_ashr_v2i64_31:
1638 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1639 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1640 ; GFX10-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1641 ; GFX10-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3]
1642 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1643 %result = ashr <2 x i64> %value, <i64 31, i64 31>
1644 ret <2 x i64> %result
1647 define amdgpu_ps <2 x i64> @s_ashr_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) {
1648 ; GCN-LABEL: s_ashr_v2i64:
1650 ; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], s4
1651 ; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s6
1652 ; GCN-NEXT: ; return to shader part epilog
1654 ; GFX10-LABEL: s_ashr_v2i64:
1656 ; GFX10-NEXT: s_ashr_i64 s[0:1], s[0:1], s4
1657 ; GFX10-NEXT: s_ashr_i64 s[2:3], s[2:3], s6
1658 ; GFX10-NEXT: ; return to shader part epilog
1659 %result = ashr <2 x i64> %value, %amount
1660 ret <2 x i64> %result
1663 define i65 @v_ashr_i65(i65 %value, i65 %amount) {
1664 ; GFX6-LABEL: v_ashr_i65:
1666 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1667 ; GFX6-NEXT: v_bfe_i32 v4, v2, 0, 1
1668 ; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4
1669 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
1670 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
1671 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
1672 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
1673 ; GFX6-NEXT: v_ashr_i64 v[10:11], v[4:5], v3
1674 ; GFX6-NEXT: v_or_b32_e32 v6, v6, v8
1675 ; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5
1676 ; GFX6-NEXT: v_ashr_i64 v[4:5], v[4:5], v2
1677 ; GFX6-NEXT: v_or_b32_e32 v7, v7, v9
1678 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1679 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
1680 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
1681 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
1682 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1683 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1684 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v10, vcc
1685 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1687 ; GFX8-LABEL: v_ashr_i65:
1689 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1690 ; GFX8-NEXT: v_bfe_i32 v4, v2, 0, 1
1691 ; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v4
1692 ; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
1693 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
1694 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
1695 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
1696 ; GFX8-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
1697 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v8
1698 ; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5
1699 ; GFX8-NEXT: v_ashrrev_i64 v[4:5], v2, v[4:5]
1700 ; GFX8-NEXT: v_or_b32_e32 v7, v7, v9
1701 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1702 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
1703 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
1704 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
1705 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1706 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1707 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v10, vcc
1708 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1710 ; GFX9-LABEL: v_ashr_i65:
1712 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713 ; GFX9-NEXT: v_bfe_i32 v4, v2, 0, 1
1714 ; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v4
1715 ; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
1716 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
1717 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
1718 ; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
1719 ; GFX9-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
1720 ; GFX9-NEXT: v_or_b32_e32 v6, v6, v8
1721 ; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5
1722 ; GFX9-NEXT: v_ashrrev_i64 v[4:5], v2, v[4:5]
1723 ; GFX9-NEXT: v_or_b32_e32 v7, v7, v9
1724 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1725 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
1726 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
1727 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
1728 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1729 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1730 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v10, vcc
1731 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1733 ; GFX10-LABEL: v_ashr_i65:
1735 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1736 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1737 ; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1
1738 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
1739 ; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
1740 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
1741 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
1742 ; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
1743 ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 0, v3
1744 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v2, v[4:5]
1745 ; GFX10-NEXT: v_ashrrev_i64 v[10:11], v10, v[4:5]
1746 ; GFX10-NEXT: v_or_b32_e32 v2, v6, v8
1747 ; GFX10-NEXT: v_or_b32_e32 v8, v7, v9
1748 ; GFX10-NEXT: v_ashrrev_i64 v[6:7], v3, v[4:5]
1749 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v5
1750 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc_lo
1751 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v11, v8, vcc_lo
1752 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v0, s4
1753 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v4, v1, s4
1754 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc_lo
1755 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1756 %result = ashr i65 %value, %amount
1760 define i65 @v_ashr_i65_33(i65 %value) {
1761 ; GFX6-LABEL: v_ashr_i65_33:
1763 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1764 ; GFX6-NEXT: v_mov_b32_e32 v3, v1
1765 ; GFX6-NEXT: v_bfe_i32 v1, v2, 0, 1
1766 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1767 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], 31
1768 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1769 ; GFX6-NEXT: v_or_b32_e32 v0, v3, v0
1770 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1771 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1773 ; GFX8-LABEL: v_ashr_i65_33:
1775 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1776 ; GFX8-NEXT: v_mov_b32_e32 v3, v1
1777 ; GFX8-NEXT: v_bfe_i32 v1, v2, 0, 1
1778 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1779 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1780 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1781 ; GFX8-NEXT: v_or_b32_e32 v0, v3, v0
1782 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1783 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1785 ; GFX9-LABEL: v_ashr_i65_33:
1787 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1788 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
1789 ; GFX9-NEXT: v_bfe_i32 v1, v2, 0, 1
1790 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1791 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1792 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1793 ; GFX9-NEXT: v_or_b32_e32 v0, v3, v0
1794 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1795 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1797 ; GFX10-LABEL: v_ashr_i65_33:
1799 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1800 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1801 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
1802 ; GFX10-NEXT: v_bfe_i32 v1, v2, 0, 1
1803 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1804 ; GFX10-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1805 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1806 ; GFX10-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1807 ; GFX10-NEXT: v_or_b32_e32 v0, v3, v0
1808 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1809 %result = ashr i65 %value, 33
1813 define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
1814 ; GCN-LABEL: s_ashr_i65:
1816 ; GCN-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1817 ; GCN-NEXT: s_sub_i32 s10, s3, 64
1818 ; GCN-NEXT: s_sub_i32 s8, 64, s3
1819 ; GCN-NEXT: s_cmp_lt_u32 s3, 64
1820 ; GCN-NEXT: s_cselect_b32 s11, 1, 0
1821 ; GCN-NEXT: s_cmp_eq_u32 s3, 0
1822 ; GCN-NEXT: s_cselect_b32 s12, 1, 0
1823 ; GCN-NEXT: s_ashr_i64 s[6:7], s[4:5], s3
1824 ; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
1825 ; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
1826 ; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
1827 ; GCN-NEXT: s_ashr_i32 s8, s5, 31
1828 ; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
1829 ; GCN-NEXT: s_cmp_lg_u32 s11, 0
1830 ; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1831 ; GCN-NEXT: s_cmp_lg_u32 s12, 0
1832 ; GCN-NEXT: s_mov_b32 s9, s8
1833 ; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
1834 ; GCN-NEXT: s_cmp_lg_u32 s11, 0
1835 ; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], s[8:9]
1836 ; GCN-NEXT: ; return to shader part epilog
1838 ; GFX10-LABEL: s_ashr_i65:
1840 ; GFX10-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1841 ; GFX10-NEXT: s_sub_i32 s12, s3, 64
1842 ; GFX10-NEXT: s_sub_i32 s8, 64, s3
1843 ; GFX10-NEXT: s_cmp_lt_u32 s3, 64
1844 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0
1845 ; GFX10-NEXT: s_cmp_eq_u32 s3, 0
1846 ; GFX10-NEXT: s_cselect_b32 s14, 1, 0
1847 ; GFX10-NEXT: s_ashr_i64 s[6:7], s[4:5], s3
1848 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
1849 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
1850 ; GFX10-NEXT: s_ashr_i32 s10, s5, 31
1851 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
1852 ; GFX10-NEXT: s_ashr_i64 s[4:5], s[4:5], s12
1853 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0
1854 ; GFX10-NEXT: s_mov_b32 s11, s10
1855 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1856 ; GFX10-NEXT: s_cmp_lg_u32 s14, 0
1857 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
1858 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0
1859 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[6:7], s[10:11]
1860 ; GFX10-NEXT: ; return to shader part epilog
1861 %result = ashr i65 %value, %amount
1865 define amdgpu_ps i65 @s_ashr_i65_33(i65 inreg %value) {
1866 ; GCN-LABEL: s_ashr_i65_33:
1868 ; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1869 ; GCN-NEXT: s_lshr_b32 s0, s1, 1
1870 ; GCN-NEXT: s_mov_b32 s1, 0
1871 ; GCN-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1872 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1873 ; GCN-NEXT: s_ashr_i32 s2, s3, 1
1874 ; GCN-NEXT: ; return to shader part epilog
1876 ; GFX10-LABEL: s_ashr_i65_33:
1878 ; GFX10-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1879 ; GFX10-NEXT: s_lshr_b32 s0, s1, 1
1880 ; GFX10-NEXT: s_mov_b32 s1, 0
1881 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1882 ; GFX10-NEXT: s_ashr_i32 s2, s3, 1
1883 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1884 ; GFX10-NEXT: ; return to shader part epilog
1885 %result = ashr i65 %value, 33
1889 ; FIXME: Argument lowering asserts
1890 ; define <2 x i65> @v_ashr_v2i65(<2 x i65> %value, <2 x i65> %amount) {
1891 ; %result = ashr <2 x i65> %value, %amount
1892 ; ret <2 x i65> %result
1895 ; define amdgpu_ps <2 x i65> @s_ashr_v2i65(<2 x i65> inreg %value, <2 x i65> inreg %amount) {
1896 ; %result = ashr <2 x i65> %value, %amount
1897 ; ret <2 x i65> %result