1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
8 define i8 @v_ashr_i8(i8 %value, i8 %amount) {
9 ; GFX6-LABEL: v_ashr_i8:
11 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
13 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
14 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v1, v0
15 ; GFX6-NEXT: s_setpc_b64 s[30:31]
17 ; GFX8-LABEL: v_ashr_i8:
19 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0
21 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_1
22 ; GFX8-NEXT: s_setpc_b64 s[30:31]
24 ; GFX9-LABEL: v_ashr_i8:
26 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27 ; GFX9-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
28 ; GFX9-NEXT: s_setpc_b64 s[30:31]
30 ; GFX10PLUS-LABEL: v_ashr_i8:
32 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1
34 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 8
35 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v1, v0
36 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
37 %result = ashr i8 %value, %amount
41 define i8 @v_ashr_i8_7(i8 %value) {
42 ; GFX6-LABEL: v_ashr_i8_7:
44 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
46 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 7, v0
47 ; GFX6-NEXT: s_setpc_b64 s[30:31]
49 ; GFX8-LABEL: v_ashr_i8_7:
51 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0
53 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0
54 ; GFX8-NEXT: s_setpc_b64 s[30:31]
56 ; GFX9-LABEL: v_ashr_i8_7:
58 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX9-NEXT: v_mov_b32_e32 v1, 7
60 ; GFX9-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
61 ; GFX9-NEXT: s_setpc_b64 s[30:31]
63 ; GFX10PLUS-LABEL: v_ashr_i8_7:
65 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 8
67 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 7, v0
68 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
69 %result = ashr i8 %value, 7
73 define amdgpu_ps i8 @s_ashr_i8(i8 inreg %value, i8 inreg %amount) {
74 ; GFX6-LABEL: s_ashr_i8:
76 ; GFX6-NEXT: s_sext_i32_i8 s0, s0
77 ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
78 ; GFX6-NEXT: ; return to shader part epilog
80 ; GFX8-LABEL: s_ashr_i8:
82 ; GFX8-NEXT: s_sext_i32_i8 s0, s0
83 ; GFX8-NEXT: s_sext_i32_i8 s1, s1
84 ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
85 ; GFX8-NEXT: ; return to shader part epilog
87 ; GFX9-LABEL: s_ashr_i8:
89 ; GFX9-NEXT: s_sext_i32_i8 s0, s0
90 ; GFX9-NEXT: s_sext_i32_i8 s1, s1
91 ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
92 ; GFX9-NEXT: ; return to shader part epilog
94 ; GFX10PLUS-LABEL: s_ashr_i8:
96 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
97 ; GFX10PLUS-NEXT: s_sext_i32_i8 s1, s1
98 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
99 ; GFX10PLUS-NEXT: ; return to shader part epilog
100 %result = ashr i8 %value, %amount
104 define amdgpu_ps i8 @s_ashr_i8_7(i8 inreg %value) {
105 ; GCN-LABEL: s_ashr_i8_7:
107 ; GCN-NEXT: s_sext_i32_i8 s0, s0
108 ; GCN-NEXT: s_ashr_i32 s0, s0, 7
109 ; GCN-NEXT: ; return to shader part epilog
111 ; GFX10PLUS-LABEL: s_ashr_i8_7:
112 ; GFX10PLUS: ; %bb.0:
113 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
114 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 7
115 ; GFX10PLUS-NEXT: ; return to shader part epilog
116 %result = ashr i8 %value, 7
121 define i24 @v_ashr_i24(i24 %value, i24 %amount) {
122 ; GCN-LABEL: v_ashr_i24:
124 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125 ; GCN-NEXT: v_and_b32_e32 v1, 0xffffff, v1
126 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24
127 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v1, v0
128 ; GCN-NEXT: s_setpc_b64 s[30:31]
130 ; GFX10PLUS-LABEL: v_ashr_i24:
131 ; GFX10PLUS: ; %bb.0:
132 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xffffff, v1
134 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24
135 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v1, v0
136 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
137 %result = ashr i24 %value, %amount
141 define i24 @v_ashr_i24_7(i24 %value) {
142 ; GCN-LABEL: v_ashr_i24_7:
144 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24
146 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 7, v0
147 ; GCN-NEXT: s_setpc_b64 s[30:31]
149 ; GFX10PLUS-LABEL: v_ashr_i24_7:
150 ; GFX10PLUS: ; %bb.0:
151 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24
153 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 7, v0
154 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
155 %result = ashr i24 %value, 7
159 define amdgpu_ps i24 @s_ashr_i24(i24 inreg %value, i24 inreg %amount) {
160 ; GCN-LABEL: s_ashr_i24:
162 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000
163 ; GCN-NEXT: s_ashr_i32 s0, s0, s1
164 ; GCN-NEXT: ; return to shader part epilog
166 ; GFX10PLUS-LABEL: s_ashr_i24:
167 ; GFX10PLUS: ; %bb.0:
168 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x180000
169 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
170 ; GFX10PLUS-NEXT: ; return to shader part epilog
171 %result = ashr i24 %value, %amount
175 define amdgpu_ps i24 @s_ashr_i24_7(i24 inreg %value) {
176 ; GCN-LABEL: s_ashr_i24_7:
178 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000
179 ; GCN-NEXT: s_ashr_i32 s0, s0, 7
180 ; GCN-NEXT: ; return to shader part epilog
182 ; GFX10PLUS-LABEL: s_ashr_i24_7:
183 ; GFX10PLUS: ; %bb.0:
184 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x180000
185 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 7
186 ; GFX10PLUS-NEXT: ; return to shader part epilog
187 %result = ashr i24 %value, 7
191 define i32 @v_ashr_i32(i32 %value, i32 %amount) {
192 ; GCN-LABEL: v_ashr_i32:
194 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v1, v0
196 ; GCN-NEXT: s_setpc_b64 s[30:31]
198 ; GFX10PLUS-LABEL: v_ashr_i32:
199 ; GFX10PLUS: ; %bb.0:
200 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v1, v0
202 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
203 %result = ashr i32 %value, %amount
207 define i32 @v_ashr_i32_31(i32 %value) {
208 ; GCN-LABEL: v_ashr_i32_31:
210 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
212 ; GCN-NEXT: s_setpc_b64 s[30:31]
214 ; GFX10PLUS-LABEL: v_ashr_i32_31:
215 ; GFX10PLUS: ; %bb.0:
216 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0
218 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
219 %result = ashr i32 %value, 31
223 define amdgpu_ps i32 @s_ashr_i32(i32 inreg %value, i32 inreg %amount) {
224 ; GCN-LABEL: s_ashr_i32:
226 ; GCN-NEXT: s_ashr_i32 s0, s0, s1
227 ; GCN-NEXT: ; return to shader part epilog
229 ; GFX10PLUS-LABEL: s_ashr_i32:
230 ; GFX10PLUS: ; %bb.0:
231 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
232 ; GFX10PLUS-NEXT: ; return to shader part epilog
233 %result = ashr i32 %value, %amount
237 define amdgpu_ps i32 @s_ashr_i32_31(i32 inreg %value) {
238 ; GCN-LABEL: s_ashr_i32_31:
240 ; GCN-NEXT: s_ashr_i32 s0, s0, 31
241 ; GCN-NEXT: ; return to shader part epilog
243 ; GFX10PLUS-LABEL: s_ashr_i32_31:
244 ; GFX10PLUS: ; %bb.0:
245 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 31
246 ; GFX10PLUS-NEXT: ; return to shader part epilog
247 %result = ashr i32 %value, 31
251 define amdgpu_ps float @ashr_i32_sv(i32 inreg %value, i32 %amount) {
252 ; GFX6-LABEL: ashr_i32_sv:
254 ; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0
255 ; GFX6-NEXT: ; return to shader part epilog
257 ; GFX8-LABEL: ashr_i32_sv:
259 ; GFX8-NEXT: v_ashrrev_i32_e64 v0, v0, s0
260 ; GFX8-NEXT: ; return to shader part epilog
262 ; GFX9-LABEL: ashr_i32_sv:
264 ; GFX9-NEXT: v_ashrrev_i32_e64 v0, v0, s0
265 ; GFX9-NEXT: ; return to shader part epilog
267 ; GFX10PLUS-LABEL: ashr_i32_sv:
268 ; GFX10PLUS: ; %bb.0:
269 ; GFX10PLUS-NEXT: v_ashrrev_i32_e64 v0, v0, s0
270 ; GFX10PLUS-NEXT: ; return to shader part epilog
271 %result = ashr i32 %value, %amount
272 %cast = bitcast i32 %result to float
276 define amdgpu_ps float @ashr_i32_vs(i32 %value, i32 inreg %amount) {
277 ; GCN-LABEL: ashr_i32_vs:
279 ; GCN-NEXT: v_ashrrev_i32_e32 v0, s0, v0
280 ; GCN-NEXT: ; return to shader part epilog
282 ; GFX10PLUS-LABEL: ashr_i32_vs:
283 ; GFX10PLUS: ; %bb.0:
284 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, s0, v0
285 ; GFX10PLUS-NEXT: ; return to shader part epilog
286 %result = ashr i32 %value, %amount
287 %cast = bitcast i32 %result to float
291 define <2 x i32> @v_ashr_v2i32(<2 x i32> %value, <2 x i32> %amount) {
292 ; GCN-LABEL: v_ashr_v2i32:
294 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v2, v0
296 ; GCN-NEXT: v_ashrrev_i32_e32 v1, v3, v1
297 ; GCN-NEXT: s_setpc_b64 s[30:31]
299 ; GFX10PLUS-LABEL: v_ashr_v2i32:
300 ; GFX10PLUS: ; %bb.0:
301 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v2, v0
303 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v3, v1
304 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
305 %result = ashr <2 x i32> %value, %amount
306 ret <2 x i32> %result
309 define <2 x i32> @v_ashr_v2i32_31(<2 x i32> %value) {
310 ; GCN-LABEL: v_ashr_v2i32_31:
312 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
313 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
314 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v1
315 ; GCN-NEXT: s_setpc_b64 s[30:31]
317 ; GFX10PLUS-LABEL: v_ashr_v2i32_31:
318 ; GFX10PLUS: ; %bb.0:
319 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
320 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0
321 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v1
322 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
323 %result = ashr <2 x i32> %value, <i32 31, i32 31>
324 ret <2 x i32> %result
327 define amdgpu_ps <2 x i32> @s_ashr_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) {
328 ; GCN-LABEL: s_ashr_v2i32:
330 ; GCN-NEXT: s_ashr_i32 s0, s0, s2
331 ; GCN-NEXT: s_ashr_i32 s1, s1, s3
332 ; GCN-NEXT: ; return to shader part epilog
334 ; GFX10PLUS-LABEL: s_ashr_v2i32:
335 ; GFX10PLUS: ; %bb.0:
336 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s2
337 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s3
338 ; GFX10PLUS-NEXT: ; return to shader part epilog
339 %result = ashr <2 x i32> %value, %amount
340 ret <2 x i32> %result
343 define <3 x i32> @v_ashr_v3i32(<3 x i32> %value, <3 x i32> %amount) {
344 ; GCN-LABEL: v_ashr_v3i32:
346 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v3, v0
348 ; GCN-NEXT: v_ashrrev_i32_e32 v1, v4, v1
349 ; GCN-NEXT: v_ashrrev_i32_e32 v2, v5, v2
350 ; GCN-NEXT: s_setpc_b64 s[30:31]
352 ; GFX10PLUS-LABEL: v_ashr_v3i32:
353 ; GFX10PLUS: ; %bb.0:
354 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v3, v0
356 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v4, v1
357 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v5, v2
358 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
359 %result = ashr <3 x i32> %value, %amount
360 ret <3 x i32> %result
363 define amdgpu_ps <3 x i32> @s_ashr_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) {
364 ; GCN-LABEL: s_ashr_v3i32:
366 ; GCN-NEXT: s_ashr_i32 s0, s0, s3
367 ; GCN-NEXT: s_ashr_i32 s1, s1, s4
368 ; GCN-NEXT: s_ashr_i32 s2, s2, s5
369 ; GCN-NEXT: ; return to shader part epilog
371 ; GFX10PLUS-LABEL: s_ashr_v3i32:
372 ; GFX10PLUS: ; %bb.0:
373 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s3
374 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s4
375 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s5
376 ; GFX10PLUS-NEXT: ; return to shader part epilog
377 %result = ashr <3 x i32> %value, %amount
378 ret <3 x i32> %result
381 define <4 x i32> @v_ashr_v4i32(<4 x i32> %value, <4 x i32> %amount) {
382 ; GCN-LABEL: v_ashr_v4i32:
384 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v4, v0
386 ; GCN-NEXT: v_ashrrev_i32_e32 v1, v5, v1
387 ; GCN-NEXT: v_ashrrev_i32_e32 v2, v6, v2
388 ; GCN-NEXT: v_ashrrev_i32_e32 v3, v7, v3
389 ; GCN-NEXT: s_setpc_b64 s[30:31]
391 ; GFX10PLUS-LABEL: v_ashr_v4i32:
392 ; GFX10PLUS: ; %bb.0:
393 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v4, v0
395 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v5, v1
396 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v6, v2
397 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, v7, v3
398 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
399 %result = ashr <4 x i32> %value, %amount
400 ret <4 x i32> %result
403 define amdgpu_ps <4 x i32> @s_ashr_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) {
404 ; GCN-LABEL: s_ashr_v4i32:
406 ; GCN-NEXT: s_ashr_i32 s0, s0, s4
407 ; GCN-NEXT: s_ashr_i32 s1, s1, s5
408 ; GCN-NEXT: s_ashr_i32 s2, s2, s6
409 ; GCN-NEXT: s_ashr_i32 s3, s3, s7
410 ; GCN-NEXT: ; return to shader part epilog
412 ; GFX10PLUS-LABEL: s_ashr_v4i32:
413 ; GFX10PLUS: ; %bb.0:
414 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s4
415 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s5
416 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s6
417 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s7
418 ; GFX10PLUS-NEXT: ; return to shader part epilog
419 %result = ashr <4 x i32> %value, %amount
420 ret <4 x i32> %result
423 define <5 x i32> @v_ashr_v5i32(<5 x i32> %value, <5 x i32> %amount) {
424 ; GCN-LABEL: v_ashr_v5i32:
426 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v5, v0
428 ; GCN-NEXT: v_ashrrev_i32_e32 v1, v6, v1
429 ; GCN-NEXT: v_ashrrev_i32_e32 v2, v7, v2
430 ; GCN-NEXT: v_ashrrev_i32_e32 v3, v8, v3
431 ; GCN-NEXT: v_ashrrev_i32_e32 v4, v9, v4
432 ; GCN-NEXT: s_setpc_b64 s[30:31]
434 ; GFX10PLUS-LABEL: v_ashr_v5i32:
435 ; GFX10PLUS: ; %bb.0:
436 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v5, v0
438 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v6, v1
439 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v7, v2
440 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, v8, v3
441 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v4, v9, v4
442 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
443 %result = ashr <5 x i32> %value, %amount
444 ret <5 x i32> %result
447 define amdgpu_ps <5 x i32> @s_ashr_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) {
448 ; GCN-LABEL: s_ashr_v5i32:
450 ; GCN-NEXT: s_ashr_i32 s0, s0, s5
451 ; GCN-NEXT: s_ashr_i32 s1, s1, s6
452 ; GCN-NEXT: s_ashr_i32 s2, s2, s7
453 ; GCN-NEXT: s_ashr_i32 s3, s3, s8
454 ; GCN-NEXT: s_ashr_i32 s4, s4, s9
455 ; GCN-NEXT: ; return to shader part epilog
457 ; GFX10PLUS-LABEL: s_ashr_v5i32:
458 ; GFX10PLUS: ; %bb.0:
459 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s5
460 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s6
461 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s7
462 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s8
463 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s9
464 ; GFX10PLUS-NEXT: ; return to shader part epilog
465 %result = ashr <5 x i32> %value, %amount
466 ret <5 x i32> %result
469 define <16 x i32> @v_ashr_v16i32(<16 x i32> %value, <16 x i32> %amount) {
470 ; GCN-LABEL: v_ashr_v16i32:
472 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473 ; GCN-NEXT: v_ashrrev_i32_e32 v0, v16, v0
474 ; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32
475 ; GCN-NEXT: v_ashrrev_i32_e32 v1, v17, v1
476 ; GCN-NEXT: v_ashrrev_i32_e32 v2, v18, v2
477 ; GCN-NEXT: v_ashrrev_i32_e32 v3, v19, v3
478 ; GCN-NEXT: v_ashrrev_i32_e32 v4, v20, v4
479 ; GCN-NEXT: v_ashrrev_i32_e32 v5, v21, v5
480 ; GCN-NEXT: v_ashrrev_i32_e32 v6, v22, v6
481 ; GCN-NEXT: v_ashrrev_i32_e32 v7, v23, v7
482 ; GCN-NEXT: v_ashrrev_i32_e32 v8, v24, v8
483 ; GCN-NEXT: v_ashrrev_i32_e32 v9, v25, v9
484 ; GCN-NEXT: v_ashrrev_i32_e32 v10, v26, v10
485 ; GCN-NEXT: v_ashrrev_i32_e32 v11, v27, v11
486 ; GCN-NEXT: v_ashrrev_i32_e32 v12, v28, v12
487 ; GCN-NEXT: v_ashrrev_i32_e32 v13, v29, v13
488 ; GCN-NEXT: v_ashrrev_i32_e32 v14, v30, v14
489 ; GCN-NEXT: s_waitcnt vmcnt(0)
490 ; GCN-NEXT: v_ashrrev_i32_e32 v15, v16, v15
491 ; GCN-NEXT: s_setpc_b64 s[30:31]
493 ; GFX10-LABEL: v_ashr_v16i32:
495 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
497 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, v16, v0
498 ; GFX10-NEXT: v_ashrrev_i32_e32 v1, v17, v1
499 ; GFX10-NEXT: v_ashrrev_i32_e32 v2, v18, v2
500 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, v19, v3
501 ; GFX10-NEXT: v_ashrrev_i32_e32 v4, v20, v4
502 ; GFX10-NEXT: v_ashrrev_i32_e32 v5, v21, v5
503 ; GFX10-NEXT: v_ashrrev_i32_e32 v6, v22, v6
504 ; GFX10-NEXT: v_ashrrev_i32_e32 v7, v23, v7
505 ; GFX10-NEXT: v_ashrrev_i32_e32 v8, v24, v8
506 ; GFX10-NEXT: v_ashrrev_i32_e32 v9, v25, v9
507 ; GFX10-NEXT: v_ashrrev_i32_e32 v10, v26, v10
508 ; GFX10-NEXT: v_ashrrev_i32_e32 v11, v27, v11
509 ; GFX10-NEXT: v_ashrrev_i32_e32 v12, v28, v12
510 ; GFX10-NEXT: v_ashrrev_i32_e32 v13, v29, v13
511 ; GFX10-NEXT: v_ashrrev_i32_e32 v14, v30, v14
512 ; GFX10-NEXT: s_waitcnt vmcnt(0)
513 ; GFX10-NEXT: v_ashrrev_i32_e32 v15, v31, v15
514 ; GFX10-NEXT: s_setpc_b64 s[30:31]
516 ; GFX11-LABEL: v_ashr_v16i32:
518 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
520 ; GFX11-NEXT: v_ashrrev_i32_e32 v0, v16, v0
521 ; GFX11-NEXT: v_ashrrev_i32_e32 v1, v17, v1
522 ; GFX11-NEXT: v_ashrrev_i32_e32 v2, v18, v2
523 ; GFX11-NEXT: v_ashrrev_i32_e32 v3, v19, v3
524 ; GFX11-NEXT: v_ashrrev_i32_e32 v4, v20, v4
525 ; GFX11-NEXT: v_ashrrev_i32_e32 v5, v21, v5
526 ; GFX11-NEXT: v_ashrrev_i32_e32 v6, v22, v6
527 ; GFX11-NEXT: v_ashrrev_i32_e32 v7, v23, v7
528 ; GFX11-NEXT: v_ashrrev_i32_e32 v8, v24, v8
529 ; GFX11-NEXT: v_ashrrev_i32_e32 v9, v25, v9
530 ; GFX11-NEXT: v_ashrrev_i32_e32 v10, v26, v10
531 ; GFX11-NEXT: v_ashrrev_i32_e32 v11, v27, v11
532 ; GFX11-NEXT: v_ashrrev_i32_e32 v12, v28, v12
533 ; GFX11-NEXT: v_ashrrev_i32_e32 v13, v29, v13
534 ; GFX11-NEXT: v_ashrrev_i32_e32 v14, v30, v14
535 ; GFX11-NEXT: s_waitcnt vmcnt(0)
536 ; GFX11-NEXT: v_ashrrev_i32_e32 v15, v31, v15
537 ; GFX11-NEXT: s_setpc_b64 s[30:31]
538 %result = ashr <16 x i32> %value, %amount
539 ret <16 x i32> %result
542 define amdgpu_ps <16 x i32> @s_ashr_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) {
543 ; GCN-LABEL: s_ashr_v16i32:
545 ; GCN-NEXT: s_ashr_i32 s0, s0, s16
546 ; GCN-NEXT: s_ashr_i32 s1, s1, s17
547 ; GCN-NEXT: s_ashr_i32 s2, s2, s18
548 ; GCN-NEXT: s_ashr_i32 s3, s3, s19
549 ; GCN-NEXT: s_ashr_i32 s4, s4, s20
550 ; GCN-NEXT: s_ashr_i32 s5, s5, s21
551 ; GCN-NEXT: s_ashr_i32 s6, s6, s22
552 ; GCN-NEXT: s_ashr_i32 s7, s7, s23
553 ; GCN-NEXT: s_ashr_i32 s8, s8, s24
554 ; GCN-NEXT: s_ashr_i32 s9, s9, s25
555 ; GCN-NEXT: s_ashr_i32 s10, s10, s26
556 ; GCN-NEXT: s_ashr_i32 s11, s11, s27
557 ; GCN-NEXT: s_ashr_i32 s12, s12, s28
558 ; GCN-NEXT: s_ashr_i32 s13, s13, s29
559 ; GCN-NEXT: s_ashr_i32 s14, s14, s30
560 ; GCN-NEXT: s_ashr_i32 s15, s15, s31
561 ; GCN-NEXT: ; return to shader part epilog
563 ; GFX10PLUS-LABEL: s_ashr_v16i32:
564 ; GFX10PLUS: ; %bb.0:
565 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s16
566 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s17
567 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s18
568 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s19
569 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s20
570 ; GFX10PLUS-NEXT: s_ashr_i32 s5, s5, s21
571 ; GFX10PLUS-NEXT: s_ashr_i32 s6, s6, s22
572 ; GFX10PLUS-NEXT: s_ashr_i32 s7, s7, s23
573 ; GFX10PLUS-NEXT: s_ashr_i32 s8, s8, s24
574 ; GFX10PLUS-NEXT: s_ashr_i32 s9, s9, s25
575 ; GFX10PLUS-NEXT: s_ashr_i32 s10, s10, s26
576 ; GFX10PLUS-NEXT: s_ashr_i32 s11, s11, s27
577 ; GFX10PLUS-NEXT: s_ashr_i32 s12, s12, s28
578 ; GFX10PLUS-NEXT: s_ashr_i32 s13, s13, s29
579 ; GFX10PLUS-NEXT: s_ashr_i32 s14, s14, s30
580 ; GFX10PLUS-NEXT: s_ashr_i32 s15, s15, s31
581 ; GFX10PLUS-NEXT: ; return to shader part epilog
582 %result = ashr <16 x i32> %value, %amount
583 ret <16 x i32> %result
586 define i16 @v_ashr_i16(i16 %value, i16 %amount) {
587 ; GFX6-LABEL: v_ashr_i16:
589 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
591 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
592 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v1, v0
593 ; GFX6-NEXT: s_setpc_b64 s[30:31]
595 ; GFX8-LABEL: v_ashr_i16:
597 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, v1, v0
599 ; GFX8-NEXT: s_setpc_b64 s[30:31]
601 ; GFX9-LABEL: v_ashr_i16:
603 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604 ; GFX9-NEXT: v_ashrrev_i16_e32 v0, v1, v0
605 ; GFX9-NEXT: s_setpc_b64 s[30:31]
607 ; GFX10PLUS-LABEL: v_ashr_i16:
608 ; GFX10PLUS: ; %bb.0:
609 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v1, v0
611 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
612 %result = ashr i16 %value, %amount
616 define i16 @v_ashr_i16_15(i16 %value) {
617 ; GFX6-LABEL: v_ashr_i16_15:
619 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
621 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 15, v0
622 ; GFX6-NEXT: s_setpc_b64 s[30:31]
624 ; GFX8-LABEL: v_ashr_i16_15:
626 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
627 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0
628 ; GFX8-NEXT: s_setpc_b64 s[30:31]
630 ; GFX9-LABEL: v_ashr_i16_15:
632 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633 ; GFX9-NEXT: v_ashrrev_i16_e32 v0, 15, v0
634 ; GFX9-NEXT: s_setpc_b64 s[30:31]
636 ; GFX10PLUS-LABEL: v_ashr_i16_15:
637 ; GFX10PLUS: ; %bb.0:
638 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 15, v0
640 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
641 %result = ashr i16 %value, 15
645 define amdgpu_ps i16 @s_ashr_i16(i16 inreg %value, i16 inreg %amount) {
646 ; GFX6-LABEL: s_ashr_i16:
648 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
649 ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
650 ; GFX6-NEXT: ; return to shader part epilog
652 ; GFX8-LABEL: s_ashr_i16:
654 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
655 ; GFX8-NEXT: s_sext_i32_i16 s1, s1
656 ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
657 ; GFX8-NEXT: ; return to shader part epilog
659 ; GFX9-LABEL: s_ashr_i16:
661 ; GFX9-NEXT: s_sext_i32_i16 s0, s0
662 ; GFX9-NEXT: s_sext_i32_i16 s1, s1
663 ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
664 ; GFX9-NEXT: ; return to shader part epilog
666 ; GFX10PLUS-LABEL: s_ashr_i16:
667 ; GFX10PLUS: ; %bb.0:
668 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
669 ; GFX10PLUS-NEXT: s_sext_i32_i16 s1, s1
670 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
671 ; GFX10PLUS-NEXT: ; return to shader part epilog
672 %result = ashr i16 %value, %amount
676 define amdgpu_ps i16 @s_ashr_i16_15(i16 inreg %value) {
677 ; GCN-LABEL: s_ashr_i16_15:
679 ; GCN-NEXT: s_sext_i32_i16 s0, s0
680 ; GCN-NEXT: s_ashr_i32 s0, s0, 15
681 ; GCN-NEXT: ; return to shader part epilog
683 ; GFX10PLUS-LABEL: s_ashr_i16_15:
684 ; GFX10PLUS: ; %bb.0:
685 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
686 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 15
687 ; GFX10PLUS-NEXT: ; return to shader part epilog
688 %result = ashr i16 %value, 15
692 define amdgpu_ps half @ashr_i16_sv(i16 inreg %value, i16 %amount) {
693 ; GFX6-LABEL: ashr_i16_sv:
695 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
696 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
697 ; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0
698 ; GFX6-NEXT: ; return to shader part epilog
700 ; GFX8-LABEL: ashr_i16_sv:
702 ; GFX8-NEXT: v_ashrrev_i16_e64 v0, v0, s0
703 ; GFX8-NEXT: ; return to shader part epilog
705 ; GFX9-LABEL: ashr_i16_sv:
707 ; GFX9-NEXT: v_ashrrev_i16_e64 v0, v0, s0
708 ; GFX9-NEXT: ; return to shader part epilog
710 ; GFX10PLUS-LABEL: ashr_i16_sv:
711 ; GFX10PLUS: ; %bb.0:
712 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v0, s0
713 ; GFX10PLUS-NEXT: ; return to shader part epilog
714 %result = ashr i16 %value, %amount
715 %cast = bitcast i16 %result to half
719 define amdgpu_ps half @ashr_i16_vs(i16 %value, i16 inreg %amount) {
720 ; GFX6-LABEL: ashr_i16_vs:
722 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
723 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
724 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0
725 ; GFX6-NEXT: ; return to shader part epilog
727 ; GFX8-LABEL: ashr_i16_vs:
729 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, s0, v0
730 ; GFX8-NEXT: ; return to shader part epilog
732 ; GFX9-LABEL: ashr_i16_vs:
734 ; GFX9-NEXT: v_ashrrev_i16_e32 v0, s0, v0
735 ; GFX9-NEXT: ; return to shader part epilog
737 ; GFX10PLUS-LABEL: ashr_i16_vs:
738 ; GFX10PLUS: ; %bb.0:
739 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, s0, v0
740 ; GFX10PLUS-NEXT: ; return to shader part epilog
741 %result = ashr i16 %value, %amount
742 %cast = bitcast i16 %result to half
746 define <2 x i16> @v_ashr_v2i16(<2 x i16> %value, <2 x i16> %amount) {
747 ; GFX6-LABEL: v_ashr_v2i16:
749 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
751 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
752 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v2, v0
753 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
754 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
755 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, v2, v1
756 ; GFX6-NEXT: s_setpc_b64 s[30:31]
758 ; GFX8-LABEL: v_ashr_v2i16:
760 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
761 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, v1, v0
762 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
763 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
764 ; GFX8-NEXT: s_setpc_b64 s[30:31]
766 ; GFX9-LABEL: v_ashr_v2i16:
768 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, v1, v0
770 ; GFX9-NEXT: s_setpc_b64 s[30:31]
772 ; GFX10PLUS-LABEL: v_ashr_v2i16:
773 ; GFX10PLUS: ; %bb.0:
774 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
775 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v1, v0
776 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
777 %result = ashr <2 x i16> %value, %amount
778 ret <2 x i16> %result
781 define <2 x i16> @v_ashr_v2i16_15(<2 x i16> %value) {
782 ; GFX6-LABEL: v_ashr_v2i16_15:
784 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
785 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
786 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
787 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 15, v0
788 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1
789 ; GFX6-NEXT: s_setpc_b64 s[30:31]
791 ; GFX8-LABEL: v_ashr_v2i16_15:
793 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
794 ; GFX8-NEXT: v_mov_b32_e32 v2, 15
795 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v0
796 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
797 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
798 ; GFX8-NEXT: s_setpc_b64 s[30:31]
800 ; GFX9-LABEL: v_ashr_v2i16_15:
802 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
804 ; GFX9-NEXT: s_setpc_b64 s[30:31]
806 ; GFX10PLUS-LABEL: v_ashr_v2i16_15:
807 ; GFX10PLUS: ; %bb.0:
808 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
809 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
810 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
811 %result = ashr <2 x i16> %value, <i16 15, i16 15>
812 ret <2 x i16> %result
815 define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) {
816 ; GFX6-LABEL: s_ashr_v2i16:
818 ; GFX6-NEXT: s_sext_i32_i16 s1, s1
819 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
820 ; GFX6-NEXT: s_ashr_i32 s1, s1, s3
821 ; GFX6-NEXT: s_ashr_i32 s0, s0, s2
822 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
823 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
824 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
825 ; GFX6-NEXT: s_or_b32 s0, s0, s1
826 ; GFX6-NEXT: ; return to shader part epilog
828 ; GFX8-LABEL: s_ashr_v2i16:
830 ; GFX8-NEXT: s_sext_i32_i16 s2, s0
831 ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
832 ; GFX8-NEXT: s_sext_i32_i16 s3, s1
833 ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
834 ; GFX8-NEXT: s_ashr_i32 s2, s2, s3
835 ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
836 ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
837 ; GFX8-NEXT: s_and_b32 s1, s2, 0xffff
838 ; GFX8-NEXT: s_or_b32 s0, s0, s1
839 ; GFX8-NEXT: ; return to shader part epilog
841 ; GFX9-LABEL: s_ashr_v2i16:
843 ; GFX9-NEXT: s_sext_i32_i16 s2, s0
844 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
845 ; GFX9-NEXT: s_sext_i32_i16 s3, s1
846 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16
847 ; GFX9-NEXT: s_ashr_i32 s2, s2, s3
848 ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
849 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s2, s0
850 ; GFX9-NEXT: ; return to shader part epilog
852 ; GFX10PLUS-LABEL: s_ashr_v2i16:
853 ; GFX10PLUS: ; %bb.0:
854 ; GFX10PLUS-NEXT: s_sext_i32_i16 s2, s0
855 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16
856 ; GFX10PLUS-NEXT: s_sext_i32_i16 s3, s1
857 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16
858 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s3
859 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
860 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s2, s0
861 ; GFX10PLUS-NEXT: ; return to shader part epilog
862 %result = ashr <2 x i16> %value, %amount
863 %cast = bitcast <2 x i16> %result to i32
867 define amdgpu_ps float @ashr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) {
868 ; GFX6-LABEL: ashr_v2i16_sv:
870 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
871 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
872 ; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0
873 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
874 ; GFX6-NEXT: s_sext_i32_i16 s0, s1
875 ; GFX6-NEXT: v_ashr_i32_e32 v1, s0, v1
876 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
877 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
878 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
879 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
880 ; GFX6-NEXT: ; return to shader part epilog
882 ; GFX8-LABEL: ashr_v2i16_sv:
884 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
885 ; GFX8-NEXT: v_mov_b32_e32 v2, s1
886 ; GFX8-NEXT: v_ashrrev_i16_e64 v1, v0, s0
887 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
888 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
889 ; GFX8-NEXT: ; return to shader part epilog
891 ; GFX9-LABEL: ashr_v2i16_sv:
893 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, v0, s0
894 ; GFX9-NEXT: ; return to shader part epilog
896 ; GFX10PLUS-LABEL: ashr_v2i16_sv:
897 ; GFX10PLUS: ; %bb.0:
898 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v0, s0
899 ; GFX10PLUS-NEXT: ; return to shader part epilog
900 %result = ashr <2 x i16> %value, %amount
901 %cast = bitcast <2 x i16> %result to float
905 define amdgpu_ps float @ashr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) {
906 ; GFX6-LABEL: ashr_v2i16_vs:
908 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
909 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
910 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0
911 ; GFX6-NEXT: s_and_b32 s0, s1, 0xffff
912 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
913 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, s0, v1
914 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
915 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
916 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
917 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
918 ; GFX6-NEXT: ; return to shader part epilog
920 ; GFX8-LABEL: ashr_v2i16_vs:
922 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
923 ; GFX8-NEXT: v_mov_b32_e32 v2, s1
924 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, s0, v0
925 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
926 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
927 ; GFX8-NEXT: ; return to shader part epilog
929 ; GFX9-LABEL: ashr_v2i16_vs:
931 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, s0, v0
932 ; GFX9-NEXT: ; return to shader part epilog
934 ; GFX10PLUS-LABEL: ashr_v2i16_vs:
935 ; GFX10PLUS: ; %bb.0:
936 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, s0, v0
937 ; GFX10PLUS-NEXT: ; return to shader part epilog
938 %result = ashr <2 x i16> %value, %amount
939 %cast = bitcast <2 x i16> %result to float
944 ; define <3 x i16> @v_ashr_v3i16(<3 x i16> %value, <3 x i16> %amount) {
945 ; %result = ashr <3 x i16> %value, %amount
946 ; ret <3 x i16> %result
949 ; define amdgpu_ps <3 x i16> @s_ashr_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) {
950 ; %result = ashr <3 x i16> %value, %amount
951 ; ret <3 x i16> %result
954 define <2 x float> @v_ashr_v4i16(<4 x i16> %value, <4 x i16> %amount) {
955 ; GFX6-LABEL: v_ashr_v4i16:
957 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
958 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
959 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
960 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v4, v0
961 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5
962 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
963 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, v4, v1
964 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
965 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
966 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, v4, v2
967 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7
968 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
969 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
970 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, v4, v3
971 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
972 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
973 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
974 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
975 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
976 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
977 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
978 ; GFX6-NEXT: s_setpc_b64 s[30:31]
980 ; GFX8-LABEL: v_ashr_v4i16:
982 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, v2, v0
984 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
985 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, v3, v1
986 ; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
987 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
988 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
989 ; GFX8-NEXT: s_setpc_b64 s[30:31]
991 ; GFX9-LABEL: v_ashr_v4i16:
993 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
994 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, v2, v0
995 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, v3, v1
996 ; GFX9-NEXT: s_setpc_b64 s[30:31]
998 ; GFX10PLUS-LABEL: v_ashr_v4i16:
999 ; GFX10PLUS: ; %bb.0:
1000 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1001 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v2, v0
1002 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, v3, v1
1003 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1004 %result = ashr <4 x i16> %value, %amount
1005 %cast = bitcast <4 x i16> %result to <2 x float>
1006 ret <2 x float> %cast
1009 define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) {
1010 ; GFX6-LABEL: s_ashr_v4i16:
1012 ; GFX6-NEXT: s_sext_i32_i16 s1, s1
1013 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
1014 ; GFX6-NEXT: s_ashr_i32 s1, s1, s5
1015 ; GFX6-NEXT: s_ashr_i32 s0, s0, s4
1016 ; GFX6-NEXT: s_sext_i32_i16 s2, s2
1017 ; GFX6-NEXT: s_sext_i32_i16 s3, s3
1018 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
1019 ; GFX6-NEXT: s_ashr_i32 s2, s2, s6
1020 ; GFX6-NEXT: s_ashr_i32 s3, s3, s7
1021 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
1022 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1023 ; GFX6-NEXT: s_or_b32 s0, s0, s1
1024 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
1025 ; GFX6-NEXT: s_and_b32 s2, s3, 0xffff
1026 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1027 ; GFX6-NEXT: s_or_b32 s1, s1, s2
1028 ; GFX6-NEXT: ; return to shader part epilog
1030 ; GFX8-LABEL: s_ashr_v4i16:
1032 ; GFX8-NEXT: s_sext_i32_i16 s4, s0
1033 ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1034 ; GFX8-NEXT: s_sext_i32_i16 s5, s1
1035 ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1036 ; GFX8-NEXT: s_sext_i32_i16 s6, s2
1037 ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1038 ; GFX8-NEXT: s_sext_i32_i16 s7, s3
1039 ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1040 ; GFX8-NEXT: s_ashr_i32 s4, s4, s6
1041 ; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1042 ; GFX8-NEXT: s_ashr_i32 s2, s5, s7
1043 ; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1044 ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1045 ; GFX8-NEXT: s_and_b32 s3, s4, 0xffff
1046 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1047 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1048 ; GFX8-NEXT: s_or_b32 s0, s0, s3
1049 ; GFX8-NEXT: s_or_b32 s1, s1, s2
1050 ; GFX8-NEXT: ; return to shader part epilog
1052 ; GFX9-LABEL: s_ashr_v4i16:
1054 ; GFX9-NEXT: s_sext_i32_i16 s4, s0
1055 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
1056 ; GFX9-NEXT: s_sext_i32_i16 s5, s2
1057 ; GFX9-NEXT: s_ashr_i32 s2, s2, 16
1058 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1059 ; GFX9-NEXT: s_ashr_i32 s0, s0, s2
1060 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s4, s0
1061 ; GFX9-NEXT: s_sext_i32_i16 s2, s1
1062 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16
1063 ; GFX9-NEXT: s_sext_i32_i16 s4, s3
1064 ; GFX9-NEXT: s_ashr_i32 s3, s3, 16
1065 ; GFX9-NEXT: s_ashr_i32 s2, s2, s4
1066 ; GFX9-NEXT: s_ashr_i32 s1, s1, s3
1067 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s2, s1
1068 ; GFX9-NEXT: ; return to shader part epilog
1070 ; GFX10PLUS-LABEL: s_ashr_v4i16:
1071 ; GFX10PLUS: ; %bb.0:
1072 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s0
1073 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16
1074 ; GFX10PLUS-NEXT: s_sext_i32_i16 s5, s2
1075 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 16
1076 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5
1077 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s2
1078 ; GFX10PLUS-NEXT: s_sext_i32_i16 s2, s1
1079 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16
1080 ; GFX10PLUS-NEXT: s_sext_i32_i16 s5, s3
1081 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 16
1082 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s5
1083 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s3
1084 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s4, s0
1085 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s2, s1
1086 ; GFX10PLUS-NEXT: ; return to shader part epilog
1087 %result = ashr <4 x i16> %value, %amount
1088 %cast = bitcast <4 x i16> %result to <2 x i32>
1093 ; define <5 x i16> @v_ashr_v5i16(<5 x i16> %value, <5 x i16> %amount) {
1094 ; %result = ashr <5 x i16> %value, %amount
1095 ; ret <5 x i16> %result
1098 ; define amdgpu_ps <5 x i16> @s_ashr_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) {
1099 ; %result = ashr <5 x i16> %value, %amount
1100 ; ret <5 x i16> %result
1103 ; define <3 x float> @v_ashr_v6i16(<6 x i16> %value, <6 x i16> %amount) {
1104 ; %result = ashr <6 x i16> %value, %amount
1105 ; %cast = bitcast <6 x i16> %result to <3 x float>
1106 ; ret <3 x float> %cast
1109 ; define amdgpu_ps <3 x i32> @s_ashr_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) {
1110 ; %result = ashr <6 x i16> %value, %amount
1111 ; %cast = bitcast <6 x i16> %result to <3 x i32>
1112 ; ret <3 x i32> %cast
1115 define <4 x float> @v_ashr_v8i16(<8 x i16> %value, <8 x i16> %amount) {
1116 ; GFX6-LABEL: v_ashr_v8i16:
1118 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1119 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
1120 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
1121 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v8, v0
1122 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v9
1123 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
1124 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, v8, v1
1125 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v10
1126 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
1127 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, v8, v2
1128 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v11
1129 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
1130 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, v8, v3
1131 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v12
1132 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
1133 ; GFX6-NEXT: v_ashrrev_i32_e32 v4, v8, v4
1134 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v13
1135 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
1136 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
1137 ; GFX6-NEXT: v_ashrrev_i32_e32 v5, v8, v5
1138 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v14
1139 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
1140 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
1141 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1142 ; GFX6-NEXT: v_ashrrev_i32_e32 v6, v8, v6
1143 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v15
1144 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
1145 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1146 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
1147 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
1148 ; GFX6-NEXT: v_ashrrev_i32_e32 v7, v8, v7
1149 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1150 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v5
1151 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1152 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4
1153 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1154 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7
1155 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1156 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v6
1157 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
1158 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
1159 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1161 ; GFX8-LABEL: v_ashr_v8i16:
1163 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1164 ; GFX8-NEXT: v_ashrrev_i16_e32 v8, v4, v0
1165 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1166 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, v5, v1
1167 ; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1168 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
1169 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, v6, v2
1170 ; GFX8-NEXT: v_ashrrev_i16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1171 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
1172 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, v7, v3
1173 ; GFX8-NEXT: v_ashrrev_i16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1174 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
1175 ; GFX8-NEXT: v_or_b32_e32 v3, v4, v3
1176 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1178 ; GFX9-LABEL: v_ashr_v8i16:
1180 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1181 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, v4, v0
1182 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, v5, v1
1183 ; GFX9-NEXT: v_pk_ashrrev_i16 v2, v6, v2
1184 ; GFX9-NEXT: v_pk_ashrrev_i16 v3, v7, v3
1185 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1187 ; GFX10PLUS-LABEL: v_ashr_v8i16:
1188 ; GFX10PLUS: ; %bb.0:
1189 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1190 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v4, v0
1191 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, v5, v1
1192 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v2, v6, v2
1193 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v3, v7, v3
1194 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1195 %result = ashr <8 x i16> %value, %amount
1196 %cast = bitcast <8 x i16> %result to <4 x float>
1197 ret <4 x float> %cast
1200 define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) {
1201 ; GFX6-LABEL: s_ashr_v8i16:
1203 ; GFX6-NEXT: s_sext_i32_i16 s1, s1
1204 ; GFX6-NEXT: s_sext_i32_i16 s0, s0
1205 ; GFX6-NEXT: s_ashr_i32 s1, s1, s9
1206 ; GFX6-NEXT: s_ashr_i32 s0, s0, s8
1207 ; GFX6-NEXT: s_sext_i32_i16 s2, s2
1208 ; GFX6-NEXT: s_sext_i32_i16 s3, s3
1209 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
1210 ; GFX6-NEXT: s_ashr_i32 s2, s2, s10
1211 ; GFX6-NEXT: s_ashr_i32 s3, s3, s11
1212 ; GFX6-NEXT: s_sext_i32_i16 s5, s5
1213 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
1214 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1215 ; GFX6-NEXT: s_sext_i32_i16 s4, s4
1216 ; GFX6-NEXT: s_ashr_i32 s5, s5, s13
1217 ; GFX6-NEXT: s_sext_i32_i16 s7, s7
1218 ; GFX6-NEXT: s_or_b32 s0, s0, s1
1219 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
1220 ; GFX6-NEXT: s_and_b32 s2, s3, 0xffff
1221 ; GFX6-NEXT: s_ashr_i32 s4, s4, s12
1222 ; GFX6-NEXT: s_sext_i32_i16 s6, s6
1223 ; GFX6-NEXT: s_ashr_i32 s7, s7, s15
1224 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1225 ; GFX6-NEXT: s_and_b32 s3, s5, 0xffff
1226 ; GFX6-NEXT: s_ashr_i32 s6, s6, s14
1227 ; GFX6-NEXT: s_or_b32 s1, s1, s2
1228 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
1229 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
1230 ; GFX6-NEXT: s_and_b32 s4, s7, 0xffff
1231 ; GFX6-NEXT: s_or_b32 s2, s2, s3
1232 ; GFX6-NEXT: s_and_b32 s3, s6, 0xffff
1233 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16
1234 ; GFX6-NEXT: s_or_b32 s3, s3, s4
1235 ; GFX6-NEXT: ; return to shader part epilog
1237 ; GFX8-LABEL: s_ashr_v8i16:
1239 ; GFX8-NEXT: s_sext_i32_i16 s8, s0
1240 ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1241 ; GFX8-NEXT: s_sext_i32_i16 s9, s1
1242 ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1243 ; GFX8-NEXT: s_sext_i32_i16 s12, s4
1244 ; GFX8-NEXT: s_bfe_i32 s4, s4, 0x100010
1245 ; GFX8-NEXT: s_sext_i32_i16 s13, s5
1246 ; GFX8-NEXT: s_bfe_i32 s5, s5, 0x100010
1247 ; GFX8-NEXT: s_sext_i32_i16 s10, s2
1248 ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1249 ; GFX8-NEXT: s_sext_i32_i16 s14, s6
1250 ; GFX8-NEXT: s_bfe_i32 s6, s6, 0x100010
1251 ; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1252 ; GFX8-NEXT: s_ashr_i32 s4, s9, s13
1253 ; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1254 ; GFX8-NEXT: s_sext_i32_i16 s11, s3
1255 ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1256 ; GFX8-NEXT: s_sext_i32_i16 s15, s7
1257 ; GFX8-NEXT: s_bfe_i32 s7, s7, 0x100010
1258 ; GFX8-NEXT: s_ashr_i32 s5, s10, s14
1259 ; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1260 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1261 ; GFX8-NEXT: s_and_b32 s4, s4, 0xffff
1262 ; GFX8-NEXT: s_ashr_i32 s8, s8, s12
1263 ; GFX8-NEXT: s_ashr_i32 s6, s11, s15
1264 ; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1265 ; GFX8-NEXT: s_or_b32 s1, s1, s4
1266 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1267 ; GFX8-NEXT: s_and_b32 s4, s5, 0xffff
1268 ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1269 ; GFX8-NEXT: s_and_b32 s7, s8, 0xffff
1270 ; GFX8-NEXT: s_or_b32 s2, s2, s4
1271 ; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1272 ; GFX8-NEXT: s_and_b32 s4, s6, 0xffff
1273 ; GFX8-NEXT: s_or_b32 s0, s0, s7
1274 ; GFX8-NEXT: s_or_b32 s3, s3, s4
1275 ; GFX8-NEXT: ; return to shader part epilog
1277 ; GFX9-LABEL: s_ashr_v8i16:
1279 ; GFX9-NEXT: s_sext_i32_i16 s8, s0
1280 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
1281 ; GFX9-NEXT: s_sext_i32_i16 s9, s4
1282 ; GFX9-NEXT: s_ashr_i32 s4, s4, 16
1283 ; GFX9-NEXT: s_ashr_i32 s8, s8, s9
1284 ; GFX9-NEXT: s_ashr_i32 s0, s0, s4
1285 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s8, s0
1286 ; GFX9-NEXT: s_sext_i32_i16 s4, s1
1287 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16
1288 ; GFX9-NEXT: s_sext_i32_i16 s8, s5
1289 ; GFX9-NEXT: s_ashr_i32 s5, s5, 16
1290 ; GFX9-NEXT: s_ashr_i32 s4, s4, s8
1291 ; GFX9-NEXT: s_ashr_i32 s1, s1, s5
1292 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s4, s1
1293 ; GFX9-NEXT: s_sext_i32_i16 s4, s2
1294 ; GFX9-NEXT: s_ashr_i32 s2, s2, 16
1295 ; GFX9-NEXT: s_sext_i32_i16 s5, s6
1296 ; GFX9-NEXT: s_ashr_i32 s6, s6, 16
1297 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1298 ; GFX9-NEXT: s_ashr_i32 s2, s2, s6
1299 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s4, s2
1300 ; GFX9-NEXT: s_sext_i32_i16 s4, s3
1301 ; GFX9-NEXT: s_ashr_i32 s3, s3, 16
1302 ; GFX9-NEXT: s_sext_i32_i16 s5, s7
1303 ; GFX9-NEXT: s_ashr_i32 s6, s7, 16
1304 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1305 ; GFX9-NEXT: s_ashr_i32 s3, s3, s6
1306 ; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s3
1307 ; GFX9-NEXT: ; return to shader part epilog
1309 ; GFX10PLUS-LABEL: s_ashr_v8i16:
1310 ; GFX10PLUS: ; %bb.0:
1311 ; GFX10PLUS-NEXT: s_sext_i32_i16 s8, s0
1312 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16
1313 ; GFX10PLUS-NEXT: s_sext_i32_i16 s9, s4
1314 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, 16
1315 ; GFX10PLUS-NEXT: s_ashr_i32 s8, s8, s9
1316 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s4
1317 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s1
1318 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16
1319 ; GFX10PLUS-NEXT: s_sext_i32_i16 s9, s5
1320 ; GFX10PLUS-NEXT: s_ashr_i32 s5, s5, 16
1321 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s9
1322 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s5
1323 ; GFX10PLUS-NEXT: s_sext_i32_i16 s5, s6
1324 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s4, s1
1325 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s2
1326 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 16
1327 ; GFX10PLUS-NEXT: s_ashr_i32 s6, s6, 16
1328 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5
1329 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s6
1330 ; GFX10PLUS-NEXT: s_sext_i32_i16 s5, s3
1331 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 16
1332 ; GFX10PLUS-NEXT: s_sext_i32_i16 s6, s7
1333 ; GFX10PLUS-NEXT: s_ashr_i32 s7, s7, 16
1334 ; GFX10PLUS-NEXT: s_ashr_i32 s5, s5, s6
1335 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s7
1336 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s8, s0
1337 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s4, s2
1338 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s5, s3
1339 ; GFX10PLUS-NEXT: ; return to shader part epilog
1340 %result = ashr <8 x i16> %value, %amount
1341 %cast = bitcast <8 x i16> %result to <4 x i32>
1345 define i64 @v_ashr_i64(i64 %value, i64 %amount) {
1346 ; GFX6-LABEL: v_ashr_i64:
1348 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1349 ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], v2
1350 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1352 ; GFX8-LABEL: v_ashr_i64:
1354 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1355 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1]
1356 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1358 ; GFX9-LABEL: v_ashr_i64:
1360 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1361 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1]
1362 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1364 ; GFX10PLUS-LABEL: v_ashr_i64:
1365 ; GFX10PLUS: ; %bb.0:
1366 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1]
1368 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1369 %result = ashr i64 %value, %amount
1373 define i64 @v_ashr_i64_63(i64 %value) {
1374 ; GCN-LABEL: v_ashr_i64_63:
1376 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1377 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v1
1378 ; GCN-NEXT: v_mov_b32_e32 v1, v0
1379 ; GCN-NEXT: s_setpc_b64 s[30:31]
1381 ; GFX10PLUS-LABEL: v_ashr_i64_63:
1382 ; GFX10PLUS: ; %bb.0:
1383 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1384 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v1
1385 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
1386 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1387 %result = ashr i64 %value, 63
1391 define i64 @v_ashr_i64_33(i64 %value) {
1392 ; GCN-LABEL: v_ashr_i64_33:
1394 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1395 ; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1396 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 1, v1
1397 ; GCN-NEXT: v_mov_b32_e32 v1, v2
1398 ; GCN-NEXT: s_setpc_b64 s[30:31]
1400 ; GFX10PLUS-LABEL: v_ashr_i64_33:
1401 ; GFX10PLUS: ; %bb.0:
1402 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 1, v1
1404 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v1
1405 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1406 %result = ashr i64 %value, 33
1410 define i64 @v_ashr_i64_32(i64 %value) {
1411 ; GCN-LABEL: v_ashr_i64_32:
1413 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1414 ; GCN-NEXT: v_mov_b32_e32 v0, v1
1415 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1416 ; GCN-NEXT: s_setpc_b64 s[30:31]
1418 ; GFX10PLUS-LABEL: v_ashr_i64_32:
1419 ; GFX10PLUS: ; %bb.0:
1420 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1421 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v1
1422 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1423 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1424 %result = ashr i64 %value, 32
1428 define i64 @v_ashr_i64_31(i64 %value) {
1429 ; GFX6-LABEL: v_ashr_i64_31:
1431 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432 ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 31
1433 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1435 ; GFX8-LABEL: v_ashr_i64_31:
1437 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1438 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1439 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1441 ; GFX9-LABEL: v_ashr_i64_31:
1443 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1444 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1445 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1447 ; GFX10PLUS-LABEL: v_ashr_i64_31:
1448 ; GFX10PLUS: ; %bb.0:
1449 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1450 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1451 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1452 %result = ashr i64 %value, 31
1456 define amdgpu_ps i64 @s_ashr_i64(i64 inreg %value, i64 inreg %amount) {
1457 ; GCN-LABEL: s_ashr_i64:
1459 ; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], s2
1460 ; GCN-NEXT: ; return to shader part epilog
1462 ; GFX10PLUS-LABEL: s_ashr_i64:
1463 ; GFX10PLUS: ; %bb.0:
1464 ; GFX10PLUS-NEXT: s_ashr_i64 s[0:1], s[0:1], s2
1465 ; GFX10PLUS-NEXT: ; return to shader part epilog
1466 %result = ashr i64 %value, %amount
1470 define amdgpu_ps i64 @s_ashr_i64_63(i64 inreg %value) {
1471 ; GCN-LABEL: s_ashr_i64_63:
1473 ; GCN-NEXT: s_ashr_i32 s0, s1, 31
1474 ; GCN-NEXT: s_mov_b32 s1, s0
1475 ; GCN-NEXT: ; return to shader part epilog
1477 ; GFX10PLUS-LABEL: s_ashr_i64_63:
1478 ; GFX10PLUS: ; %bb.0:
1479 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s1, 31
1480 ; GFX10PLUS-NEXT: s_mov_b32 s1, s0
1481 ; GFX10PLUS-NEXT: ; return to shader part epilog
1482 %result = ashr i64 %value, 63
1486 define amdgpu_ps i64 @s_ashr_i64_33(i64 inreg %value) {
1487 ; GCN-LABEL: s_ashr_i64_33:
1489 ; GCN-NEXT: s_ashr_i32 s2, s1, 31
1490 ; GCN-NEXT: s_ashr_i32 s0, s1, 1
1491 ; GCN-NEXT: s_mov_b32 s1, s2
1492 ; GCN-NEXT: ; return to shader part epilog
1494 ; GFX10PLUS-LABEL: s_ashr_i64_33:
1495 ; GFX10PLUS: ; %bb.0:
1496 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s1, 1
1497 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 31
1498 ; GFX10PLUS-NEXT: ; return to shader part epilog
1499 %result = ashr i64 %value, 33
1503 define amdgpu_ps i64 @s_ashr_i64_32(i64 inreg %value) {
1504 ; GCN-LABEL: s_ashr_i64_32:
1506 ; GCN-NEXT: s_mov_b32 s0, s1
1507 ; GCN-NEXT: s_ashr_i32 s1, s1, 31
1508 ; GCN-NEXT: ; return to shader part epilog
1510 ; GFX10PLUS-LABEL: s_ashr_i64_32:
1511 ; GFX10PLUS: ; %bb.0:
1512 ; GFX10PLUS-NEXT: s_mov_b32 s0, s1
1513 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 31
1514 ; GFX10PLUS-NEXT: ; return to shader part epilog
1515 %result = ashr i64 %value, 32
1519 define amdgpu_ps i64 @s_ashr_i64_31(i64 inreg %value) {
1520 ; GCN-LABEL: s_ashr_i64_31:
1522 ; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], 31
1523 ; GCN-NEXT: ; return to shader part epilog
1525 ; GFX10PLUS-LABEL: s_ashr_i64_31:
1526 ; GFX10PLUS: ; %bb.0:
1527 ; GFX10PLUS-NEXT: s_ashr_i64 s[0:1], s[0:1], 31
1528 ; GFX10PLUS-NEXT: ; return to shader part epilog
1529 %result = ashr i64 %value, 31
1533 define amdgpu_ps <2 x float> @ashr_i64_sv(i64 inreg %value, i64 %amount) {
1534 ; GFX6-LABEL: ashr_i64_sv:
1536 ; GFX6-NEXT: v_ashr_i64 v[0:1], s[0:1], v0
1537 ; GFX6-NEXT: ; return to shader part epilog
1539 ; GFX8-LABEL: ashr_i64_sv:
1541 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1]
1542 ; GFX8-NEXT: ; return to shader part epilog
1544 ; GFX9-LABEL: ashr_i64_sv:
1546 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1]
1547 ; GFX9-NEXT: ; return to shader part epilog
1549 ; GFX10PLUS-LABEL: ashr_i64_sv:
1550 ; GFX10PLUS: ; %bb.0:
1551 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1]
1552 ; GFX10PLUS-NEXT: ; return to shader part epilog
1553 %result = ashr i64 %value, %amount
1554 %cast = bitcast i64 %result to <2 x float>
1555 ret <2 x float> %cast
1558 define amdgpu_ps <2 x float> @ashr_i64_vs(i64 %value, i64 inreg %amount) {
1559 ; GFX6-LABEL: ashr_i64_vs:
1561 ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], s0
1562 ; GFX6-NEXT: ; return to shader part epilog
1564 ; GFX8-LABEL: ashr_i64_vs:
1566 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1]
1567 ; GFX8-NEXT: ; return to shader part epilog
1569 ; GFX9-LABEL: ashr_i64_vs:
1571 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1]
1572 ; GFX9-NEXT: ; return to shader part epilog
1574 ; GFX10PLUS-LABEL: ashr_i64_vs:
1575 ; GFX10PLUS: ; %bb.0:
1576 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1]
1577 ; GFX10PLUS-NEXT: ; return to shader part epilog
1578 %result = ashr i64 %value, %amount
1579 %cast = bitcast i64 %result to <2 x float>
1580 ret <2 x float> %cast
1583 define <2 x i64> @v_ashr_v2i64(<2 x i64> %value, <2 x i64> %amount) {
1584 ; GFX6-LABEL: v_ashr_v2i64:
1586 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1587 ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], v4
1588 ; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], v6
1589 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1591 ; GFX8-LABEL: v_ashr_v2i64:
1593 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1594 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1]
1595 ; GFX8-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3]
1596 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1598 ; GFX9-LABEL: v_ashr_v2i64:
1600 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1]
1602 ; GFX9-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3]
1603 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1605 ; GFX10PLUS-LABEL: v_ashr_v2i64:
1606 ; GFX10PLUS: ; %bb.0:
1607 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1608 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1]
1609 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3]
1610 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1611 %result = ashr <2 x i64> %value, %amount
1612 ret <2 x i64> %result
1615 define <2 x i64> @v_ashr_v2i64_31(<2 x i64> %value) {
1616 ; GFX6-LABEL: v_ashr_v2i64_31:
1618 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1619 ; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 31
1620 ; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 31
1621 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1623 ; GFX8-LABEL: v_ashr_v2i64_31:
1625 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1626 ; GFX8-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1627 ; GFX8-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3]
1628 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1630 ; GFX9-LABEL: v_ashr_v2i64_31:
1632 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1633 ; GFX9-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1634 ; GFX9-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3]
1635 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1637 ; GFX10PLUS-LABEL: v_ashr_v2i64_31:
1638 ; GFX10PLUS: ; %bb.0:
1639 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1640 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
1641 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3]
1642 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1643 %result = ashr <2 x i64> %value, <i64 31, i64 31>
1644 ret <2 x i64> %result
1647 define amdgpu_ps <2 x i64> @s_ashr_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) {
1648 ; GCN-LABEL: s_ashr_v2i64:
1650 ; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], s4
1651 ; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s6
1652 ; GCN-NEXT: ; return to shader part epilog
1654 ; GFX10PLUS-LABEL: s_ashr_v2i64:
1655 ; GFX10PLUS: ; %bb.0:
1656 ; GFX10PLUS-NEXT: s_ashr_i64 s[0:1], s[0:1], s4
1657 ; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[2:3], s6
1658 ; GFX10PLUS-NEXT: ; return to shader part epilog
1659 %result = ashr <2 x i64> %value, %amount
1660 ret <2 x i64> %result
1663 define i65 @v_ashr_i65(i65 %value, i65 %amount) {
1664 ; GFX6-LABEL: v_ashr_i65:
1666 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1667 ; GFX6-NEXT: v_bfe_i32 v4, v2, 0, 1
1668 ; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4
1669 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
1670 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
1671 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
1672 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
1673 ; GFX6-NEXT: v_ashr_i64 v[10:11], v[4:5], v3
1674 ; GFX6-NEXT: v_or_b32_e32 v6, v6, v8
1675 ; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5
1676 ; GFX6-NEXT: v_ashr_i64 v[4:5], v[4:5], v2
1677 ; GFX6-NEXT: v_or_b32_e32 v7, v7, v9
1678 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1679 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
1680 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
1681 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
1682 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1683 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1684 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v10, vcc
1685 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1687 ; GFX8-LABEL: v_ashr_i65:
1689 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1690 ; GFX8-NEXT: v_bfe_i32 v4, v2, 0, 1
1691 ; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v4
1692 ; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
1693 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
1694 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
1695 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
1696 ; GFX8-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
1697 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v8
1698 ; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5
1699 ; GFX8-NEXT: v_ashrrev_i64 v[4:5], v2, v[4:5]
1700 ; GFX8-NEXT: v_or_b32_e32 v7, v7, v9
1701 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1702 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
1703 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
1704 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
1705 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1706 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1707 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v10, vcc
1708 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1710 ; GFX9-LABEL: v_ashr_i65:
1712 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713 ; GFX9-NEXT: v_bfe_i32 v4, v2, 0, 1
1714 ; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v4
1715 ; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
1716 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
1717 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
1718 ; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
1719 ; GFX9-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
1720 ; GFX9-NEXT: v_or_b32_e32 v6, v6, v8
1721 ; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5
1722 ; GFX9-NEXT: v_ashrrev_i64 v[4:5], v2, v[4:5]
1723 ; GFX9-NEXT: v_or_b32_e32 v7, v7, v9
1724 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
1725 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
1726 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
1727 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
1728 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1729 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1730 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v10, vcc
1731 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1733 ; GFX10-LABEL: v_ashr_i65:
1735 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1736 ; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1
1737 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
1738 ; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
1739 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
1740 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
1741 ; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
1742 ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 0, v3
1743 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v2, v[4:5]
1744 ; GFX10-NEXT: v_ashrrev_i64 v[10:11], v10, v[4:5]
1745 ; GFX10-NEXT: v_or_b32_e32 v2, v6, v8
1746 ; GFX10-NEXT: v_or_b32_e32 v8, v7, v9
1747 ; GFX10-NEXT: v_ashrrev_i64 v[6:7], v3, v[4:5]
1748 ; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v5
1749 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc_lo
1750 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v11, v8, vcc_lo
1751 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v0, s4
1752 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v4, v1, s4
1753 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc_lo
1754 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1756 ; GFX11-LABEL: v_ashr_i65:
1758 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759 ; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1
1760 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
1761 ; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
1762 ; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
1763 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
1764 ; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v4
1765 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v3
1766 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], v2, v[4:5]
1767 ; GFX11-NEXT: v_ashrrev_i64 v[10:11], v10, v[4:5]
1768 ; GFX11-NEXT: v_or_b32_e32 v2, v6, v8
1769 ; GFX11-NEXT: v_or_b32_e32 v8, v7, v9
1770 ; GFX11-NEXT: v_ashrrev_i64 v[6:7], v3, v[4:5]
1771 ; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v5
1772 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc_lo
1773 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v11, v8, vcc_lo
1774 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, v0, s0
1775 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v4, v1, s0
1776 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc_lo
1777 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1778 %result = ashr i65 %value, %amount
1782 define i65 @v_ashr_i65_33(i65 %value) {
1783 ; GFX6-LABEL: v_ashr_i65_33:
1785 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1786 ; GFX6-NEXT: v_mov_b32_e32 v3, v1
1787 ; GFX6-NEXT: v_bfe_i32 v1, v2, 0, 1
1788 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1789 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], 31
1790 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1791 ; GFX6-NEXT: v_or_b32_e32 v0, v3, v0
1792 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1793 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1795 ; GFX8-LABEL: v_ashr_i65_33:
1797 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1798 ; GFX8-NEXT: v_mov_b32_e32 v3, v1
1799 ; GFX8-NEXT: v_bfe_i32 v1, v2, 0, 1
1800 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1801 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1802 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1803 ; GFX8-NEXT: v_or_b32_e32 v0, v3, v0
1804 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1805 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1807 ; GFX9-LABEL: v_ashr_i65_33:
1809 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1810 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
1811 ; GFX9-NEXT: v_bfe_i32 v1, v2, 0, 1
1812 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1813 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1814 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1815 ; GFX9-NEXT: v_or_b32_e32 v0, v3, v0
1816 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1817 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1819 ; GFX10PLUS-LABEL: v_ashr_i65_33:
1820 ; GFX10PLUS: ; %bb.0:
1821 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1822 ; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1
1823 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v2, 0, 1
1824 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1825 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1826 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1827 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1828 ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v3, v0
1829 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1830 %result = ashr i65 %value, 33
1834 define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
1835 ; GCN-LABEL: s_ashr_i65:
1837 ; GCN-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1838 ; GCN-NEXT: s_sub_i32 s10, s3, 64
1839 ; GCN-NEXT: s_sub_i32 s8, 64, s3
1840 ; GCN-NEXT: s_cmp_lt_u32 s3, 64
1841 ; GCN-NEXT: s_cselect_b32 s11, 1, 0
1842 ; GCN-NEXT: s_cmp_eq_u32 s3, 0
1843 ; GCN-NEXT: s_cselect_b32 s12, 1, 0
1844 ; GCN-NEXT: s_ashr_i64 s[6:7], s[4:5], s3
1845 ; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
1846 ; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
1847 ; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
1848 ; GCN-NEXT: s_ashr_i32 s8, s5, 31
1849 ; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
1850 ; GCN-NEXT: s_cmp_lg_u32 s11, 0
1851 ; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1852 ; GCN-NEXT: s_cmp_lg_u32 s12, 0
1853 ; GCN-NEXT: s_mov_b32 s9, s8
1854 ; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
1855 ; GCN-NEXT: s_cmp_lg_u32 s11, 0
1856 ; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], s[8:9]
1857 ; GCN-NEXT: ; return to shader part epilog
1859 ; GFX10PLUS-LABEL: s_ashr_i65:
1860 ; GFX10PLUS: ; %bb.0:
1861 ; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1862 ; GFX10PLUS-NEXT: s_sub_i32 s12, s3, 64
1863 ; GFX10PLUS-NEXT: s_sub_i32 s8, 64, s3
1864 ; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
1865 ; GFX10PLUS-NEXT: s_cselect_b32 s13, 1, 0
1866 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0
1867 ; GFX10PLUS-NEXT: s_cselect_b32 s14, 1, 0
1868 ; GFX10PLUS-NEXT: s_ashr_i64 s[6:7], s[4:5], s3
1869 ; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
1870 ; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
1871 ; GFX10PLUS-NEXT: s_ashr_i32 s10, s5, 31
1872 ; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
1873 ; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s12
1874 ; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
1875 ; GFX10PLUS-NEXT: s_mov_b32 s11, s10
1876 ; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1877 ; GFX10PLUS-NEXT: s_cmp_lg_u32 s14, 0
1878 ; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
1879 ; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
1880 ; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[6:7], s[10:11]
1881 ; GFX10PLUS-NEXT: ; return to shader part epilog
1882 %result = ashr i65 %value, %amount
1886 define amdgpu_ps i65 @s_ashr_i65_33(i65 inreg %value) {
1887 ; GCN-LABEL: s_ashr_i65_33:
1889 ; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1890 ; GCN-NEXT: s_lshr_b32 s0, s1, 1
1891 ; GCN-NEXT: s_mov_b32 s1, 0
1892 ; GCN-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1893 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1894 ; GCN-NEXT: s_ashr_i32 s2, s3, 1
1895 ; GCN-NEXT: ; return to shader part epilog
1897 ; GFX10PLUS-LABEL: s_ashr_i65_33:
1898 ; GFX10PLUS: ; %bb.0:
1899 ; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1900 ; GFX10PLUS-NEXT: s_lshr_b32 s0, s1, 1
1901 ; GFX10PLUS-NEXT: s_mov_b32 s1, 0
1902 ; GFX10PLUS-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1903 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s3, 1
1904 ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1905 ; GFX10PLUS-NEXT: ; return to shader part epilog
1906 %result = ashr i65 %value, 33
1910 ; FIXME: Argument lowering asserts
1911 ; define <2 x i65> @v_ashr_v2i65(<2 x i65> %value, <2 x i65> %amount) {
1912 ; %result = ashr <2 x i65> %value, %amount
1913 ; ret <2 x i65> %result
1916 ; define amdgpu_ps <2 x i65> @s_ashr_v2i65(<2 x i65> inreg %value, <2 x i65> inreg %amount) {
1917 ; %result = ashr <2 x i65> %value, %amount
1918 ; ret <2 x i65> %result