1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
8 define i8 @v_sext_inreg_i8_4(i8 %value) {
9 ; GCN-LABEL: v_sext_inreg_i8_4:
11 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 4
13 ; GCN-NEXT: s_setpc_b64 s[30:31]
15 ; GFX10PLUS-LABEL: v_sext_inreg_i8_4:
17 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 4
19 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
20 %shl = shl i8 %value, 4
21 %ashr = ashr i8 %shl, 4
25 define i8 @v_sext_inreg_i8_7(i8 %value) {
26 ; GCN-LABEL: v_sext_inreg_i8_7:
28 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1
30 ; GCN-NEXT: s_setpc_b64 s[30:31]
32 ; GFX10PLUS-LABEL: v_sext_inreg_i8_7:
34 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
36 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
37 %shl = shl i8 %value, 7
38 %ashr = ashr i8 %shl, 7
42 define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) {
43 ; GFX6-LABEL: s_sext_inreg_i8:
45 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x50000
46 ; GFX6-NEXT: ; return to shader part epilog
48 ; GFX8-LABEL: s_sext_inreg_i8:
50 ; GFX8-NEXT: s_lshl_b32 s0, s0, 3
51 ; GFX8-NEXT: s_sext_i32_i8 s0, s0
52 ; GFX8-NEXT: s_ashr_i32 s0, s0, 3
53 ; GFX8-NEXT: ; return to shader part epilog
55 ; GFX9-LABEL: s_sext_inreg_i8:
57 ; GFX9-NEXT: s_lshl_b32 s0, s0, 3
58 ; GFX9-NEXT: s_sext_i32_i8 s0, s0
59 ; GFX9-NEXT: s_ashr_i32 s0, s0, 3
60 ; GFX9-NEXT: ; return to shader part epilog
62 ; GFX10PLUS-LABEL: s_sext_inreg_i8:
64 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 3
65 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
66 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 3
67 ; GFX10PLUS-NEXT: ; return to shader part epilog
68 %shl = shl i8 %value, 3
69 %ashr = ashr i8 %shl, 3
73 define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) {
74 ; GFX6-LABEL: s_sext_inreg_i8_6:
76 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000
77 ; GFX6-NEXT: ; return to shader part epilog
79 ; GFX8-LABEL: s_sext_inreg_i8_6:
81 ; GFX8-NEXT: s_lshl_b32 s0, s0, 6
82 ; GFX8-NEXT: s_sext_i32_i8 s0, s0
83 ; GFX8-NEXT: s_ashr_i32 s0, s0, 6
84 ; GFX8-NEXT: ; return to shader part epilog
86 ; GFX9-LABEL: s_sext_inreg_i8_6:
88 ; GFX9-NEXT: s_lshl_b32 s0, s0, 6
89 ; GFX9-NEXT: s_sext_i32_i8 s0, s0
90 ; GFX9-NEXT: s_ashr_i32 s0, s0, 6
91 ; GFX9-NEXT: ; return to shader part epilog
93 ; GFX10PLUS-LABEL: s_sext_inreg_i8_6:
95 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 6
96 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
97 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 6
98 ; GFX10PLUS-NEXT: ; return to shader part epilog
99 %shl = shl i8 %value, 6
100 %ashr = ashr i8 %shl, 6
104 define i24 @v_sext_inreg_i24_12(i24 %value) {
105 ; GCN-LABEL: v_sext_inreg_i24_12:
107 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24
109 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 12, v0
110 ; GCN-NEXT: s_setpc_b64 s[30:31]
112 ; GFX10PLUS-LABEL: v_sext_inreg_i24_12:
113 ; GFX10PLUS: ; %bb.0:
114 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24
116 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 12, v0
117 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
118 %shl = shl i24 %value, 12
119 %ashr = ashr i24 %value, 12
123 define i24 @v_sext_inreg_i24_7(i24 %value) {
124 ; GCN-LABEL: v_sext_inreg_i24_7:
126 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 17
128 ; GCN-NEXT: s_setpc_b64 s[30:31]
130 ; GFX10PLUS-LABEL: v_sext_inreg_i24_7:
131 ; GFX10PLUS: ; %bb.0:
132 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 17
134 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
135 %shl = shl i24 %value, 7
136 %ashr = ashr i24 %shl, 7
140 define amdgpu_ps i24 @s_sext_inreg_i24_8(i24 inreg %value) {
141 ; GCN-LABEL: s_sext_inreg_i24_8:
143 ; GCN-NEXT: s_sext_i32_i16 s0, s0
144 ; GCN-NEXT: ; return to shader part epilog
146 ; GFX10PLUS-LABEL: s_sext_inreg_i24_8:
147 ; GFX10PLUS: ; %bb.0:
148 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
149 ; GFX10PLUS-NEXT: ; return to shader part epilog
150 %shl = shl i24 %value, 8
151 %ashr = ashr i24 %shl, 8
155 define amdgpu_ps i24 @s_sext_inreg_i24_7(i24 inreg %value) {
156 ; GCN-LABEL: s_sext_inreg_i24_7:
158 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x110000
159 ; GCN-NEXT: ; return to shader part epilog
161 ; GFX10PLUS-LABEL: s_sext_inreg_i24_7:
162 ; GFX10PLUS: ; %bb.0:
163 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x110000
164 ; GFX10PLUS-NEXT: ; return to shader part epilog
165 %shl = shl i24 %value, 7
166 %ashr = ashr i24 %shl, 7
170 define i32 @v_sext_inreg_i32_3(i32 %value) {
171 ; GCN-LABEL: v_sext_inreg_i32_3:
173 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 29
175 ; GCN-NEXT: s_setpc_b64 s[30:31]
177 ; GFX10PLUS-LABEL: v_sext_inreg_i32_3:
178 ; GFX10PLUS: ; %bb.0:
179 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 29
181 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
182 %shl = shl i32 %value, 3
183 %ashr = ashr i32 %shl, 3
187 define i32 @v_sext_inreg_i32_31(i32 %value) {
188 ; GCN-LABEL: v_sext_inreg_i32_31:
190 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
192 ; GCN-NEXT: s_setpc_b64 s[30:31]
194 ; GFX10PLUS-LABEL: v_sext_inreg_i32_31:
195 ; GFX10PLUS: ; %bb.0:
196 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0
198 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
199 %shl = shl i32 %value, 31
200 %ashr = ashr i32 %value, 31
204 define amdgpu_ps i32 @s_sext_inreg_i32_2(i32 inreg %value) {
205 ; GCN-LABEL: s_sext_inreg_i32_2:
207 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x1e0000
208 ; GCN-NEXT: ; return to shader part epilog
210 ; GFX10PLUS-LABEL: s_sext_inreg_i32_2:
211 ; GFX10PLUS: ; %bb.0:
212 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x1e0000
213 ; GFX10PLUS-NEXT: ; return to shader part epilog
214 %shl = shl i32 %value, 2
215 %ashr = ashr i32 %shl, 2
219 define amdgpu_ps i32 @s_sext_inreg_i32_31(i32 inreg %value) {
220 ; GCN-LABEL: s_sext_inreg_i32_31:
222 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x10000
223 ; GCN-NEXT: ; return to shader part epilog
225 ; GFX10PLUS-LABEL: s_sext_inreg_i32_31:
226 ; GFX10PLUS: ; %bb.0:
227 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x10000
228 ; GFX10PLUS-NEXT: ; return to shader part epilog
229 %shl = shl i32 %value, 31
230 %ashr = ashr i32 %shl, 31
234 define <2 x i32> @v_sext_inreg_v2i32_14(<2 x i32> %value) {
235 ; GCN-LABEL: v_sext_inreg_v2i32_14:
237 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 18
239 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 18
240 ; GCN-NEXT: s_setpc_b64 s[30:31]
242 ; GFX10PLUS-LABEL: v_sext_inreg_v2i32_14:
243 ; GFX10PLUS: ; %bb.0:
244 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 18
246 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 18
247 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
248 %shl = shl <2 x i32> %value, <i32 14, i32 14>
249 %ashr = ashr <2 x i32> %shl, <i32 14, i32 14>
253 define <2 x i32> @v_sext_inreg_v2i32_31(<2 x i32> %value) {
254 ; GCN-LABEL: v_sext_inreg_v2i32_31:
256 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1
258 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 1
259 ; GCN-NEXT: s_setpc_b64 s[30:31]
261 ; GFX10PLUS-LABEL: v_sext_inreg_v2i32_31:
262 ; GFX10PLUS: ; %bb.0:
263 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
265 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 1
266 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
267 %shl = shl <2 x i32> %value, <i32 31, i32 31>
268 %shr = ashr <2 x i32> %shl, <i32 31, i32 31>
272 define amdgpu_ps <2 x i32> @s_sext_inreg_v2i32_22(<2 x i32> inreg %value) {
273 ; GCN-LABEL: s_sext_inreg_v2i32_22:
275 ; GCN-NEXT: s_bfe_i32 s0, s0, 0xa0000
276 ; GCN-NEXT: s_bfe_i32 s1, s1, 0xa0000
277 ; GCN-NEXT: ; return to shader part epilog
279 ; GFX10PLUS-LABEL: s_sext_inreg_v2i32_22:
280 ; GFX10PLUS: ; %bb.0:
281 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0xa0000
282 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0xa0000
283 ; GFX10PLUS-NEXT: ; return to shader part epilog
284 %shl = shl <2 x i32> %value, <i32 22, i32 22>
285 %ashr = ashr <2 x i32> %shl, <i32 22, i32 22>
289 define <3 x i32> @v_sext_inreg_v3i32_16(<3 x i32> %value, <3 x i32> %amount) {
290 ; GCN-LABEL: v_sext_inreg_v3i32_16:
292 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16
294 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 16
295 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16
296 ; GCN-NEXT: s_setpc_b64 s[30:31]
298 ; GFX10PLUS-LABEL: v_sext_inreg_v3i32_16:
299 ; GFX10PLUS: ; %bb.0:
300 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16
302 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 16
303 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 16
304 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
305 %shl = shl <3 x i32> %value, <i32 16, i32 16, i32 16>
306 %ashr = ashr <3 x i32> %shl, <i32 16, i32 16, i32 16>
310 define amdgpu_ps <3 x i32> @s_sext_inreg_v3i32_22(<3 x i32> inreg %value) {
311 ; GCN-LABEL: s_sext_inreg_v3i32_22:
313 ; GCN-NEXT: s_bfe_i32 s0, s0, 0xa0000
314 ; GCN-NEXT: s_bfe_i32 s1, s1, 0xa0000
315 ; GCN-NEXT: s_bfe_i32 s2, s2, 0xa0000
316 ; GCN-NEXT: ; return to shader part epilog
318 ; GFX10PLUS-LABEL: s_sext_inreg_v3i32_22:
319 ; GFX10PLUS: ; %bb.0:
320 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0xa0000
321 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0xa0000
322 ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0xa0000
323 ; GFX10PLUS-NEXT: ; return to shader part epilog
324 %shl = shl <3 x i32> %value, <i32 22, i32 22, i32 22>
325 %ashr = ashr <3 x i32> %shl, <i32 22, i32 22, i32 22>
329 define <4 x i32> @v_sext_inreg_v4i32_6(<4 x i32> %value) {
330 ; GCN-LABEL: v_sext_inreg_v4i32_6:
332 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 26
334 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 26
335 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 26
336 ; GCN-NEXT: v_bfe_i32 v3, v3, 0, 26
337 ; GCN-NEXT: s_setpc_b64 s[30:31]
339 ; GFX10PLUS-LABEL: v_sext_inreg_v4i32_6:
340 ; GFX10PLUS: ; %bb.0:
341 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 26
343 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 26
344 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 26
345 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 26
346 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
347 %shl = shl <4 x i32> %value, <i32 6, i32 6, i32 6, i32 6>
348 %ashr = ashr <4 x i32> %shl, <i32 6, i32 6, i32 6, i32 6>
352 define amdgpu_ps <4 x i32> @s_sext_inreg_v4i32_13(<4 x i32> inreg %value) {
353 ; GCN-LABEL: s_sext_inreg_v4i32_13:
355 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x130000
356 ; GCN-NEXT: s_bfe_i32 s1, s1, 0x130000
357 ; GCN-NEXT: s_bfe_i32 s2, s2, 0x130000
358 ; GCN-NEXT: s_bfe_i32 s3, s3, 0x130000
359 ; GCN-NEXT: ; return to shader part epilog
361 ; GFX10PLUS-LABEL: s_sext_inreg_v4i32_13:
362 ; GFX10PLUS: ; %bb.0:
363 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x130000
364 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x130000
365 ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x130000
366 ; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x130000
367 ; GFX10PLUS-NEXT: ; return to shader part epilog
368 %shl = shl <4 x i32> %value, <i32 13, i32 13, i32 13, i32 13>
369 %ashr = ashr <4 x i32> %shl, <i32 13, i32 13, i32 13, i32 13>
373 define <5 x i32> @v_sext_inreg_v5i32_30(<5 x i32> %value) {
374 ; GCN-LABEL: v_sext_inreg_v5i32_30:
376 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 2
378 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 2
379 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 2
380 ; GCN-NEXT: v_bfe_i32 v3, v3, 0, 2
381 ; GCN-NEXT: v_bfe_i32 v4, v4, 0, 2
382 ; GCN-NEXT: s_setpc_b64 s[30:31]
384 ; GFX10PLUS-LABEL: v_sext_inreg_v5i32_30:
385 ; GFX10PLUS: ; %bb.0:
386 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 2
388 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 2
389 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 2
390 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 2
391 ; GFX10PLUS-NEXT: v_bfe_i32 v4, v4, 0, 2
392 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
393 %shl = shl <5 x i32> %value, <i32 30, i32 30, i32 30, i32 30, i32 30>
394 %ashr = ashr <5 x i32> %shl, <i32 30, i32 30, i32 30, i32 30, i32 30>
398 define amdgpu_ps <5 x i32> @s_sext_inreg_v5i32_19(<5 x i32> inreg %value) {
399 ; GCN-LABEL: s_sext_inreg_v5i32_19:
401 ; GCN-NEXT: s_ashr_i32 s0, s0, 19
402 ; GCN-NEXT: s_ashr_i32 s1, s1, 19
403 ; GCN-NEXT: s_ashr_i32 s2, s2, 19
404 ; GCN-NEXT: s_ashr_i32 s3, s3, 19
405 ; GCN-NEXT: s_ashr_i32 s4, s4, 19
406 ; GCN-NEXT: ; return to shader part epilog
408 ; GFX10PLUS-LABEL: s_sext_inreg_v5i32_19:
409 ; GFX10PLUS: ; %bb.0:
410 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 19
411 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 19
412 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 19
413 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 19
414 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, 19
415 ; GFX10PLUS-NEXT: ; return to shader part epilog
416 %shl = shl <5 x i32> %value, <i32 19, i32 19, i32 19, i32 19, i32 19>
417 %ashr = ashr <5 x i32> %value, <i32 19, i32 19, i32 19, i32 19, i32 19>
421 define <16 x i32> @v_sext_inreg_v16i32_27(<16 x i32> %value) {
422 ; GCN-LABEL: v_sext_inreg_v16i32_27:
424 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 5
426 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 5
427 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 5
428 ; GCN-NEXT: v_bfe_i32 v3, v3, 0, 5
429 ; GCN-NEXT: v_bfe_i32 v4, v4, 0, 5
430 ; GCN-NEXT: v_bfe_i32 v5, v5, 0, 5
431 ; GCN-NEXT: v_bfe_i32 v6, v6, 0, 5
432 ; GCN-NEXT: v_bfe_i32 v7, v7, 0, 5
433 ; GCN-NEXT: v_bfe_i32 v8, v8, 0, 5
434 ; GCN-NEXT: v_bfe_i32 v9, v9, 0, 5
435 ; GCN-NEXT: v_bfe_i32 v10, v10, 0, 5
436 ; GCN-NEXT: v_bfe_i32 v11, v11, 0, 5
437 ; GCN-NEXT: v_bfe_i32 v12, v12, 0, 5
438 ; GCN-NEXT: v_bfe_i32 v13, v13, 0, 5
439 ; GCN-NEXT: v_bfe_i32 v14, v14, 0, 5
440 ; GCN-NEXT: v_bfe_i32 v15, v15, 0, 5
441 ; GCN-NEXT: s_setpc_b64 s[30:31]
443 ; GFX10PLUS-LABEL: v_sext_inreg_v16i32_27:
444 ; GFX10PLUS: ; %bb.0:
445 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 5
447 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 5
448 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 5
449 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 5
450 ; GFX10PLUS-NEXT: v_bfe_i32 v4, v4, 0, 5
451 ; GFX10PLUS-NEXT: v_bfe_i32 v5, v5, 0, 5
452 ; GFX10PLUS-NEXT: v_bfe_i32 v6, v6, 0, 5
453 ; GFX10PLUS-NEXT: v_bfe_i32 v7, v7, 0, 5
454 ; GFX10PLUS-NEXT: v_bfe_i32 v8, v8, 0, 5
455 ; GFX10PLUS-NEXT: v_bfe_i32 v9, v9, 0, 5
456 ; GFX10PLUS-NEXT: v_bfe_i32 v10, v10, 0, 5
457 ; GFX10PLUS-NEXT: v_bfe_i32 v11, v11, 0, 5
458 ; GFX10PLUS-NEXT: v_bfe_i32 v12, v12, 0, 5
459 ; GFX10PLUS-NEXT: v_bfe_i32 v13, v13, 0, 5
460 ; GFX10PLUS-NEXT: v_bfe_i32 v14, v14, 0, 5
461 ; GFX10PLUS-NEXT: v_bfe_i32 v15, v15, 0, 5
462 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
463 %shl = shl <16 x i32> %value, <i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27>
464 %ashr = ashr <16 x i32> %shl, <i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27>
468 define amdgpu_ps <16 x i32> @s_sext_inreg_v16i32_3(<16 x i32> inreg %value) {
469 ; GCN-LABEL: s_sext_inreg_v16i32_3:
471 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x1d0000
472 ; GCN-NEXT: s_bfe_i32 s1, s1, 0x1d0000
473 ; GCN-NEXT: s_bfe_i32 s2, s2, 0x1d0000
474 ; GCN-NEXT: s_bfe_i32 s3, s3, 0x1d0000
475 ; GCN-NEXT: s_bfe_i32 s4, s4, 0x1d0000
476 ; GCN-NEXT: s_bfe_i32 s5, s5, 0x1d0000
477 ; GCN-NEXT: s_bfe_i32 s6, s6, 0x1d0000
478 ; GCN-NEXT: s_bfe_i32 s7, s7, 0x1d0000
479 ; GCN-NEXT: s_bfe_i32 s8, s8, 0x1d0000
480 ; GCN-NEXT: s_bfe_i32 s9, s9, 0x1d0000
481 ; GCN-NEXT: s_bfe_i32 s10, s10, 0x1d0000
482 ; GCN-NEXT: s_bfe_i32 s11, s11, 0x1d0000
483 ; GCN-NEXT: s_bfe_i32 s12, s12, 0x1d0000
484 ; GCN-NEXT: s_bfe_i32 s13, s13, 0x1d0000
485 ; GCN-NEXT: s_bfe_i32 s14, s14, 0x1d0000
486 ; GCN-NEXT: s_bfe_i32 s15, s15, 0x1d0000
487 ; GCN-NEXT: ; return to shader part epilog
489 ; GFX10PLUS-LABEL: s_sext_inreg_v16i32_3:
490 ; GFX10PLUS: ; %bb.0:
491 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x1d0000
492 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x1d0000
493 ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x1d0000
494 ; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x1d0000
495 ; GFX10PLUS-NEXT: s_bfe_i32 s4, s4, 0x1d0000
496 ; GFX10PLUS-NEXT: s_bfe_i32 s5, s5, 0x1d0000
497 ; GFX10PLUS-NEXT: s_bfe_i32 s6, s6, 0x1d0000
498 ; GFX10PLUS-NEXT: s_bfe_i32 s7, s7, 0x1d0000
499 ; GFX10PLUS-NEXT: s_bfe_i32 s8, s8, 0x1d0000
500 ; GFX10PLUS-NEXT: s_bfe_i32 s9, s9, 0x1d0000
501 ; GFX10PLUS-NEXT: s_bfe_i32 s10, s10, 0x1d0000
502 ; GFX10PLUS-NEXT: s_bfe_i32 s11, s11, 0x1d0000
503 ; GFX10PLUS-NEXT: s_bfe_i32 s12, s12, 0x1d0000
504 ; GFX10PLUS-NEXT: s_bfe_i32 s13, s13, 0x1d0000
505 ; GFX10PLUS-NEXT: s_bfe_i32 s14, s14, 0x1d0000
506 ; GFX10PLUS-NEXT: s_bfe_i32 s15, s15, 0x1d0000
507 ; GFX10PLUS-NEXT: ; return to shader part epilog
508 %shl = shl <16 x i32> %value, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
509 %ashr = ashr <16 x i32> %shl, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
513 define i16 @v_sext_inreg_i16_4(i16 %value) {
514 ; GFX6-LABEL: v_sext_inreg_i16_4:
516 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 12
518 ; GFX6-NEXT: s_setpc_b64 s[30:31]
520 ; GFX8-LABEL: v_sext_inreg_i16_4:
522 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
523 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0
524 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 4, v0
525 ; GFX8-NEXT: s_setpc_b64 s[30:31]
527 ; GFX9-LABEL: v_sext_inreg_i16_4:
529 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 12
531 ; GFX9-NEXT: s_setpc_b64 s[30:31]
533 ; GFX10PLUS-LABEL: v_sext_inreg_i16_4:
534 ; GFX10PLUS: ; %bb.0:
535 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 12
537 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
538 %shl = shl i16 %value, 4
539 %ashr = ashr i16 %shl, 4
543 define i16 @v_sext_inreg_i16_15(i16 %value) {
544 ; GFX6-LABEL: v_sext_inreg_i16_15:
546 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
548 ; GFX6-NEXT: s_setpc_b64 s[30:31]
550 ; GFX8-LABEL: v_sext_inreg_i16_15:
552 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 15, v0
554 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0
555 ; GFX8-NEXT: s_setpc_b64 s[30:31]
557 ; GFX9-LABEL: v_sext_inreg_i16_15:
559 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
560 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1
561 ; GFX9-NEXT: s_setpc_b64 s[30:31]
563 ; GFX10PLUS-LABEL: v_sext_inreg_i16_15:
564 ; GFX10PLUS: ; %bb.0:
565 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
567 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
568 %shl = shl i16 %value, 15
569 %ashr = ashr i16 %shl, 15
573 define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) {
574 ; GFX6-LABEL: s_sext_inreg_i16_9:
576 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x70000
577 ; GFX6-NEXT: ; return to shader part epilog
579 ; GFX8-LABEL: s_sext_inreg_i16_9:
581 ; GFX8-NEXT: s_lshl_b32 s0, s0, 9
582 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
583 ; GFX8-NEXT: s_ashr_i32 s0, s0, 9
584 ; GFX8-NEXT: ; return to shader part epilog
586 ; GFX9-LABEL: s_sext_inreg_i16_9:
588 ; GFX9-NEXT: s_lshl_b32 s0, s0, 9
589 ; GFX9-NEXT: s_sext_i32_i16 s0, s0
590 ; GFX9-NEXT: s_ashr_i32 s0, s0, 9
591 ; GFX9-NEXT: ; return to shader part epilog
593 ; GFX10PLUS-LABEL: s_sext_inreg_i16_9:
594 ; GFX10PLUS: ; %bb.0:
595 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9
596 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
597 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 9
598 ; GFX10PLUS-NEXT: ; return to shader part epilog
599 %shl = shl i16 %value, 9
600 %ashr = ashr i16 %shl, 9
604 define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) {
605 ; GFX6-LABEL: s_sext_inreg_i16_15:
607 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000
608 ; GFX6-NEXT: ; return to shader part epilog
610 ; GFX8-LABEL: s_sext_inreg_i16_15:
612 ; GFX8-NEXT: s_lshl_b32 s0, s0, 15
613 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
614 ; GFX8-NEXT: s_ashr_i32 s0, s0, 15
615 ; GFX8-NEXT: ; return to shader part epilog
617 ; GFX9-LABEL: s_sext_inreg_i16_15:
619 ; GFX9-NEXT: s_lshl_b32 s0, s0, 15
620 ; GFX9-NEXT: s_sext_i32_i16 s0, s0
621 ; GFX9-NEXT: s_ashr_i32 s0, s0, 15
622 ; GFX9-NEXT: ; return to shader part epilog
624 ; GFX10PLUS-LABEL: s_sext_inreg_i16_15:
625 ; GFX10PLUS: ; %bb.0:
626 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15
627 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
628 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 15
629 ; GFX10PLUS-NEXT: ; return to shader part epilog
630 %shl = shl i16 %value, 15
631 %ashr = ashr i16 %shl, 15
635 define <2 x i16> @v_sext_inreg_v2i16_8(<2 x i16> %value) {
636 ; GFX6-LABEL: v_sext_inreg_v2i16_8:
638 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
640 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8
641 ; GFX6-NEXT: s_setpc_b64 s[30:31]
643 ; GFX8-LABEL: v_sext_inreg_v2i16_8:
645 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
647 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
648 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 8, v1
649 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0
650 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
651 ; GFX8-NEXT: v_or_b32_sdwa v0, sext(v0), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
652 ; GFX8-NEXT: s_setpc_b64 s[30:31]
654 ; GFX9-LABEL: v_sext_inreg_v2i16_8:
656 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
658 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
659 ; GFX9-NEXT: s_setpc_b64 s[30:31]
661 ; GFX10PLUS-LABEL: v_sext_inreg_v2i16_8:
662 ; GFX10PLUS: ; %bb.0:
663 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
665 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
666 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
667 %shl = shl <2 x i16> %value, <i16 8, i16 8>
668 %ashr = ashr <2 x i16> %shl, <i16 8, i16 8>
672 define <2 x i16> @v_sext_inreg_v2i16_15(<2 x i16> %value) {
673 ; GFX6-LABEL: v_sext_inreg_v2i16_15:
675 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
676 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
677 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 1
678 ; GFX6-NEXT: s_setpc_b64 s[30:31]
680 ; GFX8-LABEL: v_sext_inreg_v2i16_15:
682 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
683 ; GFX8-NEXT: v_mov_b32_e32 v2, 15
684 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 15, v0
685 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
686 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v1
687 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
688 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
689 ; GFX8-NEXT: s_setpc_b64 s[30:31]
691 ; GFX9-LABEL: v_sext_inreg_v2i16_15:
693 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
694 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
695 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
696 ; GFX9-NEXT: s_setpc_b64 s[30:31]
698 ; GFX10PLUS-LABEL: v_sext_inreg_v2i16_15:
699 ; GFX10PLUS: ; %bb.0:
700 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
701 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
702 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
703 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
704 %shl = shl <2 x i16> %value, <i16 15, i16 15>
705 %ashr = ashr <2 x i16> %shl, <i16 15, i16 15>
709 define amdgpu_ps i32 @s_sext_inreg_v2i16_11(<2 x i16> inreg %value) {
710 ; GFX6-LABEL: s_sext_inreg_v2i16_11:
712 ; GFX6-NEXT: s_bfe_i32 s1, s1, 0x50000
713 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x50000
714 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
715 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
716 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
717 ; GFX6-NEXT: s_or_b32 s0, s0, s1
718 ; GFX6-NEXT: ; return to shader part epilog
720 ; GFX8-LABEL: s_sext_inreg_v2i16_11:
722 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
723 ; GFX8-NEXT: s_lshl_b32 s0, s0, 11
724 ; GFX8-NEXT: s_lshl_b32 s1, s1, 11
725 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
726 ; GFX8-NEXT: s_sext_i32_i16 s1, s1
727 ; GFX8-NEXT: s_ashr_i32 s0, s0, 11
728 ; GFX8-NEXT: s_ashr_i32 s1, s1, 11
729 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
730 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
731 ; GFX8-NEXT: s_or_b32 s0, s1, s0
732 ; GFX8-NEXT: ; return to shader part epilog
734 ; GFX9-LABEL: s_sext_inreg_v2i16_11:
736 ; GFX9-NEXT: s_lshr_b32 s1, s0, 16
737 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0xb000b
738 ; GFX9-NEXT: s_lshl_b32 s1, s1, 11
739 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
740 ; GFX9-NEXT: s_sext_i32_i16 s1, s0
741 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
742 ; GFX9-NEXT: s_sext_i32_i16 s2, 0xb000b
743 ; GFX9-NEXT: s_ashr_i32 s1, s1, s2
744 ; GFX9-NEXT: s_ashr_i32 s0, s0, 11
745 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s1, s0
746 ; GFX9-NEXT: ; return to shader part epilog
748 ; GFX10PLUS-LABEL: s_sext_inreg_v2i16_11:
749 ; GFX10PLUS: ; %bb.0:
750 ; GFX10PLUS-NEXT: s_lshr_b32 s1, s0, 16
751 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0xb000b
752 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 11
753 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s1
754 ; GFX10PLUS-NEXT: s_sext_i32_i16 s1, 0xb000b
755 ; GFX10PLUS-NEXT: s_sext_i32_i16 s2, s0
756 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16
757 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s2, s1
758 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 11
759 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s1, s0
760 ; GFX10PLUS-NEXT: ; return to shader part epilog
761 %shl = shl <2 x i16> %value, <i16 11, i16 11>
762 %ashr = ashr <2 x i16> %shl, <i16 11, i16 11>
763 %cast = bitcast <2 x i16> %ashr to i32
768 ; define <3 x i16> @v_sext_inreg_v3i16_4(<3 x i16> %value) {
769 ; %shl = shl <3 x i16> %value, <i16 4, i16 4, i16 4>
770 ; %ashr = ashr <3 x i16> %shl, <i16 4, i16 4, i16 4>
771 ; ret <3 x i16> %ashr
774 ; define amdgpu_ps <3 x i16> @s_sext_inreg_v3i16_4(<3 x i16> inreg %value) {
775 ; %shl = shl <3 x i16> %value, <i16 4, i16 4, i16 4>
776 ; %ashr = ashr <3 x i16> %shl, <i16 4, i16 4, i16 4>
777 ; ret <3 x i16> %ashr
780 define <2 x float> @v_sext_inreg_v4i16_3(<4 x i16> %value) {
781 ; GFX6-LABEL: v_sext_inreg_v4i16_3:
783 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
784 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 13
785 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 13
786 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
787 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 13
788 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 13
789 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
790 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
791 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
792 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
793 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
794 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
795 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
796 ; GFX6-NEXT: s_setpc_b64 s[30:31]
798 ; GFX8-LABEL: v_sext_inreg_v4i16_3:
800 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801 ; GFX8-NEXT: v_mov_b32_e32 v3, 3
802 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 3, v0
803 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
804 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, 3, v1
805 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
806 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 3, v2
807 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
808 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
809 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 3, v4
810 ; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
811 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
812 ; GFX8-NEXT: s_setpc_b64 s[30:31]
814 ; GFX9-LABEL: v_sext_inreg_v4i16_3:
816 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 3, v0 op_sel_hi:[0,1]
818 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 3, v1 op_sel_hi:[0,1]
819 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 3, v0 op_sel_hi:[0,1]
820 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 3, v1 op_sel_hi:[0,1]
821 ; GFX9-NEXT: s_setpc_b64 s[30:31]
823 ; GFX10PLUS-LABEL: v_sext_inreg_v4i16_3:
824 ; GFX10PLUS: ; %bb.0:
825 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 3, v0 op_sel_hi:[0,1]
827 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 3, v1 op_sel_hi:[0,1]
828 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 3, v0 op_sel_hi:[0,1]
829 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, 3, v1 op_sel_hi:[0,1]
830 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
831 %shl = shl <4 x i16> %value, <i16 3, i16 3, i16 3, i16 3>
832 %ashr = ashr <4 x i16> %shl, <i16 3, i16 3, i16 3, i16 3>
833 %cast = bitcast <4 x i16> %ashr to <2 x float>
834 ret <2 x float> %cast
837 define amdgpu_ps <2 x i32> @s_sext_inreg_v4i16_14(<4 x i16> inreg %value) {
838 ; GFX6-LABEL: s_sext_inreg_v4i16_14:
840 ; GFX6-NEXT: s_bfe_i32 s1, s1, 0x20000
841 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000
842 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
843 ; GFX6-NEXT: s_bfe_i32 s2, s2, 0x20000
844 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000
845 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
846 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
847 ; GFX6-NEXT: s_or_b32 s0, s0, s1
848 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
849 ; GFX6-NEXT: s_and_b32 s2, s3, 0xffff
850 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
851 ; GFX6-NEXT: s_or_b32 s1, s1, s2
852 ; GFX6-NEXT: ; return to shader part epilog
854 ; GFX8-LABEL: s_sext_inreg_v4i16_14:
856 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
857 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
858 ; GFX8-NEXT: s_lshl_b32 s0, s0, 14
859 ; GFX8-NEXT: s_lshl_b32 s2, s2, 14
860 ; GFX8-NEXT: s_lshl_b32 s1, s1, 14
861 ; GFX8-NEXT: s_lshl_b32 s3, s3, 14
862 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
863 ; GFX8-NEXT: s_sext_i32_i16 s2, s2
864 ; GFX8-NEXT: s_sext_i32_i16 s1, s1
865 ; GFX8-NEXT: s_sext_i32_i16 s3, s3
866 ; GFX8-NEXT: s_ashr_i32 s0, s0, 14
867 ; GFX8-NEXT: s_ashr_i32 s2, s2, 14
868 ; GFX8-NEXT: s_ashr_i32 s1, s1, 14
869 ; GFX8-NEXT: s_ashr_i32 s3, s3, 14
870 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
871 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
872 ; GFX8-NEXT: s_or_b32 s0, s2, s0
873 ; GFX8-NEXT: s_lshl_b32 s2, s3, 16
874 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
875 ; GFX8-NEXT: s_or_b32 s1, s2, s1
876 ; GFX8-NEXT: ; return to shader part epilog
878 ; GFX9-LABEL: s_sext_inreg_v4i16_14:
880 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
881 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0xe000e
882 ; GFX9-NEXT: s_lshl_b32 s2, s2, 14
883 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
884 ; GFX9-NEXT: s_sext_i32_i16 s2, s0
885 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
886 ; GFX9-NEXT: s_sext_i32_i16 s3, 0xe000e
887 ; GFX9-NEXT: s_ashr_i32 s2, s2, s3
888 ; GFX9-NEXT: s_ashr_i32 s0, s0, 14
889 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s2, s0
890 ; GFX9-NEXT: s_lshr_b32 s2, s1, 16
891 ; GFX9-NEXT: s_lshl_b32 s1, s1, 0xe000e
892 ; GFX9-NEXT: s_lshl_b32 s2, s2, 14
893 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
894 ; GFX9-NEXT: s_sext_i32_i16 s2, s1
895 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16
896 ; GFX9-NEXT: s_ashr_i32 s2, s2, s3
897 ; GFX9-NEXT: s_ashr_i32 s1, s1, 14
898 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s2, s1
899 ; GFX9-NEXT: ; return to shader part epilog
901 ; GFX10PLUS-LABEL: s_sext_inreg_v4i16_14:
902 ; GFX10PLUS: ; %bb.0:
903 ; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16
904 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 16
905 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0xe000e
906 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 14
907 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 0xe000e
908 ; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, 14
909 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s2
910 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s4
911 ; GFX10PLUS-NEXT: s_sext_i32_i16 s2, 0xe000e
912 ; GFX10PLUS-NEXT: s_sext_i32_i16 s3, s0
913 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16
914 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s1
915 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16
916 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s2
917 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 14
918 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s4, s2
919 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 14
920 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s3, s0
921 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s2, s1
922 ; GFX10PLUS-NEXT: ; return to shader part epilog
923 %shl = shl <4 x i16> %value, <i16 14, i16 14, i16 14, i16 14>
924 %ashr = ashr <4 x i16> %shl, <i16 14, i16 14, i16 14, i16 14>
925 %cast = bitcast <4 x i16> %ashr to <2 x i32>
930 ; define <5 x i16> @v_sext_inreg_v5i16(<5 x i16> %value) {
931 ; %shl = shl <5 x i16> %value, %amount
932 ; ret <5 x i16> %result
935 ; define amdgpu_ps <5 x i16> @s_sext_inreg_v5i16(<5 x i16> inreg %value) {
936 ; %shl = shl <5 x i16> %value, %amount
937 ; ret <5 x i16> %result
940 ; define <3 x float> @v_sext_inreg_v6i16(<6 x i16> %value) {
941 ; %shl = shl <6 x i16> %value, %amount
942 ; %cast = bitcast <6 x i16> %result to <3 x float>
943 ; ret <3 x float> %cast
946 ; define amdgpu_ps <3 x i32> @s_sext_inreg_v6i16(<6 x i16> inreg %value) {
947 ; %shl = shl <6 x i16> %value, %amount
948 ; %cast = bitcast <6 x i16> %result to <3 x i32>
949 ; ret <3 x i32> %cast
952 define <4 x float> @v_sext_inreg_v8i16_11(<8 x i16> %value) {
953 ; GFX6-LABEL: v_sext_inreg_v8i16_11:
955 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
956 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 5
957 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 5
958 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
959 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 5
960 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 5
961 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
962 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
963 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 5
964 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
965 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
966 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
967 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 5
968 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 5
969 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
970 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v5
971 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 5
972 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
973 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4
974 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
975 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7
976 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
977 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v6
978 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
979 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
980 ; GFX6-NEXT: s_setpc_b64 s[30:31]
982 ; GFX8-LABEL: v_sext_inreg_v8i16_11:
984 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985 ; GFX8-NEXT: v_mov_b32_e32 v5, 11
986 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, 11, v0
987 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
988 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, 11, v1
989 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
990 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v4
991 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
992 ; GFX8-NEXT: v_lshlrev_b16_e32 v7, 11, v2
993 ; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
994 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
995 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v6
996 ; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
997 ; GFX8-NEXT: v_lshlrev_b16_e32 v8, 11, v3
998 ; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
999 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
1000 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v7
1001 ; GFX8-NEXT: v_ashrrev_i16_sdwa v2, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1002 ; GFX8-NEXT: v_mov_b32_e32 v5, 11
1003 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
1004 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v8
1005 ; GFX8-NEXT: v_ashrrev_i16_sdwa v3, v5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1006 ; GFX8-NEXT: v_or_b32_e32 v3, v4, v3
1007 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1009 ; GFX9-LABEL: v_sext_inreg_v8i16_11:
1011 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1012 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 11, v0 op_sel_hi:[0,1]
1013 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 11, v1 op_sel_hi:[0,1]
1014 ; GFX9-NEXT: v_pk_lshlrev_b16 v2, 11, v2 op_sel_hi:[0,1]
1015 ; GFX9-NEXT: v_pk_lshlrev_b16 v3, 11, v3 op_sel_hi:[0,1]
1016 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 11, v0 op_sel_hi:[0,1]
1017 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 11, v1 op_sel_hi:[0,1]
1018 ; GFX9-NEXT: v_pk_ashrrev_i16 v2, 11, v2 op_sel_hi:[0,1]
1019 ; GFX9-NEXT: v_pk_ashrrev_i16 v3, 11, v3 op_sel_hi:[0,1]
1020 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1022 ; GFX10PLUS-LABEL: v_sext_inreg_v8i16_11:
1023 ; GFX10PLUS: ; %bb.0:
1024 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1025 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 11, v0 op_sel_hi:[0,1]
1026 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 11, v1 op_sel_hi:[0,1]
1027 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v2, 11, v2 op_sel_hi:[0,1]
1028 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v3, 11, v3 op_sel_hi:[0,1]
1029 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 11, v0 op_sel_hi:[0,1]
1030 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, 11, v1 op_sel_hi:[0,1]
1031 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v2, 11, v2 op_sel_hi:[0,1]
1032 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v3, 11, v3 op_sel_hi:[0,1]
1033 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1034 %shl = shl <8 x i16> %value, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
1035 %ashr = ashr <8 x i16> %shl, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
1036 %cast = bitcast <8 x i16> %ashr to <4 x float>
1037 ret <4 x float> %cast
1040 define amdgpu_ps <4 x i32> @s_sext_inreg_v8i16_5(<8 x i16> inreg %value) {
1041 ; GFX6-LABEL: s_sext_inreg_v8i16_5:
1043 ; GFX6-NEXT: s_bfe_i32 s1, s1, 0xb0000
1044 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0xb0000
1045 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
1046 ; GFX6-NEXT: s_bfe_i32 s2, s2, 0xb0000
1047 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0xb0000
1048 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
1049 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1050 ; GFX6-NEXT: s_bfe_i32 s5, s5, 0xb0000
1051 ; GFX6-NEXT: s_or_b32 s0, s0, s1
1052 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
1053 ; GFX6-NEXT: s_and_b32 s2, s3, 0xffff
1054 ; GFX6-NEXT: s_bfe_i32 s4, s4, 0xb0000
1055 ; GFX6-NEXT: s_bfe_i32 s7, s7, 0xb0000
1056 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1057 ; GFX6-NEXT: s_and_b32 s3, s5, 0xffff
1058 ; GFX6-NEXT: s_bfe_i32 s6, s6, 0xb0000
1059 ; GFX6-NEXT: s_or_b32 s1, s1, s2
1060 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
1061 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
1062 ; GFX6-NEXT: s_and_b32 s4, s7, 0xffff
1063 ; GFX6-NEXT: s_or_b32 s2, s2, s3
1064 ; GFX6-NEXT: s_and_b32 s3, s6, 0xffff
1065 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16
1066 ; GFX6-NEXT: s_or_b32 s3, s3, s4
1067 ; GFX6-NEXT: ; return to shader part epilog
1069 ; GFX8-LABEL: s_sext_inreg_v8i16_5:
1071 ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1072 ; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1073 ; GFX8-NEXT: s_lshl_b32 s0, s0, 5
1074 ; GFX8-NEXT: s_lshl_b32 s4, s4, 5
1075 ; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1076 ; GFX8-NEXT: s_lshl_b32 s1, s1, 5
1077 ; GFX8-NEXT: s_lshl_b32 s5, s5, 5
1078 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1079 ; GFX8-NEXT: s_sext_i32_i16 s4, s4
1080 ; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1081 ; GFX8-NEXT: s_lshl_b32 s2, s2, 5
1082 ; GFX8-NEXT: s_lshl_b32 s6, s6, 5
1083 ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1084 ; GFX8-NEXT: s_sext_i32_i16 s5, s5
1085 ; GFX8-NEXT: s_ashr_i32 s0, s0, 5
1086 ; GFX8-NEXT: s_ashr_i32 s4, s4, 5
1087 ; GFX8-NEXT: s_lshl_b32 s3, s3, 5
1088 ; GFX8-NEXT: s_lshl_b32 s7, s7, 5
1089 ; GFX8-NEXT: s_sext_i32_i16 s2, s2
1090 ; GFX8-NEXT: s_sext_i32_i16 s6, s6
1091 ; GFX8-NEXT: s_ashr_i32 s1, s1, 5
1092 ; GFX8-NEXT: s_ashr_i32 s5, s5, 5
1093 ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1094 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
1095 ; GFX8-NEXT: s_sext_i32_i16 s3, s3
1096 ; GFX8-NEXT: s_sext_i32_i16 s7, s7
1097 ; GFX8-NEXT: s_ashr_i32 s2, s2, 5
1098 ; GFX8-NEXT: s_ashr_i32 s6, s6, 5
1099 ; GFX8-NEXT: s_or_b32 s0, s4, s0
1100 ; GFX8-NEXT: s_lshl_b32 s4, s5, 16
1101 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
1102 ; GFX8-NEXT: s_ashr_i32 s3, s3, 5
1103 ; GFX8-NEXT: s_ashr_i32 s7, s7, 5
1104 ; GFX8-NEXT: s_or_b32 s1, s4, s1
1105 ; GFX8-NEXT: s_lshl_b32 s4, s6, 16
1106 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1107 ; GFX8-NEXT: s_or_b32 s2, s4, s2
1108 ; GFX8-NEXT: s_lshl_b32 s4, s7, 16
1109 ; GFX8-NEXT: s_and_b32 s3, s3, 0xffff
1110 ; GFX8-NEXT: s_or_b32 s3, s4, s3
1111 ; GFX8-NEXT: ; return to shader part epilog
1113 ; GFX9-LABEL: s_sext_inreg_v8i16_5:
1115 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
1116 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x50005
1117 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5
1118 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
1119 ; GFX9-NEXT: s_sext_i32_i16 s4, s0
1120 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
1121 ; GFX9-NEXT: s_sext_i32_i16 s5, 0x50005
1122 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1123 ; GFX9-NEXT: s_ashr_i32 s0, s0, 5
1124 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s4, s0
1125 ; GFX9-NEXT: s_lshr_b32 s4, s1, 16
1126 ; GFX9-NEXT: s_lshl_b32 s1, s1, 0x50005
1127 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5
1128 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4
1129 ; GFX9-NEXT: s_sext_i32_i16 s4, s1
1130 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16
1131 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1132 ; GFX9-NEXT: s_ashr_i32 s1, s1, 5
1133 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s4, s1
1134 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
1135 ; GFX9-NEXT: s_lshl_b32 s2, s2, 0x50005
1136 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5
1137 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
1138 ; GFX9-NEXT: s_sext_i32_i16 s4, s2
1139 ; GFX9-NEXT: s_ashr_i32 s2, s2, 16
1140 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1141 ; GFX9-NEXT: s_ashr_i32 s2, s2, 5
1142 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s4, s2
1143 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
1144 ; GFX9-NEXT: s_lshl_b32 s3, s3, 0x50005
1145 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5
1146 ; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
1147 ; GFX9-NEXT: s_sext_i32_i16 s4, s3
1148 ; GFX9-NEXT: s_ashr_i32 s3, s3, 16
1149 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1150 ; GFX9-NEXT: s_ashr_i32 s3, s3, 5
1151 ; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s3
1152 ; GFX9-NEXT: ; return to shader part epilog
1154 ; GFX10PLUS-LABEL: s_sext_inreg_v8i16_5:
1155 ; GFX10PLUS: ; %bb.0:
1156 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s0, 16
1157 ; GFX10PLUS-NEXT: s_lshr_b32 s6, s1, 16
1158 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0x50005
1159 ; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, 5
1160 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 0x50005
1161 ; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5
1162 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s4
1163 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s6
1164 ; GFX10PLUS-NEXT: s_lshr_b32 s6, s2, 16
1165 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s0
1166 ; GFX10PLUS-NEXT: s_sext_i32_i16 s5, 0x50005
1167 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16
1168 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 0x50005
1169 ; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5
1170 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5
1171 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 5
1172 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s2, s6
1173 ; GFX10PLUS-NEXT: s_lshr_b32 s6, s3, 16
1174 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s4, s0
1175 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s1
1176 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16
1177 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, 0x50005
1178 ; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5
1179 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5
1180 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 5
1181 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s3, s6
1182 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s4, s1
1183 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s2
1184 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 16
1185 ; GFX10PLUS-NEXT: s_sext_i32_i16 s6, s3
1186 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 16
1187 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5
1188 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 5
1189 ; GFX10PLUS-NEXT: s_ashr_i32 s5, s6, s5
1190 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 5
1191 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s4, s2
1192 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s5, s3
1193 ; GFX10PLUS-NEXT: ; return to shader part epilog
1194 %shl = shl <8 x i16> %value, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
1195 %ashr = ashr <8 x i16> %shl, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
1196 %cast = bitcast <8 x i16> %ashr to <4 x i32>
1200 define i64 @v_sext_inreg_i64_23(i64 %value) {
1201 ; GCN-LABEL: v_sext_inreg_i64_23:
1203 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1204 ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 9
1205 ; GCN-NEXT: s_setpc_b64 s[30:31]
1207 ; GFX10PLUS-LABEL: v_sext_inreg_i64_23:
1208 ; GFX10PLUS: ; %bb.0:
1209 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1210 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 9
1211 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1212 %shl = shl i64 %value, 23
1213 %ashr = ashr i64 %shl, 23
1217 define i64 @v_sext_inreg_i64_40(i64 %value) {
1218 ; GCN-LABEL: v_sext_inreg_i64_40:
1220 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1221 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24
1222 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1223 ; GCN-NEXT: s_setpc_b64 s[30:31]
1225 ; GFX10PLUS-LABEL: v_sext_inreg_i64_40:
1226 ; GFX10PLUS: ; %bb.0:
1227 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1228 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24
1229 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1230 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1231 %shl = shl i64 %value, 40
1232 %ashr = ashr i64 %shl, 40
1236 define i64 @v_sext_inreg_i64_63(i64 %value) {
1237 ; GCN-LABEL: v_sext_inreg_i64_63:
1239 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1240 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1
1241 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1242 ; GCN-NEXT: s_setpc_b64 s[30:31]
1244 ; GFX10PLUS-LABEL: v_sext_inreg_i64_63:
1245 ; GFX10PLUS: ; %bb.0:
1246 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1247 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
1248 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1249 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1250 %shl = shl i64 %value, 63
1251 %ashr = ashr i64 %shl, 63
1255 define i64 @v_sext_inreg_i64_33(i64 %value) {
1256 ; GCN-LABEL: v_sext_inreg_i64_33:
1258 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1259 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 31
1260 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1261 ; GCN-NEXT: s_setpc_b64 s[30:31]
1263 ; GFX10PLUS-LABEL: v_sext_inreg_i64_33:
1264 ; GFX10PLUS: ; %bb.0:
1265 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1266 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 31
1267 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1268 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1269 %shl = shl i64 %value, 33
1270 %ashr = ashr i64 %shl, 33
1274 define i64 @v_sext_inreg_i64_32(i64 %value) {
1275 ; GCN-LABEL: v_sext_inreg_i64_32:
1277 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1278 ; GCN-NEXT: v_mov_b32_e32 v0, v1
1279 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1280 ; GCN-NEXT: s_setpc_b64 s[30:31]
1282 ; GFX10PLUS-LABEL: v_sext_inreg_i64_32:
1283 ; GFX10PLUS: ; %bb.0:
1284 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1285 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v1
1286 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1287 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1288 %shl = shl i64 %value, 32
1289 %ashr = ashr i64 %value, 32
1293 define i64 @v_sext_inreg_i64_31(i64 %value) {
1294 ; GCN-LABEL: v_sext_inreg_i64_31:
1296 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1297 ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 1
1298 ; GCN-NEXT: s_setpc_b64 s[30:31]
1300 ; GFX10PLUS-LABEL: v_sext_inreg_i64_31:
1301 ; GFX10PLUS: ; %bb.0:
1302 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1303 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1
1304 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1305 %shl = shl i64 %value, 31
1306 %ashr = ashr i64 %shl, 31
1310 define amdgpu_ps i64 @s_sext_inreg_i64_3(i64 inreg %value) {
1311 ; GCN-LABEL: s_sext_inreg_i64_3:
1313 ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x3d0000
1314 ; GCN-NEXT: ; return to shader part epilog
1316 ; GFX10PLUS-LABEL: s_sext_inreg_i64_3:
1317 ; GFX10PLUS: ; %bb.0:
1318 ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x3d0000
1319 ; GFX10PLUS-NEXT: ; return to shader part epilog
1320 %shl = shl i64 %value, 3
1321 %ashr = ashr i64 %shl, 3
1325 define amdgpu_ps i64 @s_sext_inreg_i64_63(i64 inreg %value) {
1326 ; GCN-LABEL: s_sext_inreg_i64_63:
1328 ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x10000
1329 ; GCN-NEXT: ; return to shader part epilog
1331 ; GFX10PLUS-LABEL: s_sext_inreg_i64_63:
1332 ; GFX10PLUS: ; %bb.0:
1333 ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x10000
1334 ; GFX10PLUS-NEXT: ; return to shader part epilog
1335 %shl = shl i64 %value, 63
1336 %ashr = ashr i64 %shl, 63
1340 define amdgpu_ps i64 @s_sext_inreg_i64_33(i64 inreg %value) {
1341 ; GCN-LABEL: s_sext_inreg_i64_33:
1343 ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x1f0000
1344 ; GCN-NEXT: ; return to shader part epilog
1346 ; GFX10PLUS-LABEL: s_sext_inreg_i64_33:
1347 ; GFX10PLUS: ; %bb.0:
1348 ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x1f0000
1349 ; GFX10PLUS-NEXT: ; return to shader part epilog
1350 %shl = shl i64 %value, 33
1351 %ashr = ashr i64 %shl, 33
1355 define amdgpu_ps i64 @s_sext_inreg_i64_32(i64 inreg %value) {
1356 ; GCN-LABEL: s_sext_inreg_i64_32:
1358 ; GCN-NEXT: s_ashr_i32 s1, s0, 31
1359 ; GCN-NEXT: ; return to shader part epilog
1361 ; GFX10PLUS-LABEL: s_sext_inreg_i64_32:
1362 ; GFX10PLUS: ; %bb.0:
1363 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s0, 31
1364 ; GFX10PLUS-NEXT: ; return to shader part epilog
1365 %shl = shl i64 %value, 32
1366 %ashr = ashr i64 %shl, 32
1370 define amdgpu_ps i64 @s_sext_inreg_i64_31(i64 inreg %value) {
1371 ; GCN-LABEL: s_sext_inreg_i64_31:
1373 ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x210000
1374 ; GCN-NEXT: ; return to shader part epilog
1376 ; GFX10PLUS-LABEL: s_sext_inreg_i64_31:
1377 ; GFX10PLUS: ; %bb.0:
1378 ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x210000
1379 ; GFX10PLUS-NEXT: ; return to shader part epilog
1380 %shl = shl i64 %value, 31
1381 %ashr = ashr i64 %shl, 31
1385 define <2 x i64> @v_sext_inreg_v2i64_16(<2 x i64> %value) {
1386 ; GCN-LABEL: v_sext_inreg_v2i64_16:
1388 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389 ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 16
1390 ; GCN-NEXT: v_bfe_i32 v3, v2, 0, 16
1391 ; GCN-NEXT: s_setpc_b64 s[30:31]
1393 ; GFX10PLUS-LABEL: v_sext_inreg_v2i64_16:
1394 ; GFX10PLUS: ; %bb.0:
1395 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1396 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 16
1397 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 16
1398 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1399 %shl = shl <2 x i64> %value, <i64 16, i64 16>
1400 %ashr = ashr <2 x i64> %shl, <i64 16, i64 16>
1404 define <2 x i64> @v_sext_inreg_v2i64_31(<2 x i64> %value) {
1405 ; GCN-LABEL: v_sext_inreg_v2i64_31:
1407 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1408 ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 1
1409 ; GCN-NEXT: v_bfe_i32 v3, v2, 0, 1
1410 ; GCN-NEXT: s_setpc_b64 s[30:31]
1412 ; GFX10PLUS-LABEL: v_sext_inreg_v2i64_31:
1413 ; GFX10PLUS: ; %bb.0:
1414 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1415 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1
1416 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 1
1417 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1418 %shl = shl <2 x i64> %value, <i64 31, i64 31>
1419 %ashr = ashr <2 x i64> %shl, <i64 31, i64 31>
1423 define amdgpu_ps <2 x i64> @s_sext_inreg_v2i64_30(<2 x i64> inreg %value) {
1424 ; GCN-LABEL: s_sext_inreg_v2i64_30:
1426 ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x220000
1427 ; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x220000
1428 ; GCN-NEXT: ; return to shader part epilog
1430 ; GFX10PLUS-LABEL: s_sext_inreg_v2i64_30:
1431 ; GFX10PLUS: ; %bb.0:
1432 ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x220000
1433 ; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x220000
1434 ; GFX10PLUS-NEXT: ; return to shader part epilog
1435 %shl = shl <2 x i64> %value, <i64 30, i64 30>
1436 %ashr = ashr <2 x i64> %shl, <i64 30, i64 30>
1440 define i65 @v_sext_inreg_i65_22(i65 %value) {
1441 ; GFX6-LABEL: v_sext_inreg_i65_22:
1443 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1444 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 22
1445 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 10, v1
1446 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1447 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
1448 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 0
1449 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1450 ; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 10
1451 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 10, v2
1452 ; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 22
1453 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
1454 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1456 ; GFX8-LABEL: v_sext_inreg_i65_22:
1458 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1459 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
1460 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 10, v1
1461 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
1462 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
1463 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
1464 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1465 ; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 10
1466 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 10, v2
1467 ; GFX8-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3]
1468 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
1469 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1471 ; GFX9-LABEL: v_sext_inreg_i65_22:
1473 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1474 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
1475 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 10, v1
1476 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v3
1477 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
1478 ; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 1
1479 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1480 ; GFX9-NEXT: v_bfe_u32 v1, v1, 0, 10
1481 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 10, v1
1482 ; GFX9-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3]
1483 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1485 ; GFX10PLUS-LABEL: v_sext_inreg_i65_22:
1486 ; GFX10PLUS: ; %bb.0:
1487 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1488 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
1489 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 10, v1
1490 ; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
1491 ; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v3
1492 ; GFX10PLUS-NEXT: v_bfe_u32 v1, v1, 0, 10
1493 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 1
1494 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1495 ; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v2, 10, v1
1496 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3]
1497 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1498 %shl = shl i65 %value, 22
1499 %ashr = ashr i65 %shl, 22
1503 define i65 @v_sext_inreg_i65_33(i65 %value) {
1504 ; GFX6-LABEL: v_sext_inreg_i65_33:
1506 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1507 ; GFX6-NEXT: v_mov_b32_e32 v3, v1
1508 ; GFX6-NEXT: v_bfe_i32 v1, v2, 0, 1
1509 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1510 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], 31
1511 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1512 ; GFX6-NEXT: v_or_b32_e32 v0, v3, v0
1513 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1514 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1516 ; GFX8-LABEL: v_sext_inreg_i65_33:
1518 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1519 ; GFX8-NEXT: v_mov_b32_e32 v3, v1
1520 ; GFX8-NEXT: v_bfe_i32 v1, v2, 0, 1
1521 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1522 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1523 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1524 ; GFX8-NEXT: v_or_b32_e32 v0, v3, v0
1525 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1526 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1528 ; GFX9-LABEL: v_sext_inreg_i65_33:
1530 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1531 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
1532 ; GFX9-NEXT: v_bfe_i32 v1, v2, 0, 1
1533 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1534 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1535 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1536 ; GFX9-NEXT: v_or_b32_e32 v0, v3, v0
1537 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1538 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1540 ; GFX10PLUS-LABEL: v_sext_inreg_i65_33:
1541 ; GFX10PLUS: ; %bb.0:
1542 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1543 ; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1
1544 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v2, 0, 1
1545 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1546 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1547 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1548 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1549 ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v3, v0
1550 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1551 %shl = shl i65 %value, 33
1552 %ashr = ashr i65 %value, 33
1556 define amdgpu_ps i65 @s_sext_inreg_i65_18(i65 inreg %value) {
1557 ; GCN-LABEL: s_sext_inreg_i65_18:
1559 ; GCN-NEXT: s_lshl_b64 s[2:3], s[2:3], 18
1560 ; GCN-NEXT: s_lshr_b32 s4, s1, 14
1561 ; GCN-NEXT: s_mov_b32 s5, 0
1562 ; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
1563 ; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1564 ; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x2e0000
1565 ; GCN-NEXT: s_lshl_b32 s7, s2, 14
1566 ; GCN-NEXT: s_mov_b32 s6, s5
1567 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7]
1568 ; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 18
1569 ; GCN-NEXT: ; return to shader part epilog
1571 ; GFX10PLUS-LABEL: s_sext_inreg_i65_18:
1572 ; GFX10PLUS: ; %bb.0:
1573 ; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[2:3], 18
1574 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 14
1575 ; GFX10PLUS-NEXT: s_mov_b32 s5, 0
1576 ; GFX10PLUS-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x2e0000
1577 ; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
1578 ; GFX10PLUS-NEXT: s_mov_b32 s6, s5
1579 ; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1580 ; GFX10PLUS-NEXT: s_lshl_b32 s7, s2, 14
1581 ; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[2:3], 18
1582 ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7]
1583 ; GFX10PLUS-NEXT: ; return to shader part epilog
1584 %shl = shl i65 %value, 18
1585 %ashr = ashr i65 %shl, 18
1589 define amdgpu_ps i65 @s_sext_inreg_i65_33(i65 inreg %value) {
1590 ; GCN-LABEL: s_sext_inreg_i65_33:
1592 ; GCN-NEXT: s_lshl_b32 s3, s2, 1
1593 ; GCN-NEXT: s_mov_b32 s2, 0
1594 ; GCN-NEXT: s_lshr_b64 s[4:5], s[0:1], 31
1595 ; GCN-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5]
1596 ; GCN-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
1597 ; GCN-NEXT: s_bfe_u32 s0, s0, 0x1f0000
1598 ; GCN-NEXT: s_mov_b32 s1, s2
1599 ; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], 31
1600 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
1601 ; GCN-NEXT: s_ashr_i32 s2, s5, 1
1602 ; GCN-NEXT: ; return to shader part epilog
1604 ; GFX10PLUS-LABEL: s_sext_inreg_i65_33:
1605 ; GFX10PLUS: ; %bb.0:
1606 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s2, 1
1607 ; GFX10PLUS-NEXT: s_mov_b32 s2, 0
1608 ; GFX10PLUS-NEXT: s_lshr_b64 s[4:5], s[0:1], 31
1609 ; GFX10PLUS-NEXT: s_bfe_u32 s0, s0, 0x1f0000
1610 ; GFX10PLUS-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5]
1611 ; GFX10PLUS-NEXT: s_mov_b32 s1, s2
1612 ; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
1613 ; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[4:5], 31
1614 ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
1615 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s5, 1
1616 ; GFX10PLUS-NEXT: ; return to shader part epilog
1617 %shl = shl i65 %value, 33
1618 %ashr = ashr i65 %shl, 33
1622 ; FIXME: Argument lowering asserts
1623 ; define <2 x i65> @v_sext_inreg_v2i65_36(<2 x i65> %value) {
1624 ; %shl = shl <2 x i65> %value, <i65 36, i65 36>
1625 ; %ashr = ashr <2 x i65> %shl, <i65 36, i65 36>
1626 ; ret <2 x i65> %ashr
1629 ; define amdgpu_ps <2 x i65> @s_sext_inreg_v2i65_36(<2 x i65> inreg %valuex) {
1630 ; %shl = shl <2 x i65> %value, <i65 36, i65 36>
1631 ; %ashr = ashrshl <2 x i65> %shl, <i65 36, i65 36>
1632 ; ret <2 x i65> %ashr
1634 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: