1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
8 define i8 @v_sext_inreg_i8_4(i8 %value) {
9 ; GCN-LABEL: v_sext_inreg_i8_4:
11 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 4
13 ; GCN-NEXT: s_setpc_b64 s[30:31]
15 ; GFX10PLUS-LABEL: v_sext_inreg_i8_4:
17 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 4
19 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
20 %shl = shl i8 %value, 4
21 %ashr = ashr i8 %shl, 4
25 define i8 @v_sext_inreg_i8_7(i8 %value) {
26 ; GCN-LABEL: v_sext_inreg_i8_7:
28 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1
30 ; GCN-NEXT: s_setpc_b64 s[30:31]
32 ; GFX10PLUS-LABEL: v_sext_inreg_i8_7:
34 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
36 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
37 %shl = shl i8 %value, 7
38 %ashr = ashr i8 %shl, 7
42 define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) {
43 ; GFX6-LABEL: s_sext_inreg_i8:
45 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x50000
46 ; GFX6-NEXT: ; return to shader part epilog
48 ; GFX8-LABEL: s_sext_inreg_i8:
50 ; GFX8-NEXT: s_lshl_b32 s0, s0, 3
51 ; GFX8-NEXT: s_sext_i32_i8 s0, s0
52 ; GFX8-NEXT: s_ashr_i32 s0, s0, 3
53 ; GFX8-NEXT: ; return to shader part epilog
55 ; GFX9-LABEL: s_sext_inreg_i8:
57 ; GFX9-NEXT: s_lshl_b32 s0, s0, 3
58 ; GFX9-NEXT: s_sext_i32_i8 s0, s0
59 ; GFX9-NEXT: s_ashr_i32 s0, s0, 3
60 ; GFX9-NEXT: ; return to shader part epilog
62 ; GFX10PLUS-LABEL: s_sext_inreg_i8:
64 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 3
65 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
66 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 3
67 ; GFX10PLUS-NEXT: ; return to shader part epilog
68 %shl = shl i8 %value, 3
69 %ashr = ashr i8 %shl, 3
73 define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) {
74 ; GFX6-LABEL: s_sext_inreg_i8_6:
76 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000
77 ; GFX6-NEXT: ; return to shader part epilog
79 ; GFX8-LABEL: s_sext_inreg_i8_6:
81 ; GFX8-NEXT: s_lshl_b32 s0, s0, 6
82 ; GFX8-NEXT: s_sext_i32_i8 s0, s0
83 ; GFX8-NEXT: s_ashr_i32 s0, s0, 6
84 ; GFX8-NEXT: ; return to shader part epilog
86 ; GFX9-LABEL: s_sext_inreg_i8_6:
88 ; GFX9-NEXT: s_lshl_b32 s0, s0, 6
89 ; GFX9-NEXT: s_sext_i32_i8 s0, s0
90 ; GFX9-NEXT: s_ashr_i32 s0, s0, 6
91 ; GFX9-NEXT: ; return to shader part epilog
93 ; GFX10PLUS-LABEL: s_sext_inreg_i8_6:
95 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 6
96 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
97 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 6
98 ; GFX10PLUS-NEXT: ; return to shader part epilog
99 %shl = shl i8 %value, 6
100 %ashr = ashr i8 %shl, 6
104 define i24 @v_sext_inreg_i24_12(i24 %value) {
105 ; GCN-LABEL: v_sext_inreg_i24_12:
107 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24
109 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 12, v0
110 ; GCN-NEXT: s_setpc_b64 s[30:31]
112 ; GFX10PLUS-LABEL: v_sext_inreg_i24_12:
113 ; GFX10PLUS: ; %bb.0:
114 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24
116 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 12, v0
117 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
118 %shl = shl i24 %value, 12
119 %ashr = ashr i24 %value, 12
123 define i24 @v_sext_inreg_i24_7(i24 %value) {
124 ; GCN-LABEL: v_sext_inreg_i24_7:
126 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 17
128 ; GCN-NEXT: s_setpc_b64 s[30:31]
130 ; GFX10PLUS-LABEL: v_sext_inreg_i24_7:
131 ; GFX10PLUS: ; %bb.0:
132 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 17
134 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
135 %shl = shl i24 %value, 7
136 %ashr = ashr i24 %shl, 7
140 define amdgpu_ps i24 @s_sext_inreg_i24_8(i24 inreg %value) {
141 ; GCN-LABEL: s_sext_inreg_i24_8:
143 ; GCN-NEXT: s_sext_i32_i16 s0, s0
144 ; GCN-NEXT: ; return to shader part epilog
146 ; GFX10PLUS-LABEL: s_sext_inreg_i24_8:
147 ; GFX10PLUS: ; %bb.0:
148 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
149 ; GFX10PLUS-NEXT: ; return to shader part epilog
150 %shl = shl i24 %value, 8
151 %ashr = ashr i24 %shl, 8
155 define amdgpu_ps i24 @s_sext_inreg_i24_7(i24 inreg %value) {
156 ; GCN-LABEL: s_sext_inreg_i24_7:
158 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x110000
159 ; GCN-NEXT: ; return to shader part epilog
161 ; GFX10PLUS-LABEL: s_sext_inreg_i24_7:
162 ; GFX10PLUS: ; %bb.0:
163 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x110000
164 ; GFX10PLUS-NEXT: ; return to shader part epilog
165 %shl = shl i24 %value, 7
166 %ashr = ashr i24 %shl, 7
170 define i32 @v_sext_inreg_i32_3(i32 %value) {
171 ; GCN-LABEL: v_sext_inreg_i32_3:
173 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 29
175 ; GCN-NEXT: s_setpc_b64 s[30:31]
177 ; GFX10PLUS-LABEL: v_sext_inreg_i32_3:
178 ; GFX10PLUS: ; %bb.0:
179 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 29
181 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
182 %shl = shl i32 %value, 3
183 %ashr = ashr i32 %shl, 3
187 define i32 @v_sext_inreg_i32_31(i32 %value) {
188 ; GCN-LABEL: v_sext_inreg_i32_31:
190 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
192 ; GCN-NEXT: s_setpc_b64 s[30:31]
194 ; GFX10PLUS-LABEL: v_sext_inreg_i32_31:
195 ; GFX10PLUS: ; %bb.0:
196 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0
198 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
199 %shl = shl i32 %value, 31
200 %ashr = ashr i32 %value, 31
204 define amdgpu_ps i32 @s_sext_inreg_i32_2(i32 inreg %value) {
205 ; GCN-LABEL: s_sext_inreg_i32_2:
207 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x1e0000
208 ; GCN-NEXT: ; return to shader part epilog
210 ; GFX10PLUS-LABEL: s_sext_inreg_i32_2:
211 ; GFX10PLUS: ; %bb.0:
212 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x1e0000
213 ; GFX10PLUS-NEXT: ; return to shader part epilog
214 %shl = shl i32 %value, 2
215 %ashr = ashr i32 %shl, 2
219 define amdgpu_ps i32 @s_sext_inreg_i32_31(i32 inreg %value) {
220 ; GCN-LABEL: s_sext_inreg_i32_31:
222 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x10000
223 ; GCN-NEXT: ; return to shader part epilog
225 ; GFX10PLUS-LABEL: s_sext_inreg_i32_31:
226 ; GFX10PLUS: ; %bb.0:
227 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x10000
228 ; GFX10PLUS-NEXT: ; return to shader part epilog
229 %shl = shl i32 %value, 31
230 %ashr = ashr i32 %shl, 31
234 define <2 x i32> @v_sext_inreg_v2i32_14(<2 x i32> %value) {
235 ; GCN-LABEL: v_sext_inreg_v2i32_14:
237 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 18
239 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 18
240 ; GCN-NEXT: s_setpc_b64 s[30:31]
242 ; GFX10PLUS-LABEL: v_sext_inreg_v2i32_14:
243 ; GFX10PLUS: ; %bb.0:
244 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 18
246 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 18
247 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
248 %shl = shl <2 x i32> %value, <i32 14, i32 14>
249 %ashr = ashr <2 x i32> %shl, <i32 14, i32 14>
253 define <2 x i32> @v_sext_inreg_v2i32_31(<2 x i32> %value) {
254 ; GCN-LABEL: v_sext_inreg_v2i32_31:
256 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1
258 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 1
259 ; GCN-NEXT: s_setpc_b64 s[30:31]
261 ; GFX10PLUS-LABEL: v_sext_inreg_v2i32_31:
262 ; GFX10PLUS: ; %bb.0:
263 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
265 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 1
266 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
267 %shl = shl <2 x i32> %value, <i32 31, i32 31>
268 %shr = ashr <2 x i32> %shl, <i32 31, i32 31>
272 define amdgpu_ps <2 x i32> @s_sext_inreg_v2i32_22(<2 x i32> inreg %value) {
273 ; GCN-LABEL: s_sext_inreg_v2i32_22:
275 ; GCN-NEXT: s_bfe_i32 s0, s0, 0xa0000
276 ; GCN-NEXT: s_bfe_i32 s1, s1, 0xa0000
277 ; GCN-NEXT: ; return to shader part epilog
279 ; GFX10PLUS-LABEL: s_sext_inreg_v2i32_22:
280 ; GFX10PLUS: ; %bb.0:
281 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0xa0000
282 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0xa0000
283 ; GFX10PLUS-NEXT: ; return to shader part epilog
284 %shl = shl <2 x i32> %value, <i32 22, i32 22>
285 %ashr = ashr <2 x i32> %shl, <i32 22, i32 22>
289 define <3 x i32> @v_sext_inreg_v3i32_16(<3 x i32> %value, <3 x i32> %amount) {
290 ; GCN-LABEL: v_sext_inreg_v3i32_16:
292 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16
294 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 16
295 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16
296 ; GCN-NEXT: s_setpc_b64 s[30:31]
298 ; GFX10PLUS-LABEL: v_sext_inreg_v3i32_16:
299 ; GFX10PLUS: ; %bb.0:
300 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16
302 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 16
303 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 16
304 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
305 %shl = shl <3 x i32> %value, <i32 16, i32 16, i32 16>
306 %ashr = ashr <3 x i32> %shl, <i32 16, i32 16, i32 16>
310 define amdgpu_ps <3 x i32> @s_sext_inreg_v3i32_22(<3 x i32> inreg %value) {
311 ; GCN-LABEL: s_sext_inreg_v3i32_22:
313 ; GCN-NEXT: s_bfe_i32 s0, s0, 0xa0000
314 ; GCN-NEXT: s_bfe_i32 s1, s1, 0xa0000
315 ; GCN-NEXT: s_bfe_i32 s2, s2, 0xa0000
316 ; GCN-NEXT: ; return to shader part epilog
318 ; GFX10PLUS-LABEL: s_sext_inreg_v3i32_22:
319 ; GFX10PLUS: ; %bb.0:
320 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0xa0000
321 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0xa0000
322 ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0xa0000
323 ; GFX10PLUS-NEXT: ; return to shader part epilog
324 %shl = shl <3 x i32> %value, <i32 22, i32 22, i32 22>
325 %ashr = ashr <3 x i32> %shl, <i32 22, i32 22, i32 22>
329 define <4 x i32> @v_sext_inreg_v4i32_6(<4 x i32> %value) {
330 ; GCN-LABEL: v_sext_inreg_v4i32_6:
332 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 26
334 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 26
335 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 26
336 ; GCN-NEXT: v_bfe_i32 v3, v3, 0, 26
337 ; GCN-NEXT: s_setpc_b64 s[30:31]
339 ; GFX10PLUS-LABEL: v_sext_inreg_v4i32_6:
340 ; GFX10PLUS: ; %bb.0:
341 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 26
343 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 26
344 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 26
345 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 26
346 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
347 %shl = shl <4 x i32> %value, <i32 6, i32 6, i32 6, i32 6>
348 %ashr = ashr <4 x i32> %shl, <i32 6, i32 6, i32 6, i32 6>
352 define amdgpu_ps <4 x i32> @s_sext_inreg_v4i32_13(<4 x i32> inreg %value) {
353 ; GCN-LABEL: s_sext_inreg_v4i32_13:
355 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x130000
356 ; GCN-NEXT: s_bfe_i32 s1, s1, 0x130000
357 ; GCN-NEXT: s_bfe_i32 s2, s2, 0x130000
358 ; GCN-NEXT: s_bfe_i32 s3, s3, 0x130000
359 ; GCN-NEXT: ; return to shader part epilog
361 ; GFX10PLUS-LABEL: s_sext_inreg_v4i32_13:
362 ; GFX10PLUS: ; %bb.0:
363 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x130000
364 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x130000
365 ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x130000
366 ; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x130000
367 ; GFX10PLUS-NEXT: ; return to shader part epilog
368 %shl = shl <4 x i32> %value, <i32 13, i32 13, i32 13, i32 13>
369 %ashr = ashr <4 x i32> %shl, <i32 13, i32 13, i32 13, i32 13>
373 define <5 x i32> @v_sext_inreg_v5i32_30(<5 x i32> %value) {
374 ; GCN-LABEL: v_sext_inreg_v5i32_30:
376 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 2
378 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 2
379 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 2
380 ; GCN-NEXT: v_bfe_i32 v3, v3, 0, 2
381 ; GCN-NEXT: v_bfe_i32 v4, v4, 0, 2
382 ; GCN-NEXT: s_setpc_b64 s[30:31]
384 ; GFX10PLUS-LABEL: v_sext_inreg_v5i32_30:
385 ; GFX10PLUS: ; %bb.0:
386 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 2
388 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 2
389 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 2
390 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 2
391 ; GFX10PLUS-NEXT: v_bfe_i32 v4, v4, 0, 2
392 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
393 %shl = shl <5 x i32> %value, <i32 30, i32 30, i32 30, i32 30, i32 30>
394 %ashr = ashr <5 x i32> %shl, <i32 30, i32 30, i32 30, i32 30, i32 30>
398 define amdgpu_ps <5 x i32> @s_sext_inreg_v5i32_19(<5 x i32> inreg %value) {
399 ; GCN-LABEL: s_sext_inreg_v5i32_19:
401 ; GCN-NEXT: s_ashr_i32 s0, s0, 19
402 ; GCN-NEXT: s_ashr_i32 s1, s1, 19
403 ; GCN-NEXT: s_ashr_i32 s2, s2, 19
404 ; GCN-NEXT: s_ashr_i32 s3, s3, 19
405 ; GCN-NEXT: s_ashr_i32 s4, s4, 19
406 ; GCN-NEXT: ; return to shader part epilog
408 ; GFX10PLUS-LABEL: s_sext_inreg_v5i32_19:
409 ; GFX10PLUS: ; %bb.0:
410 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 19
411 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 19
412 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 19
413 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 19
414 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, 19
415 ; GFX10PLUS-NEXT: ; return to shader part epilog
416 %shl = shl <5 x i32> %value, <i32 19, i32 19, i32 19, i32 19, i32 19>
417 %ashr = ashr <5 x i32> %value, <i32 19, i32 19, i32 19, i32 19, i32 19>
421 define <16 x i32> @v_sext_inreg_v16i32_27(<16 x i32> %value) {
422 ; GCN-LABEL: v_sext_inreg_v16i32_27:
424 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 5
426 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 5
427 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 5
428 ; GCN-NEXT: v_bfe_i32 v3, v3, 0, 5
429 ; GCN-NEXT: v_bfe_i32 v4, v4, 0, 5
430 ; GCN-NEXT: v_bfe_i32 v5, v5, 0, 5
431 ; GCN-NEXT: v_bfe_i32 v6, v6, 0, 5
432 ; GCN-NEXT: v_bfe_i32 v7, v7, 0, 5
433 ; GCN-NEXT: v_bfe_i32 v8, v8, 0, 5
434 ; GCN-NEXT: v_bfe_i32 v9, v9, 0, 5
435 ; GCN-NEXT: v_bfe_i32 v10, v10, 0, 5
436 ; GCN-NEXT: v_bfe_i32 v11, v11, 0, 5
437 ; GCN-NEXT: v_bfe_i32 v12, v12, 0, 5
438 ; GCN-NEXT: v_bfe_i32 v13, v13, 0, 5
439 ; GCN-NEXT: v_bfe_i32 v14, v14, 0, 5
440 ; GCN-NEXT: v_bfe_i32 v15, v15, 0, 5
441 ; GCN-NEXT: s_setpc_b64 s[30:31]
443 ; GFX10PLUS-LABEL: v_sext_inreg_v16i32_27:
444 ; GFX10PLUS: ; %bb.0:
445 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 5
447 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 5
448 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 5
449 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 5
450 ; GFX10PLUS-NEXT: v_bfe_i32 v4, v4, 0, 5
451 ; GFX10PLUS-NEXT: v_bfe_i32 v5, v5, 0, 5
452 ; GFX10PLUS-NEXT: v_bfe_i32 v6, v6, 0, 5
453 ; GFX10PLUS-NEXT: v_bfe_i32 v7, v7, 0, 5
454 ; GFX10PLUS-NEXT: v_bfe_i32 v8, v8, 0, 5
455 ; GFX10PLUS-NEXT: v_bfe_i32 v9, v9, 0, 5
456 ; GFX10PLUS-NEXT: v_bfe_i32 v10, v10, 0, 5
457 ; GFX10PLUS-NEXT: v_bfe_i32 v11, v11, 0, 5
458 ; GFX10PLUS-NEXT: v_bfe_i32 v12, v12, 0, 5
459 ; GFX10PLUS-NEXT: v_bfe_i32 v13, v13, 0, 5
460 ; GFX10PLUS-NEXT: v_bfe_i32 v14, v14, 0, 5
461 ; GFX10PLUS-NEXT: v_bfe_i32 v15, v15, 0, 5
462 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
463 %shl = shl <16 x i32> %value, <i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27>
464 %ashr = ashr <16 x i32> %shl, <i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27>
468 define amdgpu_ps <16 x i32> @s_sext_inreg_v16i32_3(<16 x i32> inreg %value) {
469 ; GCN-LABEL: s_sext_inreg_v16i32_3:
471 ; GCN-NEXT: s_bfe_i32 s0, s0, 0x1d0000
472 ; GCN-NEXT: s_bfe_i32 s1, s1, 0x1d0000
473 ; GCN-NEXT: s_bfe_i32 s2, s2, 0x1d0000
474 ; GCN-NEXT: s_bfe_i32 s3, s3, 0x1d0000
475 ; GCN-NEXT: s_bfe_i32 s4, s4, 0x1d0000
476 ; GCN-NEXT: s_bfe_i32 s5, s5, 0x1d0000
477 ; GCN-NEXT: s_bfe_i32 s6, s6, 0x1d0000
478 ; GCN-NEXT: s_bfe_i32 s7, s7, 0x1d0000
479 ; GCN-NEXT: s_bfe_i32 s8, s8, 0x1d0000
480 ; GCN-NEXT: s_bfe_i32 s9, s9, 0x1d0000
481 ; GCN-NEXT: s_bfe_i32 s10, s10, 0x1d0000
482 ; GCN-NEXT: s_bfe_i32 s11, s11, 0x1d0000
483 ; GCN-NEXT: s_bfe_i32 s12, s12, 0x1d0000
484 ; GCN-NEXT: s_bfe_i32 s13, s13, 0x1d0000
485 ; GCN-NEXT: s_bfe_i32 s14, s14, 0x1d0000
486 ; GCN-NEXT: s_bfe_i32 s15, s15, 0x1d0000
487 ; GCN-NEXT: ; return to shader part epilog
489 ; GFX10PLUS-LABEL: s_sext_inreg_v16i32_3:
490 ; GFX10PLUS: ; %bb.0:
491 ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x1d0000
492 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x1d0000
493 ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x1d0000
494 ; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x1d0000
495 ; GFX10PLUS-NEXT: s_bfe_i32 s4, s4, 0x1d0000
496 ; GFX10PLUS-NEXT: s_bfe_i32 s5, s5, 0x1d0000
497 ; GFX10PLUS-NEXT: s_bfe_i32 s6, s6, 0x1d0000
498 ; GFX10PLUS-NEXT: s_bfe_i32 s7, s7, 0x1d0000
499 ; GFX10PLUS-NEXT: s_bfe_i32 s8, s8, 0x1d0000
500 ; GFX10PLUS-NEXT: s_bfe_i32 s9, s9, 0x1d0000
501 ; GFX10PLUS-NEXT: s_bfe_i32 s10, s10, 0x1d0000
502 ; GFX10PLUS-NEXT: s_bfe_i32 s11, s11, 0x1d0000
503 ; GFX10PLUS-NEXT: s_bfe_i32 s12, s12, 0x1d0000
504 ; GFX10PLUS-NEXT: s_bfe_i32 s13, s13, 0x1d0000
505 ; GFX10PLUS-NEXT: s_bfe_i32 s14, s14, 0x1d0000
506 ; GFX10PLUS-NEXT: s_bfe_i32 s15, s15, 0x1d0000
507 ; GFX10PLUS-NEXT: ; return to shader part epilog
508 %shl = shl <16 x i32> %value, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
509 %ashr = ashr <16 x i32> %shl, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
513 define i16 @v_sext_inreg_i16_4(i16 %value) {
514 ; GFX6-LABEL: v_sext_inreg_i16_4:
516 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 12
518 ; GFX6-NEXT: s_setpc_b64 s[30:31]
520 ; GFX8-LABEL: v_sext_inreg_i16_4:
522 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
523 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0
524 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 4, v0
525 ; GFX8-NEXT: s_setpc_b64 s[30:31]
527 ; GFX9-LABEL: v_sext_inreg_i16_4:
529 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 12
531 ; GFX9-NEXT: s_setpc_b64 s[30:31]
533 ; GFX10PLUS-LABEL: v_sext_inreg_i16_4:
534 ; GFX10PLUS: ; %bb.0:
535 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 12
537 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
538 %shl = shl i16 %value, 4
539 %ashr = ashr i16 %shl, 4
543 define i16 @v_sext_inreg_i16_15(i16 %value) {
544 ; GFX6-LABEL: v_sext_inreg_i16_15:
546 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
548 ; GFX6-NEXT: s_setpc_b64 s[30:31]
550 ; GFX8-LABEL: v_sext_inreg_i16_15:
552 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 15, v0
554 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0
555 ; GFX8-NEXT: s_setpc_b64 s[30:31]
557 ; GFX9-LABEL: v_sext_inreg_i16_15:
559 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
560 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1
561 ; GFX9-NEXT: s_setpc_b64 s[30:31]
563 ; GFX10PLUS-LABEL: v_sext_inreg_i16_15:
564 ; GFX10PLUS: ; %bb.0:
565 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
567 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
568 %shl = shl i16 %value, 15
569 %ashr = ashr i16 %shl, 15
573 define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) {
574 ; GFX6-LABEL: s_sext_inreg_i16_9:
576 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x70000
577 ; GFX6-NEXT: ; return to shader part epilog
579 ; GFX8-LABEL: s_sext_inreg_i16_9:
581 ; GFX8-NEXT: s_lshl_b32 s0, s0, 9
582 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
583 ; GFX8-NEXT: s_ashr_i32 s0, s0, 9
584 ; GFX8-NEXT: ; return to shader part epilog
586 ; GFX9-LABEL: s_sext_inreg_i16_9:
588 ; GFX9-NEXT: s_lshl_b32 s0, s0, 9
589 ; GFX9-NEXT: s_sext_i32_i16 s0, s0
590 ; GFX9-NEXT: s_ashr_i32 s0, s0, 9
591 ; GFX9-NEXT: ; return to shader part epilog
593 ; GFX10PLUS-LABEL: s_sext_inreg_i16_9:
594 ; GFX10PLUS: ; %bb.0:
595 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9
596 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
597 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 9
598 ; GFX10PLUS-NEXT: ; return to shader part epilog
599 %shl = shl i16 %value, 9
600 %ashr = ashr i16 %shl, 9
604 define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) {
605 ; GFX6-LABEL: s_sext_inreg_i16_15:
607 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000
608 ; GFX6-NEXT: ; return to shader part epilog
610 ; GFX8-LABEL: s_sext_inreg_i16_15:
612 ; GFX8-NEXT: s_lshl_b32 s0, s0, 15
613 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
614 ; GFX8-NEXT: s_ashr_i32 s0, s0, 15
615 ; GFX8-NEXT: ; return to shader part epilog
617 ; GFX9-LABEL: s_sext_inreg_i16_15:
619 ; GFX9-NEXT: s_lshl_b32 s0, s0, 15
620 ; GFX9-NEXT: s_sext_i32_i16 s0, s0
621 ; GFX9-NEXT: s_ashr_i32 s0, s0, 15
622 ; GFX9-NEXT: ; return to shader part epilog
624 ; GFX10PLUS-LABEL: s_sext_inreg_i16_15:
625 ; GFX10PLUS: ; %bb.0:
626 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15
627 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
628 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 15
629 ; GFX10PLUS-NEXT: ; return to shader part epilog
630 %shl = shl i16 %value, 15
631 %ashr = ashr i16 %shl, 15
635 define <2 x i16> @v_sext_inreg_v2i16_8(<2 x i16> %value) {
636 ; GFX6-LABEL: v_sext_inreg_v2i16_8:
638 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
640 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8
641 ; GFX6-NEXT: s_setpc_b64 s[30:31]
643 ; GFX8-LABEL: v_sext_inreg_v2i16_8:
645 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
647 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
648 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 8, v1
649 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0
650 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
651 ; GFX8-NEXT: v_or_b32_sdwa v0, sext(v0), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
652 ; GFX8-NEXT: s_setpc_b64 s[30:31]
654 ; GFX9-LABEL: v_sext_inreg_v2i16_8:
656 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
658 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
659 ; GFX9-NEXT: s_setpc_b64 s[30:31]
661 ; GFX10PLUS-LABEL: v_sext_inreg_v2i16_8:
662 ; GFX10PLUS: ; %bb.0:
663 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
665 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
666 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
667 %shl = shl <2 x i16> %value, <i16 8, i16 8>
668 %ashr = ashr <2 x i16> %shl, <i16 8, i16 8>
672 define <2 x i16> @v_sext_inreg_v2i16_15(<2 x i16> %value) {
673 ; GFX6-LABEL: v_sext_inreg_v2i16_15:
675 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
676 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
677 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 1
678 ; GFX6-NEXT: s_setpc_b64 s[30:31]
680 ; GFX8-LABEL: v_sext_inreg_v2i16_15:
682 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
683 ; GFX8-NEXT: v_mov_b32_e32 v2, 15
684 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 15, v0
685 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
686 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v1
687 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
688 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
689 ; GFX8-NEXT: s_setpc_b64 s[30:31]
691 ; GFX9-LABEL: v_sext_inreg_v2i16_15:
693 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
694 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
695 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
696 ; GFX9-NEXT: s_setpc_b64 s[30:31]
698 ; GFX10PLUS-LABEL: v_sext_inreg_v2i16_15:
699 ; GFX10PLUS: ; %bb.0:
700 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
701 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
702 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
703 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
704 %shl = shl <2 x i16> %value, <i16 15, i16 15>
705 %ashr = ashr <2 x i16> %shl, <i16 15, i16 15>
709 define amdgpu_ps i32 @s_sext_inreg_v2i16_11(<2 x i16> inreg %value) {
710 ; GFX6-LABEL: s_sext_inreg_v2i16_11:
712 ; GFX6-NEXT: s_bfe_i32 s1, s1, 0x50000
713 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x50000
714 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
715 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
716 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
717 ; GFX6-NEXT: s_or_b32 s0, s0, s1
718 ; GFX6-NEXT: ; return to shader part epilog
720 ; GFX8-LABEL: s_sext_inreg_v2i16_11:
722 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
723 ; GFX8-NEXT: s_lshl_b32 s0, s0, 11
724 ; GFX8-NEXT: s_lshl_b32 s1, s1, 11
725 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
726 ; GFX8-NEXT: s_sext_i32_i16 s1, s1
727 ; GFX8-NEXT: s_ashr_i32 s0, s0, 11
728 ; GFX8-NEXT: s_ashr_i32 s1, s1, 11
729 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
730 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
731 ; GFX8-NEXT: s_or_b32 s0, s1, s0
732 ; GFX8-NEXT: ; return to shader part epilog
734 ; GFX9-LABEL: s_sext_inreg_v2i16_11:
736 ; GFX9-NEXT: s_lshr_b32 s1, s0, 16
737 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0xb000b
738 ; GFX9-NEXT: s_lshl_b32 s1, s1, 11
739 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
740 ; GFX9-NEXT: s_sext_i32_i16 s1, s0
741 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
742 ; GFX9-NEXT: s_sext_i32_i16 s2, 0xb000b
743 ; GFX9-NEXT: s_ashr_i32 s1, s1, s2
744 ; GFX9-NEXT: s_ashr_i32 s0, s0, 11
745 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s1, s0
746 ; GFX9-NEXT: ; return to shader part epilog
748 ; GFX10PLUS-LABEL: s_sext_inreg_v2i16_11:
749 ; GFX10PLUS: ; %bb.0:
750 ; GFX10PLUS-NEXT: s_lshr_b32 s1, s0, 16
751 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0xb000b
752 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 11
753 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s1
754 ; GFX10PLUS-NEXT: s_sext_i32_i16 s1, 0xb000b
755 ; GFX10PLUS-NEXT: s_sext_i32_i16 s2, s0
756 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16
757 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s2, s1
758 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 11
759 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s1, s0
760 ; GFX10PLUS-NEXT: ; return to shader part epilog
761 %shl = shl <2 x i16> %value, <i16 11, i16 11>
762 %ashr = ashr <2 x i16> %shl, <i16 11, i16 11>
763 %cast = bitcast <2 x i16> %ashr to i32
768 ; define <3 x i16> @v_sext_inreg_v3i16_4(<3 x i16> %value) {
769 ; %shl = shl <3 x i16> %value, <i16 4, i16 4, i16 4>
770 ; %ashr = ashr <3 x i16> %shl, <i16 4, i16 4, i16 4>
771 ; ret <3 x i16> %ashr
774 ; define amdgpu_ps <3 x i16> @s_sext_inreg_v3i16_4(<3 x i16> inreg %value) {
775 ; %shl = shl <3 x i16> %value, <i16 4, i16 4, i16 4>
776 ; %ashr = ashr <3 x i16> %shl, <i16 4, i16 4, i16 4>
777 ; ret <3 x i16> %ashr
780 define <2 x float> @v_sext_inreg_v4i16_3(<4 x i16> %value) {
781 ; GFX6-LABEL: v_sext_inreg_v4i16_3:
783 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
784 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 13
785 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 13
786 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
787 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 13
788 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 13
789 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
790 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
791 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
792 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
793 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
794 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
795 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
796 ; GFX6-NEXT: s_setpc_b64 s[30:31]
798 ; GFX8-LABEL: v_sext_inreg_v4i16_3:
800 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801 ; GFX8-NEXT: v_mov_b32_e32 v3, 3
802 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 3, v0
803 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
804 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, 3, v1
805 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
806 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 3, v2
807 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
808 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
809 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 3, v4
810 ; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
811 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
812 ; GFX8-NEXT: s_setpc_b64 s[30:31]
814 ; GFX9-LABEL: v_sext_inreg_v4i16_3:
816 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 3, v0 op_sel_hi:[0,1]
818 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 3, v1 op_sel_hi:[0,1]
819 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 3, v0 op_sel_hi:[0,1]
820 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 3, v1 op_sel_hi:[0,1]
821 ; GFX9-NEXT: s_setpc_b64 s[30:31]
823 ; GFX10PLUS-LABEL: v_sext_inreg_v4i16_3:
824 ; GFX10PLUS: ; %bb.0:
825 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 3, v0 op_sel_hi:[0,1]
827 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 3, v1 op_sel_hi:[0,1]
828 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 3, v0 op_sel_hi:[0,1]
829 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, 3, v1 op_sel_hi:[0,1]
830 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
831 %shl = shl <4 x i16> %value, <i16 3, i16 3, i16 3, i16 3>
832 %ashr = ashr <4 x i16> %shl, <i16 3, i16 3, i16 3, i16 3>
833 %cast = bitcast <4 x i16> %ashr to <2 x float>
834 ret <2 x float> %cast
837 define amdgpu_ps <2 x i32> @s_sext_inreg_v4i16_14(<4 x i16> inreg %value) {
838 ; GFX6-LABEL: s_sext_inreg_v4i16_14:
840 ; GFX6-NEXT: s_bfe_i32 s1, s1, 0x20000
841 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000
842 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
843 ; GFX6-NEXT: s_bfe_i32 s2, s2, 0x20000
844 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000
845 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
846 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
847 ; GFX6-NEXT: s_or_b32 s0, s0, s1
848 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
849 ; GFX6-NEXT: s_and_b32 s2, s3, 0xffff
850 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
851 ; GFX6-NEXT: s_or_b32 s1, s1, s2
852 ; GFX6-NEXT: ; return to shader part epilog
854 ; GFX8-LABEL: s_sext_inreg_v4i16_14:
856 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
857 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
858 ; GFX8-NEXT: s_lshl_b32 s0, s0, 14
859 ; GFX8-NEXT: s_lshl_b32 s2, s2, 14
860 ; GFX8-NEXT: s_lshl_b32 s1, s1, 14
861 ; GFX8-NEXT: s_lshl_b32 s3, s3, 14
862 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
863 ; GFX8-NEXT: s_sext_i32_i16 s2, s2
864 ; GFX8-NEXT: s_sext_i32_i16 s1, s1
865 ; GFX8-NEXT: s_sext_i32_i16 s3, s3
866 ; GFX8-NEXT: s_ashr_i32 s0, s0, 14
867 ; GFX8-NEXT: s_ashr_i32 s2, s2, 14
868 ; GFX8-NEXT: s_ashr_i32 s1, s1, 14
869 ; GFX8-NEXT: s_ashr_i32 s3, s3, 14
870 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
871 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
872 ; GFX8-NEXT: s_or_b32 s0, s2, s0
873 ; GFX8-NEXT: s_lshl_b32 s2, s3, 16
874 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
875 ; GFX8-NEXT: s_or_b32 s1, s2, s1
876 ; GFX8-NEXT: ; return to shader part epilog
878 ; GFX9-LABEL: s_sext_inreg_v4i16_14:
880 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
881 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0xe000e
882 ; GFX9-NEXT: s_lshl_b32 s2, s2, 14
883 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
884 ; GFX9-NEXT: s_sext_i32_i16 s2, s0
885 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
886 ; GFX9-NEXT: s_sext_i32_i16 s3, 0xe000e
887 ; GFX9-NEXT: s_ashr_i32 s2, s2, s3
888 ; GFX9-NEXT: s_ashr_i32 s0, s0, 14
889 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s2, s0
890 ; GFX9-NEXT: s_lshr_b32 s2, s1, 16
891 ; GFX9-NEXT: s_lshl_b32 s1, s1, 0xe000e
892 ; GFX9-NEXT: s_lshl_b32 s2, s2, 14
893 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
894 ; GFX9-NEXT: s_sext_i32_i16 s2, s1
895 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16
896 ; GFX9-NEXT: s_ashr_i32 s2, s2, s3
897 ; GFX9-NEXT: s_ashr_i32 s1, s1, 14
898 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s2, s1
899 ; GFX9-NEXT: ; return to shader part epilog
901 ; GFX10PLUS-LABEL: s_sext_inreg_v4i16_14:
902 ; GFX10PLUS: ; %bb.0:
903 ; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16
904 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 16
905 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0xe000e
906 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 14
907 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 0xe000e
908 ; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, 14
909 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s2
910 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s4
911 ; GFX10PLUS-NEXT: s_sext_i32_i16 s2, 0xe000e
912 ; GFX10PLUS-NEXT: s_sext_i32_i16 s3, s0
913 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16
914 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s1
915 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16
916 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s2
917 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 14
918 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s4, s2
919 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 14
920 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s3, s0
921 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s2, s1
922 ; GFX10PLUS-NEXT: ; return to shader part epilog
923 %shl = shl <4 x i16> %value, <i16 14, i16 14, i16 14, i16 14>
924 %ashr = ashr <4 x i16> %shl, <i16 14, i16 14, i16 14, i16 14>
925 %cast = bitcast <4 x i16> %ashr to <2 x i32>
930 ; define <5 x i16> @v_sext_inreg_v5i16(<5 x i16> %value) {
931 ; %shl = shl <5 x i16> %value, %amount
932 ; ret <5 x i16> %result
935 ; define amdgpu_ps <5 x i16> @s_sext_inreg_v5i16(<5 x i16> inreg %value) {
936 ; %shl = shl <5 x i16> %value, %amount
937 ; ret <5 x i16> %result
940 ; define <3 x float> @v_sext_inreg_v6i16(<6 x i16> %value) {
941 ; %shl = shl <6 x i16> %value, %amount
942 ; %cast = bitcast <6 x i16> %result to <3 x float>
943 ; ret <3 x float> %cast
946 ; define amdgpu_ps <3 x i32> @s_sext_inreg_v6i16(<6 x i16> inreg %value) {
947 ; %shl = shl <6 x i16> %value, %amount
948 ; %cast = bitcast <6 x i16> %result to <3 x i32>
949 ; ret <3 x i32> %cast
952 define <4 x float> @v_sext_inreg_v8i16_11(<8 x i16> %value) {
953 ; GFX6-LABEL: v_sext_inreg_v8i16_11:
955 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
956 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 5
957 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 5
958 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
959 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 5
960 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 5
961 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
962 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
963 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 5
964 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
965 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
966 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
967 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 5
968 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 5
969 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
970 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v5
971 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 5
972 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
973 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4
974 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
975 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7
976 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
977 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v6
978 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
979 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
980 ; GFX6-NEXT: s_setpc_b64 s[30:31]
982 ; GFX8-LABEL: v_sext_inreg_v8i16_11:
984 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985 ; GFX8-NEXT: v_mov_b32_e32 v5, 11
986 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, 11, v0
987 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
988 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, 11, v1
989 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
990 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v4
991 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
992 ; GFX8-NEXT: v_lshlrev_b16_e32 v7, 11, v2
993 ; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
994 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
995 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v6
996 ; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
997 ; GFX8-NEXT: v_lshlrev_b16_e32 v8, 11, v3
998 ; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
999 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
1000 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v7
1001 ; GFX8-NEXT: v_ashrrev_i16_sdwa v2, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1002 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
1003 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v8
1004 ; GFX8-NEXT: v_ashrrev_i16_sdwa v3, v5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1005 ; GFX8-NEXT: v_or_b32_e32 v3, v4, v3
1006 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1008 ; GFX9-LABEL: v_sext_inreg_v8i16_11:
1010 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 11, v0 op_sel_hi:[0,1]
1012 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 11, v1 op_sel_hi:[0,1]
1013 ; GFX9-NEXT: v_pk_lshlrev_b16 v2, 11, v2 op_sel_hi:[0,1]
1014 ; GFX9-NEXT: v_pk_lshlrev_b16 v3, 11, v3 op_sel_hi:[0,1]
1015 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 11, v0 op_sel_hi:[0,1]
1016 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 11, v1 op_sel_hi:[0,1]
1017 ; GFX9-NEXT: v_pk_ashrrev_i16 v2, 11, v2 op_sel_hi:[0,1]
1018 ; GFX9-NEXT: v_pk_ashrrev_i16 v3, 11, v3 op_sel_hi:[0,1]
1019 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1021 ; GFX10PLUS-LABEL: v_sext_inreg_v8i16_11:
1022 ; GFX10PLUS: ; %bb.0:
1023 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 11, v0 op_sel_hi:[0,1]
1025 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 11, v1 op_sel_hi:[0,1]
1026 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v2, 11, v2 op_sel_hi:[0,1]
1027 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v3, 11, v3 op_sel_hi:[0,1]
1028 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 11, v0 op_sel_hi:[0,1]
1029 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, 11, v1 op_sel_hi:[0,1]
1030 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v2, 11, v2 op_sel_hi:[0,1]
1031 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v3, 11, v3 op_sel_hi:[0,1]
1032 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1033 %shl = shl <8 x i16> %value, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
1034 %ashr = ashr <8 x i16> %shl, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
1035 %cast = bitcast <8 x i16> %ashr to <4 x float>
1036 ret <4 x float> %cast
1039 define amdgpu_ps <4 x i32> @s_sext_inreg_v8i16_5(<8 x i16> inreg %value) {
1040 ; GFX6-LABEL: s_sext_inreg_v8i16_5:
1042 ; GFX6-NEXT: s_bfe_i32 s1, s1, 0xb0000
1043 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0xb0000
1044 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
1045 ; GFX6-NEXT: s_bfe_i32 s2, s2, 0xb0000
1046 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0xb0000
1047 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
1048 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1049 ; GFX6-NEXT: s_bfe_i32 s5, s5, 0xb0000
1050 ; GFX6-NEXT: s_or_b32 s0, s0, s1
1051 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
1052 ; GFX6-NEXT: s_and_b32 s2, s3, 0xffff
1053 ; GFX6-NEXT: s_bfe_i32 s4, s4, 0xb0000
1054 ; GFX6-NEXT: s_bfe_i32 s7, s7, 0xb0000
1055 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1056 ; GFX6-NEXT: s_and_b32 s3, s5, 0xffff
1057 ; GFX6-NEXT: s_bfe_i32 s6, s6, 0xb0000
1058 ; GFX6-NEXT: s_or_b32 s1, s1, s2
1059 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
1060 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
1061 ; GFX6-NEXT: s_and_b32 s4, s7, 0xffff
1062 ; GFX6-NEXT: s_or_b32 s2, s2, s3
1063 ; GFX6-NEXT: s_and_b32 s3, s6, 0xffff
1064 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16
1065 ; GFX6-NEXT: s_or_b32 s3, s3, s4
1066 ; GFX6-NEXT: ; return to shader part epilog
1068 ; GFX8-LABEL: s_sext_inreg_v8i16_5:
1070 ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1071 ; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1072 ; GFX8-NEXT: s_lshl_b32 s0, s0, 5
1073 ; GFX8-NEXT: s_lshl_b32 s4, s4, 5
1074 ; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1075 ; GFX8-NEXT: s_lshl_b32 s1, s1, 5
1076 ; GFX8-NEXT: s_lshl_b32 s5, s5, 5
1077 ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1078 ; GFX8-NEXT: s_sext_i32_i16 s4, s4
1079 ; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1080 ; GFX8-NEXT: s_lshl_b32 s2, s2, 5
1081 ; GFX8-NEXT: s_lshl_b32 s6, s6, 5
1082 ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1083 ; GFX8-NEXT: s_sext_i32_i16 s5, s5
1084 ; GFX8-NEXT: s_ashr_i32 s0, s0, 5
1085 ; GFX8-NEXT: s_ashr_i32 s4, s4, 5
1086 ; GFX8-NEXT: s_lshl_b32 s3, s3, 5
1087 ; GFX8-NEXT: s_lshl_b32 s7, s7, 5
1088 ; GFX8-NEXT: s_sext_i32_i16 s2, s2
1089 ; GFX8-NEXT: s_sext_i32_i16 s6, s6
1090 ; GFX8-NEXT: s_ashr_i32 s1, s1, 5
1091 ; GFX8-NEXT: s_ashr_i32 s5, s5, 5
1092 ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1093 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
1094 ; GFX8-NEXT: s_sext_i32_i16 s3, s3
1095 ; GFX8-NEXT: s_sext_i32_i16 s7, s7
1096 ; GFX8-NEXT: s_ashr_i32 s2, s2, 5
1097 ; GFX8-NEXT: s_ashr_i32 s6, s6, 5
1098 ; GFX8-NEXT: s_or_b32 s0, s4, s0
1099 ; GFX8-NEXT: s_lshl_b32 s4, s5, 16
1100 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
1101 ; GFX8-NEXT: s_ashr_i32 s3, s3, 5
1102 ; GFX8-NEXT: s_ashr_i32 s7, s7, 5
1103 ; GFX8-NEXT: s_or_b32 s1, s4, s1
1104 ; GFX8-NEXT: s_lshl_b32 s4, s6, 16
1105 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1106 ; GFX8-NEXT: s_or_b32 s2, s4, s2
1107 ; GFX8-NEXT: s_lshl_b32 s4, s7, 16
1108 ; GFX8-NEXT: s_and_b32 s3, s3, 0xffff
1109 ; GFX8-NEXT: s_or_b32 s3, s4, s3
1110 ; GFX8-NEXT: ; return to shader part epilog
1112 ; GFX9-LABEL: s_sext_inreg_v8i16_5:
1114 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
1115 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x50005
1116 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5
1117 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
1118 ; GFX9-NEXT: s_sext_i32_i16 s4, s0
1119 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16
1120 ; GFX9-NEXT: s_sext_i32_i16 s5, 0x50005
1121 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1122 ; GFX9-NEXT: s_ashr_i32 s0, s0, 5
1123 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s4, s0
1124 ; GFX9-NEXT: s_lshr_b32 s4, s1, 16
1125 ; GFX9-NEXT: s_lshl_b32 s1, s1, 0x50005
1126 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5
1127 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4
1128 ; GFX9-NEXT: s_sext_i32_i16 s4, s1
1129 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16
1130 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1131 ; GFX9-NEXT: s_ashr_i32 s1, s1, 5
1132 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s4, s1
1133 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
1134 ; GFX9-NEXT: s_lshl_b32 s2, s2, 0x50005
1135 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5
1136 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
1137 ; GFX9-NEXT: s_sext_i32_i16 s4, s2
1138 ; GFX9-NEXT: s_ashr_i32 s2, s2, 16
1139 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1140 ; GFX9-NEXT: s_ashr_i32 s2, s2, 5
1141 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s4, s2
1142 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
1143 ; GFX9-NEXT: s_lshl_b32 s3, s3, 0x50005
1144 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5
1145 ; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
1146 ; GFX9-NEXT: s_sext_i32_i16 s4, s3
1147 ; GFX9-NEXT: s_ashr_i32 s3, s3, 16
1148 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5
1149 ; GFX9-NEXT: s_ashr_i32 s3, s3, 5
1150 ; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s3
1151 ; GFX9-NEXT: ; return to shader part epilog
1153 ; GFX10PLUS-LABEL: s_sext_inreg_v8i16_5:
1154 ; GFX10PLUS: ; %bb.0:
1155 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s0, 16
1156 ; GFX10PLUS-NEXT: s_lshr_b32 s6, s1, 16
1157 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0x50005
1158 ; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, 5
1159 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 0x50005
1160 ; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5
1161 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s4
1162 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s6
1163 ; GFX10PLUS-NEXT: s_lshr_b32 s6, s2, 16
1164 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s0
1165 ; GFX10PLUS-NEXT: s_sext_i32_i16 s5, 0x50005
1166 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16
1167 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 0x50005
1168 ; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5
1169 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5
1170 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 5
1171 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s2, s6
1172 ; GFX10PLUS-NEXT: s_lshr_b32 s6, s3, 16
1173 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s4, s0
1174 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s1
1175 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16
1176 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, 0x50005
1177 ; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5
1178 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5
1179 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 5
1180 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s3, s6
1181 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s4, s1
1182 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s2
1183 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 16
1184 ; GFX10PLUS-NEXT: s_sext_i32_i16 s6, s3
1185 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 16
1186 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5
1187 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 5
1188 ; GFX10PLUS-NEXT: s_ashr_i32 s5, s6, s5
1189 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 5
1190 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s4, s2
1191 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s5, s3
1192 ; GFX10PLUS-NEXT: ; return to shader part epilog
1193 %shl = shl <8 x i16> %value, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
1194 %ashr = ashr <8 x i16> %shl, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
1195 %cast = bitcast <8 x i16> %ashr to <4 x i32>
1199 define i64 @v_sext_inreg_i64_23(i64 %value) {
1200 ; GCN-LABEL: v_sext_inreg_i64_23:
1202 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203 ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 9
1204 ; GCN-NEXT: s_setpc_b64 s[30:31]
1206 ; GFX10PLUS-LABEL: v_sext_inreg_i64_23:
1207 ; GFX10PLUS: ; %bb.0:
1208 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 9
1210 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1211 %shl = shl i64 %value, 23
1212 %ashr = ashr i64 %shl, 23
1216 define i64 @v_sext_inreg_i64_40(i64 %value) {
1217 ; GCN-LABEL: v_sext_inreg_i64_40:
1219 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1220 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24
1221 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1222 ; GCN-NEXT: s_setpc_b64 s[30:31]
1224 ; GFX10PLUS-LABEL: v_sext_inreg_i64_40:
1225 ; GFX10PLUS: ; %bb.0:
1226 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1227 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24
1228 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1229 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1230 %shl = shl i64 %value, 40
1231 %ashr = ashr i64 %shl, 40
1235 define i64 @v_sext_inreg_i64_63(i64 %value) {
1236 ; GCN-LABEL: v_sext_inreg_i64_63:
1238 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1239 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1
1240 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1241 ; GCN-NEXT: s_setpc_b64 s[30:31]
1243 ; GFX10PLUS-LABEL: v_sext_inreg_i64_63:
1244 ; GFX10PLUS: ; %bb.0:
1245 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1246 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
1247 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1248 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1249 %shl = shl i64 %value, 63
1250 %ashr = ashr i64 %shl, 63
1254 define i64 @v_sext_inreg_i64_33(i64 %value) {
1255 ; GCN-LABEL: v_sext_inreg_i64_33:
1257 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1258 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 31
1259 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1260 ; GCN-NEXT: s_setpc_b64 s[30:31]
1262 ; GFX10PLUS-LABEL: v_sext_inreg_i64_33:
1263 ; GFX10PLUS: ; %bb.0:
1264 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1265 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 31
1266 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1267 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1268 %shl = shl i64 %value, 33
1269 %ashr = ashr i64 %shl, 33
1273 define i64 @v_sext_inreg_i64_32(i64 %value) {
1274 ; GCN-LABEL: v_sext_inreg_i64_32:
1276 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1277 ; GCN-NEXT: v_mov_b32_e32 v0, v1
1278 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1279 ; GCN-NEXT: s_setpc_b64 s[30:31]
1281 ; GFX10PLUS-LABEL: v_sext_inreg_i64_32:
1282 ; GFX10PLUS: ; %bb.0:
1283 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1284 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v1
1285 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1286 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1287 %shl = shl i64 %value, 32
1288 %ashr = ashr i64 %value, 32
1292 define i64 @v_sext_inreg_i64_31(i64 %value) {
1293 ; GCN-LABEL: v_sext_inreg_i64_31:
1295 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1296 ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 1
1297 ; GCN-NEXT: s_setpc_b64 s[30:31]
1299 ; GFX10PLUS-LABEL: v_sext_inreg_i64_31:
1300 ; GFX10PLUS: ; %bb.0:
1301 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1302 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1
1303 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1304 %shl = shl i64 %value, 31
1305 %ashr = ashr i64 %shl, 31
1309 define amdgpu_ps i64 @s_sext_inreg_i64_3(i64 inreg %value) {
1310 ; GCN-LABEL: s_sext_inreg_i64_3:
1312 ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x3d0000
1313 ; GCN-NEXT: ; return to shader part epilog
1315 ; GFX10PLUS-LABEL: s_sext_inreg_i64_3:
1316 ; GFX10PLUS: ; %bb.0:
1317 ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x3d0000
1318 ; GFX10PLUS-NEXT: ; return to shader part epilog
1319 %shl = shl i64 %value, 3
1320 %ashr = ashr i64 %shl, 3
1324 define amdgpu_ps i64 @s_sext_inreg_i64_63(i64 inreg %value) {
1325 ; GCN-LABEL: s_sext_inreg_i64_63:
1327 ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x10000
1328 ; GCN-NEXT: ; return to shader part epilog
1330 ; GFX10PLUS-LABEL: s_sext_inreg_i64_63:
1331 ; GFX10PLUS: ; %bb.0:
1332 ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x10000
1333 ; GFX10PLUS-NEXT: ; return to shader part epilog
1334 %shl = shl i64 %value, 63
1335 %ashr = ashr i64 %shl, 63
1339 define amdgpu_ps i64 @s_sext_inreg_i64_33(i64 inreg %value) {
1340 ; GCN-LABEL: s_sext_inreg_i64_33:
1342 ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x1f0000
1343 ; GCN-NEXT: ; return to shader part epilog
1345 ; GFX10PLUS-LABEL: s_sext_inreg_i64_33:
1346 ; GFX10PLUS: ; %bb.0:
1347 ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x1f0000
1348 ; GFX10PLUS-NEXT: ; return to shader part epilog
1349 %shl = shl i64 %value, 33
1350 %ashr = ashr i64 %shl, 33
1354 define amdgpu_ps i64 @s_sext_inreg_i64_32(i64 inreg %value) {
1355 ; GCN-LABEL: s_sext_inreg_i64_32:
1357 ; GCN-NEXT: s_ashr_i32 s1, s0, 31
1358 ; GCN-NEXT: ; return to shader part epilog
1360 ; GFX10PLUS-LABEL: s_sext_inreg_i64_32:
1361 ; GFX10PLUS: ; %bb.0:
1362 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s0, 31
1363 ; GFX10PLUS-NEXT: ; return to shader part epilog
1364 %shl = shl i64 %value, 32
1365 %ashr = ashr i64 %shl, 32
1369 define amdgpu_ps i64 @s_sext_inreg_i64_31(i64 inreg %value) {
1370 ; GCN-LABEL: s_sext_inreg_i64_31:
1372 ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x210000
1373 ; GCN-NEXT: ; return to shader part epilog
1375 ; GFX10PLUS-LABEL: s_sext_inreg_i64_31:
1376 ; GFX10PLUS: ; %bb.0:
1377 ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x210000
1378 ; GFX10PLUS-NEXT: ; return to shader part epilog
1379 %shl = shl i64 %value, 31
1380 %ashr = ashr i64 %shl, 31
1384 define <2 x i64> @v_sext_inreg_v2i64_16(<2 x i64> %value) {
1385 ; GCN-LABEL: v_sext_inreg_v2i64_16:
1387 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1388 ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 16
1389 ; GCN-NEXT: v_bfe_i32 v3, v2, 0, 16
1390 ; GCN-NEXT: s_setpc_b64 s[30:31]
1392 ; GFX10PLUS-LABEL: v_sext_inreg_v2i64_16:
1393 ; GFX10PLUS: ; %bb.0:
1394 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1395 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 16
1396 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 16
1397 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1398 %shl = shl <2 x i64> %value, <i64 16, i64 16>
1399 %ashr = ashr <2 x i64> %shl, <i64 16, i64 16>
1403 define <2 x i64> @v_sext_inreg_v2i64_31(<2 x i64> %value) {
1404 ; GCN-LABEL: v_sext_inreg_v2i64_31:
1406 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1407 ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 1
1408 ; GCN-NEXT: v_bfe_i32 v3, v2, 0, 1
1409 ; GCN-NEXT: s_setpc_b64 s[30:31]
1411 ; GFX10PLUS-LABEL: v_sext_inreg_v2i64_31:
1412 ; GFX10PLUS: ; %bb.0:
1413 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1414 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1
1415 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 1
1416 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1417 %shl = shl <2 x i64> %value, <i64 31, i64 31>
1418 %ashr = ashr <2 x i64> %shl, <i64 31, i64 31>
1422 define amdgpu_ps <2 x i64> @s_sext_inreg_v2i64_30(<2 x i64> inreg %value) {
1423 ; GCN-LABEL: s_sext_inreg_v2i64_30:
1425 ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x220000
1426 ; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x220000
1427 ; GCN-NEXT: ; return to shader part epilog
1429 ; GFX10PLUS-LABEL: s_sext_inreg_v2i64_30:
1430 ; GFX10PLUS: ; %bb.0:
1431 ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x220000
1432 ; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x220000
1433 ; GFX10PLUS-NEXT: ; return to shader part epilog
1434 %shl = shl <2 x i64> %value, <i64 30, i64 30>
1435 %ashr = ashr <2 x i64> %shl, <i64 30, i64 30>
1439 define i65 @v_sext_inreg_i65_22(i65 %value) {
1440 ; GFX6-LABEL: v_sext_inreg_i65_22:
1442 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 22
1444 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 10, v1
1445 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1446 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
1447 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 0
1448 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1449 ; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 10
1450 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 10, v2
1451 ; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 22
1452 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
1453 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1455 ; GFX8-LABEL: v_sext_inreg_i65_22:
1457 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1458 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
1459 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 10, v1
1460 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
1461 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
1462 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
1463 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1464 ; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 10
1465 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 10, v2
1466 ; GFX8-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3]
1467 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
1468 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1470 ; GFX9-LABEL: v_sext_inreg_i65_22:
1472 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1473 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
1474 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 10, v1
1475 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v3
1476 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
1477 ; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 1
1478 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1479 ; GFX9-NEXT: v_bfe_u32 v1, v1, 0, 10
1480 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 10, v1
1481 ; GFX9-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3]
1482 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1484 ; GFX10PLUS-LABEL: v_sext_inreg_i65_22:
1485 ; GFX10PLUS: ; %bb.0:
1486 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1487 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
1488 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 10, v1
1489 ; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
1490 ; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v3
1491 ; GFX10PLUS-NEXT: v_bfe_u32 v1, v1, 0, 10
1492 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 1
1493 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, 31, v2
1494 ; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v2, 10, v1
1495 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3]
1496 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1497 %shl = shl i65 %value, 22
1498 %ashr = ashr i65 %shl, 22
1502 define i65 @v_sext_inreg_i65_33(i65 %value) {
1503 ; GFX6-LABEL: v_sext_inreg_i65_33:
1505 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1506 ; GFX6-NEXT: v_mov_b32_e32 v3, v1
1507 ; GFX6-NEXT: v_bfe_i32 v1, v2, 0, 1
1508 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1509 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], 31
1510 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1511 ; GFX6-NEXT: v_or_b32_e32 v0, v3, v0
1512 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1513 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1515 ; GFX8-LABEL: v_sext_inreg_i65_33:
1517 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1518 ; GFX8-NEXT: v_mov_b32_e32 v3, v1
1519 ; GFX8-NEXT: v_bfe_i32 v1, v2, 0, 1
1520 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1521 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1522 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1523 ; GFX8-NEXT: v_or_b32_e32 v0, v3, v0
1524 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1525 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1527 ; GFX9-LABEL: v_sext_inreg_i65_33:
1529 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1530 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
1531 ; GFX9-NEXT: v_bfe_i32 v1, v2, 0, 1
1532 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1533 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1534 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1535 ; GFX9-NEXT: v_or_b32_e32 v0, v3, v0
1536 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1537 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1539 ; GFX10PLUS-LABEL: v_sext_inreg_i65_33:
1540 ; GFX10PLUS: ; %bb.0:
1541 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1542 ; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1
1543 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v2, 0, 1
1544 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1545 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1546 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
1547 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 1, v2
1548 ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v3, v0
1549 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1550 %shl = shl i65 %value, 33
1551 %ashr = ashr i65 %value, 33
1555 define amdgpu_ps i65 @s_sext_inreg_i65_18(i65 inreg %value) {
1556 ; GCN-LABEL: s_sext_inreg_i65_18:
1558 ; GCN-NEXT: s_lshl_b64 s[2:3], s[2:3], 18
1559 ; GCN-NEXT: s_lshr_b32 s4, s1, 14
1560 ; GCN-NEXT: s_mov_b32 s5, 0
1561 ; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
1562 ; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1563 ; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x2e0000
1564 ; GCN-NEXT: s_lshl_b32 s7, s2, 14
1565 ; GCN-NEXT: s_mov_b32 s6, s5
1566 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7]
1567 ; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 18
1568 ; GCN-NEXT: ; return to shader part epilog
1570 ; GFX10PLUS-LABEL: s_sext_inreg_i65_18:
1571 ; GFX10PLUS: ; %bb.0:
1572 ; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[2:3], 18
1573 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 14
1574 ; GFX10PLUS-NEXT: s_mov_b32 s5, 0
1575 ; GFX10PLUS-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x2e0000
1576 ; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
1577 ; GFX10PLUS-NEXT: s_mov_b32 s6, s5
1578 ; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1579 ; GFX10PLUS-NEXT: s_lshl_b32 s7, s2, 14
1580 ; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[2:3], 18
1581 ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7]
1582 ; GFX10PLUS-NEXT: ; return to shader part epilog
1583 %shl = shl i65 %value, 18
1584 %ashr = ashr i65 %shl, 18
1588 define amdgpu_ps i65 @s_sext_inreg_i65_33(i65 inreg %value) {
1589 ; GCN-LABEL: s_sext_inreg_i65_33:
1591 ; GCN-NEXT: s_lshl_b32 s3, s2, 1
1592 ; GCN-NEXT: s_mov_b32 s2, 0
1593 ; GCN-NEXT: s_lshr_b64 s[4:5], s[0:1], 31
1594 ; GCN-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5]
1595 ; GCN-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
1596 ; GCN-NEXT: s_bfe_u32 s0, s0, 0x1f0000
1597 ; GCN-NEXT: s_mov_b32 s1, s2
1598 ; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], 31
1599 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
1600 ; GCN-NEXT: s_ashr_i32 s2, s5, 1
1601 ; GCN-NEXT: ; return to shader part epilog
1603 ; GFX10PLUS-LABEL: s_sext_inreg_i65_33:
1604 ; GFX10PLUS: ; %bb.0:
1605 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s2, 1
1606 ; GFX10PLUS-NEXT: s_mov_b32 s2, 0
1607 ; GFX10PLUS-NEXT: s_lshr_b64 s[4:5], s[0:1], 31
1608 ; GFX10PLUS-NEXT: s_bfe_u32 s0, s0, 0x1f0000
1609 ; GFX10PLUS-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5]
1610 ; GFX10PLUS-NEXT: s_mov_b32 s1, s2
1611 ; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
1612 ; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[4:5], 31
1613 ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
1614 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s5, 1
1615 ; GFX10PLUS-NEXT: ; return to shader part epilog
1616 %shl = shl i65 %value, 33
1617 %ashr = ashr i65 %shl, 33
1621 ; FIXME: Argument lowering asserts
1622 ; define <2 x i65> @v_sext_inreg_v2i65_36(<2 x i65> %value) {
1623 ; %shl = shl <2 x i65> %value, <i65 36, i65 36>
1624 ; %ashr = ashr <2 x i65> %shl, <i65 36, i65 36>
1625 ; ret <2 x i65> %ashr
1628 ; define amdgpu_ps <2 x i65> @s_sext_inreg_v2i65_36(<2 x i65> inreg %valuex) {
1629 ; %shl = shl <2 x i65> %value, <i65 36, i65 36>
1630 ; %ashr = ashrshl <2 x i65> %shl, <i65 36, i65 36>
1631 ; ret <2 x i65> %ashr
1633 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: