1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
5 define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
6 ; SI-LABEL: bfe_u32_arg_arg_arg:
8 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
9 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
10 ; SI-NEXT: s_mov_b32 s3, 0xf000
11 ; SI-NEXT: s_mov_b32 s2, -1
12 ; SI-NEXT: s_waitcnt lgkmcnt(0)
13 ; SI-NEXT: v_mov_b32_e32 v0, s4
14 ; SI-NEXT: v_bfe_u32 v0, v0, s5, s5
15 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
18 ; VI-LABEL: bfe_u32_arg_arg_arg:
20 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
21 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
22 ; VI-NEXT: s_mov_b32 s7, 0xf000
23 ; VI-NEXT: s_mov_b32 s6, -1
24 ; VI-NEXT: s_waitcnt lgkmcnt(0)
25 ; VI-NEXT: v_mov_b32_e32 v0, s0
26 ; VI-NEXT: v_bfe_u32 v0, v0, s1, s1
27 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
29 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
30 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
34 define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
35 ; SI-LABEL: bfe_u32_arg_arg_imm:
37 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
38 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
39 ; SI-NEXT: s_mov_b32 s3, 0xf000
40 ; SI-NEXT: s_mov_b32 s2, -1
41 ; SI-NEXT: v_mov_b32_e32 v0, 0x7b
42 ; SI-NEXT: s_waitcnt lgkmcnt(0)
43 ; SI-NEXT: v_mov_b32_e32 v1, s5
44 ; SI-NEXT: v_bfe_u32 v0, s4, v1, v0
45 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
48 ; VI-LABEL: bfe_u32_arg_arg_imm:
50 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
51 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
52 ; VI-NEXT: v_mov_b32_e32 v1, 0x7b
53 ; VI-NEXT: s_mov_b32 s7, 0xf000
54 ; VI-NEXT: s_mov_b32 s6, -1
55 ; VI-NEXT: s_waitcnt lgkmcnt(0)
56 ; VI-NEXT: v_mov_b32_e32 v0, s1
57 ; VI-NEXT: v_bfe_u32 v0, s0, v0, v1
58 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
60 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
61 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
65 define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
66 ; SI-LABEL: bfe_u32_arg_imm_arg:
68 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
69 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
70 ; SI-NEXT: s_mov_b32 s3, 0xf000
71 ; SI-NEXT: s_mov_b32 s2, -1
72 ; SI-NEXT: v_mov_b32_e32 v0, 0x7b
73 ; SI-NEXT: s_waitcnt lgkmcnt(0)
74 ; SI-NEXT: v_mov_b32_e32 v1, s5
75 ; SI-NEXT: v_bfe_u32 v0, s4, v0, v1
76 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
79 ; VI-LABEL: bfe_u32_arg_imm_arg:
81 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
82 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
83 ; VI-NEXT: v_mov_b32_e32 v0, 0x7b
84 ; VI-NEXT: s_mov_b32 s7, 0xf000
85 ; VI-NEXT: s_mov_b32 s6, -1
86 ; VI-NEXT: s_waitcnt lgkmcnt(0)
87 ; VI-NEXT: v_mov_b32_e32 v1, s1
88 ; VI-NEXT: v_bfe_u32 v0, s0, v0, v1
89 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
91 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
92 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
96 define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
97 ; SI-LABEL: bfe_u32_imm_arg_arg:
99 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
100 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
101 ; SI-NEXT: s_mov_b32 s3, 0xf000
102 ; SI-NEXT: s_mov_b32 s2, -1
103 ; SI-NEXT: s_movk_i32 s6, 0x7b
104 ; SI-NEXT: s_waitcnt lgkmcnt(0)
105 ; SI-NEXT: v_mov_b32_e32 v0, s4
106 ; SI-NEXT: v_mov_b32_e32 v1, s5
107 ; SI-NEXT: v_bfe_u32 v0, s6, v0, v1
108 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
111 ; VI-LABEL: bfe_u32_imm_arg_arg:
113 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
114 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
115 ; VI-NEXT: s_movk_i32 s2, 0x7b
116 ; VI-NEXT: s_mov_b32 s7, 0xf000
117 ; VI-NEXT: s_mov_b32 s6, -1
118 ; VI-NEXT: s_waitcnt lgkmcnt(0)
119 ; VI-NEXT: v_mov_b32_e32 v0, s0
120 ; VI-NEXT: v_mov_b32_e32 v1, s1
121 ; VI-NEXT: v_bfe_u32 v0, s2, v0, v1
122 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
124 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
125 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
129 define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
130 ; SI-LABEL: bfe_u32_arg_0_width_reg_offset:
132 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
133 ; SI-NEXT: s_mov_b32 s3, 0xf000
134 ; SI-NEXT: s_mov_b32 s2, -1
135 ; SI-NEXT: v_mov_b32_e32 v0, 0
136 ; SI-NEXT: s_waitcnt lgkmcnt(0)
137 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
140 ; VI-LABEL: bfe_u32_arg_0_width_reg_offset:
142 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
143 ; VI-NEXT: s_mov_b32 s3, 0xf000
144 ; VI-NEXT: s_mov_b32 s2, -1
145 ; VI-NEXT: v_mov_b32_e32 v0, 0
146 ; VI-NEXT: s_waitcnt lgkmcnt(0)
147 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
149 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
150 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
154 define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
155 ; SI-LABEL: bfe_u32_arg_0_width_imm_offset:
157 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
158 ; SI-NEXT: s_mov_b32 s3, 0xf000
159 ; SI-NEXT: s_mov_b32 s2, -1
160 ; SI-NEXT: v_mov_b32_e32 v0, 0
161 ; SI-NEXT: s_waitcnt lgkmcnt(0)
162 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
165 ; VI-LABEL: bfe_u32_arg_0_width_imm_offset:
167 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
168 ; VI-NEXT: s_mov_b32 s3, 0xf000
169 ; VI-NEXT: s_mov_b32 s2, -1
170 ; VI-NEXT: v_mov_b32_e32 v0, 0
171 ; VI-NEXT: s_waitcnt lgkmcnt(0)
172 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
174 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
175 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
179 define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
180 ; SI-LABEL: bfe_u32_zextload_i8:
182 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
183 ; SI-NEXT: s_mov_b32 s7, 0xf000
184 ; SI-NEXT: s_mov_b32 s6, -1
185 ; SI-NEXT: s_mov_b32 s10, s6
186 ; SI-NEXT: s_mov_b32 s11, s7
187 ; SI-NEXT: s_waitcnt lgkmcnt(0)
188 ; SI-NEXT: s_mov_b32 s8, s2
189 ; SI-NEXT: s_mov_b32 s9, s3
190 ; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
191 ; SI-NEXT: s_mov_b32 s4, s0
192 ; SI-NEXT: s_mov_b32 s5, s1
193 ; SI-NEXT: s_waitcnt vmcnt(0)
194 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
197 ; VI-LABEL: bfe_u32_zextload_i8:
199 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
200 ; VI-NEXT: s_mov_b32 s3, 0xf000
201 ; VI-NEXT: s_mov_b32 s2, -1
202 ; VI-NEXT: s_waitcnt lgkmcnt(0)
203 ; VI-NEXT: s_mov_b32 s0, s4
204 ; VI-NEXT: s_mov_b32 s1, s5
205 ; VI-NEXT: s_mov_b32 s4, s6
206 ; VI-NEXT: s_mov_b32 s5, s7
207 ; VI-NEXT: s_mov_b32 s6, s2
208 ; VI-NEXT: s_mov_b32 s7, s3
209 ; VI-NEXT: buffer_load_ubyte v0, off, s[4:7], 0
210 ; VI-NEXT: s_waitcnt vmcnt(0)
211 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
213 %load = load i8, i8 addrspace(1)* %in
214 %ext = zext i8 %load to i32
215 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
216 store i32 %bfe, i32 addrspace(1)* %out, align 4
220 ; FIXME: Should be using s_add_i32
221 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
222 ; SI-LABEL: bfe_u32_zext_in_reg_i8:
224 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
225 ; SI-NEXT: s_mov_b32 s7, 0xf000
226 ; SI-NEXT: s_mov_b32 s6, -1
227 ; SI-NEXT: s_mov_b32 s10, s6
228 ; SI-NEXT: s_mov_b32 s11, s7
229 ; SI-NEXT: s_waitcnt lgkmcnt(0)
230 ; SI-NEXT: s_mov_b32 s8, s2
231 ; SI-NEXT: s_mov_b32 s9, s3
232 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
233 ; SI-NEXT: s_mov_b32 s4, s0
234 ; SI-NEXT: s_mov_b32 s5, s1
235 ; SI-NEXT: s_waitcnt vmcnt(0)
236 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
237 ; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
238 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
241 ; VI-LABEL: bfe_u32_zext_in_reg_i8:
243 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
244 ; VI-NEXT: s_mov_b32 s3, 0xf000
245 ; VI-NEXT: s_mov_b32 s2, -1
246 ; VI-NEXT: s_waitcnt lgkmcnt(0)
247 ; VI-NEXT: s_mov_b32 s0, s4
248 ; VI-NEXT: s_mov_b32 s1, s5
249 ; VI-NEXT: s_mov_b32 s4, s6
250 ; VI-NEXT: s_mov_b32 s5, s7
251 ; VI-NEXT: s_mov_b32 s6, s2
252 ; VI-NEXT: s_mov_b32 s7, s3
253 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
254 ; VI-NEXT: s_waitcnt vmcnt(0)
255 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
256 ; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
257 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
259 %load = load i32, i32 addrspace(1)* %in, align 4
260 %add = add i32 %load, 1
261 %ext = and i32 %add, 255
262 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
263 store i32 %bfe, i32 addrspace(1)* %out, align 4
267 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
268 ; SI-LABEL: bfe_u32_zext_in_reg_i16:
270 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
271 ; SI-NEXT: s_mov_b32 s7, 0xf000
272 ; SI-NEXT: s_mov_b32 s6, -1
273 ; SI-NEXT: s_mov_b32 s10, s6
274 ; SI-NEXT: s_mov_b32 s11, s7
275 ; SI-NEXT: s_waitcnt lgkmcnt(0)
276 ; SI-NEXT: s_mov_b32 s8, s2
277 ; SI-NEXT: s_mov_b32 s9, s3
278 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
279 ; SI-NEXT: s_mov_b32 s4, s0
280 ; SI-NEXT: s_mov_b32 s5, s1
281 ; SI-NEXT: s_waitcnt vmcnt(0)
282 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
283 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
284 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
287 ; VI-LABEL: bfe_u32_zext_in_reg_i16:
289 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
290 ; VI-NEXT: s_mov_b32 s3, 0xf000
291 ; VI-NEXT: s_mov_b32 s2, -1
292 ; VI-NEXT: s_waitcnt lgkmcnt(0)
293 ; VI-NEXT: s_mov_b32 s0, s4
294 ; VI-NEXT: s_mov_b32 s1, s5
295 ; VI-NEXT: s_mov_b32 s4, s6
296 ; VI-NEXT: s_mov_b32 s5, s7
297 ; VI-NEXT: s_mov_b32 s6, s2
298 ; VI-NEXT: s_mov_b32 s7, s3
299 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
300 ; VI-NEXT: s_waitcnt vmcnt(0)
301 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
302 ; VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
303 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
305 %load = load i32, i32 addrspace(1)* %in, align 4
306 %add = add i32 %load, 1
307 %ext = and i32 %add, 65535
308 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
309 store i32 %bfe, i32 addrspace(1)* %out, align 4
313 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
314 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
316 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
317 ; SI-NEXT: s_mov_b32 s7, 0xf000
318 ; SI-NEXT: s_mov_b32 s6, -1
319 ; SI-NEXT: s_mov_b32 s10, s6
320 ; SI-NEXT: s_mov_b32 s11, s7
321 ; SI-NEXT: s_waitcnt lgkmcnt(0)
322 ; SI-NEXT: s_mov_b32 s8, s2
323 ; SI-NEXT: s_mov_b32 s9, s3
324 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
325 ; SI-NEXT: s_mov_b32 s4, s0
326 ; SI-NEXT: s_mov_b32 s5, s1
327 ; SI-NEXT: s_waitcnt vmcnt(0)
328 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
329 ; SI-NEXT: v_and_b32_e32 v0, 0xfe, v0
330 ; SI-NEXT: v_bfe_u32 v0, v0, 1, 8
331 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
334 ; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
336 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
337 ; VI-NEXT: s_mov_b32 s3, 0xf000
338 ; VI-NEXT: s_mov_b32 s2, -1
339 ; VI-NEXT: s_waitcnt lgkmcnt(0)
340 ; VI-NEXT: s_mov_b32 s0, s4
341 ; VI-NEXT: s_mov_b32 s1, s5
342 ; VI-NEXT: s_mov_b32 s4, s6
343 ; VI-NEXT: s_mov_b32 s5, s7
344 ; VI-NEXT: s_mov_b32 s6, s2
345 ; VI-NEXT: s_mov_b32 s7, s3
346 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
347 ; VI-NEXT: s_waitcnt vmcnt(0)
348 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
349 ; VI-NEXT: v_and_b32_e32 v0, 0xfe, v0
350 ; VI-NEXT: v_bfe_u32 v0, v0, 1, 8
351 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
353 %load = load i32, i32 addrspace(1)* %in, align 4
354 %add = add i32 %load, 1
355 %ext = and i32 %add, 255
356 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
357 store i32 %bfe, i32 addrspace(1)* %out, align 4
361 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
362 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
364 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
365 ; SI-NEXT: s_mov_b32 s7, 0xf000
366 ; SI-NEXT: s_mov_b32 s6, -1
367 ; SI-NEXT: s_mov_b32 s10, s6
368 ; SI-NEXT: s_mov_b32 s11, s7
369 ; SI-NEXT: s_waitcnt lgkmcnt(0)
370 ; SI-NEXT: s_mov_b32 s8, s2
371 ; SI-NEXT: s_mov_b32 s9, s3
372 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
373 ; SI-NEXT: s_mov_b32 s4, s0
374 ; SI-NEXT: s_mov_b32 s5, s1
375 ; SI-NEXT: s_waitcnt vmcnt(0)
376 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
377 ; SI-NEXT: v_and_b32_e32 v0, 0xf8, v0
378 ; SI-NEXT: v_bfe_u32 v0, v0, 3, 8
379 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
382 ; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
384 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
385 ; VI-NEXT: s_mov_b32 s3, 0xf000
386 ; VI-NEXT: s_mov_b32 s2, -1
387 ; VI-NEXT: s_waitcnt lgkmcnt(0)
388 ; VI-NEXT: s_mov_b32 s0, s4
389 ; VI-NEXT: s_mov_b32 s1, s5
390 ; VI-NEXT: s_mov_b32 s4, s6
391 ; VI-NEXT: s_mov_b32 s5, s7
392 ; VI-NEXT: s_mov_b32 s6, s2
393 ; VI-NEXT: s_mov_b32 s7, s3
394 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
395 ; VI-NEXT: s_waitcnt vmcnt(0)
396 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
397 ; VI-NEXT: v_and_b32_e32 v0, 0xf8, v0
398 ; VI-NEXT: v_bfe_u32 v0, v0, 3, 8
399 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
401 %load = load i32, i32 addrspace(1)* %in, align 4
402 %add = add i32 %load, 1
403 %ext = and i32 %add, 255
404 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
405 store i32 %bfe, i32 addrspace(1)* %out, align 4
409 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
410 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
412 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
413 ; SI-NEXT: s_mov_b32 s7, 0xf000
414 ; SI-NEXT: s_mov_b32 s6, -1
415 ; SI-NEXT: s_mov_b32 s10, s6
416 ; SI-NEXT: s_mov_b32 s11, s7
417 ; SI-NEXT: s_waitcnt lgkmcnt(0)
418 ; SI-NEXT: s_mov_b32 s8, s2
419 ; SI-NEXT: s_mov_b32 s9, s3
420 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
421 ; SI-NEXT: s_mov_b32 s4, s0
422 ; SI-NEXT: s_mov_b32 s5, s1
423 ; SI-NEXT: s_waitcnt vmcnt(0)
424 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
425 ; SI-NEXT: v_and_b32_e32 v0, 0x80, v0
426 ; SI-NEXT: v_bfe_u32 v0, v0, 7, 8
427 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
430 ; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
432 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
433 ; VI-NEXT: s_mov_b32 s3, 0xf000
434 ; VI-NEXT: s_mov_b32 s2, -1
435 ; VI-NEXT: s_waitcnt lgkmcnt(0)
436 ; VI-NEXT: s_mov_b32 s0, s4
437 ; VI-NEXT: s_mov_b32 s1, s5
438 ; VI-NEXT: s_mov_b32 s4, s6
439 ; VI-NEXT: s_mov_b32 s5, s7
440 ; VI-NEXT: s_mov_b32 s6, s2
441 ; VI-NEXT: s_mov_b32 s7, s3
442 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
443 ; VI-NEXT: s_waitcnt vmcnt(0)
444 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
445 ; VI-NEXT: v_and_b32_e32 v0, 0x80, v0
446 ; VI-NEXT: v_bfe_u32 v0, v0, 7, 8
447 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
449 %load = load i32, i32 addrspace(1)* %in, align 4
450 %add = add i32 %load, 1
451 %ext = and i32 %add, 255
452 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
453 store i32 %bfe, i32 addrspace(1)* %out, align 4
457 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
458 ; SI-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
460 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
461 ; SI-NEXT: s_mov_b32 s7, 0xf000
462 ; SI-NEXT: s_mov_b32 s6, -1
463 ; SI-NEXT: s_mov_b32 s10, s6
464 ; SI-NEXT: s_mov_b32 s11, s7
465 ; SI-NEXT: s_waitcnt lgkmcnt(0)
466 ; SI-NEXT: s_mov_b32 s8, s2
467 ; SI-NEXT: s_mov_b32 s9, s3
468 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
469 ; SI-NEXT: s_mov_b32 s4, s0
470 ; SI-NEXT: s_mov_b32 s5, s1
471 ; SI-NEXT: s_waitcnt vmcnt(0)
472 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
473 ; SI-NEXT: v_bfe_u32 v0, v0, 8, 8
474 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
477 ; VI-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
479 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
480 ; VI-NEXT: s_mov_b32 s3, 0xf000
481 ; VI-NEXT: s_mov_b32 s2, -1
482 ; VI-NEXT: s_waitcnt lgkmcnt(0)
483 ; VI-NEXT: s_mov_b32 s0, s4
484 ; VI-NEXT: s_mov_b32 s1, s5
485 ; VI-NEXT: s_mov_b32 s4, s6
486 ; VI-NEXT: s_mov_b32 s5, s7
487 ; VI-NEXT: s_mov_b32 s6, s2
488 ; VI-NEXT: s_mov_b32 s7, s3
489 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
490 ; VI-NEXT: s_waitcnt vmcnt(0)
491 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
492 ; VI-NEXT: v_bfe_u32 v0, v0, 8, 8
493 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
495 %load = load i32, i32 addrspace(1)* %in, align 4
496 %add = add i32 %load, 1
497 %ext = and i32 %add, 65535
498 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
499 store i32 %bfe, i32 addrspace(1)* %out, align 4
503 define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
504 ; SI-LABEL: bfe_u32_test_1:
506 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
507 ; SI-NEXT: s_mov_b32 s7, 0xf000
508 ; SI-NEXT: s_mov_b32 s6, -1
509 ; SI-NEXT: s_mov_b32 s10, s6
510 ; SI-NEXT: s_mov_b32 s11, s7
511 ; SI-NEXT: s_waitcnt lgkmcnt(0)
512 ; SI-NEXT: s_mov_b32 s8, s2
513 ; SI-NEXT: s_mov_b32 s9, s3
514 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
515 ; SI-NEXT: s_mov_b32 s4, s0
516 ; SI-NEXT: s_mov_b32 s5, s1
517 ; SI-NEXT: s_waitcnt vmcnt(0)
518 ; SI-NEXT: v_and_b32_e32 v0, 1, v0
519 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
522 ; VI-LABEL: bfe_u32_test_1:
524 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
525 ; VI-NEXT: s_mov_b32 s3, 0xf000
526 ; VI-NEXT: s_mov_b32 s2, -1
527 ; VI-NEXT: s_waitcnt lgkmcnt(0)
528 ; VI-NEXT: s_mov_b32 s0, s4
529 ; VI-NEXT: s_mov_b32 s1, s5
530 ; VI-NEXT: s_mov_b32 s4, s6
531 ; VI-NEXT: s_mov_b32 s5, s7
532 ; VI-NEXT: s_mov_b32 s6, s2
533 ; VI-NEXT: s_mov_b32 s7, s3
534 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
535 ; VI-NEXT: s_waitcnt vmcnt(0)
536 ; VI-NEXT: v_and_b32_e32 v0, 1, v0
537 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
539 %x = load i32, i32 addrspace(1)* %in, align 4
540 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
541 store i32 %bfe, i32 addrspace(1)* %out, align 4
545 define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
546 ; SI-LABEL: bfe_u32_test_2:
548 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
549 ; SI-NEXT: s_waitcnt lgkmcnt(0)
550 ; SI-NEXT: s_mov_b32 s3, 0xf000
551 ; SI-NEXT: s_mov_b32 s2, -1
552 ; SI-NEXT: v_mov_b32_e32 v0, 0
553 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
556 ; VI-LABEL: bfe_u32_test_2:
558 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
559 ; VI-NEXT: s_waitcnt lgkmcnt(0)
560 ; VI-NEXT: s_mov_b32 s3, 0xf000
561 ; VI-NEXT: s_mov_b32 s2, -1
562 ; VI-NEXT: v_mov_b32_e32 v0, 0
563 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
565 %x = load i32, i32 addrspace(1)* %in, align 4
566 %shl = shl i32 %x, 31
567 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
568 store i32 %bfe, i32 addrspace(1)* %out, align 4
572 define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
573 ; SI-LABEL: bfe_u32_test_3:
575 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
576 ; SI-NEXT: s_waitcnt lgkmcnt(0)
577 ; SI-NEXT: s_mov_b32 s3, 0xf000
578 ; SI-NEXT: s_mov_b32 s2, -1
579 ; SI-NEXT: v_mov_b32_e32 v0, 0
580 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
583 ; VI-LABEL: bfe_u32_test_3:
585 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
586 ; VI-NEXT: s_waitcnt lgkmcnt(0)
587 ; VI-NEXT: s_mov_b32 s3, 0xf000
588 ; VI-NEXT: s_mov_b32 s2, -1
589 ; VI-NEXT: v_mov_b32_e32 v0, 0
590 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
592 %x = load i32, i32 addrspace(1)* %in, align 4
593 %shl = shl i32 %x, 31
594 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
595 store i32 %bfe, i32 addrspace(1)* %out, align 4
599 define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
600 ; SI-LABEL: bfe_u32_test_4:
602 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
603 ; SI-NEXT: s_waitcnt lgkmcnt(0)
604 ; SI-NEXT: s_mov_b32 s3, 0xf000
605 ; SI-NEXT: s_mov_b32 s2, -1
606 ; SI-NEXT: v_mov_b32_e32 v0, 0
607 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
610 ; VI-LABEL: bfe_u32_test_4:
612 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
613 ; VI-NEXT: s_waitcnt lgkmcnt(0)
614 ; VI-NEXT: s_mov_b32 s3, 0xf000
615 ; VI-NEXT: s_mov_b32 s2, -1
616 ; VI-NEXT: v_mov_b32_e32 v0, 0
617 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
619 %x = load i32, i32 addrspace(1)* %in, align 4
620 %shl = shl i32 %x, 31
621 %shr = lshr i32 %shl, 31
622 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
623 store i32 %bfe, i32 addrspace(1)* %out, align 4
627 define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
628 ; SI-LABEL: bfe_u32_test_5:
630 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
631 ; SI-NEXT: s_mov_b32 s7, 0xf000
632 ; SI-NEXT: s_mov_b32 s6, -1
633 ; SI-NEXT: s_mov_b32 s10, s6
634 ; SI-NEXT: s_mov_b32 s11, s7
635 ; SI-NEXT: s_waitcnt lgkmcnt(0)
636 ; SI-NEXT: s_mov_b32 s8, s2
637 ; SI-NEXT: s_mov_b32 s9, s3
638 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
639 ; SI-NEXT: s_mov_b32 s4, s0
640 ; SI-NEXT: s_mov_b32 s5, s1
641 ; SI-NEXT: s_waitcnt vmcnt(0)
642 ; SI-NEXT: v_bfe_i32 v0, v0, 0, 1
643 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
646 ; VI-LABEL: bfe_u32_test_5:
648 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
649 ; VI-NEXT: s_mov_b32 s3, 0xf000
650 ; VI-NEXT: s_mov_b32 s2, -1
651 ; VI-NEXT: s_waitcnt lgkmcnt(0)
652 ; VI-NEXT: s_mov_b32 s0, s4
653 ; VI-NEXT: s_mov_b32 s1, s5
654 ; VI-NEXT: s_mov_b32 s4, s6
655 ; VI-NEXT: s_mov_b32 s5, s7
656 ; VI-NEXT: s_mov_b32 s6, s2
657 ; VI-NEXT: s_mov_b32 s7, s3
658 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
659 ; VI-NEXT: s_waitcnt vmcnt(0)
660 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 1
661 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
663 %x = load i32, i32 addrspace(1)* %in, align 4
664 %shl = shl i32 %x, 31
665 %shr = ashr i32 %shl, 31
666 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
667 store i32 %bfe, i32 addrspace(1)* %out, align 4
671 define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
672 ; SI-LABEL: bfe_u32_test_6:
674 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
675 ; SI-NEXT: s_mov_b32 s7, 0xf000
676 ; SI-NEXT: s_mov_b32 s6, -1
677 ; SI-NEXT: s_mov_b32 s10, s6
678 ; SI-NEXT: s_mov_b32 s11, s7
679 ; SI-NEXT: s_waitcnt lgkmcnt(0)
680 ; SI-NEXT: s_mov_b32 s8, s2
681 ; SI-NEXT: s_mov_b32 s9, s3
682 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
683 ; SI-NEXT: s_mov_b32 s4, s0
684 ; SI-NEXT: s_mov_b32 s5, s1
685 ; SI-NEXT: s_waitcnt vmcnt(0)
686 ; SI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
687 ; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
688 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
691 ; VI-LABEL: bfe_u32_test_6:
693 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
694 ; VI-NEXT: s_mov_b32 s3, 0xf000
695 ; VI-NEXT: s_mov_b32 s2, -1
696 ; VI-NEXT: s_waitcnt lgkmcnt(0)
697 ; VI-NEXT: s_mov_b32 s0, s4
698 ; VI-NEXT: s_mov_b32 s1, s5
699 ; VI-NEXT: s_mov_b32 s4, s6
700 ; VI-NEXT: s_mov_b32 s5, s7
701 ; VI-NEXT: s_mov_b32 s6, s2
702 ; VI-NEXT: s_mov_b32 s7, s3
703 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
704 ; VI-NEXT: s_waitcnt vmcnt(0)
705 ; VI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
706 ; VI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
707 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
709 %x = load i32, i32 addrspace(1)* %in, align 4
710 %shl = shl i32 %x, 31
711 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
712 store i32 %bfe, i32 addrspace(1)* %out, align 4
716 define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
717 ; SI-LABEL: bfe_u32_test_7:
719 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
720 ; SI-NEXT: s_mov_b32 s7, 0xf000
721 ; SI-NEXT: s_mov_b32 s6, -1
722 ; SI-NEXT: s_mov_b32 s10, s6
723 ; SI-NEXT: s_mov_b32 s11, s7
724 ; SI-NEXT: s_waitcnt lgkmcnt(0)
725 ; SI-NEXT: s_mov_b32 s8, s2
726 ; SI-NEXT: s_mov_b32 s9, s3
727 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
728 ; SI-NEXT: s_mov_b32 s4, s0
729 ; SI-NEXT: s_mov_b32 s5, s1
730 ; SI-NEXT: s_waitcnt vmcnt(0)
731 ; SI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
732 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
735 ; VI-LABEL: bfe_u32_test_7:
737 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
738 ; VI-NEXT: s_mov_b32 s3, 0xf000
739 ; VI-NEXT: s_mov_b32 s2, -1
740 ; VI-NEXT: s_waitcnt lgkmcnt(0)
741 ; VI-NEXT: s_mov_b32 s0, s4
742 ; VI-NEXT: s_mov_b32 s1, s5
743 ; VI-NEXT: s_mov_b32 s4, s6
744 ; VI-NEXT: s_mov_b32 s5, s7
745 ; VI-NEXT: s_mov_b32 s6, s2
746 ; VI-NEXT: s_mov_b32 s7, s3
747 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
748 ; VI-NEXT: s_waitcnt vmcnt(0)
749 ; VI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
750 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
752 %x = load i32, i32 addrspace(1)* %in, align 4
753 %shl = shl i32 %x, 31
754 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
755 store i32 %bfe, i32 addrspace(1)* %out, align 4
759 define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
760 ; SI-LABEL: bfe_u32_test_8:
762 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
763 ; SI-NEXT: s_mov_b32 s7, 0xf000
764 ; SI-NEXT: s_mov_b32 s6, -1
765 ; SI-NEXT: s_mov_b32 s10, s6
766 ; SI-NEXT: s_mov_b32 s11, s7
767 ; SI-NEXT: s_waitcnt lgkmcnt(0)
768 ; SI-NEXT: s_mov_b32 s8, s2
769 ; SI-NEXT: s_mov_b32 s9, s3
770 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
771 ; SI-NEXT: s_mov_b32 s4, s0
772 ; SI-NEXT: s_mov_b32 s5, s1
773 ; SI-NEXT: s_waitcnt vmcnt(0)
774 ; SI-NEXT: v_and_b32_e32 v0, 1, v0
775 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
778 ; VI-LABEL: bfe_u32_test_8:
780 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
781 ; VI-NEXT: s_mov_b32 s3, 0xf000
782 ; VI-NEXT: s_mov_b32 s2, -1
783 ; VI-NEXT: s_waitcnt lgkmcnt(0)
784 ; VI-NEXT: s_mov_b32 s0, s4
785 ; VI-NEXT: s_mov_b32 s1, s5
786 ; VI-NEXT: s_mov_b32 s4, s6
787 ; VI-NEXT: s_mov_b32 s5, s7
788 ; VI-NEXT: s_mov_b32 s6, s2
789 ; VI-NEXT: s_mov_b32 s7, s3
790 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
791 ; VI-NEXT: s_waitcnt vmcnt(0)
792 ; VI-NEXT: v_and_b32_e32 v0, 1, v0
793 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
795 %x = load i32, i32 addrspace(1)* %in, align 4
796 %shl = shl i32 %x, 31
797 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
798 store i32 %bfe, i32 addrspace(1)* %out, align 4
802 define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
803 ; SI-LABEL: bfe_u32_test_9:
805 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
806 ; SI-NEXT: s_mov_b32 s7, 0xf000
807 ; SI-NEXT: s_mov_b32 s6, -1
808 ; SI-NEXT: s_mov_b32 s10, s6
809 ; SI-NEXT: s_mov_b32 s11, s7
810 ; SI-NEXT: s_waitcnt lgkmcnt(0)
811 ; SI-NEXT: s_mov_b32 s8, s2
812 ; SI-NEXT: s_mov_b32 s9, s3
813 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
814 ; SI-NEXT: s_mov_b32 s4, s0
815 ; SI-NEXT: s_mov_b32 s5, s1
816 ; SI-NEXT: s_waitcnt vmcnt(0)
817 ; SI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
818 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
821 ; VI-LABEL: bfe_u32_test_9:
823 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
824 ; VI-NEXT: s_mov_b32 s3, 0xf000
825 ; VI-NEXT: s_mov_b32 s2, -1
826 ; VI-NEXT: s_waitcnt lgkmcnt(0)
827 ; VI-NEXT: s_mov_b32 s0, s4
828 ; VI-NEXT: s_mov_b32 s1, s5
829 ; VI-NEXT: s_mov_b32 s4, s6
830 ; VI-NEXT: s_mov_b32 s5, s7
831 ; VI-NEXT: s_mov_b32 s6, s2
832 ; VI-NEXT: s_mov_b32 s7, s3
833 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
834 ; VI-NEXT: s_waitcnt vmcnt(0)
835 ; VI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
836 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
838 %x = load i32, i32 addrspace(1)* %in, align 4
839 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
840 store i32 %bfe, i32 addrspace(1)* %out, align 4
844 define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
845 ; SI-LABEL: bfe_u32_test_10:
847 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
848 ; SI-NEXT: s_mov_b32 s7, 0xf000
849 ; SI-NEXT: s_mov_b32 s6, -1
850 ; SI-NEXT: s_mov_b32 s10, s6
851 ; SI-NEXT: s_mov_b32 s11, s7
852 ; SI-NEXT: s_waitcnt lgkmcnt(0)
853 ; SI-NEXT: s_mov_b32 s8, s2
854 ; SI-NEXT: s_mov_b32 s9, s3
855 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
856 ; SI-NEXT: s_mov_b32 s4, s0
857 ; SI-NEXT: s_mov_b32 s5, s1
858 ; SI-NEXT: s_waitcnt vmcnt(0)
859 ; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
860 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
863 ; VI-LABEL: bfe_u32_test_10:
865 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
866 ; VI-NEXT: s_mov_b32 s3, 0xf000
867 ; VI-NEXT: s_mov_b32 s2, -1
868 ; VI-NEXT: s_waitcnt lgkmcnt(0)
869 ; VI-NEXT: s_mov_b32 s0, s4
870 ; VI-NEXT: s_mov_b32 s1, s5
871 ; VI-NEXT: s_mov_b32 s4, s6
872 ; VI-NEXT: s_mov_b32 s5, s7
873 ; VI-NEXT: s_mov_b32 s6, s2
874 ; VI-NEXT: s_mov_b32 s7, s3
875 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
876 ; VI-NEXT: s_waitcnt vmcnt(0)
877 ; VI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
878 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
880 %x = load i32, i32 addrspace(1)* %in, align 4
881 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
882 store i32 %bfe, i32 addrspace(1)* %out, align 4
886 define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
887 ; SI-LABEL: bfe_u32_test_11:
889 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
890 ; SI-NEXT: s_mov_b32 s7, 0xf000
891 ; SI-NEXT: s_mov_b32 s6, -1
892 ; SI-NEXT: s_mov_b32 s10, s6
893 ; SI-NEXT: s_mov_b32 s11, s7
894 ; SI-NEXT: s_waitcnt lgkmcnt(0)
895 ; SI-NEXT: s_mov_b32 s8, s2
896 ; SI-NEXT: s_mov_b32 s9, s3
897 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
898 ; SI-NEXT: s_mov_b32 s4, s0
899 ; SI-NEXT: s_mov_b32 s5, s1
900 ; SI-NEXT: s_waitcnt vmcnt(0)
901 ; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v0
902 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
905 ; VI-LABEL: bfe_u32_test_11:
907 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
908 ; VI-NEXT: s_mov_b32 s3, 0xf000
909 ; VI-NEXT: s_mov_b32 s2, -1
910 ; VI-NEXT: s_waitcnt lgkmcnt(0)
911 ; VI-NEXT: s_mov_b32 s0, s4
912 ; VI-NEXT: s_mov_b32 s1, s5
913 ; VI-NEXT: s_mov_b32 s4, s6
914 ; VI-NEXT: s_mov_b32 s5, s7
915 ; VI-NEXT: s_mov_b32 s6, s2
916 ; VI-NEXT: s_mov_b32 s7, s3
917 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
918 ; VI-NEXT: s_waitcnt vmcnt(0)
919 ; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v0
920 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
922 %x = load i32, i32 addrspace(1)* %in, align 4
923 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
924 store i32 %bfe, i32 addrspace(1)* %out, align 4
928 define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
929 ; SI-LABEL: bfe_u32_test_12:
931 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
932 ; SI-NEXT: s_mov_b32 s7, 0xf000
933 ; SI-NEXT: s_mov_b32 s6, -1
934 ; SI-NEXT: s_mov_b32 s10, s6
935 ; SI-NEXT: s_mov_b32 s11, s7
936 ; SI-NEXT: s_waitcnt lgkmcnt(0)
937 ; SI-NEXT: s_mov_b32 s8, s2
938 ; SI-NEXT: s_mov_b32 s9, s3
939 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
940 ; SI-NEXT: s_mov_b32 s4, s0
941 ; SI-NEXT: s_mov_b32 s5, s1
942 ; SI-NEXT: s_waitcnt vmcnt(0)
943 ; SI-NEXT: v_lshrrev_b32_e32 v0, 24, v0
944 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
947 ; VI-LABEL: bfe_u32_test_12:
949 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
950 ; VI-NEXT: s_mov_b32 s3, 0xf000
951 ; VI-NEXT: s_mov_b32 s2, -1
952 ; VI-NEXT: s_waitcnt lgkmcnt(0)
953 ; VI-NEXT: s_mov_b32 s0, s4
954 ; VI-NEXT: s_mov_b32 s1, s5
955 ; VI-NEXT: s_mov_b32 s4, s6
956 ; VI-NEXT: s_mov_b32 s5, s7
957 ; VI-NEXT: s_mov_b32 s6, s2
958 ; VI-NEXT: s_mov_b32 s7, s3
959 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
960 ; VI-NEXT: s_waitcnt vmcnt(0)
961 ; VI-NEXT: v_lshrrev_b32_e32 v0, 24, v0
962 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
964 %x = load i32, i32 addrspace(1)* %in, align 4
965 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
966 store i32 %bfe, i32 addrspace(1)* %out, align 4
970 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
971 define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
972 ; SI-LABEL: bfe_u32_test_13:
974 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
975 ; SI-NEXT: s_mov_b32 s7, 0xf000
976 ; SI-NEXT: s_mov_b32 s6, -1
977 ; SI-NEXT: s_mov_b32 s10, s6
978 ; SI-NEXT: s_mov_b32 s11, s7
979 ; SI-NEXT: s_waitcnt lgkmcnt(0)
980 ; SI-NEXT: s_mov_b32 s8, s2
981 ; SI-NEXT: s_mov_b32 s9, s3
982 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
983 ; SI-NEXT: s_mov_b32 s4, s0
984 ; SI-NEXT: s_mov_b32 s5, s1
985 ; SI-NEXT: s_waitcnt vmcnt(0)
986 ; SI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
987 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
990 ; VI-LABEL: bfe_u32_test_13:
992 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
993 ; VI-NEXT: s_mov_b32 s3, 0xf000
994 ; VI-NEXT: s_mov_b32 s2, -1
995 ; VI-NEXT: s_waitcnt lgkmcnt(0)
996 ; VI-NEXT: s_mov_b32 s0, s4
997 ; VI-NEXT: s_mov_b32 s1, s5
998 ; VI-NEXT: s_mov_b32 s4, s6
999 ; VI-NEXT: s_mov_b32 s5, s7
1000 ; VI-NEXT: s_mov_b32 s6, s2
1001 ; VI-NEXT: s_mov_b32 s7, s3
1002 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
1003 ; VI-NEXT: s_waitcnt vmcnt(0)
1004 ; VI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
1005 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1007 %x = load i32, i32 addrspace(1)* %in, align 4
1008 %shl = ashr i32 %x, 31
1009 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
1010 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
1013 define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
1014 ; SI-LABEL: bfe_u32_test_14:
1016 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1017 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1018 ; SI-NEXT: s_mov_b32 s3, 0xf000
1019 ; SI-NEXT: s_mov_b32 s2, -1
1020 ; SI-NEXT: v_mov_b32_e32 v0, 0
1021 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1024 ; VI-LABEL: bfe_u32_test_14:
1026 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1027 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1028 ; VI-NEXT: s_mov_b32 s3, 0xf000
1029 ; VI-NEXT: s_mov_b32 s2, -1
1030 ; VI-NEXT: v_mov_b32_e32 v0, 0
1031 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1033 %x = load i32, i32 addrspace(1)* %in, align 4
1034 %shl = lshr i32 %x, 31
1035 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
1036 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
1040 define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
1041 ; SI-LABEL: bfe_u32_constant_fold_test_0:
1043 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1044 ; SI-NEXT: s_mov_b32 s3, 0xf000
1045 ; SI-NEXT: s_mov_b32 s2, -1
1046 ; SI-NEXT: v_mov_b32_e32 v0, 0
1047 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1048 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1051 ; VI-LABEL: bfe_u32_constant_fold_test_0:
1053 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1054 ; VI-NEXT: s_mov_b32 s3, 0xf000
1055 ; VI-NEXT: s_mov_b32 s2, -1
1056 ; VI-NEXT: v_mov_b32_e32 v0, 0
1057 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1058 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1060 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
1061 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1066 define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
1067 ; SI-LABEL: bfe_u32_constant_fold_test_1:
1069 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1070 ; SI-NEXT: s_mov_b32 s3, 0xf000
1071 ; SI-NEXT: s_mov_b32 s2, -1
1072 ; SI-NEXT: v_mov_b32_e32 v0, 0
1073 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1074 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1077 ; VI-LABEL: bfe_u32_constant_fold_test_1:
1079 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1080 ; VI-NEXT: s_mov_b32 s3, 0xf000
1081 ; VI-NEXT: s_mov_b32 s2, -1
1082 ; VI-NEXT: v_mov_b32_e32 v0, 0
1083 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1084 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1086 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
1087 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1092 define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
1093 ; SI-LABEL: bfe_u32_constant_fold_test_2:
1095 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1096 ; SI-NEXT: s_mov_b32 s3, 0xf000
1097 ; SI-NEXT: s_mov_b32 s2, -1
1098 ; SI-NEXT: v_mov_b32_e32 v0, 0
1099 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1100 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1103 ; VI-LABEL: bfe_u32_constant_fold_test_2:
1105 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1106 ; VI-NEXT: s_mov_b32 s3, 0xf000
1107 ; VI-NEXT: s_mov_b32 s2, -1
1108 ; VI-NEXT: v_mov_b32_e32 v0, 0
1109 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1110 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1112 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
1113 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1118 define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
1119 ; SI-LABEL: bfe_u32_constant_fold_test_3:
1121 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1122 ; SI-NEXT: s_mov_b32 s3, 0xf000
1123 ; SI-NEXT: s_mov_b32 s2, -1
1124 ; SI-NEXT: v_mov_b32_e32 v0, 1
1125 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1126 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1129 ; VI-LABEL: bfe_u32_constant_fold_test_3:
1131 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1132 ; VI-NEXT: s_mov_b32 s3, 0xf000
1133 ; VI-NEXT: s_mov_b32 s2, -1
1134 ; VI-NEXT: v_mov_b32_e32 v0, 1
1135 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1136 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1138 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
1139 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1144 define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
1145 ; SI-LABEL: bfe_u32_constant_fold_test_4:
1147 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1148 ; SI-NEXT: s_mov_b32 s3, 0xf000
1149 ; SI-NEXT: s_mov_b32 s2, -1
1150 ; SI-NEXT: v_mov_b32_e32 v0, -1
1151 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1152 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1155 ; VI-LABEL: bfe_u32_constant_fold_test_4:
1157 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1158 ; VI-NEXT: s_mov_b32 s3, 0xf000
1159 ; VI-NEXT: s_mov_b32 s2, -1
1160 ; VI-NEXT: v_mov_b32_e32 v0, -1
1161 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1162 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1164 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
1165 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1170 define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
1171 ; SI-LABEL: bfe_u32_constant_fold_test_5:
1173 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1174 ; SI-NEXT: s_mov_b32 s3, 0xf000
1175 ; SI-NEXT: s_mov_b32 s2, -1
1176 ; SI-NEXT: v_mov_b32_e32 v0, 1
1177 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1178 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1181 ; VI-LABEL: bfe_u32_constant_fold_test_5:
1183 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1184 ; VI-NEXT: s_mov_b32 s3, 0xf000
1185 ; VI-NEXT: s_mov_b32 s2, -1
1186 ; VI-NEXT: v_mov_b32_e32 v0, 1
1187 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1188 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1190 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
1191 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1196 define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
1197 ; SI-LABEL: bfe_u32_constant_fold_test_6:
1199 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1200 ; SI-NEXT: s_mov_b32 s3, 0xf000
1201 ; SI-NEXT: s_mov_b32 s2, -1
1202 ; SI-NEXT: v_mov_b32_e32 v0, 0x80
1203 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1204 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1207 ; VI-LABEL: bfe_u32_constant_fold_test_6:
1209 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1210 ; VI-NEXT: s_mov_b32 s3, 0xf000
1211 ; VI-NEXT: s_mov_b32 s2, -1
1212 ; VI-NEXT: v_mov_b32_e32 v0, 0x80
1213 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1214 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1216 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
1217 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1222 define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
1223 ; SI-LABEL: bfe_u32_constant_fold_test_7:
1225 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1226 ; SI-NEXT: s_mov_b32 s3, 0xf000
1227 ; SI-NEXT: s_mov_b32 s2, -1
1228 ; SI-NEXT: v_mov_b32_e32 v0, 0x7f
1229 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1230 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1233 ; VI-LABEL: bfe_u32_constant_fold_test_7:
1235 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1236 ; VI-NEXT: s_mov_b32 s3, 0xf000
1237 ; VI-NEXT: s_mov_b32 s2, -1
1238 ; VI-NEXT: v_mov_b32_e32 v0, 0x7f
1239 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1240 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1242 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
1243 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1248 define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
1249 ; SI-LABEL: bfe_u32_constant_fold_test_8:
1251 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1252 ; SI-NEXT: s_mov_b32 s3, 0xf000
1253 ; SI-NEXT: s_mov_b32 s2, -1
1254 ; SI-NEXT: v_mov_b32_e32 v0, 1
1255 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1256 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1259 ; VI-LABEL: bfe_u32_constant_fold_test_8:
1261 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1262 ; VI-NEXT: s_mov_b32 s3, 0xf000
1263 ; VI-NEXT: s_mov_b32 s2, -1
1264 ; VI-NEXT: v_mov_b32_e32 v0, 1
1265 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1266 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1268 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
1269 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1274 define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
1275 ; SI-LABEL: bfe_u32_constant_fold_test_9:
1277 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1278 ; SI-NEXT: s_mov_b32 s3, 0xf000
1279 ; SI-NEXT: s_mov_b32 s2, -1
1280 ; SI-NEXT: v_mov_b32_e32 v0, 1
1281 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1282 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1285 ; VI-LABEL: bfe_u32_constant_fold_test_9:
1287 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1288 ; VI-NEXT: s_mov_b32 s3, 0xf000
1289 ; VI-NEXT: s_mov_b32 s2, -1
1290 ; VI-NEXT: v_mov_b32_e32 v0, 1
1291 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1292 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1294 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
1295 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1300 define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
1301 ; SI-LABEL: bfe_u32_constant_fold_test_10:
1303 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1304 ; SI-NEXT: s_mov_b32 s3, 0xf000
1305 ; SI-NEXT: s_mov_b32 s2, -1
1306 ; SI-NEXT: v_mov_b32_e32 v0, 0
1307 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1308 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1311 ; VI-LABEL: bfe_u32_constant_fold_test_10:
1313 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1314 ; VI-NEXT: s_mov_b32 s3, 0xf000
1315 ; VI-NEXT: s_mov_b32 s2, -1
1316 ; VI-NEXT: v_mov_b32_e32 v0, 0
1317 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1318 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1320 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
1321 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1326 define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
1327 ; SI-LABEL: bfe_u32_constant_fold_test_11:
1329 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1330 ; SI-NEXT: s_mov_b32 s3, 0xf000
1331 ; SI-NEXT: s_mov_b32 s2, -1
1332 ; SI-NEXT: v_mov_b32_e32 v0, 10
1333 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1334 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1337 ; VI-LABEL: bfe_u32_constant_fold_test_11:
1339 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1340 ; VI-NEXT: s_mov_b32 s3, 0xf000
1341 ; VI-NEXT: s_mov_b32 s2, -1
1342 ; VI-NEXT: v_mov_b32_e32 v0, 10
1343 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1344 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1346 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
1347 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1352 define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
1353 ; SI-LABEL: bfe_u32_constant_fold_test_12:
1355 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1356 ; SI-NEXT: s_mov_b32 s3, 0xf000
1357 ; SI-NEXT: s_mov_b32 s2, -1
1358 ; SI-NEXT: v_mov_b32_e32 v0, 0
1359 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1360 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1363 ; VI-LABEL: bfe_u32_constant_fold_test_12:
1365 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1366 ; VI-NEXT: s_mov_b32 s3, 0xf000
1367 ; VI-NEXT: s_mov_b32 s2, -1
1368 ; VI-NEXT: v_mov_b32_e32 v0, 0
1369 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1370 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1372 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
1373 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1378 define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
1379 ; SI-LABEL: bfe_u32_constant_fold_test_13:
1381 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1382 ; SI-NEXT: s_mov_b32 s3, 0xf000
1383 ; SI-NEXT: s_mov_b32 s2, -1
1384 ; SI-NEXT: v_mov_b32_e32 v0, 1
1385 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1386 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1389 ; VI-LABEL: bfe_u32_constant_fold_test_13:
1391 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1392 ; VI-NEXT: s_mov_b32 s3, 0xf000
1393 ; VI-NEXT: s_mov_b32 s2, -1
1394 ; VI-NEXT: v_mov_b32_e32 v0, 1
1395 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1396 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1398 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
1399 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1404 define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
1405 ; SI-LABEL: bfe_u32_constant_fold_test_14:
1407 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1408 ; SI-NEXT: s_mov_b32 s3, 0xf000
1409 ; SI-NEXT: s_mov_b32 s2, -1
1410 ; SI-NEXT: v_mov_b32_e32 v0, 40
1411 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1412 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1415 ; VI-LABEL: bfe_u32_constant_fold_test_14:
1417 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1418 ; VI-NEXT: s_mov_b32 s3, 0xf000
1419 ; VI-NEXT: s_mov_b32 s2, -1
1420 ; VI-NEXT: v_mov_b32_e32 v0, 40
1421 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1422 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1424 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
1425 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1430 define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
1431 ; SI-LABEL: bfe_u32_constant_fold_test_15:
1433 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1434 ; SI-NEXT: s_mov_b32 s3, 0xf000
1435 ; SI-NEXT: s_mov_b32 s2, -1
1436 ; SI-NEXT: v_mov_b32_e32 v0, 10
1437 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1438 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1441 ; VI-LABEL: bfe_u32_constant_fold_test_15:
1443 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1444 ; VI-NEXT: s_mov_b32 s3, 0xf000
1445 ; VI-NEXT: s_mov_b32 s2, -1
1446 ; VI-NEXT: v_mov_b32_e32 v0, 10
1447 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1448 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1450 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
1451 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1456 define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
1457 ; SI-LABEL: bfe_u32_constant_fold_test_16:
1459 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1460 ; SI-NEXT: s_mov_b32 s3, 0xf000
1461 ; SI-NEXT: s_mov_b32 s2, -1
1462 ; SI-NEXT: v_mov_b32_e32 v0, 0x7f
1463 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1464 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1467 ; VI-LABEL: bfe_u32_constant_fold_test_16:
1469 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1470 ; VI-NEXT: s_mov_b32 s3, 0xf000
1471 ; VI-NEXT: s_mov_b32 s2, -1
1472 ; VI-NEXT: v_mov_b32_e32 v0, 0x7f
1473 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1474 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1476 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
1477 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1482 define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
1483 ; SI-LABEL: bfe_u32_constant_fold_test_17:
1485 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1486 ; SI-NEXT: s_mov_b32 s3, 0xf000
1487 ; SI-NEXT: s_mov_b32 s2, -1
1488 ; SI-NEXT: v_mov_b32_e32 v0, 0x7f
1489 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1490 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1493 ; VI-LABEL: bfe_u32_constant_fold_test_17:
1495 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1496 ; VI-NEXT: s_mov_b32 s3, 0xf000
1497 ; VI-NEXT: s_mov_b32 s2, -1
1498 ; VI-NEXT: v_mov_b32_e32 v0, 0x7f
1499 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1500 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1502 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
1503 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1508 define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
1509 ; SI-LABEL: bfe_u32_constant_fold_test_18:
1511 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1512 ; SI-NEXT: s_mov_b32 s3, 0xf000
1513 ; SI-NEXT: s_mov_b32 s2, -1
1514 ; SI-NEXT: v_mov_b32_e32 v0, 0
1515 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1516 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1519 ; VI-LABEL: bfe_u32_constant_fold_test_18:
1521 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1522 ; VI-NEXT: s_mov_b32 s3, 0xf000
1523 ; VI-NEXT: s_mov_b32 s2, -1
1524 ; VI-NEXT: v_mov_b32_e32 v0, 0
1525 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1526 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1528 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
1529 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
1533 ; Make sure that SimplifyDemandedBits doesn't cause the and to be
1534 ; reduced to the bits demanded by the bfe.
1536 ; XXX: The operand to v_bfe_u32 could also just directly be the load register.
1537 define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
1538 ; SI-LABEL: simplify_bfe_u32_multi_use_arg:
1540 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
1541 ; SI-NEXT: s_mov_b32 s3, 0xf000
1542 ; SI-NEXT: s_mov_b32 s2, -1
1543 ; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
1544 ; SI-NEXT: s_mov_b32 s6, s2
1545 ; SI-NEXT: s_mov_b32 s7, s3
1546 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1547 ; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0
1548 ; SI-NEXT: s_mov_b32 s0, s8
1549 ; SI-NEXT: s_mov_b32 s1, s9
1550 ; SI-NEXT: s_mov_b32 s4, s10
1551 ; SI-NEXT: s_mov_b32 s5, s11
1552 ; SI-NEXT: s_waitcnt vmcnt(0)
1553 ; SI-NEXT: v_and_b32_e32 v0, 63, v0
1554 ; SI-NEXT: v_bfe_u32 v1, v0, 2, 2
1555 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0
1556 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1559 ; VI-LABEL: simplify_bfe_u32_multi_use_arg:
1561 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1562 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1563 ; VI-NEXT: s_mov_b32 s11, 0xf000
1564 ; VI-NEXT: s_mov_b32 s10, -1
1565 ; VI-NEXT: s_mov_b32 s2, s10
1566 ; VI-NEXT: s_mov_b32 s3, s11
1567 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1568 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0
1569 ; VI-NEXT: s_mov_b32 s8, s4
1570 ; VI-NEXT: s_mov_b32 s9, s5
1571 ; VI-NEXT: s_mov_b32 s0, s6
1572 ; VI-NEXT: s_mov_b32 s1, s7
1573 ; VI-NEXT: s_waitcnt vmcnt(0)
1574 ; VI-NEXT: v_and_b32_e32 v0, 63, v0
1575 ; VI-NEXT: v_bfe_u32 v1, v0, 2, 2
1576 ; VI-NEXT: buffer_store_dword v1, off, s[8:11], 0
1577 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1579 i32 addrspace(1)* %out1,
1580 i32 addrspace(1)* %in) #0 {
1581 %src = load i32, i32 addrspace(1)* %in, align 4
1582 %and = and i32 %src, 63
1583 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
1584 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
1585 store i32 %and, i32 addrspace(1)* %out1, align 4
1589 define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
1590 ; SI-LABEL: lshr_and:
1592 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1593 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1594 ; SI-NEXT: s_mov_b32 s3, 0xf000
1595 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1596 ; SI-NEXT: s_bfe_u32 s4, s2, 0x30006
1597 ; SI-NEXT: s_mov_b32 s2, -1
1598 ; SI-NEXT: v_mov_b32_e32 v0, s4
1599 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1602 ; VI-LABEL: lshr_and:
1604 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1605 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
1606 ; VI-NEXT: s_mov_b32 s7, 0xf000
1607 ; VI-NEXT: s_mov_b32 s6, -1
1608 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1609 ; VI-NEXT: s_bfe_u32 s0, s0, 0x30006
1610 ; VI-NEXT: v_mov_b32_e32 v0, s0
1611 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1615 store i32 %c, i32 addrspace(1)* %out, align 8
1619 define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
1620 ; SI-LABEL: v_lshr_and:
1622 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
1623 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1624 ; SI-NEXT: s_mov_b32 s3, 0xf000
1625 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1626 ; SI-NEXT: s_lshr_b32 s2, s4, s5
1627 ; SI-NEXT: s_and_b32 s4, s2, 7
1628 ; SI-NEXT: s_mov_b32 s2, -1
1629 ; SI-NEXT: v_mov_b32_e32 v0, s4
1630 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1633 ; VI-LABEL: v_lshr_and:
1635 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1636 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
1637 ; VI-NEXT: s_mov_b32 s7, 0xf000
1638 ; VI-NEXT: s_mov_b32 s6, -1
1639 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1640 ; VI-NEXT: s_lshr_b32 s0, s0, s1
1641 ; VI-NEXT: s_and_b32 s0, s0, 7
1642 ; VI-NEXT: v_mov_b32_e32 v0, s0
1643 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1645 %c = lshr i32 %a, %b
1647 store i32 %d, i32 addrspace(1)* %out, align 8
1651 define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
1652 ; SI-LABEL: and_lshr:
1654 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1655 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1656 ; SI-NEXT: s_mov_b32 s3, 0xf000
1657 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1658 ; SI-NEXT: s_bfe_u32 s4, s2, 0x30006
1659 ; SI-NEXT: s_mov_b32 s2, -1
1660 ; SI-NEXT: v_mov_b32_e32 v0, s4
1661 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1664 ; VI-LABEL: and_lshr:
1666 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1667 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
1668 ; VI-NEXT: s_mov_b32 s7, 0xf000
1669 ; VI-NEXT: s_mov_b32 s6, -1
1670 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1671 ; VI-NEXT: s_bfe_u32 s0, s0, 0x30006
1672 ; VI-NEXT: v_mov_b32_e32 v0, s0
1673 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1675 %b = and i32 %a, 448
1677 store i32 %c, i32 addrspace(1)* %out, align 8
1681 define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
1682 ; SI-LABEL: and_lshr2:
1684 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1685 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1686 ; SI-NEXT: s_mov_b32 s3, 0xf000
1687 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1688 ; SI-NEXT: s_bfe_u32 s4, s2, 0x30006
1689 ; SI-NEXT: s_mov_b32 s2, -1
1690 ; SI-NEXT: v_mov_b32_e32 v0, s4
1691 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1694 ; VI-LABEL: and_lshr2:
1696 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1697 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
1698 ; VI-NEXT: s_mov_b32 s7, 0xf000
1699 ; VI-NEXT: s_mov_b32 s6, -1
1700 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1701 ; VI-NEXT: s_bfe_u32 s0, s0, 0x30006
1702 ; VI-NEXT: v_mov_b32_e32 v0, s0
1703 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1705 %b = and i32 %a, 511
1707 store i32 %c, i32 addrspace(1)* %out, align 8
1711 define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
1712 ; SI-LABEL: shl_lshr:
1714 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1715 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1716 ; SI-NEXT: s_mov_b32 s3, 0xf000
1717 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1718 ; SI-NEXT: s_bfe_u32 s4, s2, 0x150002
1719 ; SI-NEXT: s_mov_b32 s2, -1
1720 ; SI-NEXT: v_mov_b32_e32 v0, s4
1721 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1724 ; VI-LABEL: shl_lshr:
1726 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1727 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
1728 ; VI-NEXT: s_mov_b32 s7, 0xf000
1729 ; VI-NEXT: s_mov_b32 s6, -1
1730 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1731 ; VI-NEXT: s_bfe_u32 s0, s0, 0x150002
1732 ; VI-NEXT: v_mov_b32_e32 v0, s0
1733 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1736 %c = lshr i32 %b, 11
1737 store i32 %c, i32 addrspace(1)* %out, align 8
1741 declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
1743 attributes #0 = { nounwind }
1744 attributes #1 = { nounwind readnone }