1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
5 define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
6 ; SI-LABEL: bfe_u32_arg_arg_arg:
8 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
9 ; SI-NEXT: s_mov_b32 s7, 0xf000
10 ; SI-NEXT: s_mov_b32 s6, -1
11 ; SI-NEXT: s_waitcnt lgkmcnt(0)
12 ; SI-NEXT: s_mov_b32 s4, s0
13 ; SI-NEXT: s_mov_b32 s5, s1
14 ; SI-NEXT: v_mov_b32_e32 v0, s2
15 ; SI-NEXT: v_bfe_u32 v0, v0, s3, s3
16 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
19 ; VI-LABEL: bfe_u32_arg_arg_arg:
21 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
22 ; VI-NEXT: s_mov_b32 s7, 0xf000
23 ; VI-NEXT: s_mov_b32 s6, -1
24 ; VI-NEXT: s_waitcnt lgkmcnt(0)
25 ; VI-NEXT: v_mov_b32_e32 v0, s2
26 ; VI-NEXT: s_mov_b32 s4, s0
27 ; VI-NEXT: s_mov_b32 s5, s1
28 ; VI-NEXT: v_bfe_u32 v0, v0, s3, s3
29 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
31 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
32 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
36 define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
37 ; SI-LABEL: bfe_u32_arg_arg_imm:
39 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
40 ; SI-NEXT: s_mov_b32 s7, 0xf000
41 ; SI-NEXT: s_mov_b32 s6, -1
42 ; SI-NEXT: v_mov_b32_e32 v0, 0x7b
43 ; SI-NEXT: s_waitcnt lgkmcnt(0)
44 ; SI-NEXT: s_mov_b32 s4, s0
45 ; SI-NEXT: s_mov_b32 s5, s1
46 ; SI-NEXT: v_mov_b32_e32 v1, s3
47 ; SI-NEXT: v_bfe_u32 v0, s2, v1, v0
48 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
51 ; VI-LABEL: bfe_u32_arg_arg_imm:
53 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
54 ; VI-NEXT: v_mov_b32_e32 v1, 0x7b
55 ; VI-NEXT: s_mov_b32 s7, 0xf000
56 ; VI-NEXT: s_mov_b32 s6, -1
57 ; VI-NEXT: s_waitcnt lgkmcnt(0)
58 ; VI-NEXT: v_mov_b32_e32 v0, s3
59 ; VI-NEXT: s_mov_b32 s4, s0
60 ; VI-NEXT: s_mov_b32 s5, s1
61 ; VI-NEXT: v_bfe_u32 v0, s2, v0, v1
62 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
64 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
65 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
69 define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
70 ; SI-LABEL: bfe_u32_arg_imm_arg:
72 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
73 ; SI-NEXT: s_mov_b32 s7, 0xf000
74 ; SI-NEXT: s_mov_b32 s6, -1
75 ; SI-NEXT: v_mov_b32_e32 v0, 0x7b
76 ; SI-NEXT: s_waitcnt lgkmcnt(0)
77 ; SI-NEXT: s_mov_b32 s4, s0
78 ; SI-NEXT: s_mov_b32 s5, s1
79 ; SI-NEXT: v_mov_b32_e32 v1, s3
80 ; SI-NEXT: v_bfe_u32 v0, s2, v0, v1
81 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
84 ; VI-LABEL: bfe_u32_arg_imm_arg:
86 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
87 ; VI-NEXT: v_mov_b32_e32 v0, 0x7b
88 ; VI-NEXT: s_mov_b32 s7, 0xf000
89 ; VI-NEXT: s_mov_b32 s6, -1
90 ; VI-NEXT: s_waitcnt lgkmcnt(0)
91 ; VI-NEXT: v_mov_b32_e32 v1, s3
92 ; VI-NEXT: s_mov_b32 s4, s0
93 ; VI-NEXT: s_mov_b32 s5, s1
94 ; VI-NEXT: v_bfe_u32 v0, s2, v0, v1
95 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
97 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
98 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
102 define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
103 ; SI-LABEL: bfe_u32_imm_arg_arg:
105 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
106 ; SI-NEXT: s_mov_b32 s7, 0xf000
107 ; SI-NEXT: s_mov_b32 s6, -1
108 ; SI-NEXT: s_movk_i32 s8, 0x7b
109 ; SI-NEXT: s_waitcnt lgkmcnt(0)
110 ; SI-NEXT: s_mov_b32 s4, s0
111 ; SI-NEXT: s_mov_b32 s5, s1
112 ; SI-NEXT: v_mov_b32_e32 v0, s2
113 ; SI-NEXT: v_mov_b32_e32 v1, s3
114 ; SI-NEXT: v_bfe_u32 v0, s8, v0, v1
115 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
118 ; VI-LABEL: bfe_u32_imm_arg_arg:
120 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
121 ; VI-NEXT: s_movk_i32 s8, 0x7b
122 ; VI-NEXT: s_mov_b32 s7, 0xf000
123 ; VI-NEXT: s_mov_b32 s6, -1
124 ; VI-NEXT: s_waitcnt lgkmcnt(0)
125 ; VI-NEXT: v_mov_b32_e32 v0, s2
126 ; VI-NEXT: v_mov_b32_e32 v1, s3
127 ; VI-NEXT: s_mov_b32 s4, s0
128 ; VI-NEXT: s_mov_b32 s5, s1
129 ; VI-NEXT: v_bfe_u32 v0, s8, v0, v1
130 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
132 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
133 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
137 define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
138 ; SI-LABEL: bfe_u32_arg_0_width_reg_offset:
140 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
141 ; SI-NEXT: s_mov_b32 s3, 0xf000
142 ; SI-NEXT: s_mov_b32 s2, -1
143 ; SI-NEXT: v_mov_b32_e32 v0, 0
144 ; SI-NEXT: s_waitcnt lgkmcnt(0)
145 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
148 ; VI-LABEL: bfe_u32_arg_0_width_reg_offset:
150 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
151 ; VI-NEXT: s_mov_b32 s3, 0xf000
152 ; VI-NEXT: s_mov_b32 s2, -1
153 ; VI-NEXT: v_mov_b32_e32 v0, 0
154 ; VI-NEXT: s_waitcnt lgkmcnt(0)
155 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
157 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
158 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
162 define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
163 ; SI-LABEL: bfe_u32_arg_0_width_imm_offset:
165 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
166 ; SI-NEXT: s_mov_b32 s3, 0xf000
167 ; SI-NEXT: s_mov_b32 s2, -1
168 ; SI-NEXT: v_mov_b32_e32 v0, 0
169 ; SI-NEXT: s_waitcnt lgkmcnt(0)
170 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
173 ; VI-LABEL: bfe_u32_arg_0_width_imm_offset:
175 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
176 ; VI-NEXT: s_mov_b32 s3, 0xf000
177 ; VI-NEXT: s_mov_b32 s2, -1
178 ; VI-NEXT: v_mov_b32_e32 v0, 0
179 ; VI-NEXT: s_waitcnt lgkmcnt(0)
180 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
182 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
183 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
187 define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
188 ; SI-LABEL: bfe_u32_zextload_i8:
190 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
191 ; SI-NEXT: s_mov_b32 s7, 0xf000
192 ; SI-NEXT: s_mov_b32 s6, -1
193 ; SI-NEXT: s_mov_b32 s10, s6
194 ; SI-NEXT: s_mov_b32 s11, s7
195 ; SI-NEXT: s_waitcnt lgkmcnt(0)
196 ; SI-NEXT: s_mov_b32 s8, s2
197 ; SI-NEXT: s_mov_b32 s9, s3
198 ; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
199 ; SI-NEXT: s_mov_b32 s4, s0
200 ; SI-NEXT: s_mov_b32 s5, s1
201 ; SI-NEXT: s_waitcnt vmcnt(0)
202 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
205 ; VI-LABEL: bfe_u32_zextload_i8:
207 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
208 ; VI-NEXT: s_mov_b32 s7, 0xf000
209 ; VI-NEXT: s_mov_b32 s6, -1
210 ; VI-NEXT: s_mov_b32 s10, s6
211 ; VI-NEXT: s_mov_b32 s11, s7
212 ; VI-NEXT: s_waitcnt lgkmcnt(0)
213 ; VI-NEXT: s_mov_b32 s8, s2
214 ; VI-NEXT: s_mov_b32 s9, s3
215 ; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
216 ; VI-NEXT: s_mov_b32 s4, s0
217 ; VI-NEXT: s_mov_b32 s5, s1
218 ; VI-NEXT: s_waitcnt vmcnt(0)
219 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
221 %load = load i8, ptr addrspace(1) %in
222 %ext = zext i8 %load to i32
223 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
224 store i32 %bfe, ptr addrspace(1) %out, align 4
228 ; FIXME: Should be using s_add_i32
229 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
230 ; SI-LABEL: bfe_u32_zext_in_reg_i8:
232 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
233 ; SI-NEXT: s_mov_b32 s7, 0xf000
234 ; SI-NEXT: s_mov_b32 s6, -1
235 ; SI-NEXT: s_mov_b32 s10, s6
236 ; SI-NEXT: s_mov_b32 s11, s7
237 ; SI-NEXT: s_waitcnt lgkmcnt(0)
238 ; SI-NEXT: s_mov_b32 s8, s2
239 ; SI-NEXT: s_mov_b32 s9, s3
240 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
241 ; SI-NEXT: s_mov_b32 s4, s0
242 ; SI-NEXT: s_mov_b32 s5, s1
243 ; SI-NEXT: s_waitcnt vmcnt(0)
244 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
245 ; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
246 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
249 ; VI-LABEL: bfe_u32_zext_in_reg_i8:
251 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
252 ; VI-NEXT: s_mov_b32 s7, 0xf000
253 ; VI-NEXT: s_mov_b32 s6, -1
254 ; VI-NEXT: s_mov_b32 s10, s6
255 ; VI-NEXT: s_mov_b32 s11, s7
256 ; VI-NEXT: s_waitcnt lgkmcnt(0)
257 ; VI-NEXT: s_mov_b32 s8, s2
258 ; VI-NEXT: s_mov_b32 s9, s3
259 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
260 ; VI-NEXT: s_mov_b32 s4, s0
261 ; VI-NEXT: s_mov_b32 s5, s1
262 ; VI-NEXT: s_waitcnt vmcnt(0)
263 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
264 ; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
265 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
267 %load = load i32, ptr addrspace(1) %in, align 4
268 %add = add i32 %load, 1
269 %ext = and i32 %add, 255
270 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
271 store i32 %bfe, ptr addrspace(1) %out, align 4
275 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
276 ; SI-LABEL: bfe_u32_zext_in_reg_i16:
278 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
279 ; SI-NEXT: s_mov_b32 s7, 0xf000
280 ; SI-NEXT: s_mov_b32 s6, -1
281 ; SI-NEXT: s_mov_b32 s10, s6
282 ; SI-NEXT: s_mov_b32 s11, s7
283 ; SI-NEXT: s_waitcnt lgkmcnt(0)
284 ; SI-NEXT: s_mov_b32 s8, s2
285 ; SI-NEXT: s_mov_b32 s9, s3
286 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
287 ; SI-NEXT: s_mov_b32 s4, s0
288 ; SI-NEXT: s_mov_b32 s5, s1
289 ; SI-NEXT: s_waitcnt vmcnt(0)
290 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
291 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
292 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
295 ; VI-LABEL: bfe_u32_zext_in_reg_i16:
297 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
298 ; VI-NEXT: s_mov_b32 s7, 0xf000
299 ; VI-NEXT: s_mov_b32 s6, -1
300 ; VI-NEXT: s_mov_b32 s10, s6
301 ; VI-NEXT: s_mov_b32 s11, s7
302 ; VI-NEXT: s_waitcnt lgkmcnt(0)
303 ; VI-NEXT: s_mov_b32 s8, s2
304 ; VI-NEXT: s_mov_b32 s9, s3
305 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
306 ; VI-NEXT: s_mov_b32 s4, s0
307 ; VI-NEXT: s_mov_b32 s5, s1
308 ; VI-NEXT: s_waitcnt vmcnt(0)
309 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
310 ; VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
311 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
313 %load = load i32, ptr addrspace(1) %in, align 4
314 %add = add i32 %load, 1
315 %ext = and i32 %add, 65535
316 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
317 store i32 %bfe, ptr addrspace(1) %out, align 4
321 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
322 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
324 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
325 ; SI-NEXT: s_mov_b32 s7, 0xf000
326 ; SI-NEXT: s_mov_b32 s6, -1
327 ; SI-NEXT: s_mov_b32 s10, s6
328 ; SI-NEXT: s_mov_b32 s11, s7
329 ; SI-NEXT: s_waitcnt lgkmcnt(0)
330 ; SI-NEXT: s_mov_b32 s8, s2
331 ; SI-NEXT: s_mov_b32 s9, s3
332 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
333 ; SI-NEXT: s_mov_b32 s4, s0
334 ; SI-NEXT: s_mov_b32 s5, s1
335 ; SI-NEXT: s_waitcnt vmcnt(0)
336 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
337 ; SI-NEXT: v_and_b32_e32 v0, 0xfe, v0
338 ; SI-NEXT: v_bfe_u32 v0, v0, 1, 8
339 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
342 ; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
344 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
345 ; VI-NEXT: s_mov_b32 s7, 0xf000
346 ; VI-NEXT: s_mov_b32 s6, -1
347 ; VI-NEXT: s_mov_b32 s10, s6
348 ; VI-NEXT: s_mov_b32 s11, s7
349 ; VI-NEXT: s_waitcnt lgkmcnt(0)
350 ; VI-NEXT: s_mov_b32 s8, s2
351 ; VI-NEXT: s_mov_b32 s9, s3
352 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
353 ; VI-NEXT: s_mov_b32 s4, s0
354 ; VI-NEXT: s_mov_b32 s5, s1
355 ; VI-NEXT: s_waitcnt vmcnt(0)
356 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
357 ; VI-NEXT: v_and_b32_e32 v0, 0xfe, v0
358 ; VI-NEXT: v_bfe_u32 v0, v0, 1, 8
359 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
361 %load = load i32, ptr addrspace(1) %in, align 4
362 %add = add i32 %load, 1
363 %ext = and i32 %add, 255
364 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
365 store i32 %bfe, ptr addrspace(1) %out, align 4
369 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
370 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
372 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
373 ; SI-NEXT: s_mov_b32 s7, 0xf000
374 ; SI-NEXT: s_mov_b32 s6, -1
375 ; SI-NEXT: s_mov_b32 s10, s6
376 ; SI-NEXT: s_mov_b32 s11, s7
377 ; SI-NEXT: s_waitcnt lgkmcnt(0)
378 ; SI-NEXT: s_mov_b32 s8, s2
379 ; SI-NEXT: s_mov_b32 s9, s3
380 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
381 ; SI-NEXT: s_mov_b32 s4, s0
382 ; SI-NEXT: s_mov_b32 s5, s1
383 ; SI-NEXT: s_waitcnt vmcnt(0)
384 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
385 ; SI-NEXT: v_and_b32_e32 v0, 0xf8, v0
386 ; SI-NEXT: v_bfe_u32 v0, v0, 3, 8
387 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
390 ; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
392 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
393 ; VI-NEXT: s_mov_b32 s7, 0xf000
394 ; VI-NEXT: s_mov_b32 s6, -1
395 ; VI-NEXT: s_mov_b32 s10, s6
396 ; VI-NEXT: s_mov_b32 s11, s7
397 ; VI-NEXT: s_waitcnt lgkmcnt(0)
398 ; VI-NEXT: s_mov_b32 s8, s2
399 ; VI-NEXT: s_mov_b32 s9, s3
400 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
401 ; VI-NEXT: s_mov_b32 s4, s0
402 ; VI-NEXT: s_mov_b32 s5, s1
403 ; VI-NEXT: s_waitcnt vmcnt(0)
404 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
405 ; VI-NEXT: v_and_b32_e32 v0, 0xf8, v0
406 ; VI-NEXT: v_bfe_u32 v0, v0, 3, 8
407 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
409 %load = load i32, ptr addrspace(1) %in, align 4
410 %add = add i32 %load, 1
411 %ext = and i32 %add, 255
412 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
413 store i32 %bfe, ptr addrspace(1) %out, align 4
417 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
418 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
420 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
421 ; SI-NEXT: s_mov_b32 s7, 0xf000
422 ; SI-NEXT: s_mov_b32 s6, -1
423 ; SI-NEXT: s_mov_b32 s10, s6
424 ; SI-NEXT: s_mov_b32 s11, s7
425 ; SI-NEXT: s_waitcnt lgkmcnt(0)
426 ; SI-NEXT: s_mov_b32 s8, s2
427 ; SI-NEXT: s_mov_b32 s9, s3
428 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
429 ; SI-NEXT: s_mov_b32 s4, s0
430 ; SI-NEXT: s_mov_b32 s5, s1
431 ; SI-NEXT: s_waitcnt vmcnt(0)
432 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
433 ; SI-NEXT: v_and_b32_e32 v0, 0x80, v0
434 ; SI-NEXT: v_bfe_u32 v0, v0, 7, 8
435 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
438 ; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
440 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
441 ; VI-NEXT: s_mov_b32 s7, 0xf000
442 ; VI-NEXT: s_mov_b32 s6, -1
443 ; VI-NEXT: s_mov_b32 s10, s6
444 ; VI-NEXT: s_mov_b32 s11, s7
445 ; VI-NEXT: s_waitcnt lgkmcnt(0)
446 ; VI-NEXT: s_mov_b32 s8, s2
447 ; VI-NEXT: s_mov_b32 s9, s3
448 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
449 ; VI-NEXT: s_mov_b32 s4, s0
450 ; VI-NEXT: s_mov_b32 s5, s1
451 ; VI-NEXT: s_waitcnt vmcnt(0)
452 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
453 ; VI-NEXT: v_and_b32_e32 v0, 0x80, v0
454 ; VI-NEXT: v_bfe_u32 v0, v0, 7, 8
455 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
457 %load = load i32, ptr addrspace(1) %in, align 4
458 %add = add i32 %load, 1
459 %ext = and i32 %add, 255
460 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
461 store i32 %bfe, ptr addrspace(1) %out, align 4
465 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
466 ; SI-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
468 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
469 ; SI-NEXT: s_mov_b32 s7, 0xf000
470 ; SI-NEXT: s_mov_b32 s6, -1
471 ; SI-NEXT: s_mov_b32 s10, s6
472 ; SI-NEXT: s_mov_b32 s11, s7
473 ; SI-NEXT: s_waitcnt lgkmcnt(0)
474 ; SI-NEXT: s_mov_b32 s8, s2
475 ; SI-NEXT: s_mov_b32 s9, s3
476 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
477 ; SI-NEXT: s_mov_b32 s4, s0
478 ; SI-NEXT: s_mov_b32 s5, s1
479 ; SI-NEXT: s_waitcnt vmcnt(0)
480 ; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
481 ; SI-NEXT: v_bfe_u32 v0, v0, 8, 8
482 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
485 ; VI-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
487 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
488 ; VI-NEXT: s_mov_b32 s7, 0xf000
489 ; VI-NEXT: s_mov_b32 s6, -1
490 ; VI-NEXT: s_mov_b32 s10, s6
491 ; VI-NEXT: s_mov_b32 s11, s7
492 ; VI-NEXT: s_waitcnt lgkmcnt(0)
493 ; VI-NEXT: s_mov_b32 s8, s2
494 ; VI-NEXT: s_mov_b32 s9, s3
495 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
496 ; VI-NEXT: s_mov_b32 s4, s0
497 ; VI-NEXT: s_mov_b32 s5, s1
498 ; VI-NEXT: s_waitcnt vmcnt(0)
499 ; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
500 ; VI-NEXT: v_bfe_u32 v0, v0, 8, 8
501 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
503 %load = load i32, ptr addrspace(1) %in, align 4
504 %add = add i32 %load, 1
505 %ext = and i32 %add, 65535
506 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
507 store i32 %bfe, ptr addrspace(1) %out, align 4
511 define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
512 ; SI-LABEL: bfe_u32_test_1:
514 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
515 ; SI-NEXT: s_mov_b32 s7, 0xf000
516 ; SI-NEXT: s_mov_b32 s6, -1
517 ; SI-NEXT: s_mov_b32 s10, s6
518 ; SI-NEXT: s_mov_b32 s11, s7
519 ; SI-NEXT: s_waitcnt lgkmcnt(0)
520 ; SI-NEXT: s_mov_b32 s8, s2
521 ; SI-NEXT: s_mov_b32 s9, s3
522 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
523 ; SI-NEXT: s_mov_b32 s4, s0
524 ; SI-NEXT: s_mov_b32 s5, s1
525 ; SI-NEXT: s_waitcnt vmcnt(0)
526 ; SI-NEXT: v_and_b32_e32 v0, 1, v0
527 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
530 ; VI-LABEL: bfe_u32_test_1:
532 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
533 ; VI-NEXT: s_mov_b32 s7, 0xf000
534 ; VI-NEXT: s_mov_b32 s6, -1
535 ; VI-NEXT: s_mov_b32 s10, s6
536 ; VI-NEXT: s_mov_b32 s11, s7
537 ; VI-NEXT: s_waitcnt lgkmcnt(0)
538 ; VI-NEXT: s_mov_b32 s8, s2
539 ; VI-NEXT: s_mov_b32 s9, s3
540 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
541 ; VI-NEXT: s_mov_b32 s4, s0
542 ; VI-NEXT: s_mov_b32 s5, s1
543 ; VI-NEXT: s_waitcnt vmcnt(0)
544 ; VI-NEXT: v_and_b32_e32 v0, 1, v0
545 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
547 %x = load i32, ptr addrspace(1) %in, align 4
548 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
549 store i32 %bfe, ptr addrspace(1) %out, align 4
553 define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
554 ; SI-LABEL: bfe_u32_test_2:
556 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
557 ; SI-NEXT: s_waitcnt lgkmcnt(0)
558 ; SI-NEXT: s_mov_b32 s3, 0xf000
559 ; SI-NEXT: s_mov_b32 s2, -1
560 ; SI-NEXT: v_mov_b32_e32 v0, 0
561 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
564 ; VI-LABEL: bfe_u32_test_2:
566 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
567 ; VI-NEXT: s_waitcnt lgkmcnt(0)
568 ; VI-NEXT: s_mov_b32 s3, 0xf000
569 ; VI-NEXT: s_mov_b32 s2, -1
570 ; VI-NEXT: v_mov_b32_e32 v0, 0
571 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
573 %x = load i32, ptr addrspace(1) %in, align 4
574 %shl = shl i32 %x, 31
575 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
576 store i32 %bfe, ptr addrspace(1) %out, align 4
580 define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
581 ; SI-LABEL: bfe_u32_test_3:
583 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
584 ; SI-NEXT: s_waitcnt lgkmcnt(0)
585 ; SI-NEXT: s_mov_b32 s3, 0xf000
586 ; SI-NEXT: s_mov_b32 s2, -1
587 ; SI-NEXT: v_mov_b32_e32 v0, 0
588 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
591 ; VI-LABEL: bfe_u32_test_3:
593 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
594 ; VI-NEXT: s_waitcnt lgkmcnt(0)
595 ; VI-NEXT: s_mov_b32 s3, 0xf000
596 ; VI-NEXT: s_mov_b32 s2, -1
597 ; VI-NEXT: v_mov_b32_e32 v0, 0
598 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
600 %x = load i32, ptr addrspace(1) %in, align 4
601 %shl = shl i32 %x, 31
602 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
603 store i32 %bfe, ptr addrspace(1) %out, align 4
607 define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
608 ; SI-LABEL: bfe_u32_test_4:
610 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
611 ; SI-NEXT: s_waitcnt lgkmcnt(0)
612 ; SI-NEXT: s_mov_b32 s3, 0xf000
613 ; SI-NEXT: s_mov_b32 s2, -1
614 ; SI-NEXT: v_mov_b32_e32 v0, 0
615 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
618 ; VI-LABEL: bfe_u32_test_4:
620 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
621 ; VI-NEXT: s_waitcnt lgkmcnt(0)
622 ; VI-NEXT: s_mov_b32 s3, 0xf000
623 ; VI-NEXT: s_mov_b32 s2, -1
624 ; VI-NEXT: v_mov_b32_e32 v0, 0
625 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
627 %x = load i32, ptr addrspace(1) %in, align 4
628 %shl = shl i32 %x, 31
629 %shr = lshr i32 %shl, 31
630 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
631 store i32 %bfe, ptr addrspace(1) %out, align 4
635 define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
636 ; SI-LABEL: bfe_u32_test_5:
638 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
639 ; SI-NEXT: s_mov_b32 s7, 0xf000
640 ; SI-NEXT: s_mov_b32 s6, -1
641 ; SI-NEXT: s_mov_b32 s10, s6
642 ; SI-NEXT: s_mov_b32 s11, s7
643 ; SI-NEXT: s_waitcnt lgkmcnt(0)
644 ; SI-NEXT: s_mov_b32 s8, s2
645 ; SI-NEXT: s_mov_b32 s9, s3
646 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
647 ; SI-NEXT: s_mov_b32 s4, s0
648 ; SI-NEXT: s_mov_b32 s5, s1
649 ; SI-NEXT: s_waitcnt vmcnt(0)
650 ; SI-NEXT: v_bfe_i32 v0, v0, 0, 1
651 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
654 ; VI-LABEL: bfe_u32_test_5:
656 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
657 ; VI-NEXT: s_mov_b32 s7, 0xf000
658 ; VI-NEXT: s_mov_b32 s6, -1
659 ; VI-NEXT: s_mov_b32 s10, s6
660 ; VI-NEXT: s_mov_b32 s11, s7
661 ; VI-NEXT: s_waitcnt lgkmcnt(0)
662 ; VI-NEXT: s_mov_b32 s8, s2
663 ; VI-NEXT: s_mov_b32 s9, s3
664 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
665 ; VI-NEXT: s_mov_b32 s4, s0
666 ; VI-NEXT: s_mov_b32 s5, s1
667 ; VI-NEXT: s_waitcnt vmcnt(0)
668 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 1
669 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
671 %x = load i32, ptr addrspace(1) %in, align 4
672 %shl = shl i32 %x, 31
673 %shr = ashr i32 %shl, 31
674 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
675 store i32 %bfe, ptr addrspace(1) %out, align 4
679 define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
680 ; SI-LABEL: bfe_u32_test_6:
682 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
683 ; SI-NEXT: s_mov_b32 s7, 0xf000
684 ; SI-NEXT: s_mov_b32 s6, -1
685 ; SI-NEXT: s_mov_b32 s10, s6
686 ; SI-NEXT: s_mov_b32 s11, s7
687 ; SI-NEXT: s_waitcnt lgkmcnt(0)
688 ; SI-NEXT: s_mov_b32 s8, s2
689 ; SI-NEXT: s_mov_b32 s9, s3
690 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
691 ; SI-NEXT: s_mov_b32 s4, s0
692 ; SI-NEXT: s_mov_b32 s5, s1
693 ; SI-NEXT: s_waitcnt vmcnt(0)
694 ; SI-NEXT: v_lshlrev_b32_e32 v0, 30, v0
695 ; SI-NEXT: v_and_b32_e32 v0, 2.0, v0
696 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
699 ; VI-LABEL: bfe_u32_test_6:
701 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
702 ; VI-NEXT: s_mov_b32 s7, 0xf000
703 ; VI-NEXT: s_mov_b32 s6, -1
704 ; VI-NEXT: s_mov_b32 s10, s6
705 ; VI-NEXT: s_mov_b32 s11, s7
706 ; VI-NEXT: s_waitcnt lgkmcnt(0)
707 ; VI-NEXT: s_mov_b32 s8, s2
708 ; VI-NEXT: s_mov_b32 s9, s3
709 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
710 ; VI-NEXT: s_mov_b32 s4, s0
711 ; VI-NEXT: s_mov_b32 s5, s1
712 ; VI-NEXT: s_waitcnt vmcnt(0)
713 ; VI-NEXT: v_lshlrev_b32_e32 v0, 30, v0
714 ; VI-NEXT: v_and_b32_e32 v0, 2.0, v0
715 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
717 %x = load i32, ptr addrspace(1) %in, align 4
718 %shl = shl i32 %x, 31
719 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
720 store i32 %bfe, ptr addrspace(1) %out, align 4
724 define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
725 ; SI-LABEL: bfe_u32_test_7:
727 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
728 ; SI-NEXT: s_mov_b32 s7, 0xf000
729 ; SI-NEXT: s_mov_b32 s6, -1
730 ; SI-NEXT: s_mov_b32 s10, s6
731 ; SI-NEXT: s_mov_b32 s11, s7
732 ; SI-NEXT: s_waitcnt lgkmcnt(0)
733 ; SI-NEXT: s_mov_b32 s8, s2
734 ; SI-NEXT: s_mov_b32 s9, s3
735 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
736 ; SI-NEXT: s_mov_b32 s4, s0
737 ; SI-NEXT: s_mov_b32 s5, s1
738 ; SI-NEXT: s_waitcnt vmcnt(0)
739 ; SI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
740 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
743 ; VI-LABEL: bfe_u32_test_7:
745 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
746 ; VI-NEXT: s_mov_b32 s7, 0xf000
747 ; VI-NEXT: s_mov_b32 s6, -1
748 ; VI-NEXT: s_mov_b32 s10, s6
749 ; VI-NEXT: s_mov_b32 s11, s7
750 ; VI-NEXT: s_waitcnt lgkmcnt(0)
751 ; VI-NEXT: s_mov_b32 s8, s2
752 ; VI-NEXT: s_mov_b32 s9, s3
753 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
754 ; VI-NEXT: s_mov_b32 s4, s0
755 ; VI-NEXT: s_mov_b32 s5, s1
756 ; VI-NEXT: s_waitcnt vmcnt(0)
757 ; VI-NEXT: v_lshlrev_b32_e32 v0, 31, v0
758 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
760 %x = load i32, ptr addrspace(1) %in, align 4
761 %shl = shl i32 %x, 31
762 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
763 store i32 %bfe, ptr addrspace(1) %out, align 4
767 define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
768 ; SI-LABEL: bfe_u32_test_8:
770 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
771 ; SI-NEXT: s_mov_b32 s7, 0xf000
772 ; SI-NEXT: s_mov_b32 s6, -1
773 ; SI-NEXT: s_mov_b32 s10, s6
774 ; SI-NEXT: s_mov_b32 s11, s7
775 ; SI-NEXT: s_waitcnt lgkmcnt(0)
776 ; SI-NEXT: s_mov_b32 s8, s2
777 ; SI-NEXT: s_mov_b32 s9, s3
778 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
779 ; SI-NEXT: s_mov_b32 s4, s0
780 ; SI-NEXT: s_mov_b32 s5, s1
781 ; SI-NEXT: s_waitcnt vmcnt(0)
782 ; SI-NEXT: v_and_b32_e32 v0, 1, v0
783 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
786 ; VI-LABEL: bfe_u32_test_8:
788 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
789 ; VI-NEXT: s_mov_b32 s7, 0xf000
790 ; VI-NEXT: s_mov_b32 s6, -1
791 ; VI-NEXT: s_mov_b32 s10, s6
792 ; VI-NEXT: s_mov_b32 s11, s7
793 ; VI-NEXT: s_waitcnt lgkmcnt(0)
794 ; VI-NEXT: s_mov_b32 s8, s2
795 ; VI-NEXT: s_mov_b32 s9, s3
796 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
797 ; VI-NEXT: s_mov_b32 s4, s0
798 ; VI-NEXT: s_mov_b32 s5, s1
799 ; VI-NEXT: s_waitcnt vmcnt(0)
800 ; VI-NEXT: v_and_b32_e32 v0, 1, v0
801 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
803 %x = load i32, ptr addrspace(1) %in, align 4
804 %shl = shl i32 %x, 31
805 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
806 store i32 %bfe, ptr addrspace(1) %out, align 4
810 define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
811 ; SI-LABEL: bfe_u32_test_9:
813 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
814 ; SI-NEXT: s_mov_b32 s7, 0xf000
815 ; SI-NEXT: s_mov_b32 s6, -1
816 ; SI-NEXT: s_mov_b32 s10, s6
817 ; SI-NEXT: s_mov_b32 s11, s7
818 ; SI-NEXT: s_waitcnt lgkmcnt(0)
819 ; SI-NEXT: s_mov_b32 s8, s2
820 ; SI-NEXT: s_mov_b32 s9, s3
821 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
822 ; SI-NEXT: s_mov_b32 s4, s0
823 ; SI-NEXT: s_mov_b32 s5, s1
824 ; SI-NEXT: s_waitcnt vmcnt(0)
825 ; SI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
826 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
829 ; VI-LABEL: bfe_u32_test_9:
831 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
832 ; VI-NEXT: s_mov_b32 s7, 0xf000
833 ; VI-NEXT: s_mov_b32 s6, -1
834 ; VI-NEXT: s_mov_b32 s10, s6
835 ; VI-NEXT: s_mov_b32 s11, s7
836 ; VI-NEXT: s_waitcnt lgkmcnt(0)
837 ; VI-NEXT: s_mov_b32 s8, s2
838 ; VI-NEXT: s_mov_b32 s9, s3
839 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
840 ; VI-NEXT: s_mov_b32 s4, s0
841 ; VI-NEXT: s_mov_b32 s5, s1
842 ; VI-NEXT: s_waitcnt vmcnt(0)
843 ; VI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
844 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
846 %x = load i32, ptr addrspace(1) %in, align 4
847 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
848 store i32 %bfe, ptr addrspace(1) %out, align 4
852 define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
853 ; SI-LABEL: bfe_u32_test_10:
855 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
856 ; SI-NEXT: s_mov_b32 s7, 0xf000
857 ; SI-NEXT: s_mov_b32 s6, -1
858 ; SI-NEXT: s_mov_b32 s10, s6
859 ; SI-NEXT: s_mov_b32 s11, s7
860 ; SI-NEXT: s_waitcnt lgkmcnt(0)
861 ; SI-NEXT: s_mov_b32 s8, s2
862 ; SI-NEXT: s_mov_b32 s9, s3
863 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
864 ; SI-NEXT: s_mov_b32 s4, s0
865 ; SI-NEXT: s_mov_b32 s5, s1
866 ; SI-NEXT: s_waitcnt vmcnt(0)
867 ; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
868 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
871 ; VI-LABEL: bfe_u32_test_10:
873 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
874 ; VI-NEXT: s_mov_b32 s7, 0xf000
875 ; VI-NEXT: s_mov_b32 s6, -1
876 ; VI-NEXT: s_mov_b32 s10, s6
877 ; VI-NEXT: s_mov_b32 s11, s7
878 ; VI-NEXT: s_waitcnt lgkmcnt(0)
879 ; VI-NEXT: s_mov_b32 s8, s2
880 ; VI-NEXT: s_mov_b32 s9, s3
881 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
882 ; VI-NEXT: s_mov_b32 s4, s0
883 ; VI-NEXT: s_mov_b32 s5, s1
884 ; VI-NEXT: s_waitcnt vmcnt(0)
885 ; VI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
886 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
888 %x = load i32, ptr addrspace(1) %in, align 4
889 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
890 store i32 %bfe, ptr addrspace(1) %out, align 4
894 define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
895 ; SI-LABEL: bfe_u32_test_11:
897 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
898 ; SI-NEXT: s_mov_b32 s7, 0xf000
899 ; SI-NEXT: s_mov_b32 s6, -1
900 ; SI-NEXT: s_mov_b32 s10, s6
901 ; SI-NEXT: s_mov_b32 s11, s7
902 ; SI-NEXT: s_waitcnt lgkmcnt(0)
903 ; SI-NEXT: s_mov_b32 s8, s2
904 ; SI-NEXT: s_mov_b32 s9, s3
905 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
906 ; SI-NEXT: s_mov_b32 s4, s0
907 ; SI-NEXT: s_mov_b32 s5, s1
908 ; SI-NEXT: s_waitcnt vmcnt(0)
909 ; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v0
910 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
913 ; VI-LABEL: bfe_u32_test_11:
915 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
916 ; VI-NEXT: s_mov_b32 s7, 0xf000
917 ; VI-NEXT: s_mov_b32 s6, -1
918 ; VI-NEXT: s_mov_b32 s10, s6
919 ; VI-NEXT: s_mov_b32 s11, s7
920 ; VI-NEXT: s_waitcnt lgkmcnt(0)
921 ; VI-NEXT: s_mov_b32 s8, s2
922 ; VI-NEXT: s_mov_b32 s9, s3
923 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
924 ; VI-NEXT: s_mov_b32 s4, s0
925 ; VI-NEXT: s_mov_b32 s5, s1
926 ; VI-NEXT: s_waitcnt vmcnt(0)
927 ; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v0
928 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
930 %x = load i32, ptr addrspace(1) %in, align 4
931 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
932 store i32 %bfe, ptr addrspace(1) %out, align 4
936 define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
937 ; SI-LABEL: bfe_u32_test_12:
939 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
940 ; SI-NEXT: s_mov_b32 s7, 0xf000
941 ; SI-NEXT: s_mov_b32 s6, -1
942 ; SI-NEXT: s_mov_b32 s10, s6
943 ; SI-NEXT: s_mov_b32 s11, s7
944 ; SI-NEXT: s_waitcnt lgkmcnt(0)
945 ; SI-NEXT: s_mov_b32 s8, s2
946 ; SI-NEXT: s_mov_b32 s9, s3
947 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
948 ; SI-NEXT: s_mov_b32 s4, s0
949 ; SI-NEXT: s_mov_b32 s5, s1
950 ; SI-NEXT: s_waitcnt vmcnt(0)
951 ; SI-NEXT: v_lshrrev_b32_e32 v0, 24, v0
952 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
955 ; VI-LABEL: bfe_u32_test_12:
957 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
958 ; VI-NEXT: s_mov_b32 s7, 0xf000
959 ; VI-NEXT: s_mov_b32 s6, -1
960 ; VI-NEXT: s_mov_b32 s10, s6
961 ; VI-NEXT: s_mov_b32 s11, s7
962 ; VI-NEXT: s_waitcnt lgkmcnt(0)
963 ; VI-NEXT: s_mov_b32 s8, s2
964 ; VI-NEXT: s_mov_b32 s9, s3
965 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
966 ; VI-NEXT: s_mov_b32 s4, s0
967 ; VI-NEXT: s_mov_b32 s5, s1
968 ; VI-NEXT: s_waitcnt vmcnt(0)
969 ; VI-NEXT: v_lshrrev_b32_e32 v0, 24, v0
970 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
972 %x = load i32, ptr addrspace(1) %in, align 4
973 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
974 store i32 %bfe, ptr addrspace(1) %out, align 4
978 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
979 define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
980 ; SI-LABEL: bfe_u32_test_13:
982 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
983 ; SI-NEXT: s_mov_b32 s7, 0xf000
984 ; SI-NEXT: s_mov_b32 s6, -1
985 ; SI-NEXT: s_mov_b32 s10, s6
986 ; SI-NEXT: s_mov_b32 s11, s7
987 ; SI-NEXT: s_waitcnt lgkmcnt(0)
988 ; SI-NEXT: s_mov_b32 s8, s2
989 ; SI-NEXT: s_mov_b32 s9, s3
990 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
991 ; SI-NEXT: s_mov_b32 s4, s0
992 ; SI-NEXT: s_mov_b32 s5, s1
993 ; SI-NEXT: s_waitcnt vmcnt(0)
994 ; SI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
995 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
998 ; VI-LABEL: bfe_u32_test_13:
1000 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1001 ; VI-NEXT: s_mov_b32 s7, 0xf000
1002 ; VI-NEXT: s_mov_b32 s6, -1
1003 ; VI-NEXT: s_mov_b32 s10, s6
1004 ; VI-NEXT: s_mov_b32 s11, s7
1005 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1006 ; VI-NEXT: s_mov_b32 s8, s2
1007 ; VI-NEXT: s_mov_b32 s9, s3
1008 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
1009 ; VI-NEXT: s_mov_b32 s4, s0
1010 ; VI-NEXT: s_mov_b32 s5, s1
1011 ; VI-NEXT: s_waitcnt vmcnt(0)
1012 ; VI-NEXT: v_lshrrev_b32_e32 v0, 31, v0
1013 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1015 %x = load i32, ptr addrspace(1) %in, align 4
1016 %shl = ashr i32 %x, 31
1017 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
1018 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
1021 define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1022 ; SI-LABEL: bfe_u32_test_14:
1024 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1025 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1026 ; SI-NEXT: s_mov_b32 s3, 0xf000
1027 ; SI-NEXT: s_mov_b32 s2, -1
1028 ; SI-NEXT: v_mov_b32_e32 v0, 0
1029 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1032 ; VI-LABEL: bfe_u32_test_14:
1034 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1035 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1036 ; VI-NEXT: s_mov_b32 s3, 0xf000
1037 ; VI-NEXT: s_mov_b32 s2, -1
1038 ; VI-NEXT: v_mov_b32_e32 v0, 0
1039 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1041 %x = load i32, ptr addrspace(1) %in, align 4
1042 %shl = lshr i32 %x, 31
1043 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
1044 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
1047 define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
1048 ; SI-LABEL: bfe_u32_constant_fold_test_0:
1050 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1051 ; SI-NEXT: s_mov_b32 s3, 0xf000
1052 ; SI-NEXT: s_mov_b32 s2, -1
1053 ; SI-NEXT: v_mov_b32_e32 v0, 0
1054 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1055 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1058 ; VI-LABEL: bfe_u32_constant_fold_test_0:
1060 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1061 ; VI-NEXT: s_mov_b32 s3, 0xf000
1062 ; VI-NEXT: s_mov_b32 s2, -1
1063 ; VI-NEXT: v_mov_b32_e32 v0, 0
1064 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1065 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1067 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
1068 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1072 define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
1073 ; SI-LABEL: bfe_u32_constant_fold_test_1:
1075 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1076 ; SI-NEXT: s_mov_b32 s3, 0xf000
1077 ; SI-NEXT: s_mov_b32 s2, -1
1078 ; SI-NEXT: v_mov_b32_e32 v0, 0
1079 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1080 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1083 ; VI-LABEL: bfe_u32_constant_fold_test_1:
1085 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1086 ; VI-NEXT: s_mov_b32 s3, 0xf000
1087 ; VI-NEXT: s_mov_b32 s2, -1
1088 ; VI-NEXT: v_mov_b32_e32 v0, 0
1089 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1090 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1092 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
1093 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1097 define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
1098 ; SI-LABEL: bfe_u32_constant_fold_test_2:
1100 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1101 ; SI-NEXT: s_mov_b32 s3, 0xf000
1102 ; SI-NEXT: s_mov_b32 s2, -1
1103 ; SI-NEXT: v_mov_b32_e32 v0, 0
1104 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1105 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1108 ; VI-LABEL: bfe_u32_constant_fold_test_2:
1110 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1111 ; VI-NEXT: s_mov_b32 s3, 0xf000
1112 ; VI-NEXT: s_mov_b32 s2, -1
1113 ; VI-NEXT: v_mov_b32_e32 v0, 0
1114 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1115 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1117 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
1118 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1122 define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
1123 ; SI-LABEL: bfe_u32_constant_fold_test_3:
1125 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1126 ; SI-NEXT: s_mov_b32 s3, 0xf000
1127 ; SI-NEXT: s_mov_b32 s2, -1
1128 ; SI-NEXT: v_mov_b32_e32 v0, 1
1129 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1130 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1133 ; VI-LABEL: bfe_u32_constant_fold_test_3:
1135 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1136 ; VI-NEXT: s_mov_b32 s3, 0xf000
1137 ; VI-NEXT: s_mov_b32 s2, -1
1138 ; VI-NEXT: v_mov_b32_e32 v0, 1
1139 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1140 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1142 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
1143 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1147 define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
1148 ; SI-LABEL: bfe_u32_constant_fold_test_4:
1150 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1151 ; SI-NEXT: s_mov_b32 s3, 0xf000
1152 ; SI-NEXT: s_mov_b32 s2, -1
1153 ; SI-NEXT: v_mov_b32_e32 v0, -1
1154 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1155 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1158 ; VI-LABEL: bfe_u32_constant_fold_test_4:
1160 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1161 ; VI-NEXT: s_mov_b32 s3, 0xf000
1162 ; VI-NEXT: s_mov_b32 s2, -1
1163 ; VI-NEXT: v_mov_b32_e32 v0, -1
1164 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1165 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1167 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
1168 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1172 define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
1173 ; SI-LABEL: bfe_u32_constant_fold_test_5:
1175 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1176 ; SI-NEXT: s_mov_b32 s3, 0xf000
1177 ; SI-NEXT: s_mov_b32 s2, -1
1178 ; SI-NEXT: v_mov_b32_e32 v0, 1
1179 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1180 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1183 ; VI-LABEL: bfe_u32_constant_fold_test_5:
1185 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1186 ; VI-NEXT: s_mov_b32 s3, 0xf000
1187 ; VI-NEXT: s_mov_b32 s2, -1
1188 ; VI-NEXT: v_mov_b32_e32 v0, 1
1189 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1190 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1192 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
1193 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1197 define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
1198 ; SI-LABEL: bfe_u32_constant_fold_test_6:
1200 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1201 ; SI-NEXT: s_mov_b32 s3, 0xf000
1202 ; SI-NEXT: s_mov_b32 s2, -1
1203 ; SI-NEXT: v_mov_b32_e32 v0, 0x80
1204 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1205 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1208 ; VI-LABEL: bfe_u32_constant_fold_test_6:
1210 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1211 ; VI-NEXT: s_mov_b32 s3, 0xf000
1212 ; VI-NEXT: s_mov_b32 s2, -1
1213 ; VI-NEXT: v_mov_b32_e32 v0, 0x80
1214 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1215 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1217 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
1218 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1222 define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
1223 ; SI-LABEL: bfe_u32_constant_fold_test_7:
1225 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1226 ; SI-NEXT: s_mov_b32 s3, 0xf000
1227 ; SI-NEXT: s_mov_b32 s2, -1
1228 ; SI-NEXT: v_mov_b32_e32 v0, 0x7f
1229 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1230 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1233 ; VI-LABEL: bfe_u32_constant_fold_test_7:
1235 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1236 ; VI-NEXT: s_mov_b32 s3, 0xf000
1237 ; VI-NEXT: s_mov_b32 s2, -1
1238 ; VI-NEXT: v_mov_b32_e32 v0, 0x7f
1239 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1240 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1242 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
1243 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1247 define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
1248 ; SI-LABEL: bfe_u32_constant_fold_test_8:
1250 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1251 ; SI-NEXT: s_mov_b32 s3, 0xf000
1252 ; SI-NEXT: s_mov_b32 s2, -1
1253 ; SI-NEXT: v_mov_b32_e32 v0, 1
1254 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1255 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1258 ; VI-LABEL: bfe_u32_constant_fold_test_8:
1260 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1261 ; VI-NEXT: s_mov_b32 s3, 0xf000
1262 ; VI-NEXT: s_mov_b32 s2, -1
1263 ; VI-NEXT: v_mov_b32_e32 v0, 1
1264 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1265 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1267 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
1268 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1272 define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
1273 ; SI-LABEL: bfe_u32_constant_fold_test_9:
1275 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1276 ; SI-NEXT: s_mov_b32 s3, 0xf000
1277 ; SI-NEXT: s_mov_b32 s2, -1
1278 ; SI-NEXT: v_mov_b32_e32 v0, 1
1279 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1280 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1283 ; VI-LABEL: bfe_u32_constant_fold_test_9:
1285 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1286 ; VI-NEXT: s_mov_b32 s3, 0xf000
1287 ; VI-NEXT: s_mov_b32 s2, -1
1288 ; VI-NEXT: v_mov_b32_e32 v0, 1
1289 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1290 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1292 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
1293 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1297 define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
1298 ; SI-LABEL: bfe_u32_constant_fold_test_10:
1300 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1301 ; SI-NEXT: s_mov_b32 s3, 0xf000
1302 ; SI-NEXT: s_mov_b32 s2, -1
1303 ; SI-NEXT: v_mov_b32_e32 v0, 0
1304 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1305 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1308 ; VI-LABEL: bfe_u32_constant_fold_test_10:
1310 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1311 ; VI-NEXT: s_mov_b32 s3, 0xf000
1312 ; VI-NEXT: s_mov_b32 s2, -1
1313 ; VI-NEXT: v_mov_b32_e32 v0, 0
1314 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1315 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1317 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
1318 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1322 define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
1323 ; SI-LABEL: bfe_u32_constant_fold_test_11:
1325 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1326 ; SI-NEXT: s_mov_b32 s3, 0xf000
1327 ; SI-NEXT: s_mov_b32 s2, -1
1328 ; SI-NEXT: v_mov_b32_e32 v0, 10
1329 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1330 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1333 ; VI-LABEL: bfe_u32_constant_fold_test_11:
1335 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1336 ; VI-NEXT: s_mov_b32 s3, 0xf000
1337 ; VI-NEXT: s_mov_b32 s2, -1
1338 ; VI-NEXT: v_mov_b32_e32 v0, 10
1339 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1340 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1342 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
1343 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1347 define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
1348 ; SI-LABEL: bfe_u32_constant_fold_test_12:
1350 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1351 ; SI-NEXT: s_mov_b32 s3, 0xf000
1352 ; SI-NEXT: s_mov_b32 s2, -1
1353 ; SI-NEXT: v_mov_b32_e32 v0, 0
1354 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1355 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1358 ; VI-LABEL: bfe_u32_constant_fold_test_12:
1360 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1361 ; VI-NEXT: s_mov_b32 s3, 0xf000
1362 ; VI-NEXT: s_mov_b32 s2, -1
1363 ; VI-NEXT: v_mov_b32_e32 v0, 0
1364 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1365 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1367 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
1368 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1372 define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
1373 ; SI-LABEL: bfe_u32_constant_fold_test_13:
1375 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1376 ; SI-NEXT: s_mov_b32 s3, 0xf000
1377 ; SI-NEXT: s_mov_b32 s2, -1
1378 ; SI-NEXT: v_mov_b32_e32 v0, 1
1379 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1380 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1383 ; VI-LABEL: bfe_u32_constant_fold_test_13:
1385 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1386 ; VI-NEXT: s_mov_b32 s3, 0xf000
1387 ; VI-NEXT: s_mov_b32 s2, -1
1388 ; VI-NEXT: v_mov_b32_e32 v0, 1
1389 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1390 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1392 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
1393 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1397 define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
1398 ; SI-LABEL: bfe_u32_constant_fold_test_14:
1400 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1401 ; SI-NEXT: s_mov_b32 s3, 0xf000
1402 ; SI-NEXT: s_mov_b32 s2, -1
1403 ; SI-NEXT: v_mov_b32_e32 v0, 40
1404 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1405 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1408 ; VI-LABEL: bfe_u32_constant_fold_test_14:
1410 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1411 ; VI-NEXT: s_mov_b32 s3, 0xf000
1412 ; VI-NEXT: s_mov_b32 s2, -1
1413 ; VI-NEXT: v_mov_b32_e32 v0, 40
1414 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1415 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1417 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
1418 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1422 define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
1423 ; SI-LABEL: bfe_u32_constant_fold_test_15:
1425 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1426 ; SI-NEXT: s_mov_b32 s3, 0xf000
1427 ; SI-NEXT: s_mov_b32 s2, -1
1428 ; SI-NEXT: v_mov_b32_e32 v0, 10
1429 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1430 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1433 ; VI-LABEL: bfe_u32_constant_fold_test_15:
1435 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1436 ; VI-NEXT: s_mov_b32 s3, 0xf000
1437 ; VI-NEXT: s_mov_b32 s2, -1
1438 ; VI-NEXT: v_mov_b32_e32 v0, 10
1439 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1440 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1442 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
1443 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1447 define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
1448 ; SI-LABEL: bfe_u32_constant_fold_test_16:
1450 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1451 ; SI-NEXT: s_mov_b32 s3, 0xf000
1452 ; SI-NEXT: s_mov_b32 s2, -1
1453 ; SI-NEXT: v_mov_b32_e32 v0, 0x7f
1454 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1455 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1458 ; VI-LABEL: bfe_u32_constant_fold_test_16:
1460 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1461 ; VI-NEXT: s_mov_b32 s3, 0xf000
1462 ; VI-NEXT: s_mov_b32 s2, -1
1463 ; VI-NEXT: v_mov_b32_e32 v0, 0x7f
1464 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1465 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1467 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
1468 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1472 define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
1473 ; SI-LABEL: bfe_u32_constant_fold_test_17:
1475 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1476 ; SI-NEXT: s_mov_b32 s3, 0xf000
1477 ; SI-NEXT: s_mov_b32 s2, -1
1478 ; SI-NEXT: v_mov_b32_e32 v0, 0x7f
1479 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1480 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1483 ; VI-LABEL: bfe_u32_constant_fold_test_17:
1485 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1486 ; VI-NEXT: s_mov_b32 s3, 0xf000
1487 ; VI-NEXT: s_mov_b32 s2, -1
1488 ; VI-NEXT: v_mov_b32_e32 v0, 0x7f
1489 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1490 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1492 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
1493 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1497 define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
1498 ; SI-LABEL: bfe_u32_constant_fold_test_18:
1500 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1501 ; SI-NEXT: s_mov_b32 s3, 0xf000
1502 ; SI-NEXT: s_mov_b32 s2, -1
1503 ; SI-NEXT: v_mov_b32_e32 v0, 0
1504 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1505 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1508 ; VI-LABEL: bfe_u32_constant_fold_test_18:
1510 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1511 ; VI-NEXT: s_mov_b32 s3, 0xf000
1512 ; VI-NEXT: s_mov_b32 s2, -1
1513 ; VI-NEXT: v_mov_b32_e32 v0, 0
1514 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1515 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1517 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
1518 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
1522 ; Make sure that SimplifyDemandedBits doesn't cause the and to be
1523 ; reduced to the bits demanded by the bfe.
1525 ; XXX: The operand to v_bfe_u32 could also just directly be the load register.
1526 define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0,
1527 ; SI-LABEL: simplify_bfe_u32_multi_use_arg:
1529 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
1530 ; SI-NEXT: s_mov_b32 s3, 0xf000
1531 ; SI-NEXT: s_mov_b32 s2, -1
1532 ; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
1533 ; SI-NEXT: s_mov_b32 s6, s2
1534 ; SI-NEXT: s_mov_b32 s7, s3
1535 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1536 ; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0
1537 ; SI-NEXT: s_mov_b32 s0, s8
1538 ; SI-NEXT: s_mov_b32 s1, s9
1539 ; SI-NEXT: s_mov_b32 s4, s10
1540 ; SI-NEXT: s_mov_b32 s5, s11
1541 ; SI-NEXT: s_waitcnt vmcnt(0)
1542 ; SI-NEXT: v_and_b32_e32 v0, 63, v0
1543 ; SI-NEXT: v_bfe_u32 v1, v0, 2, 2
1544 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0
1545 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1548 ; VI-LABEL: simplify_bfe_u32_multi_use_arg:
1550 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1551 ; VI-NEXT: s_mov_b32 s3, 0xf000
1552 ; VI-NEXT: s_mov_b32 s2, -1
1553 ; VI-NEXT: s_mov_b32 s6, s2
1554 ; VI-NEXT: s_mov_b32 s7, s3
1555 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1556 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
1557 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1558 ; VI-NEXT: s_mov_b32 s10, s2
1559 ; VI-NEXT: s_mov_b32 s11, s3
1560 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1561 ; VI-NEXT: s_mov_b32 s0, s4
1562 ; VI-NEXT: s_mov_b32 s1, s5
1563 ; VI-NEXT: s_mov_b32 s8, s6
1564 ; VI-NEXT: s_mov_b32 s9, s7
1565 ; VI-NEXT: s_waitcnt vmcnt(0)
1566 ; VI-NEXT: v_and_b32_e32 v0, 63, v0
1567 ; VI-NEXT: v_bfe_u32 v1, v0, 2, 2
1568 ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
1569 ; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0
1571 ptr addrspace(1) %out1,
1572 ptr addrspace(1) %in) #0 {
1573 %src = load i32, ptr addrspace(1) %in, align 4
1574 %and = and i32 %src, 63
1575 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
1576 store i32 %bfe_u32, ptr addrspace(1) %out0, align 4
1577 store i32 %and, ptr addrspace(1) %out1, align 4
1581 define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 {
1582 ; SI-LABEL: lshr_and:
1584 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1585 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1586 ; SI-NEXT: s_mov_b32 s3, 0xf000
1587 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1588 ; SI-NEXT: s_bfe_u32 s4, s2, 0x30006
1589 ; SI-NEXT: s_mov_b32 s2, -1
1590 ; SI-NEXT: v_mov_b32_e32 v0, s4
1591 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1594 ; VI-LABEL: lshr_and:
1596 ; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
1597 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1598 ; VI-NEXT: s_mov_b32 s3, 0xf000
1599 ; VI-NEXT: s_mov_b32 s2, -1
1600 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1601 ; VI-NEXT: s_bfe_u32 s4, s4, 0x30006
1602 ; VI-NEXT: v_mov_b32_e32 v0, s4
1603 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1607 store i32 %c, ptr addrspace(1) %out, align 8
1611 define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
1612 ; SI-LABEL: v_lshr_and:
1614 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1615 ; SI-NEXT: s_mov_b32 s7, 0xf000
1616 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1617 ; SI-NEXT: s_lshr_b32 s2, s2, s3
1618 ; SI-NEXT: s_and_b32 s2, s2, 7
1619 ; SI-NEXT: s_mov_b32 s6, -1
1620 ; SI-NEXT: s_mov_b32 s4, s0
1621 ; SI-NEXT: s_mov_b32 s5, s1
1622 ; SI-NEXT: v_mov_b32_e32 v0, s2
1623 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1626 ; VI-LABEL: v_lshr_and:
1628 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1629 ; VI-NEXT: s_mov_b32 s7, 0xf000
1630 ; VI-NEXT: s_mov_b32 s6, -1
1631 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1632 ; VI-NEXT: s_mov_b32 s4, s0
1633 ; VI-NEXT: s_lshr_b32 s0, s2, s3
1634 ; VI-NEXT: s_and_b32 s0, s0, 7
1635 ; VI-NEXT: s_mov_b32 s5, s1
1636 ; VI-NEXT: v_mov_b32_e32 v0, s0
1637 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1639 %c = lshr i32 %a, %b
1641 store i32 %d, ptr addrspace(1) %out, align 8
1645 define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 {
1646 ; SI-LABEL: and_lshr:
1648 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1649 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1650 ; SI-NEXT: s_mov_b32 s3, 0xf000
1651 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1652 ; SI-NEXT: s_bfe_u32 s4, s2, 0x30006
1653 ; SI-NEXT: s_mov_b32 s2, -1
1654 ; SI-NEXT: v_mov_b32_e32 v0, s4
1655 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1658 ; VI-LABEL: and_lshr:
1660 ; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
1661 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1662 ; VI-NEXT: s_mov_b32 s3, 0xf000
1663 ; VI-NEXT: s_mov_b32 s2, -1
1664 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1665 ; VI-NEXT: s_bfe_u32 s4, s4, 0x30006
1666 ; VI-NEXT: v_mov_b32_e32 v0, s4
1667 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1669 %b = and i32 %a, 448
1671 store i32 %c, ptr addrspace(1) %out, align 8
1675 define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 {
1676 ; SI-LABEL: and_lshr2:
1678 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1679 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1680 ; SI-NEXT: s_mov_b32 s3, 0xf000
1681 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1682 ; SI-NEXT: s_bfe_u32 s4, s2, 0x30006
1683 ; SI-NEXT: s_mov_b32 s2, -1
1684 ; SI-NEXT: v_mov_b32_e32 v0, s4
1685 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1688 ; VI-LABEL: and_lshr2:
1690 ; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
1691 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1692 ; VI-NEXT: s_mov_b32 s3, 0xf000
1693 ; VI-NEXT: s_mov_b32 s2, -1
1694 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1695 ; VI-NEXT: s_bfe_u32 s4, s4, 0x30006
1696 ; VI-NEXT: v_mov_b32_e32 v0, s4
1697 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1699 %b = and i32 %a, 511
1701 store i32 %c, ptr addrspace(1) %out, align 8
1705 define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 {
1706 ; SI-LABEL: shl_lshr:
1708 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
1709 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1710 ; SI-NEXT: s_mov_b32 s3, 0xf000
1711 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1712 ; SI-NEXT: s_bfe_u32 s4, s2, 0x150002
1713 ; SI-NEXT: s_mov_b32 s2, -1
1714 ; SI-NEXT: v_mov_b32_e32 v0, s4
1715 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1718 ; VI-LABEL: shl_lshr:
1720 ; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
1721 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1722 ; VI-NEXT: s_mov_b32 s3, 0xf000
1723 ; VI-NEXT: s_mov_b32 s2, -1
1724 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1725 ; VI-NEXT: s_bfe_u32 s4, s4, 0x150002
1726 ; VI-NEXT: v_mov_b32_e32 v0, s4
1727 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1730 %c = lshr i32 %b, 11
1731 store i32 %c, ptr addrspace(1) %out, align 8
1735 declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
1737 attributes #0 = { nounwind }
1738 attributes #1 = { nounwind readnone }