1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4 ; GCN-LABEL: {{^}}bfe_u32_arg_arg_arg:
6 define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
7 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
8 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
12 ; GCN-LABEL: {{^}}bfe_u32_arg_arg_imm:
14 define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
15 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
16 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
20 ; GCN-LABEL: {{^}}bfe_u32_arg_imm_arg:
22 define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
23 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
24 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
28 ; GCN-LABEL: {{^}}bfe_u32_imm_arg_arg:
30 define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
31 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
32 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
36 ; GCN-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
37 ; GCN-NOT: {{[^@]}}bfe
39 define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
40 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
41 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
45 ; GCN-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
46 ; GCN-NOT: {{[^@]}}bfe
48 define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
49 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
50 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
54 ; GCN-LABEL: {{^}}bfe_u32_zextload_i8:
55 ; GCN: buffer_load_ubyte
56 ; GCN-NOT: {{[^@]}}bfe
58 define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
59 %load = load i8, i8 addrspace(1)* %in
60 %ext = zext i8 %load to i32
61 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
62 store i32 %bfe, i32 addrspace(1)* %out, align 4
66 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
67 ; GCN: buffer_load_dword
68 ; GCN: v_add_{{[iu]}}32
69 ; GCN-NEXT: v_and_b32_e32
70 ; FIXME: Should be using s_add_i32
71 ; GCN-NOT: {{[^@]}}bfe
73 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
74 %load = load i32, i32 addrspace(1)* %in, align 4
75 %add = add i32 %load, 1
76 %ext = and i32 %add, 255
77 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
78 store i32 %bfe, i32 addrspace(1)* %out, align 4
82 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
83 ; GCN: buffer_load_dword
84 ; GCN: v_add_{{[iu]}}32
85 ; GCN-NEXT: v_and_b32_e32
86 ; GCN-NOT: {{[^@]}}bfe
88 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
89 %load = load i32, i32 addrspace(1)* %in, align 4
90 %add = add i32 %load, 1
91 %ext = and i32 %add, 65535
92 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
93 store i32 %bfe, i32 addrspace(1)* %out, align 4
97 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
98 ; GCN: buffer_load_dword
99 ; GCN: v_add_{{[iu]}}32
102 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
103 %load = load i32, i32 addrspace(1)* %in, align 4
104 %add = add i32 %load, 1
105 %ext = and i32 %add, 255
106 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
107 store i32 %bfe, i32 addrspace(1)* %out, align 4
111 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
112 ; GCN: buffer_load_dword
113 ; GCN: v_add_{{[iu]}}32
114 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
117 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
118 %load = load i32, i32 addrspace(1)* %in, align 4
119 %add = add i32 %load, 1
120 %ext = and i32 %add, 255
121 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
122 store i32 %bfe, i32 addrspace(1)* %out, align 4
126 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
127 ; GCN: buffer_load_dword
128 ; GCN: v_add_{{[iu]}}32
129 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
132 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
133 %load = load i32, i32 addrspace(1)* %in, align 4
134 %add = add i32 %load, 1
135 %ext = and i32 %add, 255
136 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
137 store i32 %bfe, i32 addrspace(1)* %out, align 4
141 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
142 ; GCN: buffer_load_dword
143 ; GCN: v_add_{{[iu]}}32
146 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
147 %load = load i32, i32 addrspace(1)* %in, align 4
148 %add = add i32 %load, 1
149 %ext = and i32 %add, 65535
150 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
151 store i32 %bfe, i32 addrspace(1)* %out, align 4
155 ; GCN-LABEL: {{^}}bfe_u32_test_1:
156 ; GCN: buffer_load_dword
157 ; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
159 define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
160 %x = load i32, i32 addrspace(1)* %in, align 4
161 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
162 store i32 %bfe, i32 addrspace(1)* %out, align 4
166 define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
167 %x = load i32, i32 addrspace(1)* %in, align 4
168 %shl = shl i32 %x, 31
169 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
170 store i32 %bfe, i32 addrspace(1)* %out, align 4
174 define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
175 %x = load i32, i32 addrspace(1)* %in, align 4
176 %shl = shl i32 %x, 31
177 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
178 store i32 %bfe, i32 addrspace(1)* %out, align 4
182 ; GCN-LABEL: {{^}}bfe_u32_test_4:
185 ; GCN-NOT: {{[^@]}}bfe
186 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
187 ; GCN: buffer_store_dword [[VREG]],
189 define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
190 %x = load i32, i32 addrspace(1)* %in, align 4
191 %shl = shl i32 %x, 31
192 %shr = lshr i32 %shl, 31
193 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
194 store i32 %bfe, i32 addrspace(1)* %out, align 4
198 ; GCN-LABEL: {{^}}bfe_u32_test_5:
199 ; GCN: buffer_load_dword
202 ; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
204 define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
205 %x = load i32, i32 addrspace(1)* %in, align 4
206 %shl = shl i32 %x, 31
207 %shr = ashr i32 %shl, 31
208 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
209 store i32 %bfe, i32 addrspace(1)* %out, align 4
213 ; GCN-LABEL: {{^}}bfe_u32_test_6:
214 ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
215 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
217 define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
218 %x = load i32, i32 addrspace(1)* %in, align 4
219 %shl = shl i32 %x, 31
220 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
221 store i32 %bfe, i32 addrspace(1)* %out, align 4
225 ; GCN-LABEL: {{^}}bfe_u32_test_7:
226 ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
227 ; GCN-NOT: {{[^@]}}bfe
229 define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
230 %x = load i32, i32 addrspace(1)* %in, align 4
231 %shl = shl i32 %x, 31
232 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
233 store i32 %bfe, i32 addrspace(1)* %out, align 4
237 ; GCN-LABEL: {{^}}bfe_u32_test_8:
238 ; GCN-NOT: {{[^@]}}bfe
239 ; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
240 ; GCN-NOT: {{[^@]}}bfe
242 define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
243 %x = load i32, i32 addrspace(1)* %in, align 4
244 %shl = shl i32 %x, 31
245 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
246 store i32 %bfe, i32 addrspace(1)* %out, align 4
250 ; GCN-LABEL: {{^}}bfe_u32_test_9:
251 ; GCN-NOT: {{[^@]}}bfe
252 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
253 ; GCN-NOT: {{[^@]}}bfe
255 define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
256 %x = load i32, i32 addrspace(1)* %in, align 4
257 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
258 store i32 %bfe, i32 addrspace(1)* %out, align 4
262 ; GCN-LABEL: {{^}}bfe_u32_test_10:
263 ; GCN-NOT: {{[^@]}}bfe
264 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
265 ; GCN-NOT: {{[^@]}}bfe
267 define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
268 %x = load i32, i32 addrspace(1)* %in, align 4
269 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
270 store i32 %bfe, i32 addrspace(1)* %out, align 4
274 ; GCN-LABEL: {{^}}bfe_u32_test_11:
275 ; GCN-NOT: {{[^@]}}bfe
276 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
277 ; GCN-NOT: {{[^@]}}bfe
279 define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
280 %x = load i32, i32 addrspace(1)* %in, align 4
281 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
282 store i32 %bfe, i32 addrspace(1)* %out, align 4
286 ; GCN-LABEL: {{^}}bfe_u32_test_12:
287 ; GCN-NOT: {{[^@]}}bfe
288 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
289 ; GCN-NOT: {{[^@]}}bfe
291 define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
292 %x = load i32, i32 addrspace(1)* %in, align 4
293 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
294 store i32 %bfe, i32 addrspace(1)* %out, align 4
298 ; GCN-LABEL: {{^}}bfe_u32_test_13:
299 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
300 ; GCN-NOT: {{[^@]}}bfe
302 define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
303 %x = load i32, i32 addrspace(1)* %in, align 4
304 %shl = ashr i32 %x, 31
305 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
306 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
309 ; GCN-LABEL: {{^}}bfe_u32_test_14:
311 ; GCN-NOT: {{[^@]}}bfe
313 define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
314 %x = load i32, i32 addrspace(1)* %in, align 4
315 %shl = lshr i32 %x, 31
316 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
317 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
320 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_0:
321 ; GCN-NOT: {{[^@]}}bfe
322 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
323 ; GCN: buffer_store_dword [[VREG]],
326 define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
327 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
328 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
332 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_1:
333 ; GCN-NOT: {{[^@]}}bfe
334 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
335 ; GCN: buffer_store_dword [[VREG]],
338 define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
339 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
340 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
344 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_2:
345 ; GCN-NOT: {{[^@]}}bfe
346 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
347 ; GCN: buffer_store_dword [[VREG]],
350 define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
351 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
352 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
356 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_3:
357 ; GCN-NOT: {{[^@]}}bfe
358 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
359 ; GCN: buffer_store_dword [[VREG]],
362 define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
363 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
364 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
368 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_4:
369 ; GCN-NOT: {{[^@]}}bfe
370 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
371 ; GCN: buffer_store_dword [[VREG]],
374 define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
375 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
376 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
380 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_5:
381 ; GCN-NOT: {{[^@]}}bfe
382 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
383 ; GCN: buffer_store_dword [[VREG]],
386 define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
387 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
388 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
392 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_6:
393 ; GCN-NOT: {{[^@]}}bfe
394 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
395 ; GCN: buffer_store_dword [[VREG]],
398 define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
399 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
400 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
404 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_7:
405 ; GCN-NOT: {{[^@]}}bfe
406 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
407 ; GCN: buffer_store_dword [[VREG]],
410 define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
411 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
412 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
416 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_8:
417 ; GCN-NOT: {{[^@]}}bfe
418 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
419 ; GCN: buffer_store_dword [[VREG]],
422 define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
423 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
424 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
428 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_9:
429 ; GCN-NOT: {{[^@]}}bfe
430 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
431 ; GCN: buffer_store_dword [[VREG]],
434 define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
435 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
436 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
440 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_10:
441 ; GCN-NOT: {{[^@]}}bfe
442 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
443 ; GCN: buffer_store_dword [[VREG]],
446 define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
447 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
448 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
452 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_11:
453 ; GCN-NOT: {{[^@]}}bfe
454 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
455 ; GCN: buffer_store_dword [[VREG]],
458 define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
459 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
460 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
464 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_12:
465 ; GCN-NOT: {{[^@]}}bfe
466 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
467 ; GCN: buffer_store_dword [[VREG]],
470 define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
471 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
472 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
476 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_13:
477 ; GCN-NOT: {{[^@]}}bfe
478 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
479 ; GCN: buffer_store_dword [[VREG]],
482 define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
483 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
484 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
488 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_14:
489 ; GCN-NOT: {{[^@]}}bfe
490 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
491 ; GCN: buffer_store_dword [[VREG]],
494 define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
495 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
496 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
500 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_15:
501 ; GCN-NOT: {{[^@]}}bfe
502 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
503 ; GCN: buffer_store_dword [[VREG]],
506 define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
507 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
508 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
512 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_16:
513 ; GCN-NOT: {{[^@]}}bfe
514 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
515 ; GCN: buffer_store_dword [[VREG]],
518 define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
519 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
520 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
524 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_17:
525 ; GCN-NOT: {{[^@]}}bfe
526 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
527 ; GCN: buffer_store_dword [[VREG]],
530 define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
531 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
532 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
536 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_18:
537 ; GCN-NOT: {{[^@]}}bfe
538 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
539 ; GCN: buffer_store_dword [[VREG]],
542 define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
543 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
544 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
548 ; Make sure that SimplifyDemandedBits doesn't cause the and to be
549 ; reduced to the bits demanded by the bfe.
551 ; XXX: The operand to v_bfe_u32 could also just directly be the load register.
552 ; GCN-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
553 ; GCN: buffer_load_dword [[ARG:v[0-9]+]]
554 ; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
555 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
556 ; GCN-DAG: buffer_store_dword [[AND]]
557 ; GCN-DAG: buffer_store_dword [[BFE]]
559 define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
560 i32 addrspace(1)* %out1,
561 i32 addrspace(1)* %in) #0 {
562 %src = load i32, i32 addrspace(1)* %in, align 4
563 %and = and i32 %src, 63
564 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
565 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
566 store i32 %and, i32 addrspace(1)* %out1, align 4
570 ; GCN-LABEL: {{^}}lshr_and:
571 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
572 ; GCN: buffer_store_dword
573 define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
576 store i32 %c, i32 addrspace(1)* %out, align 8
580 ; GCN-LABEL: {{^}}v_lshr_and:
581 ; GCN: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3
582 ; GCN: buffer_store_dword
583 define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
586 store i32 %d, i32 addrspace(1)* %out, align 8
590 ; GCN-LABEL: {{^}}and_lshr:
591 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
592 ; GCN: buffer_store_dword
593 define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
596 store i32 %c, i32 addrspace(1)* %out, align 8
600 ; GCN-LABEL: {{^}}and_lshr2:
601 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
602 ; GCN: buffer_store_dword
603 define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
606 store i32 %c, i32 addrspace(1)* %out, align 8
610 ; GCN-LABEL: {{^}}shl_lshr:
611 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002
612 ; GCN: buffer_store_dword
613 define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
616 store i32 %c, i32 addrspace(1)* %out, align 8
620 declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
622 attributes #0 = { nounwind }
623 attributes #1 = { nounwind readnone }