1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
4 define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
5 ; GFX6-LABEL: v_bfe_i32_arg_arg_arg:
7 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8 ; GFX6-NEXT: v_bfe_i32 v0, v0, v1, v2
9 ; GFX6-NEXT: s_setpc_b64 s[30:31]
10 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2)
14 define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
15 ; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
17 ; GFX6-NEXT: s_and_b32 s1, s1, 63
18 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
19 ; GFX6-NEXT: s_or_b32 s1, s1, s2
20 ; GFX6-NEXT: s_bfe_i32 s0, s0, s1
21 ; GFX6-NEXT: ; return to shader part epilog
22 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2)
26 ; TODO: Need to expand this
27 ; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 {
28 ; %bfe_i64 = call i32 @llvm.amdgcn.sbfe.i64(i32 %src0, i32 %src1, i32 %src2)
32 define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
33 ; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
35 ; GFX6-NEXT: s_and_b32 s2, s2, 63
36 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
37 ; GFX6-NEXT: s_or_b32 s2, s2, s3
38 ; GFX6-NEXT: s_bfe_i64 s[0:1], s[0:1], s2
39 ; GFX6-NEXT: ; return to shader part epilog
40 %bfe_i32 = call i64 @llvm.amdgcn.sbfe.i64(i64 %src0, i32 %src1, i32 %src2)
44 define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
45 ; GFX6-LABEL: bfe_i32_arg_arg_imm:
47 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
48 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
49 ; GFX6-NEXT: s_and_b32 s3, s3, 63
50 ; GFX6-NEXT: s_or_b32 s3, s3, 0x7b0000
51 ; GFX6-NEXT: s_bfe_i32 s3, s2, s3
52 ; GFX6-NEXT: s_mov_b32 s2, -1
53 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
54 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
55 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
57 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123)
58 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
62 define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
63 ; GFX6-LABEL: bfe_i32_arg_imm_arg:
65 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
66 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
67 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
68 ; GFX6-NEXT: s_or_b32 s3, 59, s3
69 ; GFX6-NEXT: s_bfe_i32 s3, s2, s3
70 ; GFX6-NEXT: s_mov_b32 s2, -1
71 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
72 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
73 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
75 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2)
76 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
80 define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
81 ; GFX6-LABEL: bfe_i32_imm_arg_arg:
83 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
84 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
85 ; GFX6-NEXT: s_and_b32 s4, s2, 63
86 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
87 ; GFX6-NEXT: s_or_b32 s3, s4, s3
88 ; GFX6-NEXT: s_bfe_i32 s3, 0x7b, s3
89 ; GFX6-NEXT: s_mov_b32 s2, -1
90 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
91 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
92 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
94 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2)
95 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
99 define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(1) %src0) #0 {
100 ; GFX6-LABEL: v_bfe_print_arg:
102 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
103 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
104 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
105 ; GFX6-NEXT: s_mov_b32 s2, -1
106 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
107 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80002
108 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
109 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
110 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
111 ; GFX6-NEXT: s_endpgm
112 %load = load i32, ptr addrspace(1) %src0, align 4
113 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8)
114 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
118 define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
119 ; GFX6-LABEL: bfe_i32_arg_0_width_reg_offset:
121 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
122 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
123 ; GFX6-NEXT: s_and_b32 s3, s3, 63
124 ; GFX6-NEXT: s_bfe_i32 s3, s2, s3
125 ; GFX6-NEXT: s_mov_b32 s2, -1
126 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
127 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
128 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
129 ; GFX6-NEXT: s_endpgm
130 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0)
131 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
135 define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
136 ; GFX6-LABEL: bfe_i32_arg_0_width_imm_offset:
138 ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2
139 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
140 ; GFX6-NEXT: s_mov_b32 s2, -1
141 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
142 ; GFX6-NEXT: s_bfe_i32 s3, s3, 8
143 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
144 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
145 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
146 ; GFX6-NEXT: s_endpgm
147 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0)
148 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
152 define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
153 ; GFX6-LABEL: bfe_i32_test_6:
155 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
156 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
157 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
158 ; GFX6-NEXT: s_mov_b32 s2, -1
159 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
160 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
161 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1f0001
162 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
163 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
164 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
165 ; GFX6-NEXT: s_endpgm
166 %x = load i32, ptr addrspace(1) %in, align 4
167 %shl = shl i32 %x, 31
168 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31)
169 store i32 %bfe, ptr addrspace(1) %out, align 4
173 define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
174 ; GFX6-LABEL: bfe_i32_test_7:
176 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
177 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
178 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
179 ; GFX6-NEXT: s_mov_b32 s2, -1
180 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
181 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
182 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1f0000
183 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
184 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
185 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
186 ; GFX6-NEXT: s_endpgm
187 %x = load i32, ptr addrspace(1) %in, align 4
188 %shl = shl i32 %x, 31
189 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31)
190 store i32 %bfe, ptr addrspace(1) %out, align 4
194 define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
195 ; GFX6-LABEL: bfe_i32_test_8:
197 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
198 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
199 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
200 ; GFX6-NEXT: s_mov_b32 s2, -1
201 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
202 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
203 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1001f
204 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
205 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
206 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
207 ; GFX6-NEXT: s_endpgm
208 %x = load i32, ptr addrspace(1) %in, align 4
209 %shl = shl i32 %x, 31
210 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
211 store i32 %bfe, ptr addrspace(1) %out, align 4
215 define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
216 ; GFX6-LABEL: bfe_i32_test_9:
218 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
219 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
220 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
221 ; GFX6-NEXT: s_mov_b32 s2, -1
222 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
223 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1001f
224 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
225 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
226 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
227 ; GFX6-NEXT: s_endpgm
228 %x = load i32, ptr addrspace(1) %in, align 4
229 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1)
230 store i32 %bfe, ptr addrspace(1) %out, align 4
234 define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
235 ; GFX6-LABEL: bfe_i32_test_10:
237 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
238 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
239 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
240 ; GFX6-NEXT: s_mov_b32 s2, -1
241 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
242 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1f0001
243 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
244 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
245 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
246 ; GFX6-NEXT: s_endpgm
247 %x = load i32, ptr addrspace(1) %in, align 4
248 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31)
249 store i32 %bfe, ptr addrspace(1) %out, align 4
253 define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
254 ; GFX6-LABEL: bfe_i32_test_11:
256 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
257 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
258 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
259 ; GFX6-NEXT: s_mov_b32 s2, -1
260 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
261 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x180008
262 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
263 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
264 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
265 ; GFX6-NEXT: s_endpgm
266 %x = load i32, ptr addrspace(1) %in, align 4
267 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24)
268 store i32 %bfe, ptr addrspace(1) %out, align 4
272 define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
273 ; GFX6-LABEL: bfe_i32_test_12:
275 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
276 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
277 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
278 ; GFX6-NEXT: s_mov_b32 s2, -1
279 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
280 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80018
281 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
282 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
283 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
284 ; GFX6-NEXT: s_endpgm
285 %x = load i32, ptr addrspace(1) %in, align 4
286 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8)
287 store i32 %bfe, ptr addrspace(1) %out, align 4
291 define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
292 ; GFX6-LABEL: bfe_i32_test_13:
294 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
295 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
296 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
297 ; GFX6-NEXT: s_mov_b32 s2, -1
298 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
299 ; GFX6-NEXT: s_ashr_i32 s3, s3, 31
300 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1001f
301 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
302 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
303 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
304 ; GFX6-NEXT: s_endpgm
305 %x = load i32, ptr addrspace(1) %in, align 4
306 %shl = ashr i32 %x, 31
307 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
308 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
311 define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
312 ; GFX6-LABEL: bfe_i32_test_14:
314 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
315 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
316 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
317 ; GFX6-NEXT: s_mov_b32 s2, -1
318 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
319 ; GFX6-NEXT: s_lshr_b32 s3, s3, 31
320 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1001f
321 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
322 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
323 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
324 ; GFX6-NEXT: s_endpgm
325 %x = load i32, ptr addrspace(1) %in, align 4
326 %shl = lshr i32 %x, 31
327 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
328 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
331 define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
332 ; GFX6-LABEL: bfe_i32_constant_fold_test_0:
334 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
335 ; GFX6-NEXT: s_bfe_i32 s2, 0, 0
336 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
337 ; GFX6-NEXT: s_mov_b32 s2, -1
338 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
339 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
340 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
341 ; GFX6-NEXT: s_endpgm
342 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0)
343 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
347 define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
348 ; GFX6-LABEL: bfe_i32_constant_fold_test_1:
350 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
351 ; GFX6-NEXT: s_bfe_i32 s2, 0x302e, 0
352 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
353 ; GFX6-NEXT: s_mov_b32 s2, -1
354 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
355 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
356 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
357 ; GFX6-NEXT: s_endpgm
358 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0)
359 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
363 define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
364 ; GFX6-LABEL: bfe_i32_constant_fold_test_2:
366 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
367 ; GFX6-NEXT: s_bfe_i32 s2, 0, 0x10000
368 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
369 ; GFX6-NEXT: s_mov_b32 s2, -1
370 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
371 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
372 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
373 ; GFX6-NEXT: s_endpgm
374 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1)
375 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
379 define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
380 ; GFX6-LABEL: bfe_i32_constant_fold_test_3:
382 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
383 ; GFX6-NEXT: s_bfe_i32 s2, 1, 0x10000
384 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
385 ; GFX6-NEXT: s_mov_b32 s2, -1
386 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
387 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
388 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
389 ; GFX6-NEXT: s_endpgm
390 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1)
391 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
395 define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
396 ; GFX6-LABEL: bfe_i32_constant_fold_test_4:
398 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
399 ; GFX6-NEXT: s_bfe_i32 s2, -1, 0x10000
400 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
401 ; GFX6-NEXT: s_mov_b32 s2, -1
402 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
403 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
404 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
405 ; GFX6-NEXT: s_endpgm
406 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1)
407 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
411 define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
412 ; GFX6-LABEL: bfe_i32_constant_fold_test_5:
414 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
415 ; GFX6-NEXT: s_mov_b32 s2, 0x10007
416 ; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2
417 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
418 ; GFX6-NEXT: s_mov_b32 s2, -1
419 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
420 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
421 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
422 ; GFX6-NEXT: s_endpgm
423 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1)
424 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
428 define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
429 ; GFX6-LABEL: bfe_i32_constant_fold_test_6:
431 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
432 ; GFX6-NEXT: s_mov_b32 s2, 0x80000
433 ; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2
434 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
435 ; GFX6-NEXT: s_mov_b32 s2, -1
436 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
437 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
438 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
439 ; GFX6-NEXT: s_endpgm
440 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8)
441 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
445 define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
446 ; GFX6-LABEL: bfe_i32_constant_fold_test_7:
448 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
449 ; GFX6-NEXT: s_mov_b32 s2, 0x80000
450 ; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2
451 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
452 ; GFX6-NEXT: s_mov_b32 s2, -1
453 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
454 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
455 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
456 ; GFX6-NEXT: s_endpgm
457 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8)
458 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
462 define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
463 ; GFX6-LABEL: bfe_i32_constant_fold_test_8:
465 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
466 ; GFX6-NEXT: s_mov_b32 s2, 0x80006
467 ; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2
468 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
469 ; GFX6-NEXT: s_mov_b32 s2, -1
470 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
471 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
472 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
473 ; GFX6-NEXT: s_endpgm
474 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8)
475 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
479 define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
480 ; GFX6-LABEL: bfe_i32_constant_fold_test_9:
482 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
483 ; GFX6-NEXT: s_mov_b32 s2, 0x80010
484 ; GFX6-NEXT: s_bfe_i32 s2, 0x10000, s2
485 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
486 ; GFX6-NEXT: s_mov_b32 s2, -1
487 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
488 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
489 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
490 ; GFX6-NEXT: s_endpgm
491 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8)
492 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
496 define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
497 ; GFX6-LABEL: bfe_i32_constant_fold_test_10:
499 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
500 ; GFX6-NEXT: s_mov_b32 s2, 0x100010
501 ; GFX6-NEXT: s_bfe_i32 s2, 0xffff, s2
502 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
503 ; GFX6-NEXT: s_mov_b32 s2, -1
504 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
505 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
506 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
507 ; GFX6-NEXT: s_endpgm
508 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16)
509 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
513 define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
514 ; GFX6-LABEL: bfe_i32_constant_fold_test_11:
516 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
517 ; GFX6-NEXT: s_mov_b32 s2, 0x40004
518 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2
519 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
520 ; GFX6-NEXT: s_mov_b32 s2, -1
521 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
522 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
523 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
524 ; GFX6-NEXT: s_endpgm
525 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4)
526 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
530 define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
531 ; GFX6-LABEL: bfe_i32_constant_fold_test_12:
533 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
534 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f
535 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2
536 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
537 ; GFX6-NEXT: s_mov_b32 s2, -1
538 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
539 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
540 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
541 ; GFX6-NEXT: s_endpgm
542 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1)
543 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
547 define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
548 ; GFX6-LABEL: bfe_i32_constant_fold_test_13:
550 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
551 ; GFX6-NEXT: s_mov_b32 s2, 0x100010
552 ; GFX6-NEXT: s_bfe_i32 s2, 0x1fffe, s2
553 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
554 ; GFX6-NEXT: s_mov_b32 s2, -1
555 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
556 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
557 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
558 ; GFX6-NEXT: s_endpgm
559 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16)
560 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
564 define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
565 ; GFX6-LABEL: bfe_i32_constant_fold_test_14:
567 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
568 ; GFX6-NEXT: s_mov_b32 s2, 0x1e0002
569 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2
570 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
571 ; GFX6-NEXT: s_mov_b32 s2, -1
572 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
573 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
574 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
575 ; GFX6-NEXT: s_endpgm
576 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30)
577 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
581 define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
582 ; GFX6-LABEL: bfe_i32_constant_fold_test_15:
584 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
585 ; GFX6-NEXT: s_mov_b32 s2, 0x1c0004
586 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2
587 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
588 ; GFX6-NEXT: s_mov_b32 s2, -1
589 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
590 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
591 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
592 ; GFX6-NEXT: s_endpgm
593 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28)
594 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
598 define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
599 ; GFX6-LABEL: bfe_i32_constant_fold_test_16:
601 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
602 ; GFX6-NEXT: s_bfe_i32 s2, -1, 0x70001
603 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
604 ; GFX6-NEXT: s_mov_b32 s2, -1
605 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
606 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
607 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
608 ; GFX6-NEXT: s_endpgm
609 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7)
610 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
614 define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
615 ; GFX6-LABEL: bfe_i32_constant_fold_test_17:
617 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
618 ; GFX6-NEXT: s_mov_b32 s2, 0x1f0001
619 ; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2
620 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
621 ; GFX6-NEXT: s_mov_b32 s2, -1
622 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
623 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
624 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
625 ; GFX6-NEXT: s_endpgm
626 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31)
627 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
631 define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
632 ; GFX6-LABEL: bfe_i32_constant_fold_test_18:
634 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
635 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f
636 ; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2
637 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
638 ; GFX6-NEXT: s_mov_b32 s2, -1
639 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
640 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
641 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
642 ; GFX6-NEXT: s_endpgm
643 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1)
644 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
648 define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
649 ; GFX6-LABEL: bfe_sext_in_reg_i24:
651 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
652 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
653 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
654 ; GFX6-NEXT: s_mov_b32 s2, -1
655 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
656 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x180000
657 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x180000
658 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
659 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
660 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
661 ; GFX6-NEXT: s_endpgm
662 %x = load i32, ptr addrspace(1) %in, align 4
663 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24)
664 %shl = shl i32 %bfe, 8
665 %ashr = ashr i32 %shl, 8
666 store i32 %ashr, ptr addrspace(1) %out, align 4
670 define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
671 ; GFX6-LABEL: simplify_demanded_bfe_sdiv:
673 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, 2.0
674 ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
675 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
676 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
677 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
678 ; GFX6-NEXT: s_load_dword s0, s[6:7], 0x0
679 ; GFX6-NEXT: s_mov_b32 s6, -1
680 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
681 ; GFX6-NEXT: v_mul_lo_u32 v1, v0, -2
682 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
683 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x100001
684 ; GFX6-NEXT: s_ashr_i32 s2, s0, 31
685 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
686 ; GFX6-NEXT: s_add_i32 s0, s0, s2
687 ; GFX6-NEXT: s_xor_b32 s0, s0, s2
688 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
689 ; GFX6-NEXT: v_mul_hi_u32 v0, s0, v0
690 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v0
691 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0
692 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
693 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1
694 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
695 ; GFX6-NEXT: v_subrev_i32_e64 v2, s[0:1], 2, v1
696 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
697 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0
698 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1
699 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
700 ; GFX6-NEXT: v_xor_b32_e32 v0, s2, v0
701 ; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
702 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
703 ; GFX6-NEXT: s_endpgm
704 %src = load i32, ptr addrspace(1) %in, align 4
705 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
706 %div = sdiv i32 %bfe, 2
707 store i32 %div, ptr addrspace(1) %out, align 4
711 define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
712 ; GFX6-LABEL: bfe_0_width:
714 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
715 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
716 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
717 ; GFX6-NEXT: s_mov_b32 s2, -1
718 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
719 ; GFX6-NEXT: s_bfe_i32 s3, s3, 8
720 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
721 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
722 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
723 ; GFX6-NEXT: s_endpgm
724 %load = load i32, ptr addrspace(1) %ptr, align 4
725 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0)
726 store i32 %bfe, ptr addrspace(1) %out, align 4
730 define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
731 ; GFX6-LABEL: bfe_8_bfe_8:
733 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
734 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
735 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
736 ; GFX6-NEXT: s_mov_b32 s2, -1
737 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
738 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80000
739 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80000
740 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
741 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
742 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
743 ; GFX6-NEXT: s_endpgm
744 %load = load i32, ptr addrspace(1) %ptr, align 4
745 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8)
746 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8)
747 store i32 %bfe1, ptr addrspace(1) %out, align 4
751 define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
752 ; GFX6-LABEL: bfe_8_bfe_16:
754 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
755 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
756 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
757 ; GFX6-NEXT: s_mov_b32 s2, -1
758 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
759 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80000
760 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x100000
761 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
762 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
763 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
764 ; GFX6-NEXT: s_endpgm
765 %load = load i32, ptr addrspace(1) %ptr, align 4
766 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8)
767 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16)
768 store i32 %bfe1, ptr addrspace(1) %out, align 4
772 ; This really should be folded into 1
773 define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
774 ; GFX6-LABEL: bfe_16_bfe_8:
776 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
777 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
778 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
779 ; GFX6-NEXT: s_mov_b32 s2, -1
780 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
781 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x100000
782 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80000
783 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
784 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
785 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
786 ; GFX6-NEXT: s_endpgm
787 %load = load i32, ptr addrspace(1) %ptr, align 4
788 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16)
789 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8)
790 store i32 %bfe1, ptr addrspace(1) %out, align 4
794 ; Make sure there isn't a redundant BFE
795 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
796 ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe:
798 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
799 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
800 ; GFX6-NEXT: s_add_i32 s3, s2, s3
801 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80000
802 ; GFX6-NEXT: s_sext_i32_i8 s3, s3
803 ; GFX6-NEXT: s_mov_b32 s2, -1
804 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
805 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
806 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
807 ; GFX6-NEXT: s_endpgm
808 %c = add i32 %a, %b ; add to prevent folding into extload
809 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8)
810 %shl = shl i32 %bfe, 24
811 %ashr = ashr i32 %shl, 24
812 store i32 %ashr, ptr addrspace(1) %out, align 4
816 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
817 ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe_wrong:
819 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
820 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
821 ; GFX6-NEXT: s_add_i32 s3, s2, s3
822 ; GFX6-NEXT: s_bfe_i32 s3, s3, 8
823 ; GFX6-NEXT: s_sext_i32_i8 s3, s3
824 ; GFX6-NEXT: s_mov_b32 s2, -1
825 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
826 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
827 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
828 ; GFX6-NEXT: s_endpgm
829 %c = add i32 %a, %b ; add to prevent folding into extload
830 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0)
831 %shl = shl i32 %bfe, 24
832 %ashr = ashr i32 %shl, 24
833 store i32 %ashr, ptr addrspace(1) %out, align 4
837 define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
838 ; GFX6-LABEL: sextload_i8_to_i32_bfe:
840 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
841 ; GFX6-NEXT: s_mov_b32 s6, -1
842 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
843 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
844 ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
845 ; GFX6-NEXT: buffer_load_sbyte v0, off, s[4:7], 0
846 ; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
847 ; GFX6-NEXT: s_waitcnt vmcnt(0)
848 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
849 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
850 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
851 ; GFX6-NEXT: s_endpgm
852 %load = load i8, ptr addrspace(1) %ptr, align 1
853 %sext = sext i8 %load to i32
854 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8)
855 %shl = shl i32 %bfe, 24
856 %ashr = ashr i32 %shl, 24
857 store i32 %ashr, ptr addrspace(1) %out, align 4
861 define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
862 ; GFX6-LABEL: sextload_i8_to_i32_bfe_0:
864 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
865 ; GFX6-NEXT: s_mov_b32 s6, -1
866 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
867 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
868 ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
869 ; GFX6-NEXT: buffer_load_sbyte v0, off, s[4:7], 0
870 ; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
871 ; GFX6-NEXT: s_waitcnt vmcnt(0)
872 ; GFX6-NEXT: v_bfe_i32 v0, v0, 8, 0
873 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
874 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
875 ; GFX6-NEXT: s_endpgm
876 %load = load i8, ptr addrspace(1) %ptr, align 1
877 %sext = sext i8 %load to i32
878 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0)
879 %shl = shl i32 %bfe, 24
880 %ashr = ashr i32 %shl, 24
881 store i32 %ashr, ptr addrspace(1) %out, align 4
885 define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
886 ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_0:
888 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
889 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
890 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
891 ; GFX6-NEXT: s_mov_b32 s2, -1
892 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
893 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10000
894 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10000
895 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
896 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
897 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
898 ; GFX6-NEXT: s_endpgm
899 %x = load i32, ptr addrspace(1) %in, align 4
900 %shl = shl i32 %x, 31
901 %shr = ashr i32 %shl, 31
902 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 0, i32 1)
903 store i32 %bfe, ptr addrspace(1) %out, align 4
907 define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
908 ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_1:
910 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
911 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
912 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
913 ; GFX6-NEXT: s_mov_b32 s2, -1
914 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
915 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000
916 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10001
917 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
918 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
919 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
920 ; GFX6-NEXT: s_endpgm
921 %x = load i32, ptr addrspace(1) %in, align 4
922 %shl = shl i32 %x, 30
923 %shr = ashr i32 %shl, 30
924 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 1)
925 store i32 %bfe, ptr addrspace(1) %out, align 4
929 define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
930 ; GFX6-LABEL: sext_in_reg_i2_bfe_offset_1:
932 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
933 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
934 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
935 ; GFX6-NEXT: s_mov_b32 s2, -1
936 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
937 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000
938 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20001
939 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
940 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
941 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
942 ; GFX6-NEXT: s_endpgm
943 %x = load i32, ptr addrspace(1) %in, align 4
944 %shl = shl i32 %x, 30
945 %shr = ashr i32 %shl, 30
946 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 2)
947 store i32 %bfe, ptr addrspace(1) %out, align 4
951 declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1
952 declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) #1
954 attributes #0 = { nounwind }
955 attributes #1 = { nounwind readnone }