1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
4 define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
5 ; GFX6-LABEL: v_bfe_i32_arg_arg_arg:
7 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8 ; GFX6-NEXT: v_bfe_i32 v0, v0, v1, v2
9 ; GFX6-NEXT: s_setpc_b64 s[30:31]
10 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2)
14 define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
15 ; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
17 ; GFX6-NEXT: s_and_b32 s1, s1, 63
18 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
19 ; GFX6-NEXT: s_or_b32 s1, s1, s2
20 ; GFX6-NEXT: s_bfe_i32 s0, s0, s1
21 ; GFX6-NEXT: ; return to shader part epilog
22 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2)
26 ; TODO: Need to expand this
27 ; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 {
28 ; %bfe_i64 = call i32 @llvm.amdgcn.sbfe.i64(i32 %src0, i32 %src1, i32 %src2)
32 define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
33 ; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
35 ; GFX6-NEXT: s_and_b32 s2, s2, 63
36 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
37 ; GFX6-NEXT: s_or_b32 s2, s2, s3
38 ; GFX6-NEXT: s_bfe_i64 s[0:1], s[0:1], s2
39 ; GFX6-NEXT: ; return to shader part epilog
40 %bfe_i32 = call i64 @llvm.amdgcn.sbfe.i64(i64 %src0, i32 %src1, i32 %src2)
44 define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
45 ; GFX6-LABEL: bfe_i32_arg_arg_imm:
47 ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3
48 ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2
49 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
50 ; GFX6-NEXT: s_mov_b32 s2, -1
51 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
52 ; GFX6-NEXT: s_and_b32 s3, s3, 63
53 ; GFX6-NEXT: s_or_b32 s3, s3, 0x7b0000
54 ; GFX6-NEXT: s_bfe_i32 s3, s4, s3
55 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
56 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
57 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
59 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123)
60 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
64 define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
65 ; GFX6-LABEL: bfe_i32_arg_imm_arg:
67 ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3
68 ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2
69 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
70 ; GFX6-NEXT: s_mov_b32 s2, -1
71 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
72 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
73 ; GFX6-NEXT: s_or_b32 s3, 59, s3
74 ; GFX6-NEXT: s_bfe_i32 s3, s4, s3
75 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
76 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
77 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
79 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2)
80 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
84 define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
85 ; GFX6-LABEL: bfe_i32_imm_arg_arg:
87 ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2
88 ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3
89 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
90 ; GFX6-NEXT: s_mov_b32 s2, -1
91 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
92 ; GFX6-NEXT: s_and_b32 s3, s3, 63
93 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16
94 ; GFX6-NEXT: s_or_b32 s3, s3, s4
95 ; GFX6-NEXT: s_bfe_i32 s3, 0x7b, s3
96 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
97 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
98 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
100 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2)
101 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
105 define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 {
106 ; GFX6-LABEL: v_bfe_print_arg:
108 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
109 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
110 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
111 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
112 ; GFX6-NEXT: s_mov_b32 s2, -1
113 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
114 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80002
115 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
116 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
117 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
118 ; GFX6-NEXT: s_endpgm
119 %load = load i32, i32 addrspace(1)* %src0, align 4
120 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8)
121 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
125 define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
126 ; GFX6-LABEL: bfe_i32_arg_0_width_reg_offset:
128 ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3
129 ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2
130 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
131 ; GFX6-NEXT: s_mov_b32 s2, -1
132 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
133 ; GFX6-NEXT: s_and_b32 s3, s3, 63
134 ; GFX6-NEXT: s_bfe_i32 s3, s4, s3
135 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
136 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
137 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
138 ; GFX6-NEXT: s_endpgm
139 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0)
140 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
144 define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
145 ; GFX6-LABEL: bfe_i32_arg_0_width_imm_offset:
147 ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2
148 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
149 ; GFX6-NEXT: s_mov_b32 s2, -1
150 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
151 ; GFX6-NEXT: s_bfe_i32 s3, s3, 8
152 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
153 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
154 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
155 ; GFX6-NEXT: s_endpgm
156 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0)
157 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
161 define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
162 ; GFX6-LABEL: bfe_i32_test_6:
164 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
165 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
166 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
167 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
168 ; GFX6-NEXT: s_mov_b32 s2, -1
169 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
170 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
171 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1f0001
172 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
173 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
174 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
175 ; GFX6-NEXT: s_endpgm
176 %x = load i32, i32 addrspace(1)* %in, align 4
177 %shl = shl i32 %x, 31
178 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31)
179 store i32 %bfe, i32 addrspace(1)* %out, align 4
183 define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
184 ; GFX6-LABEL: bfe_i32_test_7:
186 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
187 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
188 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
189 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
190 ; GFX6-NEXT: s_mov_b32 s2, -1
191 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
192 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
193 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1f0000
194 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
195 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
196 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
197 ; GFX6-NEXT: s_endpgm
198 %x = load i32, i32 addrspace(1)* %in, align 4
199 %shl = shl i32 %x, 31
200 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31)
201 store i32 %bfe, i32 addrspace(1)* %out, align 4
205 define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
206 ; GFX6-LABEL: bfe_i32_test_8:
208 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
209 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
210 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
211 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
212 ; GFX6-NEXT: s_mov_b32 s2, -1
213 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
214 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
215 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1001f
216 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
217 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
218 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
219 ; GFX6-NEXT: s_endpgm
220 %x = load i32, i32 addrspace(1)* %in, align 4
221 %shl = shl i32 %x, 31
222 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
223 store i32 %bfe, i32 addrspace(1)* %out, align 4
227 define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
228 ; GFX6-LABEL: bfe_i32_test_9:
230 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
231 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
232 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
233 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
234 ; GFX6-NEXT: s_mov_b32 s2, -1
235 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
236 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1001f
237 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
238 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
239 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
240 ; GFX6-NEXT: s_endpgm
241 %x = load i32, i32 addrspace(1)* %in, align 4
242 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1)
243 store i32 %bfe, i32 addrspace(1)* %out, align 4
247 define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
248 ; GFX6-LABEL: bfe_i32_test_10:
250 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
251 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
252 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
253 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
254 ; GFX6-NEXT: s_mov_b32 s2, -1
255 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
256 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1f0001
257 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
258 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
259 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
260 ; GFX6-NEXT: s_endpgm
261 %x = load i32, i32 addrspace(1)* %in, align 4
262 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31)
263 store i32 %bfe, i32 addrspace(1)* %out, align 4
267 define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
268 ; GFX6-LABEL: bfe_i32_test_11:
270 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
271 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
272 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
273 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
274 ; GFX6-NEXT: s_mov_b32 s2, -1
275 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
276 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x180008
277 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
278 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
279 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
280 ; GFX6-NEXT: s_endpgm
281 %x = load i32, i32 addrspace(1)* %in, align 4
282 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24)
283 store i32 %bfe, i32 addrspace(1)* %out, align 4
287 define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
288 ; GFX6-LABEL: bfe_i32_test_12:
290 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
291 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
292 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
293 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
294 ; GFX6-NEXT: s_mov_b32 s2, -1
295 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
296 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80018
297 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
298 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
299 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
300 ; GFX6-NEXT: s_endpgm
301 %x = load i32, i32 addrspace(1)* %in, align 4
302 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8)
303 store i32 %bfe, i32 addrspace(1)* %out, align 4
307 define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
308 ; GFX6-LABEL: bfe_i32_test_13:
310 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
311 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
312 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
313 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
314 ; GFX6-NEXT: s_mov_b32 s2, -1
315 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
316 ; GFX6-NEXT: s_ashr_i32 s3, s3, 31
317 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1001f
318 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
319 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
320 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
321 ; GFX6-NEXT: s_endpgm
322 %x = load i32, i32 addrspace(1)* %in, align 4
323 %shl = ashr i32 %x, 31
324 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
325 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
328 define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
329 ; GFX6-LABEL: bfe_i32_test_14:
331 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
332 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
333 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
334 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
335 ; GFX6-NEXT: s_mov_b32 s2, -1
336 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
337 ; GFX6-NEXT: s_lshr_b32 s3, s3, 31
338 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1001f
339 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
340 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
341 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
342 ; GFX6-NEXT: s_endpgm
343 %x = load i32, i32 addrspace(1)* %in, align 4
344 %shl = lshr i32 %x, 31
345 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
346 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
349 define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
350 ; GFX6-LABEL: bfe_i32_constant_fold_test_0:
352 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
353 ; GFX6-NEXT: s_bfe_i32 s2, 0, 0
354 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
355 ; GFX6-NEXT: s_mov_b32 s2, -1
356 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
357 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
358 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
359 ; GFX6-NEXT: s_endpgm
360 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0)
361 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
365 define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
366 ; GFX6-LABEL: bfe_i32_constant_fold_test_1:
368 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
369 ; GFX6-NEXT: s_bfe_i32 s2, 0x302e, 0
370 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
371 ; GFX6-NEXT: s_mov_b32 s2, -1
372 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
373 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
374 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
375 ; GFX6-NEXT: s_endpgm
376 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0)
377 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
381 define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
382 ; GFX6-LABEL: bfe_i32_constant_fold_test_2:
384 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
385 ; GFX6-NEXT: s_bfe_i32 s2, 0, 0x10000
386 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
387 ; GFX6-NEXT: s_mov_b32 s2, -1
388 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
389 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
390 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
391 ; GFX6-NEXT: s_endpgm
392 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1)
393 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
397 define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
398 ; GFX6-LABEL: bfe_i32_constant_fold_test_3:
400 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
401 ; GFX6-NEXT: s_bfe_i32 s2, 1, 0x10000
402 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
403 ; GFX6-NEXT: s_mov_b32 s2, -1
404 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
405 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
406 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
407 ; GFX6-NEXT: s_endpgm
408 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1)
409 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
413 define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
414 ; GFX6-LABEL: bfe_i32_constant_fold_test_4:
416 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
417 ; GFX6-NEXT: s_bfe_i32 s2, -1, 0x10000
418 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
419 ; GFX6-NEXT: s_mov_b32 s2, -1
420 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
421 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
422 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
423 ; GFX6-NEXT: s_endpgm
424 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1)
425 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
429 define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
430 ; GFX6-LABEL: bfe_i32_constant_fold_test_5:
432 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
433 ; GFX6-NEXT: s_mov_b32 s2, 0x10007
434 ; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2
435 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
436 ; GFX6-NEXT: s_mov_b32 s2, -1
437 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
438 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
439 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
440 ; GFX6-NEXT: s_endpgm
441 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1)
442 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
446 define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
447 ; GFX6-LABEL: bfe_i32_constant_fold_test_6:
449 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
450 ; GFX6-NEXT: s_mov_b32 s2, 0x80000
451 ; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2
452 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
453 ; GFX6-NEXT: s_mov_b32 s2, -1
454 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
455 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
456 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
457 ; GFX6-NEXT: s_endpgm
458 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8)
459 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
463 define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
464 ; GFX6-LABEL: bfe_i32_constant_fold_test_7:
466 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
467 ; GFX6-NEXT: s_mov_b32 s2, 0x80000
468 ; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2
469 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
470 ; GFX6-NEXT: s_mov_b32 s2, -1
471 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
472 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
473 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
474 ; GFX6-NEXT: s_endpgm
475 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8)
476 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
480 define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
481 ; GFX6-LABEL: bfe_i32_constant_fold_test_8:
483 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
484 ; GFX6-NEXT: s_mov_b32 s2, 0x80006
485 ; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2
486 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
487 ; GFX6-NEXT: s_mov_b32 s2, -1
488 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
489 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
490 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
491 ; GFX6-NEXT: s_endpgm
492 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8)
493 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
497 define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
498 ; GFX6-LABEL: bfe_i32_constant_fold_test_9:
500 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
501 ; GFX6-NEXT: s_mov_b32 s2, 0x80010
502 ; GFX6-NEXT: s_bfe_i32 s2, 0x10000, s2
503 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
504 ; GFX6-NEXT: s_mov_b32 s2, -1
505 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
506 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
507 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
508 ; GFX6-NEXT: s_endpgm
509 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8)
510 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
514 define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
515 ; GFX6-LABEL: bfe_i32_constant_fold_test_10:
517 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
518 ; GFX6-NEXT: s_mov_b32 s2, 0x100010
519 ; GFX6-NEXT: s_bfe_i32 s2, 0xffff, s2
520 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
521 ; GFX6-NEXT: s_mov_b32 s2, -1
522 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
523 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
524 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
525 ; GFX6-NEXT: s_endpgm
526 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16)
527 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
531 define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
532 ; GFX6-LABEL: bfe_i32_constant_fold_test_11:
534 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
535 ; GFX6-NEXT: s_mov_b32 s2, 0x40004
536 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2
537 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
538 ; GFX6-NEXT: s_mov_b32 s2, -1
539 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
540 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
541 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
542 ; GFX6-NEXT: s_endpgm
543 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4)
544 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
548 define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
549 ; GFX6-LABEL: bfe_i32_constant_fold_test_12:
551 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
552 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f
553 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2
554 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
555 ; GFX6-NEXT: s_mov_b32 s2, -1
556 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
557 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
558 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
559 ; GFX6-NEXT: s_endpgm
560 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1)
561 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
565 define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
566 ; GFX6-LABEL: bfe_i32_constant_fold_test_13:
568 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
569 ; GFX6-NEXT: s_mov_b32 s2, 0x100010
570 ; GFX6-NEXT: s_bfe_i32 s2, 0x1fffe, s2
571 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
572 ; GFX6-NEXT: s_mov_b32 s2, -1
573 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
574 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
575 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
576 ; GFX6-NEXT: s_endpgm
577 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16)
578 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
582 define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
583 ; GFX6-LABEL: bfe_i32_constant_fold_test_14:
585 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
586 ; GFX6-NEXT: s_mov_b32 s2, 0x1e0002
587 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2
588 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
589 ; GFX6-NEXT: s_mov_b32 s2, -1
590 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
591 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
592 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
593 ; GFX6-NEXT: s_endpgm
594 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30)
595 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
599 define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
600 ; GFX6-LABEL: bfe_i32_constant_fold_test_15:
602 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
603 ; GFX6-NEXT: s_mov_b32 s2, 0x1c0004
604 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2
605 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
606 ; GFX6-NEXT: s_mov_b32 s2, -1
607 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
608 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
609 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
610 ; GFX6-NEXT: s_endpgm
611 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28)
612 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
616 define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
617 ; GFX6-LABEL: bfe_i32_constant_fold_test_16:
619 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
620 ; GFX6-NEXT: s_bfe_i32 s2, -1, 0x70001
621 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
622 ; GFX6-NEXT: s_mov_b32 s2, -1
623 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
624 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
625 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
626 ; GFX6-NEXT: s_endpgm
627 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7)
628 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
632 define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
633 ; GFX6-LABEL: bfe_i32_constant_fold_test_17:
635 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
636 ; GFX6-NEXT: s_mov_b32 s2, 0x1f0001
637 ; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2
638 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
639 ; GFX6-NEXT: s_mov_b32 s2, -1
640 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
641 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
642 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
643 ; GFX6-NEXT: s_endpgm
644 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31)
645 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
649 define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
650 ; GFX6-LABEL: bfe_i32_constant_fold_test_18:
652 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
653 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f
654 ; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2
655 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
656 ; GFX6-NEXT: s_mov_b32 s2, -1
657 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
658 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
659 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
660 ; GFX6-NEXT: s_endpgm
661 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1)
662 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
666 define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
667 ; GFX6-LABEL: bfe_sext_in_reg_i24:
669 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
670 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
671 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
672 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
673 ; GFX6-NEXT: s_mov_b32 s2, -1
674 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
675 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x180000
676 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x180000
677 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
678 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
679 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
680 ; GFX6-NEXT: s_endpgm
681 %x = load i32, i32 addrspace(1)* %in, align 4
682 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24)
683 %shl = shl i32 %bfe, 8
684 %ashr = ashr i32 %shl, 8
685 store i32 %ashr, i32 addrspace(1)* %out, align 4
689 define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
690 ; GFX6-LABEL: simplify_demanded_bfe_sdiv:
692 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, 2.0
693 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
694 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
695 ; GFX6-NEXT: s_mov_b32 s6, -1
696 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
697 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
698 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
699 ; GFX6-NEXT: s_load_dword s2, s[2:3], 0x0
700 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
701 ; GFX6-NEXT: v_mul_lo_u32 v1, -2, v0
702 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
703 ; GFX6-NEXT: s_bfe_i32 s2, s2, 0x100001
704 ; GFX6-NEXT: s_ashr_i32 s3, s2, 31
705 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
706 ; GFX6-NEXT: s_add_i32 s2, s2, s3
707 ; GFX6-NEXT: s_xor_b32 s2, s2, s3
708 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
709 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
710 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v0
711 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0
712 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s2, v1
713 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1
714 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
715 ; GFX6-NEXT: v_subrev_i32_e64 v2, s[0:1], 2, v1
716 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
717 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0
718 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1
719 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
720 ; GFX6-NEXT: v_xor_b32_e32 v0, s3, v0
721 ; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
722 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
723 ; GFX6-NEXT: s_endpgm
724 %src = load i32, i32 addrspace(1)* %in, align 4
725 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
726 %div = sdiv i32 %bfe, 2
727 store i32 %div, i32 addrspace(1)* %out, align 4
731 define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
732 ; GFX6-LABEL: bfe_0_width:
734 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
735 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
736 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
737 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
738 ; GFX6-NEXT: s_mov_b32 s2, -1
739 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
740 ; GFX6-NEXT: s_bfe_i32 s3, s3, 8
741 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
742 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
743 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
744 ; GFX6-NEXT: s_endpgm
745 %load = load i32, i32 addrspace(1)* %ptr, align 4
746 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0)
747 store i32 %bfe, i32 addrspace(1)* %out, align 4
751 define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
752 ; GFX6-LABEL: bfe_8_bfe_8:
754 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
755 ; GFX6-NEXT: s_mov_b32 s4, 0x80000
756 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
757 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
758 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
759 ; GFX6-NEXT: s_mov_b32 s2, -1
760 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
761 ; GFX6-NEXT: s_bfe_i32 s3, s3, s4
762 ; GFX6-NEXT: s_bfe_i32 s3, s3, s4
763 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
764 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
765 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
766 ; GFX6-NEXT: s_endpgm
767 %load = load i32, i32 addrspace(1)* %ptr, align 4
768 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8)
769 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8)
770 store i32 %bfe1, i32 addrspace(1)* %out, align 4
774 define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
775 ; GFX6-LABEL: bfe_8_bfe_16:
777 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
778 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
779 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
780 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
781 ; GFX6-NEXT: s_mov_b32 s2, -1
782 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
783 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80000
784 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x100000
785 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
786 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
787 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
788 ; GFX6-NEXT: s_endpgm
789 %load = load i32, i32 addrspace(1)* %ptr, align 4
790 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8)
791 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16)
792 store i32 %bfe1, i32 addrspace(1)* %out, align 4
796 ; This really should be folded into 1
797 define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
798 ; GFX6-LABEL: bfe_16_bfe_8:
800 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
801 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
802 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
803 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
804 ; GFX6-NEXT: s_mov_b32 s2, -1
805 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
806 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x100000
807 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80000
808 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
809 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
810 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
811 ; GFX6-NEXT: s_endpgm
812 %load = load i32, i32 addrspace(1)* %ptr, align 4
813 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16)
814 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8)
815 store i32 %bfe1, i32 addrspace(1)* %out, align 4
819 ; Make sure there isn't a redundant BFE
820 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
821 ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe:
823 ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2
824 ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3
825 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
826 ; GFX6-NEXT: s_mov_b32 s2, -1
827 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
828 ; GFX6-NEXT: s_add_i32 s3, s3, s4
829 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80000
830 ; GFX6-NEXT: s_sext_i32_i8 s3, s3
831 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
832 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
833 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
834 ; GFX6-NEXT: s_endpgm
835 %c = add i32 %a, %b ; add to prevent folding into extload
836 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8)
837 %shl = shl i32 %bfe, 24
838 %ashr = ashr i32 %shl, 24
839 store i32 %ashr, i32 addrspace(1)* %out, align 4
843 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
844 ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe_wrong:
846 ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2
847 ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3
848 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
849 ; GFX6-NEXT: s_mov_b32 s2, -1
850 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
851 ; GFX6-NEXT: s_add_i32 s3, s3, s4
852 ; GFX6-NEXT: s_bfe_i32 s3, s3, 8
853 ; GFX6-NEXT: s_sext_i32_i8 s3, s3
854 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
855 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
856 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
857 ; GFX6-NEXT: s_endpgm
858 %c = add i32 %a, %b ; add to prevent folding into extload
859 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0)
860 %shl = shl i32 %bfe, 24
861 %ashr = ashr i32 %shl, 24
862 store i32 %ashr, i32 addrspace(1)* %out, align 4
866 define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 {
867 ; GFX6-LABEL: sextload_i8_to_i32_bfe:
869 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2
870 ; GFX6-NEXT: s_mov_b32 s6, -1
871 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
872 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
873 ; GFX6-NEXT: buffer_load_sbyte v0, off, s[4:7], 0
874 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
875 ; GFX6-NEXT: s_waitcnt vmcnt(0)
876 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
877 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
878 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
879 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
880 ; GFX6-NEXT: s_endpgm
881 %load = load i8, i8 addrspace(1)* %ptr, align 1
882 %sext = sext i8 %load to i32
883 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8)
884 %shl = shl i32 %bfe, 24
885 %ashr = ashr i32 %shl, 24
886 store i32 %ashr, i32 addrspace(1)* %out, align 4
890 define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 {
891 ; GFX6-LABEL: sextload_i8_to_i32_bfe_0:
893 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2
894 ; GFX6-NEXT: s_mov_b32 s6, -1
895 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
896 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
897 ; GFX6-NEXT: buffer_load_sbyte v0, off, s[4:7], 0
898 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
899 ; GFX6-NEXT: s_waitcnt vmcnt(0)
900 ; GFX6-NEXT: v_bfe_i32 v0, v0, 8, 0
901 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
902 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
903 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
904 ; GFX6-NEXT: s_endpgm
905 %load = load i8, i8 addrspace(1)* %ptr, align 1
906 %sext = sext i8 %load to i32
907 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0)
908 %shl = shl i32 %bfe, 24
909 %ashr = ashr i32 %shl, 24
910 store i32 %ashr, i32 addrspace(1)* %out, align 4
914 define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
915 ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_0:
917 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
918 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
919 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
920 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
921 ; GFX6-NEXT: s_mov_b32 s2, -1
922 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
923 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10000
924 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10000
925 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
926 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
927 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
928 ; GFX6-NEXT: s_endpgm
929 %x = load i32, i32 addrspace(1)* %in, align 4
930 %shl = shl i32 %x, 31
931 %shr = ashr i32 %shl, 31
932 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 0, i32 1)
933 store i32 %bfe, i32 addrspace(1)* %out, align 4
937 define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
938 ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_1:
940 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
941 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
942 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
943 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
944 ; GFX6-NEXT: s_mov_b32 s2, -1
945 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
946 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000
947 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10001
948 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
949 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
950 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
951 ; GFX6-NEXT: s_endpgm
952 %x = load i32, i32 addrspace(1)* %in, align 4
953 %shl = shl i32 %x, 30
954 %shr = ashr i32 %shl, 30
955 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 1)
956 store i32 %bfe, i32 addrspace(1)* %out, align 4
960 define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
961 ; GFX6-LABEL: sext_in_reg_i2_bfe_offset_1:
963 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
964 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
965 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
966 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
967 ; GFX6-NEXT: s_mov_b32 s2, -1
968 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
969 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000
970 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20001
971 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
972 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
973 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
974 ; GFX6-NEXT: s_endpgm
975 %x = load i32, i32 addrspace(1)* %in, align 4
976 %shl = shl i32 %x, 30
977 %shr = ashr i32 %shl, 30
978 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 2)
979 store i32 %bfe, i32 addrspace(1)* %out, align 4
983 declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1
984 declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) #1
986 attributes #0 = { nounwind }
987 attributes #1 = { nounwind readnone }