1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
4 define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
5 ; GFX6-LABEL: v_bfe_i32_arg_arg_arg:
7 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8 ; GFX6-NEXT: v_bfe_u32 v0, v0, v1, v2
9 ; GFX6-NEXT: s_setpc_b64 s[30:31]
10 %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
14 define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
15 ; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
17 ; GFX6-NEXT: s_and_b32 s1, s1, 63
18 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
19 ; GFX6-NEXT: s_or_b32 s1, s1, s2
20 ; GFX6-NEXT: s_bfe_u32 s0, s0, s1
21 ; GFX6-NEXT: ; return to shader part epilog
22 %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
26 ; TODO: Need to expand this.
27 ; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 {
28 ; %bfe_i64 = call i32 @llvm.amdgcn.ubfe.i64(i32 %src0, i32 %src1, i32 %src2)
32 define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
33 ; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
35 ; GFX6-NEXT: s_and_b32 s2, s2, 63
36 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
37 ; GFX6-NEXT: s_or_b32 s2, s2, s3
38 ; GFX6-NEXT: s_bfe_u64 s[0:1], s[0:1], s2
39 ; GFX6-NEXT: ; return to shader part epilog
40 %bfe_i32 = call i64 @llvm.amdgcn.ubfe.i64(i64 %src0, i32 %src1, i32 %src2)
44 define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
45 ; GFX6-LABEL: bfe_u32_arg_arg_arg:
47 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
48 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
49 ; GFX6-NEXT: s_and_b32 s4, s3, 63
50 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
51 ; GFX6-NEXT: s_or_b32 s3, s4, s3
52 ; GFX6-NEXT: s_bfe_u32 s3, s2, s3
53 ; GFX6-NEXT: s_mov_b32 s2, -1
54 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
55 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
56 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
58 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
59 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
63 define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
64 ; GFX6-LABEL: bfe_u32_arg_arg_imm:
66 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
67 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
68 ; GFX6-NEXT: s_and_b32 s3, s3, 63
69 ; GFX6-NEXT: s_or_b32 s3, s3, 0x7b0000
70 ; GFX6-NEXT: s_bfe_u32 s3, s2, s3
71 ; GFX6-NEXT: s_mov_b32 s2, -1
72 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
73 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
74 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
76 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
77 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
81 define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
82 ; GFX6-LABEL: bfe_u32_arg_imm_arg:
84 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
85 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
86 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
87 ; GFX6-NEXT: s_or_b32 s3, 59, s3
88 ; GFX6-NEXT: s_bfe_u32 s3, s2, s3
89 ; GFX6-NEXT: s_mov_b32 s2, -1
90 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
91 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
92 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
94 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
95 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
99 define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
100 ; GFX6-LABEL: bfe_u32_imm_arg_arg:
102 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
103 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
104 ; GFX6-NEXT: s_and_b32 s4, s2, 63
105 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
106 ; GFX6-NEXT: s_or_b32 s3, s4, s3
107 ; GFX6-NEXT: s_bfe_u32 s3, 0x7b, s3
108 ; GFX6-NEXT: s_mov_b32 s2, -1
109 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
110 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
111 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
112 ; GFX6-NEXT: s_endpgm
113 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
114 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
118 define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
119 ; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset:
121 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
122 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
123 ; GFX6-NEXT: s_and_b32 s3, s3, 63
124 ; GFX6-NEXT: s_bfe_u32 s3, s2, s3
125 ; GFX6-NEXT: s_mov_b32 s2, -1
126 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
127 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
128 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
129 ; GFX6-NEXT: s_endpgm
130 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
131 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
135 define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
136 ; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset:
138 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x2
139 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
140 ; GFX6-NEXT: s_mov_b32 s2, -1
141 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
142 ; GFX6-NEXT: s_bfe_u32 s3, s4, 8
143 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
144 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
145 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
146 ; GFX6-NEXT: s_endpgm
147 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
148 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
152 define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
153 ; GFX6-LABEL: bfe_u32_zextload_i8:
155 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
156 ; GFX6-NEXT: s_mov_b32 s6, -1
157 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
158 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
159 ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
160 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[4:7], 0
161 ; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
162 ; GFX6-NEXT: s_waitcnt vmcnt(0)
163 ; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 8
164 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
165 ; GFX6-NEXT: s_endpgm
166 %load = load i8, ptr addrspace(1) %in
167 %ext = zext i8 %load to i32
168 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
169 store i32 %bfe, ptr addrspace(1) %out, align 4
173 ; FIXME: Should be using s_add_i32
174 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
175 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8:
177 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
178 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
179 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
180 ; GFX6-NEXT: s_mov_b32 s2, -1
181 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
182 ; GFX6-NEXT: s_add_i32 s3, s3, 1
183 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff
184 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80000
185 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
186 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
187 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
188 ; GFX6-NEXT: s_endpgm
189 %load = load i32, ptr addrspace(1) %in, align 4
190 %add = add i32 %load, 1
191 %ext = and i32 %add, 255
192 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
193 store i32 %bfe, ptr addrspace(1) %out, align 4
197 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
198 ; GFX6-LABEL: bfe_u32_zext_in_reg_i16:
200 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
201 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
202 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
203 ; GFX6-NEXT: s_mov_b32 s2, -1
204 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
205 ; GFX6-NEXT: s_add_i32 s3, s3, 1
206 ; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
207 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000
208 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
209 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
210 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
211 ; GFX6-NEXT: s_endpgm
212 %load = load i32, ptr addrspace(1) %in, align 4
213 %add = add i32 %load, 1
214 %ext = and i32 %add, 65535
215 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
216 store i32 %bfe, ptr addrspace(1) %out, align 4
220 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
221 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
223 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
224 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
225 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
226 ; GFX6-NEXT: s_mov_b32 s2, -1
227 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
228 ; GFX6-NEXT: s_add_i32 s3, s3, 1
229 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff
230 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80001
231 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
232 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
233 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
234 ; GFX6-NEXT: s_endpgm
235 %load = load i32, ptr addrspace(1) %in, align 4
236 %add = add i32 %load, 1
237 %ext = and i32 %add, 255
238 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
239 store i32 %bfe, ptr addrspace(1) %out, align 4
243 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
244 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
246 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
247 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
248 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
249 ; GFX6-NEXT: s_mov_b32 s2, -1
250 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
251 ; GFX6-NEXT: s_add_i32 s3, s3, 1
252 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff
253 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80003
254 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
255 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
256 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
257 ; GFX6-NEXT: s_endpgm
258 %load = load i32, ptr addrspace(1) %in, align 4
259 %add = add i32 %load, 1
260 %ext = and i32 %add, 255
261 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
262 store i32 %bfe, ptr addrspace(1) %out, align 4
266 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
267 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
269 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
270 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
271 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
272 ; GFX6-NEXT: s_mov_b32 s2, -1
273 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
274 ; GFX6-NEXT: s_add_i32 s3, s3, 1
275 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff
276 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80007
277 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
278 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
279 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
280 ; GFX6-NEXT: s_endpgm
281 %load = load i32, ptr addrspace(1) %in, align 4
282 %add = add i32 %load, 1
283 %ext = and i32 %add, 255
284 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
285 store i32 %bfe, ptr addrspace(1) %out, align 4
289 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
290 ; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
292 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
293 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
294 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
295 ; GFX6-NEXT: s_mov_b32 s2, -1
296 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
297 ; GFX6-NEXT: s_add_i32 s3, s3, 1
298 ; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
299 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80008
300 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
301 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
302 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
303 ; GFX6-NEXT: s_endpgm
304 %load = load i32, ptr addrspace(1) %in, align 4
305 %add = add i32 %load, 1
306 %ext = and i32 %add, 65535
307 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
308 store i32 %bfe, ptr addrspace(1) %out, align 4
312 define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
313 ; GFX6-LABEL: bfe_u32_test_1:
315 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
316 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
317 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
318 ; GFX6-NEXT: s_mov_b32 s2, -1
319 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
320 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000
321 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
322 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
323 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
324 ; GFX6-NEXT: s_endpgm
325 %x = load i32, ptr addrspace(1) %in, align 4
326 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
327 store i32 %bfe, ptr addrspace(1) %out, align 4
331 define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
332 ; GFX6-LABEL: bfe_u32_test_2:
334 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
335 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
336 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
337 ; GFX6-NEXT: s_mov_b32 s2, -1
338 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
339 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
340 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80000
341 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
342 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
343 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
344 ; GFX6-NEXT: s_endpgm
345 %x = load i32, ptr addrspace(1) %in, align 4
346 %shl = shl i32 %x, 31
347 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
348 store i32 %bfe, ptr addrspace(1) %out, align 4
352 define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
353 ; GFX6-LABEL: bfe_u32_test_3:
355 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
356 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
357 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
358 ; GFX6-NEXT: s_mov_b32 s2, -1
359 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
360 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
361 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000
362 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
363 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
364 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
365 ; GFX6-NEXT: s_endpgm
366 %x = load i32, ptr addrspace(1) %in, align 4
367 %shl = shl i32 %x, 31
368 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
369 store i32 %bfe, ptr addrspace(1) %out, align 4
373 define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
374 ; GFX6-LABEL: bfe_u32_test_4:
376 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
377 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
378 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
379 ; GFX6-NEXT: s_mov_b32 s2, -1
380 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
381 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000
382 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f
383 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
384 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
385 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
386 ; GFX6-NEXT: s_endpgm
387 %x = load i32, ptr addrspace(1) %in, align 4
388 %shl = shl i32 %x, 31
389 %shr = lshr i32 %shl, 31
390 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
391 store i32 %bfe, ptr addrspace(1) %out, align 4
395 define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
396 ; GFX6-LABEL: bfe_u32_test_5:
398 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
399 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
400 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
401 ; GFX6-NEXT: s_mov_b32 s2, -1
402 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
403 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10000
404 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000
405 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
406 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
407 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
408 ; GFX6-NEXT: s_endpgm
409 %x = load i32, ptr addrspace(1) %in, align 4
410 %shl = shl i32 %x, 31
411 %shr = ashr i32 %shl, 31
412 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
413 store i32 %bfe, ptr addrspace(1) %out, align 4
417 define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
418 ; GFX6-LABEL: bfe_u32_test_6:
420 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
421 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
422 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
423 ; GFX6-NEXT: s_mov_b32 s2, -1
424 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
425 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
426 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1f0001
427 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
428 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
429 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
430 ; GFX6-NEXT: s_endpgm
431 %x = load i32, ptr addrspace(1) %in, align 4
432 %shl = shl i32 %x, 31
433 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
434 store i32 %bfe, ptr addrspace(1) %out, align 4
438 define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
439 ; GFX6-LABEL: bfe_u32_test_7:
441 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
442 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
443 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
444 ; GFX6-NEXT: s_mov_b32 s2, -1
445 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
446 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
447 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1f0000
448 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
449 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
450 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
451 ; GFX6-NEXT: s_endpgm
452 %x = load i32, ptr addrspace(1) %in, align 4
453 %shl = shl i32 %x, 31
454 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
455 store i32 %bfe, ptr addrspace(1) %out, align 4
459 define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
460 ; GFX6-LABEL: bfe_u32_test_8:
462 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
463 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
464 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
465 ; GFX6-NEXT: s_mov_b32 s2, -1
466 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
467 ; GFX6-NEXT: s_lshl_b32 s3, s3, 31
468 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f
469 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
470 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
471 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
472 ; GFX6-NEXT: s_endpgm
473 %x = load i32, ptr addrspace(1) %in, align 4
474 %shl = shl i32 %x, 31
475 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
476 store i32 %bfe, ptr addrspace(1) %out, align 4
480 define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
481 ; GFX6-LABEL: bfe_u32_test_9:
483 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
484 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
485 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
486 ; GFX6-NEXT: s_mov_b32 s2, -1
487 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
488 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f
489 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
490 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
491 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
492 ; GFX6-NEXT: s_endpgm
493 %x = load i32, ptr addrspace(1) %in, align 4
494 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
495 store i32 %bfe, ptr addrspace(1) %out, align 4
499 define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
500 ; GFX6-LABEL: bfe_u32_test_10:
502 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
503 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
504 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
505 ; GFX6-NEXT: s_mov_b32 s2, -1
506 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
507 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1f0001
508 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
509 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
510 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
511 ; GFX6-NEXT: s_endpgm
512 %x = load i32, ptr addrspace(1) %in, align 4
513 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
514 store i32 %bfe, ptr addrspace(1) %out, align 4
518 define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
519 ; GFX6-LABEL: bfe_u32_test_11:
521 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
522 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
523 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
524 ; GFX6-NEXT: s_mov_b32 s2, -1
525 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
526 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x180008
527 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
528 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
529 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
530 ; GFX6-NEXT: s_endpgm
531 %x = load i32, ptr addrspace(1) %in, align 4
532 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
533 store i32 %bfe, ptr addrspace(1) %out, align 4
537 define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
538 ; GFX6-LABEL: bfe_u32_test_12:
540 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
541 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
542 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
543 ; GFX6-NEXT: s_mov_b32 s2, -1
544 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
545 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80018
546 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
547 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
548 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
549 ; GFX6-NEXT: s_endpgm
550 %x = load i32, ptr addrspace(1) %in, align 4
551 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
552 store i32 %bfe, ptr addrspace(1) %out, align 4
556 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
557 define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
558 ; GFX6-LABEL: bfe_u32_test_13:
560 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
561 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
562 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
563 ; GFX6-NEXT: s_mov_b32 s2, -1
564 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
565 ; GFX6-NEXT: s_ashr_i32 s3, s3, 31
566 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f
567 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
568 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
569 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
570 ; GFX6-NEXT: s_endpgm
571 %x = load i32, ptr addrspace(1) %in, align 4
572 %shl = ashr i32 %x, 31
573 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
574 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
577 define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
578 ; GFX6-LABEL: bfe_u32_test_14:
580 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
581 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
582 ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
583 ; GFX6-NEXT: s_mov_b32 s2, -1
584 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
585 ; GFX6-NEXT: s_lshr_b32 s3, s3, 31
586 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f
587 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
588 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
589 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
590 ; GFX6-NEXT: s_endpgm
591 %x = load i32, ptr addrspace(1) %in, align 4
592 %shl = lshr i32 %x, 31
593 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
594 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
597 define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
598 ; GFX6-LABEL: bfe_u32_constant_fold_test_0:
600 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
601 ; GFX6-NEXT: s_bfe_u32 s2, 0, 0
602 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
603 ; GFX6-NEXT: s_mov_b32 s2, -1
604 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
605 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
606 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
607 ; GFX6-NEXT: s_endpgm
608 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
609 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
613 define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
614 ; GFX6-LABEL: bfe_u32_constant_fold_test_1:
616 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
617 ; GFX6-NEXT: s_bfe_u32 s2, 0x302e, 0
618 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
619 ; GFX6-NEXT: s_mov_b32 s2, -1
620 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
621 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
622 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
623 ; GFX6-NEXT: s_endpgm
624 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
625 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
629 define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
630 ; GFX6-LABEL: bfe_u32_constant_fold_test_2:
632 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
633 ; GFX6-NEXT: s_bfe_u32 s2, 0, 0x10000
634 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
635 ; GFX6-NEXT: s_mov_b32 s2, -1
636 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
637 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
638 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
639 ; GFX6-NEXT: s_endpgm
640 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
641 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
645 define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
646 ; GFX6-LABEL: bfe_u32_constant_fold_test_3:
648 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
649 ; GFX6-NEXT: s_bfe_u32 s2, 1, 0x10000
650 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
651 ; GFX6-NEXT: s_mov_b32 s2, -1
652 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
653 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
654 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
655 ; GFX6-NEXT: s_endpgm
656 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
657 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
661 define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
662 ; GFX6-LABEL: bfe_u32_constant_fold_test_4:
664 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
665 ; GFX6-NEXT: s_bfe_u32 s2, -1, 0x10000
666 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
667 ; GFX6-NEXT: s_mov_b32 s2, -1
668 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
669 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
670 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
671 ; GFX6-NEXT: s_endpgm
672 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
673 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
677 define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
678 ; GFX6-LABEL: bfe_u32_constant_fold_test_5:
680 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
681 ; GFX6-NEXT: s_mov_b32 s2, 0x10007
682 ; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2
683 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
684 ; GFX6-NEXT: s_mov_b32 s2, -1
685 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
686 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
687 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
688 ; GFX6-NEXT: s_endpgm
689 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
690 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
694 define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
695 ; GFX6-LABEL: bfe_u32_constant_fold_test_6:
697 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
698 ; GFX6-NEXT: s_mov_b32 s2, 0x80000
699 ; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2
700 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
701 ; GFX6-NEXT: s_mov_b32 s2, -1
702 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
703 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
704 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
705 ; GFX6-NEXT: s_endpgm
706 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
707 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
711 define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
712 ; GFX6-LABEL: bfe_u32_constant_fold_test_7:
714 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
715 ; GFX6-NEXT: s_mov_b32 s2, 0x80000
716 ; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2
717 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
718 ; GFX6-NEXT: s_mov_b32 s2, -1
719 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
720 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
721 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
722 ; GFX6-NEXT: s_endpgm
723 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
724 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
728 define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
729 ; GFX6-LABEL: bfe_u32_constant_fold_test_8:
731 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
732 ; GFX6-NEXT: s_mov_b32 s2, 0x80006
733 ; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2
734 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
735 ; GFX6-NEXT: s_mov_b32 s2, -1
736 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
737 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
738 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
739 ; GFX6-NEXT: s_endpgm
740 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
741 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
745 define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
746 ; GFX6-LABEL: bfe_u32_constant_fold_test_9:
748 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
749 ; GFX6-NEXT: s_mov_b32 s2, 0x80010
750 ; GFX6-NEXT: s_bfe_u32 s2, 0x10000, s2
751 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
752 ; GFX6-NEXT: s_mov_b32 s2, -1
753 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
754 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
755 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
756 ; GFX6-NEXT: s_endpgm
757 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
758 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
762 define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
763 ; GFX6-LABEL: bfe_u32_constant_fold_test_10:
765 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
766 ; GFX6-NEXT: s_mov_b32 s2, 0x100010
767 ; GFX6-NEXT: s_bfe_u32 s2, 0xffff, s2
768 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
769 ; GFX6-NEXT: s_mov_b32 s2, -1
770 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
771 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
772 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
773 ; GFX6-NEXT: s_endpgm
774 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
775 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
779 define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
780 ; GFX6-LABEL: bfe_u32_constant_fold_test_11:
782 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
783 ; GFX6-NEXT: s_mov_b32 s2, 0x40004
784 ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2
785 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
786 ; GFX6-NEXT: s_mov_b32 s2, -1
787 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
788 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
789 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
790 ; GFX6-NEXT: s_endpgm
791 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
792 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
796 define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
797 ; GFX6-LABEL: bfe_u32_constant_fold_test_12:
799 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
800 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f
801 ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2
802 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
803 ; GFX6-NEXT: s_mov_b32 s2, -1
804 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
805 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
806 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
807 ; GFX6-NEXT: s_endpgm
808 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
809 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
813 define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
814 ; GFX6-LABEL: bfe_u32_constant_fold_test_13:
816 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
817 ; GFX6-NEXT: s_mov_b32 s2, 0x100010
818 ; GFX6-NEXT: s_bfe_u32 s2, 0x1fffe, s2
819 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
820 ; GFX6-NEXT: s_mov_b32 s2, -1
821 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
822 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
823 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
824 ; GFX6-NEXT: s_endpgm
825 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
826 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
830 define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
831 ; GFX6-LABEL: bfe_u32_constant_fold_test_14:
833 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
834 ; GFX6-NEXT: s_mov_b32 s2, 0x1e0002
835 ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2
836 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
837 ; GFX6-NEXT: s_mov_b32 s2, -1
838 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
839 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
840 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
841 ; GFX6-NEXT: s_endpgm
842 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
843 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
847 define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
848 ; GFX6-LABEL: bfe_u32_constant_fold_test_15:
850 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
851 ; GFX6-NEXT: s_mov_b32 s2, 0x1c0004
852 ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2
853 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
854 ; GFX6-NEXT: s_mov_b32 s2, -1
855 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
856 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
857 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
858 ; GFX6-NEXT: s_endpgm
859 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
860 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
864 define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
865 ; GFX6-LABEL: bfe_u32_constant_fold_test_16:
867 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
868 ; GFX6-NEXT: s_bfe_u32 s2, -1, 0x70001
869 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
870 ; GFX6-NEXT: s_mov_b32 s2, -1
871 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
872 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
873 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
874 ; GFX6-NEXT: s_endpgm
875 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
876 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
880 define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
881 ; GFX6-LABEL: bfe_u32_constant_fold_test_17:
883 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
884 ; GFX6-NEXT: s_mov_b32 s2, 0x1f0001
885 ; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2
886 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
887 ; GFX6-NEXT: s_mov_b32 s2, -1
888 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
889 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
890 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
891 ; GFX6-NEXT: s_endpgm
892 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
893 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
897 define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
898 ; GFX6-LABEL: bfe_u32_constant_fold_test_18:
900 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
901 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f
902 ; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2
903 ; GFX6-NEXT: v_mov_b32_e32 v0, s2
904 ; GFX6-NEXT: s_mov_b32 s2, -1
905 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
906 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
907 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
908 ; GFX6-NEXT: s_endpgm
909 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
910 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
914 ; Make sure that SimplifyDemandedBits doesn't cause the and to be
915 ; reduced to the bits demanded by the bfe.
917 ; XXX: The operand to v_bfe_u32 could also just directly be the load register.
918 define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0,
919 ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg:
921 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x4
922 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
923 ; GFX6-NEXT: s_mov_b32 s6, -1
924 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
925 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
926 ; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0
927 ; GFX6-NEXT: s_mov_b64 s[4:5], s[0:1]
928 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
929 ; GFX6-NEXT: s_and_b32 s0, s8, 63
930 ; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002
931 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
932 ; GFX6-NEXT: v_mov_b32_e32 v0, s0
933 ; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0
934 ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
935 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
936 ; GFX6-NEXT: s_endpgm
937 ptr addrspace(1) %out1,
938 ptr addrspace(1) %in) #0 {
939 %src = load i32, ptr addrspace(1) %in, align 4
940 %and = and i32 %src, 63
941 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
942 store i32 %bfe_u32, ptr addrspace(1) %out0, align 4
943 store i32 %and, ptr addrspace(1) %out1, align 4
947 define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 {
948 ; GFX6-LABEL: lshr_and:
950 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x2
951 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
952 ; GFX6-NEXT: s_mov_b32 s2, -1
953 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
954 ; GFX6-NEXT: s_bfe_u32 s3, s4, 0x30006
955 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
956 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
957 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
958 ; GFX6-NEXT: s_endpgm
961 store i32 %c, ptr addrspace(1) %out, align 8
965 define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
966 ; GFX6-LABEL: v_lshr_and:
968 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
969 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
970 ; GFX6-NEXT: s_lshr_b32 s3, s2, s3
971 ; GFX6-NEXT: s_and_b32 s3, s3, 7
972 ; GFX6-NEXT: s_mov_b32 s2, -1
973 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
974 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
975 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
976 ; GFX6-NEXT: s_endpgm
979 store i32 %d, ptr addrspace(1) %out, align 8
983 define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 {
984 ; GFX6-LABEL: and_lshr:
986 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x2
987 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
988 ; GFX6-NEXT: s_mov_b32 s2, -1
989 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
990 ; GFX6-NEXT: s_bfe_u32 s3, s4, 0x30006
991 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
992 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
993 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
994 ; GFX6-NEXT: s_endpgm
997 store i32 %c, ptr addrspace(1) %out, align 8
1001 define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 {
1002 ; GFX6-LABEL: and_lshr2:
1004 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x2
1005 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
1006 ; GFX6-NEXT: s_mov_b32 s2, -1
1007 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1008 ; GFX6-NEXT: s_bfe_u32 s3, s4, 0x30006
1009 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
1010 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1011 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
1012 ; GFX6-NEXT: s_endpgm
1013 %b = and i32 %a, 511
1015 store i32 %c, ptr addrspace(1) %out, align 8
1019 define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 {
1020 ; GFX6-LABEL: shl_lshr:
1022 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x2
1023 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
1024 ; GFX6-NEXT: s_mov_b32 s2, -1
1025 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1026 ; GFX6-NEXT: s_bfe_u32 s3, s4, 0x150002
1027 ; GFX6-NEXT: v_mov_b32_e32 v0, s3
1028 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1029 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
1030 ; GFX6-NEXT: s_endpgm
1032 %c = lshr i32 %b, 11
1033 store i32 %c, ptr addrspace(1) %out, align 8
1037 declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
1038 declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1
1040 attributes #0 = { nounwind }
1041 attributes #1 = { nounwind readnone }