1 ; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s
2 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s
3 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
4 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
6 ; This particular case will actually be worse in terms of code size
7 ; from sinking into both.
9 ; OPT-LABEL: @sink_ubfe_i32(
14 ; OPT: %0 = lshr i32 %arg1, 8
15 ; OPT-NEXT: %val0 = and i32 %0, 255
19 ; OPT: %1 = lshr i32 %arg1, 8
20 ; OPT-NEXT: %val1 = and i32 %1, 127
28 ; GCN-LABEL: {{^}}sink_ubfe_i32:
32 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
34 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008
37 ; GCN: buffer_store_dword
39 define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
41 %shr = lshr i32 %arg1, 8
42 br i1 undef, label %bb0, label %bb1
45 %val0 = and i32 %shr, 255
46 store volatile i32 0, i32 addrspace(1)* undef
50 %val1 = and i32 %shr, 127
51 store volatile i32 0, i32 addrspace(1)* undef
55 %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
56 store i32 %phi, i32 addrspace(1)* %out
60 ; OPT-LABEL: @sink_sbfe_i32(
65 ; OPT: %0 = ashr i32 %arg1, 8
66 ; OPT-NEXT: %val0 = and i32 %0, 255
70 ; OPT: %1 = ashr i32 %arg1, 8
71 ; OPT-NEXT: %val1 = and i32 %1, 127
78 ; GCN-LABEL: {{^}}sink_sbfe_i32:
79 define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
81 %shr = ashr i32 %arg1, 8
82 br i1 undef, label %bb0, label %bb1
85 %val0 = and i32 %shr, 255
86 store volatile i32 0, i32 addrspace(1)* undef
90 %val1 = and i32 %shr, 127
91 store volatile i32 0, i32 addrspace(1)* undef
95 %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
96 store i32 %phi, i32 addrspace(1)* %out
101 ; OPT-LABEL: @sink_ubfe_i16(
106 ; OPT: %0 = lshr i16 %arg1, 4
107 ; OPT-NEXT: %val0 = and i16 %0, 255
111 ; OPT: %1 = lshr i16 %arg1, 4
112 ; OPT-NEXT: %val1 = and i16 %1, 127
119 ; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.
121 ; GCN-LABEL: {{^}}sink_ubfe_i16:
123 ; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
124 ; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
125 ; GCN: s_cbranch_scc1
127 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
128 ; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff
131 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
132 ; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f
135 ; GCN: buffer_store_short
137 define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
139 %shr = lshr i16 %arg1, 4
140 br i1 undef, label %bb0, label %bb1
143 %val0 = and i16 %shr, 255
144 store volatile i16 0, i16 addrspace(1)* undef
148 %val1 = and i16 %shr, 127
149 store volatile i16 0, i16 addrspace(1)* undef
153 %phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ]
154 store i16 %phi, i16 addrspace(1)* %out
158 ; We don't really want to sink this one since it isn't reducible to a
159 ; 32-bit BFE on one half of the integer.
161 ; OPT-LABEL: @sink_ubfe_i64_span_midpoint(
167 ; OPT: %0 = lshr i64 %arg1, 30
168 ; OPT-NEXT: %val0 = and i64 %0, 255
171 ; OPT: %1 = lshr i64 %arg1, 30
172 ; OPT-NEXT: %val1 = and i64 %1, 127
178 ; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
180 ; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
181 ; GCN: s_cbranch_scc1 BB3_2
182 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
185 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]
188 ; GCN: buffer_store_dwordx2
189 define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
191 %shr = lshr i64 %arg1, 30
192 br i1 undef, label %bb0, label %bb1
195 %val0 = and i64 %shr, 255
196 store volatile i32 0, i32 addrspace(1)* undef
200 %val1 = and i64 %shr, 127
201 store volatile i32 0, i32 addrspace(1)* undef
205 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
206 store i64 %phi, i64 addrspace(1)* %out
210 ; OPT-LABEL: @sink_ubfe_i64_low32(
216 ; OPT: %0 = lshr i64 %arg1, 15
217 ; OPT-NEXT: %val0 = and i64 %0, 255
220 ; OPT: %1 = lshr i64 %arg1, 15
221 ; OPT-NEXT: %val1 = and i64 %1, 127
227 ; GCN-LABEL: {{^}}sink_ubfe_i64_low32:
229 ; GCN: s_cbranch_scc1 BB4_2
231 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
234 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f
237 ; GCN: buffer_store_dwordx2
238 define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
240 %shr = lshr i64 %arg1, 15
241 br i1 undef, label %bb0, label %bb1
244 %val0 = and i64 %shr, 255
245 store volatile i32 0, i32 addrspace(1)* undef
249 %val1 = and i64 %shr, 127
250 store volatile i32 0, i32 addrspace(1)* undef
254 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
255 store i64 %phi, i64 addrspace(1)* %out
259 ; OPT-LABEL: @sink_ubfe_i64_high32(
265 ; OPT: %0 = lshr i64 %arg1, 35
266 ; OPT-NEXT: %val0 = and i64 %0, 255
269 ; OPT: %1 = lshr i64 %arg1, 35
270 ; OPT-NEXT: %val1 = and i64 %1, 127
276 ; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
277 ; GCN: s_cbranch_scc1 BB5_2
278 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
281 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003
284 ; GCN: buffer_store_dwordx2
285 define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
287 %shr = lshr i64 %arg1, 35
288 br i1 undef, label %bb0, label %bb1
291 %val0 = and i64 %shr, 255
292 store volatile i32 0, i32 addrspace(1)* undef
296 %val1 = and i64 %shr, 127
297 store volatile i32 0, i32 addrspace(1)* undef
301 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
302 store i64 %phi, i64 addrspace(1)* %out
306 attributes #0 = { nounwind }