1 ; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s
2 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s
3 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
4 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
6 ; This particular case will actually be worse in terms of code size
7 ; from sinking into both.
9 ; OPT-LABEL: @sink_ubfe_i32(
14 ; OPT: %0 = lshr i32 %arg1, 8
15 ; OPT-NEXT: %val0 = and i32 %0, 255
19 ; OPT: %1 = lshr i32 %arg1, 8
20 ; OPT-NEXT: %val1 = and i32 %1, 127
28 ; GCN-LABEL: {{^}}sink_ubfe_i32:
30 ; GCN: s_cbranch_scc{{[0-1]}}
32 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008
34 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
36 ; GCN: buffer_store_dword
38 define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
40 %shr = lshr i32 %arg1, 8
41 br i1 undef, label %bb0, label %bb1
44 %val0 = and i32 %shr, 255
45 store volatile i32 0, i32 addrspace(1)* undef
49 %val1 = and i32 %shr, 127
50 store volatile i32 0, i32 addrspace(1)* undef
54 %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
55 store i32 %phi, i32 addrspace(1)* %out
59 ; OPT-LABEL: @sink_sbfe_i32(
64 ; OPT: %0 = ashr i32 %arg1, 8
65 ; OPT-NEXT: %val0 = and i32 %0, 255
69 ; OPT: %1 = ashr i32 %arg1, 8
70 ; OPT-NEXT: %val1 = and i32 %1, 127
77 ; GCN-LABEL: {{^}}sink_sbfe_i32:
78 define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
80 %shr = ashr i32 %arg1, 8
81 br i1 undef, label %bb0, label %bb1
84 %val0 = and i32 %shr, 255
85 store volatile i32 0, i32 addrspace(1)* undef
89 %val1 = and i32 %shr, 127
90 store volatile i32 0, i32 addrspace(1)* undef
94 %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
95 store i32 %phi, i32 addrspace(1)* %out
100 ; OPT-LABEL: @sink_ubfe_i16(
105 ; OPT: %0 = lshr i16 %arg1, 4
106 ; OPT-NEXT: %val0 = and i16 %0, 255
110 ; OPT: %1 = lshr i16 %arg1, 4
111 ; OPT-NEXT: %val1 = and i16 %1, 127
118 ; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.
120 ; GCN-LABEL: {{^}}sink_ubfe_i16:
122 ; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
123 ; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
124 ; GCN: s_cbranch_scc{{[0-1]}}
126 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
127 ; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f
130 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
131 ; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff
133 ; GCN: buffer_store_short
135 define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
137 %shr = lshr i16 %arg1, 4
138 br i1 undef, label %bb0, label %bb1
141 %val0 = and i16 %shr, 255
142 store volatile i16 0, i16 addrspace(1)* undef
146 %val1 = and i16 %shr, 127
147 store volatile i16 0, i16 addrspace(1)* undef
151 %phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ]
152 store i16 %phi, i16 addrspace(1)* %out
156 ; We don't really want to sink this one since it isn't reducible to a
157 ; 32-bit BFE on one half of the integer.
159 ; OPT-LABEL: @sink_ubfe_i64_span_midpoint(
165 ; OPT: %0 = lshr i64 %arg1, 30
166 ; OPT-NEXT: %val0 = and i64 %0, 255
169 ; OPT: %1 = lshr i64 %arg1, 30
170 ; OPT-NEXT: %val1 = and i64 %1, 127
176 ; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
178 ; GCN: s_cbranch_scc{{[0-1]}} BB3_2
179 ; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
180 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]
183 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
185 ; GCN: buffer_store_dwordx2
186 define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
188 %shr = lshr i64 %arg1, 30
189 br i1 undef, label %bb0, label %bb1
192 %val0 = and i64 %shr, 255
193 store volatile i32 0, i32 addrspace(1)* undef
197 %val1 = and i64 %shr, 127
198 store volatile i32 0, i32 addrspace(1)* undef
202 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
203 store i64 %phi, i64 addrspace(1)* %out
207 ; OPT-LABEL: @sink_ubfe_i64_low32(
213 ; OPT: %0 = lshr i64 %arg1, 15
214 ; OPT-NEXT: %val0 = and i64 %0, 255
217 ; OPT: %1 = lshr i64 %arg1, 15
218 ; OPT-NEXT: %val1 = and i64 %1, 127
224 ; GCN-LABEL: {{^}}sink_ubfe_i64_low32:
226 ; GCN: s_cbranch_scc{{[0-1]}} BB4_2
228 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f
231 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
233 ; GCN: buffer_store_dwordx2
234 define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
236 %shr = lshr i64 %arg1, 15
237 br i1 undef, label %bb0, label %bb1
240 %val0 = and i64 %shr, 255
241 store volatile i32 0, i32 addrspace(1)* undef
245 %val1 = and i64 %shr, 127
246 store volatile i32 0, i32 addrspace(1)* undef
250 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
251 store i64 %phi, i64 addrspace(1)* %out
255 ; OPT-LABEL: @sink_ubfe_i64_high32(
261 ; OPT: %0 = lshr i64 %arg1, 35
262 ; OPT-NEXT: %val0 = and i64 %0, 255
265 ; OPT: %1 = lshr i64 %arg1, 35
266 ; OPT-NEXT: %val1 = and i64 %1, 127
272 ; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
273 ; GCN: s_cbranch_scc{{[0-1]}} BB5_2
274 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003
277 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
279 ; GCN: buffer_store_dwordx2
280 define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
282 %shr = lshr i64 %arg1, 35
283 br i1 undef, label %bb0, label %bb1
286 %val0 = and i64 %shr, 255
287 store volatile i32 0, i32 addrspace(1)* undef
291 %val1 = and i64 %shr, 127
292 store volatile i32 0, i32 addrspace(1)* undef
296 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
297 store i64 %phi, i64 addrspace(1)* %out
301 attributes #0 = { nounwind }