1 ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3 ; TODO: Test with flat scratch
5 ; GCN-LABEL: {{^}}store_fi_lifetime:
6 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
7 ; GCN: buffer_store_dword [[FI]]
8 define amdgpu_kernel void @store_fi_lifetime(ptr addrspace(1) %out, i32 %in) #0 {
10 %b = alloca i8, addrspace(5)
11 call void @llvm.lifetime.start.p5(i64 1, ptr addrspace(5) %b)
12 store volatile ptr addrspace(5) %b, ptr addrspace(1) undef
13 call void @llvm.lifetime.end.p5(i64 1, ptr addrspace(5) %b)
17 ; GCN-LABEL: {{^}}stored_fi_to_lds:
18 ; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
19 ; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
20 ; GCN: buffer_store_dword v{{[0-9]+}}, off,
21 ; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
22 ; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]]
23 define amdgpu_kernel void @stored_fi_to_lds(ptr addrspace(3) %ptr) #0 {
24 %tmp = alloca float, addrspace(5)
25 store float 4.0, ptr addrspace(5) %tmp
26 store ptr addrspace(5) %tmp, ptr addrspace(3) %ptr
31 ; GCN-LABEL: {{^}}stored_fi_to_lds_2_small_objects:
32 ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
33 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
34 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
36 ; GCN-DAG: s_load_dword [[LDSPTR:s[0-9]+]]
38 ; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
39 ; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO]]
41 ; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
42 ; GCN: ds_write_b32 [[VLDSPTR]], [[FI1]]
43 define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(ptr addrspace(3) %ptr) #0 {
44 %tmp0 = alloca float, addrspace(5)
45 %tmp1 = alloca float, addrspace(5)
46 store float 4.0, ptr addrspace(5) %tmp0
47 store float 4.0, ptr addrspace(5) %tmp1
48 store volatile ptr addrspace(5) %tmp0, ptr addrspace(3) %ptr
49 store volatile ptr addrspace(5) %tmp1, ptr addrspace(3) %ptr
53 ; Same frame index is used multiple times in the store
54 ; GCN-LABEL: {{^}}stored_fi_to_self:
55 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4d2{{$}}
56 ; GCN: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
57 ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
58 ; GCN: buffer_store_dword [[ZERO]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
59 define amdgpu_kernel void @stored_fi_to_self() #0 {
60 %tmp = alloca ptr addrspace(5), addrspace(5)
62 ; Avoid optimizing everything out
63 store volatile ptr addrspace(5) inttoptr (i32 1234 to ptr addrspace(5)), ptr addrspace(5) %tmp
64 store volatile ptr addrspace(5) %tmp, ptr addrspace(5) %tmp
68 ; GCN-LABEL: {{^}}stored_fi_to_self_offset:
69 ; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 32{{$}}
70 ; GCN: buffer_store_dword [[K0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
72 ; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x4d2{{$}}
73 ; GCN: buffer_store_dword [[K1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2048{{$}}
75 ; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x800{{$}}
76 ; GCN: buffer_store_dword [[OFFSETK]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2048{{$}}
77 define amdgpu_kernel void @stored_fi_to_self_offset() #0 {
78 %tmp0 = alloca [512 x i32], addrspace(5)
79 %tmp1 = alloca ptr addrspace(5), addrspace(5)
81 ; Avoid optimizing everything out
82 store volatile i32 32, ptr addrspace(5) %tmp0
84 store volatile ptr addrspace(5) inttoptr (i32 1234 to ptr addrspace(5)), ptr addrspace(5) %tmp1
86 store volatile ptr addrspace(5) %tmp1, ptr addrspace(5) %tmp1
90 ; GCN-LABEL: {{^}}stored_fi_to_fi:
91 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
92 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
93 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8{{$}}
95 ; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
96 ; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8{{$}}
98 ; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}}
99 ; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
100 define amdgpu_kernel void @stored_fi_to_fi() #0 {
101 %tmp0 = alloca ptr addrspace(5), addrspace(5)
102 %tmp1 = alloca ptr addrspace(5), addrspace(5)
103 %tmp2 = alloca ptr addrspace(5), addrspace(5)
104 store volatile ptr addrspace(5) inttoptr (i32 1234 to ptr addrspace(5)), ptr addrspace(5) %tmp0
105 store volatile ptr addrspace(5) inttoptr (i32 5678 to ptr addrspace(5)), ptr addrspace(5) %tmp1
106 store volatile ptr addrspace(5) inttoptr (i32 9999 to ptr addrspace(5)), ptr addrspace(5) %tmp2
109 store volatile ptr addrspace(5) %tmp1, ptr addrspace(5) %tmp2 ; store offset 0 at offset 4
110 store volatile ptr addrspace(5) %tmp2, ptr addrspace(5) %tmp1 ; store offset 4 at offset 0
114 ; GCN-LABEL: {{^}}stored_fi_to_global:
115 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
116 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
117 ; GCN: buffer_store_dword [[FI]]
118 define amdgpu_kernel void @stored_fi_to_global(ptr addrspace(1) %ptr) #0 {
119 %tmp = alloca float, addrspace(5)
120 store float 0.0, ptr addrspace(5) %tmp
121 store ptr addrspace(5) %tmp, ptr addrspace(1) %ptr
126 ; GCN-LABEL: {{^}}stored_fi_to_global_2_small_objects:
127 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
128 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
129 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8{{$}}
131 ; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
132 ; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
134 ; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}}
135 ; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
136 define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1) %ptr) #0 {
137 %tmp0 = alloca float, addrspace(5)
138 %tmp1 = alloca float, addrspace(5)
139 %tmp2 = alloca float, addrspace(5)
140 store volatile float 0.0, ptr addrspace(5) %tmp0
141 store volatile float 0.0, ptr addrspace(5) %tmp1
142 store volatile float 0.0, ptr addrspace(5) %tmp2
143 store volatile ptr addrspace(5) %tmp1, ptr addrspace(1) %ptr
144 store volatile ptr addrspace(5) %tmp2, ptr addrspace(1) %ptr
148 ; GCN-LABEL: {{^}}kernel_stored_fi_to_global_huge_frame_offset:
149 ; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
151 ; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
153 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
154 ; GCN-DAG: v_mov_b32_e32 [[V_BASE_1_OFF:v[0-9]+]], 0x4000{{$}}
155 ; GCN: buffer_store_dword [[K]], [[V_BASE_1_OFF]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
157 ; GCN: buffer_store_dword [[V_BASE_1_OFF]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
158 define amdgpu_kernel void @kernel_stored_fi_to_global_huge_frame_offset(ptr addrspace(1) %ptr) #0 {
159 %tmp0 = alloca [4096 x i32], addrspace(5)
160 %tmp1 = alloca [4096 x i32], addrspace(5)
161 store volatile i32 0, ptr addrspace(5) %tmp0
162 %gep1.tmp0 = getelementptr [4096 x i32], ptr addrspace(5) %tmp0, i32 0, i32 4095
163 store volatile i32 999, ptr addrspace(5) %gep1.tmp0
164 %gep0.tmp1 = getelementptr [4096 x i32], ptr addrspace(5) %tmp0, i32 0, i32 14
165 store ptr addrspace(5) %gep0.tmp1, ptr addrspace(1) %ptr
169 ; FIXME: Shift of SP repeated twice
170 ; GCN-LABEL: {{^}}func_stored_fi_to_global_huge_frame_offset:
171 ; GCN-DAG: v_lshr_b32_e64 [[FI_TMP_0:v[0-9]+]], s32, 6
172 ; GCN-DAG: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
173 ; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4{{$}}
176 ; GCN-DAG: v_add_i32_e32 [[FI_0:v[0-9]+]], vcc, 0x4000, [[FI_TMP_0]]{{$}}
177 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
179 ; GCN: buffer_store_dword [[K]], [[FI_0]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
180 ; GCN: v_lshr_b32_e64 [[FI_TMP_1:v[0-9]+]], s32, 6
181 ; GCN: v_add_i32_e32 [[BASE_0_1:v[0-9]+]], vcc, 60, [[FI_TMP_1]]{{$}}
182 ; GCN: buffer_store_dword [[BASE_0_1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
183 define void @func_stored_fi_to_global_huge_frame_offset(ptr addrspace(1) %ptr) #0 {
184 %tmp0 = alloca [4096 x i32], addrspace(5)
185 %tmp1 = alloca [4096 x i32], addrspace(5)
186 store volatile i32 0, ptr addrspace(5) %tmp0
187 %gep1.tmp0 = getelementptr [4096 x i32], ptr addrspace(5) %tmp0, i32 0, i32 4095
188 store volatile i32 999, ptr addrspace(5) %gep1.tmp0
189 %gep0.tmp1 = getelementptr [4096 x i32], ptr addrspace(5) %tmp0, i32 0, i32 14
190 store ptr addrspace(5) %gep0.tmp1, ptr addrspace(1) %ptr
194 @g1 = external addrspace(1) global ptr addrspace(5)
196 ; This was leaving a dead node around resulting in failing to select
197 ; on the leftover AssertZext's ValueType operand.
199 ; GCN-LABEL: {{^}}cannot_select_assertzext_valuetype:
200 ; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
201 ; GCN: s_add_u32 s{{[0-9]+}}, s[[PC_LO]], g1@gotpcrel32@lo+4
202 ; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC_HI]], g1@gotpcrel32@hi+12
203 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
204 ; GCN: buffer_store_dword [[FI]]
205 define amdgpu_kernel void @cannot_select_assertzext_valuetype(ptr addrspace(1) %out, i32 %idx) #0 {
207 %b = alloca i32, align 4, addrspace(5)
208 %tmp1 = load volatile ptr addrspace(5), ptr addrspace(1) @g1, align 4
209 %arrayidx = getelementptr inbounds i32, ptr addrspace(5) %tmp1, i32 %idx
210 %tmp2 = load i32, ptr addrspace(5) %arrayidx, align 4
211 store volatile ptr addrspace(5) %b, ptr addrspace(1) undef
215 ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_sgpr:
216 ; GCN: s_lshr_b32 [[FI:s[0-9]+]], s32, 6
219 define void @func_alloca_offset0__use_asm_sgpr() {
220 %alloca = alloca i32, addrspace(5)
221 call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca)
225 ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_vgpr:
226 ; GCN: v_lshr_b32_e64 [[FI:v[0-9]+]], s32, 6
227 ; GCN-NEXT: ;;#ASMSTART
228 ; GCN-NEXT: ; use [[FI]]
229 define void @func_alloca_offset0__use_asm_vgpr() {
230 %alloca = alloca i32, addrspace(5)
231 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca)
235 ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_phys_sgpr:
236 ; GCN: s_lshr_b32 [[FI:s[0-9]+]], s32, 6
237 ; GCN-NEXT: ;;#ASMSTART
238 ; GCN-NEXT: ; use [[FI]]
239 define void @func_alloca_offset0__use_asm_phys_sgpr() {
240 %alloca = alloca i32, addrspace(5)
241 call void asm sideeffect "; use $0", "{s8}"(ptr addrspace(5) %alloca)
245 ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_phys_vgpr:
246 ; GCN: v_lshr_b32_e64 v8, s32, 6
247 ; GCN-NEXT: ;;#ASMSTART
249 define void @func_alloca_offset0__use_asm_phys_vgpr() {
250 %alloca = alloca i32, addrspace(5)
251 call void asm sideeffect "; use $0", "{v8}"(ptr addrspace(5) %alloca)
255 ; GCN-LABEL: {{^}}func_alloca_offset_use_asm_sgpr:
256 ; GCN: s_lshr_b32 [[FI0_TMP0:s[0-9]+]], s32, 6
257 ; GCN-NEXT: s_add_i32 [[FI0:s[0-9]+]], [[FI0_TMP0]], 16
259 ; GCN: s_lshr_b32 [[TMP:s[0-9]+]], s32, 6
260 ; GCN-NEXT: s_addk_i32 [[TMP]], 0x4010
261 ; GCN-NEXT: ;;#ASMSTART
263 define void @func_alloca_offset_use_asm_sgpr() {
264 %alloca0 = alloca [4096 x i32], align 16, addrspace(5)
265 %alloca1 = alloca i32, addrspace(5)
266 call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca0)
267 call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca1)
271 ; GCN-LABEL: {{^}}func_alloca_offset_use_asm_vgpr:
272 ; GCN: s_lshr_b32 [[S_FI:s[0-9]+]], s32, 6
273 ; GCN: v_lshr_b32_e64 [[V_FI:v[0-9]+]], s32, 6
274 ; GCN: s_movk_i32 vcc_lo, 0x4010
275 ; GCN: s_add_i32 [[S_FI]], [[S_FI]], 16
276 ; GCN-NEXT: ;;#ASMSTART
277 ; GCN-NEXT: ; use [[S_FI]]
278 ; GCN-NEXT: ;;#ASMEND
279 ; GCN-NEXT: v_add_i32_e32 [[V_FI:v[0-9]+]], vcc, vcc_lo, [[V_FI]]
280 ; GCN-NEXT: ;;#ASMSTART
281 ; GCN-NEXT: ; use [[V_FI]]
282 ; GCN-NEXT: ;;#ASMEND
283 define void @func_alloca_offset_use_asm_vgpr() {
284 %alloca0 = alloca [4096 x i32], align 16, addrspace(5)
285 %alloca1 = alloca i32, addrspace(5)
286 call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca0)
287 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca1)
291 ; GCN-LABEL: {{^}}kernel_alloca_offset_use_asm_sgpr:
292 ; GCN: s_mov_b32 [[FI0:s[0-9]+]], 16
295 ; GCN-NEXT: ; use [[FI0]]
296 ; GCN-NEXT: ;;#ASMEND
297 ; GCN: s_movk_i32 [[FI1:s[0-9]+]], 0x4010
298 ; GCN-NEXT: ;;#ASMSTART
299 ; GCN-NEXT: ; use [[FI1]]
300 ; GCN-NEXT: ;;#ASMEND
301 define amdgpu_kernel void @kernel_alloca_offset_use_asm_sgpr() {
302 %alloca0 = alloca [4096 x i32], align 16, addrspace(5)
303 %alloca1 = alloca i32, addrspace(5)
304 call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca0)
305 call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca1)
309 ; GCN-LABEL: {{^}}kernel_alloca_offset_use_asm_vgpr:
310 ; GCN: v_mov_b32_e32 v0, 16
314 ; GCN-NEXT: ;;#ASMEND
316 ; GCN: v_mov_b32_e32 v0, 0x4010
317 ; GCN-NEXT: ;;#ASMSTART
319 ; GCN-NEXT: ;;#ASMEND
320 define amdgpu_kernel void @kernel_alloca_offset_use_asm_vgpr() {
321 %alloca0 = alloca [4096 x i32], align 16, addrspace(5)
322 %alloca1 = alloca i32, addrspace(5)
323 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca0)
324 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca1)
328 ; GCN-LABEL: {{^}}live_out_physreg_copy_add_fi:
329 ; GCN: s_or_b32 [[FI:s[0-9]+]], s{{[0-9]+}}, 4
330 ; GCN: v_mov_b32_e32 v0, [[FI]]
331 ; GCN: v_mov_b32_e32 v1
333 define void @live_out_physreg_copy_add_fi(ptr %fptr) #2 {
335 %alloca = alloca [4 x i32], align 16, addrspace(5)
336 %addrspacecast = addrspacecast ptr addrspace(5) %alloca to ptr
337 %getelementptr = getelementptr i8, ptr %addrspacecast, i64 4
338 call void %fptr(ptr %getelementptr) #2
342 declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #1
343 declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #1
345 attributes #0 = { nounwind }
346 attributes #1 = { argmemonly nounwind }
347 attributes #2 = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }