1 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
4 ; GCN-LABEL: {{^}}callee_no_stack:
7 ; GCN-NEXT: s_setpc_b64
8 define void @callee_no_stack() #0 {
12 ; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all:
15 ; GCN-NEXT: s_mov_b32 s4, s34
16 ; GCN-NEXT: s_mov_b32 s34, s32
17 ; GCN-NEXT: s_mov_b32 s34, s4
18 ; GCN-NEXT: s_setpc_b64
19 define void @callee_no_stack_no_fp_elim_all() #1 {
23 ; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_nonleaf:
26 ; GCN-NEXT: s_setpc_b64
27 define void @callee_no_stack_no_fp_elim_nonleaf() #2 {
31 ; GCN-LABEL: {{^}}callee_with_stack:
34 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
35 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32{{$}}
37 ; GCN-NEXT: s_setpc_b64
38 define void @callee_with_stack() #0 {
39 %alloca = alloca i32, addrspace(5)
40 store volatile i32 0, i32 addrspace(5)* %alloca
44 ; Can use free call clobbered register to preserve original FP value.
46 ; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all:
49 ; GCN-NEXT: s_mov_b32 s4, s34
50 ; GCN-NEXT: s_mov_b32 s34, s32
51 ; GCN-NEXT: s_add_u32 s32, s32, 0x200
52 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
53 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4{{$}}
54 ; GCN-NEXT: s_sub_u32 s32, s32, 0x200
55 ; GCN-NEXT: s_mov_b32 s34, s4
56 ; GCN-NEXT: s_waitcnt vmcnt(0)
57 ; GCN-NEXT: s_setpc_b64
58 define void @callee_with_stack_no_fp_elim_all() #1 {
59 %alloca = alloca i32, addrspace(5)
60 store volatile i32 0, i32 addrspace(5)* %alloca
64 ; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_non_leaf:
67 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
68 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32{{$}}
70 ; GCN-NEXT: s_setpc_b64
71 define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
72 %alloca = alloca i32, addrspace(5)
73 store volatile i32 0, i32 addrspace(5)* %alloca
77 ; GCN-LABEL: {{^}}callee_with_stack_and_call:
80 ; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
81 ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
82 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
83 ; GCN: v_writelane_b32 [[CSR_VGPR]], s34, 2
84 ; GCN-DAG: s_mov_b32 s34, s32
85 ; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
86 ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
87 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30,
88 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
90 ; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s34{{$}}
94 ; GCN-DAG: v_readlane_b32 s5, [[CSR_VGPR]]
95 ; GCN-DAG: v_readlane_b32 s4, [[CSR_VGPR]]
97 ; GCN: s_sub_u32 s32, s32, 0x400{{$}}
98 ; GCN-NEXT: v_readlane_b32 s34, [[CSR_VGPR]], 2
99 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
100 ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
101 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
102 ; GCN-NEXT: s_waitcnt vmcnt(0)
104 ; GCN-NEXT: s_setpc_b64
105 define void @callee_with_stack_and_call() #0 {
106 %alloca = alloca i32, addrspace(5)
107 store volatile i32 0, i32 addrspace(5)* %alloca
108 call void @external_void_func_void()
112 ; Should be able to copy incoming stack pointer directly to inner
113 ; call's stack pointer argument.
115 ; There is stack usage only because of the need to evict a VGPR for
116 ; spilling CSR SGPRs.
118 ; GCN-LABEL: {{^}}callee_no_stack_with_call:
120 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
121 ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill
122 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
123 ; GCN-DAG: s_add_u32 s32, s32, 0x400
124 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s34, [[FP_SPILL_LANE:[0-9]+]]
126 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
127 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
130 ; GCN-DAG: v_readlane_b32 s4, v32, 0
131 ; GCN-DAG: v_readlane_b32 s5, v32, 1
133 ; GCN: s_sub_u32 s32, s32, 0x400
134 ; GCN-NEXT: v_readlane_b32 s34, [[CSR_VGPR]], [[FP_SPILL_LANE]]
135 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
136 ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload
137 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
138 ; GCN-NEXT: s_waitcnt vmcnt(0)
139 ; GCN-NEXT: s_setpc_b64
140 define void @callee_no_stack_with_call() #0 {
141 call void @external_void_func_void()
145 declare hidden void @external_void_func_void() #0
147 ; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and
148 ; restored. No FP is required.
150 ; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls:
151 ; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
152 ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill
153 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
154 ; GCN: v_writelane_b32 [[CSR_VGPR]], s
155 ; GCN: v_writelane_b32 [[CSR_VGPR]], s
158 ; GCN: v_readlane_b32 s{{[0-9]+}}, [[CSR_VGPR]]
159 ; GCN: v_readlane_b32 s{{[0-9]+}}, [[CSR_VGPR]]
161 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
162 ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload
163 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
164 ; GCN-NEXT: s_waitcnt
165 ; GCN-NEXT: s_setpc_b64
166 define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
167 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
168 call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0
169 call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0
170 call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0
172 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
173 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
174 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
175 %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
176 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
177 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
179 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
180 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
181 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
182 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
183 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
184 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
188 ; Has no spilled CSR VGPRs used for SGPR spilling, so no need to
189 ; enable all lanes and restore.
191 ; GCN-LABEL: {{^}}spill_only_csr_sgpr:
193 ; GCN-NEXT: v_writelane_b32 v0, s42, 0
194 ; GCN-NEXT: ;;#ASMSTART
195 ; GCN-NEXT: ; clobber s42
196 ; GCN-NEXT: ;;#ASMEND
197 ; GCN-NEXT: v_readlane_b32 s42, v0, 0
198 ; GCN-NEXT: s_setpc_b64
199 define void @spill_only_csr_sgpr() {
200 call void asm sideeffect "; clobber s42", "~{s42}"()
204 ; TODO: Can the SP inc/deec be remvoed?
205 ; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_csr_vgpr:
207 ; GCN-NEXT:s_mov_b32 [[FP_COPY:s[0-9]+]], s34
208 ; GCN-NEXT: s_mov_b32 s34, s32
209 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
210 ; GCN-DAG: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill
211 ; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s34 offset:8
214 ; GCN-NEXT: ; clobber v33
215 ; GCN-NEXT: ;;#ASMEND
217 ; GCN: buffer_load_dword v33, off, s[0:3], s34 ; 4-byte Folded Reload
218 ; GCN: s_add_u32 s32, s32, 0x300
219 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300
220 ; GCN-NEXT: s_mov_b32 s34, s4
221 ; GCN-NEXT: s_waitcnt vmcnt(0)
222 ; GCN-NEXT: s_setpc_b64
223 define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
224 %alloca = alloca i32, addrspace(5)
225 store volatile i32 0, i32 addrspace(5)* %alloca
226 call void asm sideeffect "; clobber v33", "~{v33}"()
230 ; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
231 ; GCN-LABEL: {{^}}last_lane_vgpr_for_fp_csr:
233 ; GCN-NEXT: v_writelane_b32 v1, s34, 63
234 ; GCN-NEXT: s_mov_b32 s34, s32
235 ; GCN: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill
236 ; GCN-COUNT-63: v_writelane_b32 v1
237 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset:8
239 ; GCN-COUNT-63: v_readlane_b32 s{{[0-9]+}}, v1
241 ; GCN: s_add_u32 s32, s32, 0x300
242 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300
243 ; GCN-NEXT: v_readlane_b32 s34, v1, 63
244 ; GCN-NEXT: s_waitcnt vmcnt(0)
245 ; GCN-NEXT: s_setpc_b64
246 define void @last_lane_vgpr_for_fp_csr() #1 {
247 %alloca = alloca i32, addrspace(5)
248 store volatile i32 0, i32 addrspace(5)* %alloca
249 call void asm sideeffect "; clobber v33", "~{v33}"()
250 call void asm sideeffect "",
251 "~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
252 ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
253 ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
254 ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
255 ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
256 ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
257 ,~{s100},~{s101},~{s102}"() #1
262 ; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
263 ; GCN-LABEL: {{^}}no_new_vgpr_for_fp_csr:
265 ; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s34
266 ; GCN-NEXT: s_mov_b32 s34, s32
267 ; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill
268 ; GCN-COUNT-64: v_writelane_b32 v1,
270 ; GCN: buffer_store_dword
272 ; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1
274 ; GCN: buffer_load_dword v33, off, s[0:3], s34 ; 4-byte Folded Reload
275 ; GCN: s_add_u32 s32, s32, 0x300
276 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300
277 ; GCN-NEXT: s_mov_b32 s34, [[FP_COPY]]
278 ; GCN-NEXT: s_waitcnt vmcnt(0)
279 ; GCN-NEXT: s_setpc_b64
280 define void @no_new_vgpr_for_fp_csr() #1 {
281 %alloca = alloca i32, addrspace(5)
282 store volatile i32 0, i32 addrspace(5)* %alloca
283 call void asm sideeffect "; clobber v33", "~{v33}"()
284 call void asm sideeffect "",
285 "~{s39},~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
286 ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
287 ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
288 ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
289 ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
290 ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
291 ,~{s100},~{s101},~{s102}"() #1
296 ; GCN-LABEL: {{^}}realign_stack_no_fp_elim:
298 ; GCN-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0
299 ; GCN-NEXT: s_mov_b32 s4, s34
300 ; GCN-NEXT: s_and_b32 s34, [[SCRATCH]], 0xfff80000
301 ; GCN-NEXT: s_add_u32 s32, s32, 0x100000
302 ; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
303 ; GCN-NEXT: buffer_store_dword [[ZERO]], off, s[0:3], s34
304 ; GCN-NEXT: s_sub_u32 s32, s32, 0x100000
305 ; GCN-NEXT: s_mov_b32 s34, s4
306 ; GCN-NEXT: s_waitcnt vmcnt(0)
307 ; GCN-NEXT: s_setpc_b64
308 define void @realign_stack_no_fp_elim() #1 {
309 %alloca = alloca i32, align 8192, addrspace(5)
310 store volatile i32 0, i32 addrspace(5)* %alloca
314 ; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp:
316 ; GCN-NEXT: v_writelane_b32 v1, s34, 2
317 ; GCN-NEXT: v_writelane_b32 v1, s30, 0
318 ; GCN-NEXT: s_mov_b32 s34, s32
319 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
320 ; GCN: v_writelane_b32 v1, s31, 1
321 ; GCN: buffer_store_dword [[ZERO]], off, s[0:3], s34 offset:4
323 ; GCN: v_readlane_b32 s4, v1, 0
324 ; GCN-NEXT: s_add_u32 s32, s32, 0x200
325 ; GCN-NEXT: v_readlane_b32 s5, v1, 1
326 ; GCN-NEXT: s_sub_u32 s32, s32, 0x200
327 ; GCN-NEXT: v_readlane_b32 s34, v1, 2
328 ; GCN-NEXT: s_waitcnt vmcnt(0)
329 ; GCN-NEXT: s_setpc_b64 s[4:5]
330 define void @no_unused_non_csr_sgpr_for_fp() #1 {
331 %alloca = alloca i32, addrspace(5)
332 store volatile i32 0, i32 addrspace(5)* %alloca
334 ; Use all clobberable registers, so FP has to spill to a VGPR.
335 call void asm sideeffect "",
336 "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
337 ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
338 ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
344 ; Need a new CSR VGPR to satisfy the FP spill.
345 ; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr:
347 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
348 ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
349 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
350 ; GCN-NEXT: v_writelane_b32 v32, s34, 2
351 ; GCN-NEXT: v_writelane_b32 v32, s30, 0
352 ; GCN-NEXT: s_mov_b32 s34, s32
354 ; GCN-DAG: v_writelane_b32 v32, s31, 1
355 ; GCN-DAG: buffer_store_dword
356 ; GCN: s_add_u32 s32, s32, 0x300{{$}}
360 ; GCN: v_readlane_b32 s4, v32, 0
361 ; GCN-NEXT: v_readlane_b32 s5, v32, 1
362 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300{{$}}
363 ; GCN-NEXT: v_readlane_b32 s34, v32, 2
364 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
365 ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
366 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
367 ; GCN-NEXT: s_waitcnt vmcnt(0)
368 ; GCN-NEXT: s_setpc_b64
369 define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
370 %alloca = alloca i32, addrspace(5)
371 store volatile i32 0, i32 addrspace(5)* %alloca
373 ; Use all clobberable registers, so FP has to spill to a VGPR.
374 call void asm sideeffect "",
375 "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
376 ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
377 ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
380 call void asm sideeffect "; clobber nonpreserved VGPRs",
381 "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
382 ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
383 ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
389 ; The byval argument exceeds the MUBUF constant offset, so a scratch
390 ; register is needed to access the CSR VGPR slot.
391 ; GCN-LABEL: {{^}}scratch_reg_needed_mubuf_offset:
393 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
394 ; GCN-NEXT: v_mov_b32_e32 [[SCRATCH_VGPR:v[0-9]+]], 0x1008
395 ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Spill
396 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
397 ; GCN-NEXT: v_writelane_b32 v32, s34, 2
398 ; GCN-NEXT: v_writelane_b32 v32, s30, 0
399 ; GCN-NEXT: s_mov_b32 s34, s32
400 ; GCN-DAG: v_writelane_b32 v32, s31, 1
401 ; GCN-DAG: s_add_u32 s32, s32, 0x40300{{$}}
402 ; GCN-DAG: buffer_store_dword
406 ; GCN: v_readlane_b32 s4, v32, 0
407 ; GCN-NEXT: v_readlane_b32 s5, v32, 1
408 ; GCN-NEXT: s_sub_u32 s32, s32, 0x40300{{$}}
409 ; GCN-NEXT: v_readlane_b32 s34, v32, 2
410 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
411 ; GCN-NEXT: v_mov_b32_e32 [[SCRATCH_VGPR:v[0-9]+]], 0x1008
412 ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Reload
413 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
414 ; GCN-NEXT: s_waitcnt vmcnt(0)
415 ; GCN-NEXT: s_setpc_b64
416 define void @scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval align 4 %arg) #1 {
417 %alloca = alloca i32, addrspace(5)
418 store volatile i32 0, i32 addrspace(5)* %alloca
420 ; Use all clobberable registers, so FP has to spill to a VGPR.
421 call void asm sideeffect "; clobber nonpreserved SGPRs",
422 "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
423 ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
424 ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
427 ; Use all clobberable VGPRs, so a CSR spill is needed for the VGPR
428 call void asm sideeffect "; clobber nonpreserved VGPRs",
429 "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
430 ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
431 ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
437 ; GCN-LABEL: {{^}}local_empty_func:
439 ; GCN-NEXT: s_setpc_b64
440 define internal void @local_empty_func() #0 {
444 ; An FP is needed, despite not needing any spills
445 ; TODO: Ccould see callee does not use stack and omit FP.
446 ; GCN-LABEL: {{^}}ipra_call_with_stack:
447 ; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s34
448 ; GCN: s_mov_b32 s34, s32
449 ; GCN: s_add_u32 s32, s32, 0x400
450 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34{{$}}
452 ; GCN: s_sub_u32 s32, s32, 0x400
453 ; GCN: s_mov_b32 s34, [[FP_COPY:s[0-9]+]]
454 define void @ipra_call_with_stack() #0 {
455 %alloca = alloca i32, addrspace(5)
456 store volatile i32 0, i32 addrspace(5)* %alloca
457 call void @local_empty_func()
461 attributes #0 = { nounwind }
462 attributes #1 = { nounwind "frame-pointer"="all" }
463 attributes #2 = { nounwind "frame-pointer"="non-leaf" }