1 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,MUBUF %s
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,MUBUF %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FLATSCR %s
5 ; GCN-LABEL: {{^}}callee_no_stack:
8 ; GCN-NEXT: s_setpc_b64
9 define void @callee_no_stack() #0 {
13 ; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all:
16 ; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33
17 ; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
18 ; GCN-NEXT: s_mov_b32 s33, s32
19 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
20 ; GCN-NEXT: s_setpc_b64
21 define void @callee_no_stack_no_fp_elim_all() #1 {
25 ; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_nonleaf:
28 ; GCN-NEXT: s_setpc_b64
29 define void @callee_no_stack_no_fp_elim_nonleaf() #2 {
33 ; GCN-LABEL: {{^}}callee_with_stack:
36 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
37 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32{{$}}
38 ; FLATSCR-NEXT: scratch_store_dword off, v0, s32
40 ; GCN-NEXT: s_setpc_b64
41 define void @callee_with_stack() #0 {
42 %alloca = alloca i32, addrspace(5)
43 store volatile i32 0, ptr addrspace(5) %alloca
47 ; Can use free call clobbered register to preserve original FP value.
49 ; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all:
52 ; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33
53 ; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
54 ; GCN-NEXT: s_mov_b32 s33, s32
55 ; MUBUF-NEXT: s_addk_i32 s32, 0x200
56 ; FLATSCR-NEXT: s_add_i32 s32, s32, 8
57 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
58 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33{{$}}
59 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33{{$}}
60 ; GCN-NEXT: s_waitcnt vmcnt(0)
61 ; MUBUF-NEXT: s_addk_i32 s32, 0xfe00
62 ; FLATSCR-NEXT: s_add_i32 s32, s32, -8
63 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
64 ; GCN-NEXT: s_setpc_b64
65 define void @callee_with_stack_no_fp_elim_all() #1 {
66 %alloca = alloca i32, addrspace(5)
67 store volatile i32 0, ptr addrspace(5) %alloca
71 ; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_non_leaf:
74 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
75 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32{{$}}
76 ; FLATSCR-NEXT: scratch_store_dword off, v0, s32{{$}}
78 ; GCN-NEXT: s_setpc_b64
79 define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
80 %alloca = alloca i32, addrspace(5)
81 store volatile i32 0, ptr addrspace(5) %alloca
85 ; GCN-LABEL: {{^}}callee_with_stack_and_call:
88 ; GCN-NEXT: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
89 ; GCN-NEXT: s_mov_b32 s33, s32
90 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
91 ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
92 ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:4 ; 4-byte Folded Spill
93 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
94 ; GCN: v_writelane_b32 [[CSR_VGPR]], [[FP_SCRATCH_COPY]], 2
95 ; MUBUF-DAG: s_addk_i32 s32, 0x400{{$}}
96 ; FLATSCR-DAG: s_add_i32 s32, s32, 16{{$}}
97 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30,
98 ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
99 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
101 ; MUBUF-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}}
102 ; FLATSCR-DAG: scratch_store_dword off, [[ZERO]], s33{{$}}
106 ; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]]
107 ; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]]
109 ; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[CSR_VGPR]], 2
110 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
111 ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
112 ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:4 ; 4-byte Folded Reload
113 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
114 ; MUBUF: s_addk_i32 s32, 0xfc00{{$}}
115 ; FLATSCR: s_add_i32 s32, s32, -16{{$}}
116 ; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
117 ; GCN-NEXT: s_waitcnt vmcnt(0)
119 ; GCN-NEXT: s_setpc_b64 s[30:31]
120 define void @callee_with_stack_and_call() #0 {
121 %alloca = alloca i32, addrspace(5)
122 store volatile i32 0, ptr addrspace(5) %alloca
123 call void @external_void_func_void()
127 ; Should be able to copy incoming stack pointer directly to inner
128 ; call's stack pointer argument.
130 ; There is stack usage only because of the need to evict a VGPR for
131 ; spilling CSR SGPRs.
133 ; GCN-LABEL: {{^}}callee_no_stack_with_call:
135 ; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
136 ; GCN-NEXT: s_mov_b32 s33, s32
137 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
138 ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 ; 4-byte Folded Spill
139 ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 ; 4-byte Folded Spill
140 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
141 ; MUBUF-DAG: s_addk_i32 s32, 0x400
142 ; FLATSCR-DAG: s_add_i32 s32, s32, 16
143 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], [[FP_SCRATCH_COPY]], [[FP_SPILL_LANE:[0-9]+]]
145 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
146 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
149 ; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 0
150 ; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 1
152 ; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[CSR_VGPR]], [[FP_SPILL_LANE]]
153 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
154 ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 ; 4-byte Folded Reload
155 ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 ; 4-byte Folded Reload
156 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
157 ; MUBUF: s_addk_i32 s32, 0xfc00
158 ; FLATSCR: s_add_i32 s32, s32, -16
159 ; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
160 ; GCN-NEXT: s_waitcnt vmcnt(0)
161 ; GCN-NEXT: s_setpc_b64 s[30:31]
162 define void @callee_no_stack_with_call() #0 {
163 call void @external_void_func_void()
167 declare hidden void @external_void_func_void() #0
169 ; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and
170 ; restored. No FP is required.
172 ; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls:
173 ; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
174 ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill
175 ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 ; 4-byte Folded Spill
176 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
177 ; GCN: v_writelane_b32 [[CSR_VGPR]], s
178 ; GCN: v_writelane_b32 [[CSR_VGPR]], s
181 ; GCN: v_readlane_b32 s{{[0-9]+}}, [[CSR_VGPR]]
182 ; GCN: v_readlane_b32 s{{[0-9]+}}, [[CSR_VGPR]]
184 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
185 ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload
186 ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload
187 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
188 ; GCN-NEXT: s_waitcnt
189 ; GCN-NEXT: s_setpc_b64
190 define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
191 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
192 call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0
193 call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0
194 call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0
195 call void asm sideeffect "", "~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #0
197 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
198 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
199 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
200 %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
201 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
202 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
204 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
205 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
206 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
207 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
208 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
209 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
213 ; Has no spilled CSR VGPRs used for SGPR spilling, so no need to
214 ; enable all lanes and restore.
216 ; GCN-LABEL: {{^}}spill_only_csr_sgpr:
218 ; GCN-NEXT: s_xor_saveexec_b64
219 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
220 ; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
221 ; GCN-NEXT: s_mov_b64 exec,
222 ; GCN-NEXT: v_writelane_b32 v0, s42, 0
223 ; GCN-NEXT: ;;#ASMSTART
224 ; GCN-NEXT: ; clobber s42
225 ; GCN-NEXT: ;;#ASMEND
226 ; GCN-NEXT: v_readlane_b32 s42, v0, 0
227 ; GCN-NEXT: s_xor_saveexec_b64
228 ; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
229 ; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
230 ; GCN-NEXT: s_mov_b64 exec,
231 ; GCN-NEXT: s_waitcnt vmcnt(0)
232 ; GCN-NEXT: s_setpc_b64
233 define void @spill_only_csr_sgpr() {
234 call void asm sideeffect "; clobber s42", "~{s42}"()
238 ; TODO: Can the SP inc/deec be remvoed?
239 ; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_csr_vgpr:
241 ; GCN-NEXT:s_mov_b32 [[FP_COPY:s[0-9]+]], s33
242 ; GCN-NEXT: s_mov_b32 s33, s32
243 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
244 ; MUBUF-DAG: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
245 ; FLATSCR-DAG: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
246 ; MUBUF-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:4
247 ; FLATSCR-DAG: scratch_store_dword off, [[ZERO]], s33 offset:4
250 ; GCN-NEXT: ; clobber v41
251 ; GCN-NEXT: ;;#ASMEND
253 ; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
254 ; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
255 ; MUBUF: s_addk_i32 s32, 0x300
256 ; MUBUF-NEXT: s_addk_i32 s32, 0xfd00
257 ; MUBUF-NEXT: s_mov_b32 s33, s4
258 ; FLATSCR: s_add_i32 s32, s32, 12
259 ; FLATSCR-NEXT: s_add_i32 s32, s32, -12
260 ; FLATSCR-NEXT: s_mov_b32 s33, s0
261 ; GCN-NEXT: s_waitcnt vmcnt(0)
262 ; GCN-NEXT: s_setpc_b64
263 define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
264 %alloca = alloca i32, addrspace(5)
265 store volatile i32 0, ptr addrspace(5) %alloca
266 call void asm sideeffect "; clobber v41", "~{v41}"()
270 ; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
271 ; GCN-LABEL: {{^}}last_lane_vgpr_for_fp_csr:
273 ; GCN-NEXT: s_mov_b32 [[TMP_SGPR:s[0-9]+]], s33
274 ; GCN: s_mov_b32 s33, s32
275 ; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
276 ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
277 ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:8 ; 4-byte Folded Spill
278 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
279 ; MUBUF: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
280 ; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
281 ; GCN: v_writelane_b32 v1
282 ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:4
283 ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33 offset:4
285 ; GCN: v_writelane_b32 v1
287 ; MUBUF: s_addk_i32 s32, 0x400
288 ; FLATSCR: s_add_i32 s32, s32, 16
289 ; GCN: s_xor_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
290 ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
291 ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:8 ; 4-byte Folded Reload
292 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
293 ; MUBUF: s_addk_i32 s32, 0xfc00
294 ; FLATSCR: s_add_i32 s32, s32, -16
295 ; GCN-NEXT: s_mov_b32 s33, [[TMP_SGPR]]
296 ; GCN-NEXT: s_waitcnt vmcnt(0)
297 ; GCN-NEXT: s_setpc_b64
298 define void @last_lane_vgpr_for_fp_csr() #1 {
299 %alloca = alloca i32, addrspace(5)
300 store volatile i32 0, ptr addrspace(5) %alloca
301 call void asm sideeffect "; clobber v41", "~{v41}"()
302 call void asm sideeffect "",
303 "~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
304 ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
305 ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
306 ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
307 ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
308 ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
309 ,~{s100},~{s101},~{s102}"() #1
314 ; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
315 ; GCN-LABEL: {{^}}no_new_vgpr_for_fp_csr:
317 ; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
318 ; GCN-NEXT: s_mov_b32 s33, s32
319 ; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
320 ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
321 ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:8 ; 4-byte Folded Spill
322 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
323 ; GCN-COUNT-61: v_writelane_b32 v1,
324 ; MUBUF: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
325 ; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
326 ; GCN: v_writelane_b32 v1,
327 ; MUBUF: buffer_store_dword
328 ; FLATSCR: scratch_store_dword
330 ; GCN: v_writelane_b32 v1,
331 ; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
332 ; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
333 ; MUBUF: s_addk_i32 s32, 0x400
334 ; FLATSCR: s_add_i32 s32, s32, 16
335 ; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1
336 ; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
337 ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
338 ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:8 ; 4-byte Folded Reload
339 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
340 ; MUBUF-NEXT: s_addk_i32 s32, 0xfc00
341 ; FLATSCR-NEXT: s_add_i32 s32, s32, -16
342 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
343 ; GCN-NEXT: s_waitcnt vmcnt(0)
344 ; GCN-NEXT: s_setpc_b64
345 define void @no_new_vgpr_for_fp_csr() #1 {
346 %alloca = alloca i32, addrspace(5)
347 store volatile i32 0, ptr addrspace(5) %alloca
348 call void asm sideeffect "; clobber v41", "~{v41}"()
349 call void asm sideeffect "",
350 "~{s39},~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
351 ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
352 ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
353 ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
354 ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
355 ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
356 ,~{s100},~{s101},~{s102}"() #1
361 ; GCN-LABEL: {{^}}realign_stack_no_fp_elim:
363 ; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33
364 ; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
365 ; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0
366 ; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff
367 ; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
368 ; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
369 ; MUBUF-NEXT: s_add_i32 s32, s32, 0x180000
370 ; FLATSCR-NEXT: s_addk_i32 s32, 0x6000
371 ; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
372 ; MUBUF-NEXT: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x2000{{$}}
373 ; MUBUF-NEXT: buffer_store_dword [[ZERO]], [[OFFSET]], s[0:3], s33 offen{{$}}
374 ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x2000
375 ; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], s1
376 ; GCN-NEXT: s_waitcnt vmcnt(0)
377 ; MUBUF-NEXT: s_add_i32 s32, s32, 0xffe80000
378 ; FLATSCR-NEXT: s_addk_i32 s32, 0xa000
379 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
380 ; GCN-NEXT: s_setpc_b64
381 define void @realign_stack_no_fp_elim() #1 {
382 %alloca = alloca i32, align 8192, addrspace(5)
383 store volatile i32 0, ptr addrspace(5) %alloca
387 ; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp:
389 ; GCN-NEXT: s_mov_b32 vcc_lo, s33
390 ; GCN-NEXT: s_mov_b32 s33, s32
391 ; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
392 ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
393 ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:4 ; 4-byte Folded Spill
394 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
395 ; GCN: v_writelane_b32 [[CSR_VGPR]], s30, 0
396 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
397 ; GCN: v_writelane_b32 [[CSR_VGPR]], s31, 1
398 ; MUBUF: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}}
399 ; FLATSCR: scratch_store_dword off, [[ZERO]], s33{{$}}
400 ; GCN-NEXT: s_waitcnt vmcnt(0)
402 ; MUBUF: s_addk_i32 s32, 0x300
403 ; FLATSCR: s_add_i32 s32, s32, 12
404 ; GCN: v_readlane_b32 s31, [[CSR_VGPR]], 1
405 ; GCN: v_readlane_b32 s30, [[CSR_VGPR]], 0
406 ; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
407 ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
408 ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:4 ; 4-byte Folded Reload
409 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
410 ; MUBUF: s_addk_i32 s32, 0xfd00
411 ; FLATSCR: s_add_i32 s32, s32, -12
412 ; GCN-NEXT: s_mov_b32 s33, vcc_lo
413 ; GCN-NEXT: s_waitcnt vmcnt(0)
414 ; GCN-NEXT: s_setpc_b64 s[30:31]
415 define void @no_unused_non_csr_sgpr_for_fp() #1 {
416 %alloca = alloca i32, addrspace(5)
417 store volatile i32 0, ptr addrspace(5) %alloca
419 ; Use all clobberable registers, so FP has to spill to a VGPR.
420 call void asm sideeffect "",
421 "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
422 ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
423 ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
429 ; Need a new CSR VGPR to satisfy the FP spill.
430 ; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr:
432 ; GCN-NEXT: s_mov_b32 vcc_lo, s33
433 ; GCN-NEXT: s_mov_b32 s33, s32
434 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
435 ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
436 ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:4 ; 4-byte Folded Spill
437 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
439 ; MUBUF-DAG: buffer_store_dword
440 ; FLATSCR-DAG: scratch_store_dword
441 ; MUBUF: s_addk_i32 s32, 0x300{{$}}
442 ; FLATSCR: s_add_i32 s32, s32, 12{{$}}
445 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
446 ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
447 ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:4 ; 4-byte Folded Reload
448 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
449 ; MUBUF: s_addk_i32 s32, 0xfd00{{$}}
450 ; FLATSCR: s_add_i32 s32, s32, -12{{$}}
451 ; GCN-NEXT: s_mov_b32 s33, vcc_lo
452 ; GCN-NEXT: s_waitcnt vmcnt(0)
453 ; GCN-NEXT: s_setpc_b64 s[30:31]
454 define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
455 %alloca = alloca i32, addrspace(5)
456 store volatile i32 0, ptr addrspace(5) %alloca
458 ; Use all clobberable registers, so FP has to spill to a VGPR.
459 call void asm sideeffect "",
460 "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
461 ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
462 ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
465 call void asm sideeffect "; clobber nonpreserved initial VGPRs",
466 "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
467 ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
468 ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
469 ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #1
474 ; The byval argument exceeds the MUBUF constant offset, so a scratch
475 ; register is needed to access the CSR VGPR slot.
476 ; GCN-LABEL: {{^}}scratch_reg_needed_mubuf_offset:
478 ; GCN-NEXT: s_mov_b32 vcc_lo, s33
479 ; GCN-DAG: s_mov_b32 s33, s32
480 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
481 ; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x40100
482 ; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x1004
483 ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
484 ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
485 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
486 ; MUBUF-DAG: s_add_i32 s32, s32, 0x40300{{$}}
487 ; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}}
488 ; MUBUF-DAG: buffer_store_dword
489 ; FLATSCR-DAG: scratch_store_dword
492 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
493 ; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x40100
494 ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Reload
495 ; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x1004
496 ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload
497 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
498 ; MUBUF: s_add_i32 s32, s32, 0xfffbfd00{{$}}
499 ; FLATSCR: s_addk_i32 s32, 0xeff4{{$}}
500 ; GCN-NEXT: s_mov_b32 s33, vcc_lo
501 ; GCN-NEXT: s_waitcnt vmcnt(0)
502 ; GCN-NEXT: s_setpc_b64 s[30:31]
503 define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) align 4 %arg) #1 {
504 %alloca = alloca i32, addrspace(5)
505 store volatile i32 0, ptr addrspace(5) %alloca
507 ; Use all clobberable registers, so FP has to spill to a VGPR.
508 call void asm sideeffect "; clobber nonpreserved SGPRs",
509 "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
510 ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
511 ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
514 ; Use all clobberable VGPRs, so a CSR spill is needed for the VGPR
515 call void asm sideeffect "; clobber nonpreserved VGPRs",
516 "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
517 ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
518 ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
519 ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #1
524 ; GCN-LABEL: {{^}}local_empty_func:
526 ; GCN-NEXT: s_setpc_b64
527 define internal void @local_empty_func() #0 {
531 ; An FP is needed, despite not needing any spills
532 ; TODO: Ccould see callee does not use stack and omit FP.
533 ; GCN-LABEL: {{^}}ipra_call_with_stack:
534 ; GCN: s_mov_b32 [[TMP_SGPR:s[0-9]+]], s33
535 ; GCN: s_mov_b32 s33, s32
536 ; MUBUF: s_addk_i32 s32, 0x400
537 ; FLATSCR: s_add_i32 s32, s32, 16
538 ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}}
539 ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33{{$}}
541 ; MUBUF: s_addk_i32 s32, 0xfc00
542 ; FLATSCR: s_add_i32 s32, s32, -16
543 ; GCN: s_mov_b32 s33, [[TMP_SGPR]]
544 define void @ipra_call_with_stack() #0 {
545 %alloca = alloca i32, addrspace(5)
546 store volatile i32 0, ptr addrspace(5) %alloca
547 call void @local_empty_func()
551 ; With no free registers, we must spill the FP to memory.
552 ; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory:
553 ; MUBUF: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
554 ; FLATSCR: s_mov_b32 s0, s33
555 ; GCN: s_mov_b32 s33, s32
556 ; MUBUF: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], [[FP_SCRATCH_COPY]]
557 ; MUBUF: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s33 ; 4-byte Folded Spill
558 ; MUBUF: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s33 ; 4-byte Folded Reload
559 ; MUBUF: s_waitcnt vmcnt(0)
560 ; MUBUF: v_readfirstlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[TMP_VGPR2]]
561 ; MUBUF: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
562 ; FLATSCR: s_mov_b32 s33, s0
564 ; MUBUF: ScratchSize: 8
565 ; FLATSCR: ScratchSize: 0
566 define void @callee_need_to_spill_fp_to_memory() #3 {
567 call void asm sideeffect "; clobber nonpreserved SGPRs",
568 "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
569 ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
570 ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
573 call void asm sideeffect "; clobber all VGPRs",
574 "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
575 ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
576 ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
577 ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"()
581 ; If we have a reserved VGPR that can be used for SGPR spills, we may still
582 ; need to spill the FP to memory if there are no free lanes in the reserved
584 ; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory_full_reserved_vgpr:
585 ; MUBUF: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
586 ; GCN: s_mov_b32 s33, s32
587 ; MUBUF: s_xor_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
588 ; MUBUF: s_mov_b64 exec, [[COPY_EXEC1]]
589 ; MUBUF: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], [[FP_SCRATCH_COPY]]
590 ; MUBUF: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s33 offset:[[OFF:[0-9]+]]
591 ; GCN-NOT: v_writelane_b32 v40, s33
592 ; GCN-NOT: v_readlane_b32 s33, v40
593 ; GCN-NOT: v_readlane_b32 s33, v40
594 ; MUBUF: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s33 offset:[[OFF]]
595 ; MUBUF: v_readfirstlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[TMP_VGPR2]]
596 ; MUBUF: s_xor_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}}
597 ; MUBUF: s_mov_b64 exec, [[COPY_EXEC2]]
598 ; MUBUF: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
600 define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
601 call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
602 "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
603 ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
604 ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
605 ,~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
606 ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
607 ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
608 ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
609 ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
610 ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
611 ,~{s100},~{s101},~{s102},~{s39},~{vcc}"()
613 call void asm sideeffect "; clobber all VGPRs except CSR v40",
614 "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
615 ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
616 ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
617 ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38}"()
621 ; When flat-scratch is enabled, we save the FP to s0. At the same time,
622 ; the exec register is saved to s0 when saving CSR in the function prolog.
623 ; Make sure that the FP save happens after restoring exec from the same
625 ; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_reg:
626 ; FLATSCR: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
627 ; FLATSCR: s_mov_b32 s33, s32
628 ; GCN-NOT: v_writelane_b32 v40, s33
629 ; FLATSCR: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
630 ; FLATSCR: s_mov_b64 exec, [[COPY_EXEC0]]
631 ; FLATSCR: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
632 ; GCN-NOT: v_readlane_b32 s33, v40
633 ; FLATSCR: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
635 define void @callee_need_to_spill_fp_to_reg() #1 {
636 call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
637 "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
638 ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
639 ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
640 ,~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
641 ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
642 ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
643 ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
644 ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
645 ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
646 ,~{s100},~{s101},~{s102},~{s39},~{vcc}"()
648 call void asm sideeffect "; clobber all VGPRs except CSR v40",
649 "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
650 ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
651 ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
652 ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"()
656 ; If the size of the offset exceeds the MUBUF offset field we need another
657 ; scratch VGPR to hold the offset.
658 ; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
659 ; MUBUF: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
660 ; MUBUF-NEXT: s_mov_b32 s33, s32
661 ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
662 ; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x40100
663 ; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
664 ; MUBUF: v_mov_b32_e32 v0, [[FP_SCRATCH_COPY]]
665 ; GCN-NOT: v_mov_b32_e32 v0, 0x100c
666 ; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x40200
667 ; MUBUF: buffer_store_dword v0, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
668 ; FLATSCR: v_mov_b32_e32 v0, 0
669 ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s33, 0x1000
670 ; FLATSCR: scratch_store_dword off, v0, [[SOFF]]
671 define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) align 4 %arg) #3 {
672 %alloca = alloca i32, addrspace(5)
673 store volatile i32 0, ptr addrspace(5) %alloca
675 call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
676 "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
677 ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
678 ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
679 ,~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
680 ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
681 ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
682 ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
683 ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
684 ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
685 ,~{s100},~{s101},~{s102},~{s39},~{vcc}"()
687 call void asm sideeffect "; clobber all VGPRs except CSR v40",
688 "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
689 ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
690 ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
691 ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38}"()
695 attributes #0 = { nounwind }
696 attributes #1 = { nounwind "frame-pointer"="all" }
697 attributes #2 = { nounwind "frame-pointer"="non-leaf" }
698 attributes #3 = { nounwind "frame-pointer"="all" "amdgpu-waves-per-eu"="6,6" }