1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
5 declare amdgpu_cs_chain void @callee()
6 declare amdgpu_gfx void @gfx_callee()
8 define amdgpu_cs_chain_preserve void @preserve_active_lanes_above_args() {ret void}
9 define amdgpu_cs_chain_preserve void @preserve_all_lanes_wwm_above_args() {ret void}
10 define amdgpu_cs_chain_preserve void @dont_preserve_args() {ret void}
11 define amdgpu_cs_chain_preserve void @preserve_inactive_lanes_wwm_args() {ret void}
12 define amdgpu_cs_chain_preserve void @dont_preserve_if_no_chain_calls() {ret void}
13 define amdgpu_cs_chain_preserve void @dont_preserve_v0_v7() {ret void}
14 define amdgpu_cs_chain_preserve void @dont_preserve_sgpr() {ret void}
18 # NOTE: Since we don't know what the args are, we rely on the fact that we can't
19 # call llvm.amdgcn.cs.chain with more parameters than we received - so anything
20 # that is used by the SI_CS_CHAIN_TC_W32 is assumed to have been an arg and therefore
24 name: preserve_active_lanes_above_args
25 tracksRegLiveness: true
29 stackPtrOffsetReg: '$sgpr32'
34 liveins: $sgpr0, $vgpr8, $vgpr9
36 ; GCN-LABEL: name: preserve_active_lanes_above_args
37 ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10
39 ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
40 ; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
41 ; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10
42 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
43 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
44 ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
45 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
46 renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
47 $vgpr8 = COPY renamable killed $vgpr10
48 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
49 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
50 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
55 name: preserve_all_lanes_wwm_above_args
56 tracksRegLiveness: true
60 stackPtrOffsetReg: '$sgpr32'
67 liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
69 ; GCN-LABEL: name: preserve_all_lanes_wwm_above_args
70 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
72 ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
73 ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
74 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
75 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
76 ; GCN-NEXT: $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
77 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
78 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0
79 ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 10, implicit $exec
80 ; GCN-NEXT: $vgpr8 = COPY killed $vgpr10
81 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
82 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
83 ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
84 ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
85 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
86 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
87 S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
88 $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
90 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0
91 $vgpr10 = V_MOV_B32_e32 10, implicit $exec
92 $vgpr8 = COPY killed $vgpr10
93 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
94 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
95 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
100 name: dont_preserve_args
101 tracksRegLiveness: true
105 stackPtrOffsetReg: '$sgpr32'
106 isChainFunction: true
110 liveins: $sgpr0, $vgpr8, $vgpr9
112 ; GCN-LABEL: name: dont_preserve_args
113 ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9
115 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
116 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
117 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
118 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
119 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
120 S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
121 renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
122 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
123 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
124 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
129 name: preserve_inactive_lanes_wwm_args
130 tracksRegLiveness: true
134 stackPtrOffsetReg: '$sgpr32'
135 isChainFunction: true
141 liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
143 ; GCN-LABEL: name: preserve_inactive_lanes_wwm_args
144 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
146 ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
147 ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
148 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
149 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
150 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
151 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
152 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
153 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
154 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
155 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
156 ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
157 ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr9(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
158 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
159 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
160 S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
161 renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
162 $sgpr35 = S_MOV_B32 5
163 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
164 renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
165 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
166 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
167 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
172 name: dont_preserve_if_no_chain_calls
173 tracksRegLiveness: true
177 stackPtrOffsetReg: '$sgpr32'
178 isChainFunction: true
184 liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9
186 ; GCN-LABEL: name: dont_preserve_if_no_chain_calls
187 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9
189 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
190 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
191 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
192 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
193 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
194 ; GCN-NEXT: $vgpr9 = V_MOV_B32_e32 20, implicit $exec
195 ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 30, implicit $exec
196 ; GCN-NEXT: S_ENDPGM 0
197 S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
198 renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
199 $sgpr35 = S_MOV_B32 5
200 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
201 renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
202 $vgpr9 = V_MOV_B32_e32 20, implicit $exec
203 $vgpr10 = V_MOV_B32_e32 30, implicit $exec
208 name: dont_preserve_v0_v7
209 tracksRegLiveness: true
213 stackPtrOffsetReg: '$sgpr32'
214 isChainFunction: true
220 liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8
222 ; GCN-LABEL: name: dont_preserve_v0_v7
223 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8
225 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
226 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
227 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
228 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
229 ; GCN-NEXT: renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
230 ; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr0
231 ; GCN-NEXT: renamable $vgpr9 = COPY killed renamable $vgpr7
232 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
233 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
234 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
235 renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
236 $sgpr35 = S_MOV_B32 5
237 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
238 renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
239 renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
240 renamable $vgpr8 = COPY killed renamable $vgpr0
241 renamable $vgpr9 = COPY killed renamable $vgpr7
242 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
243 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
244 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
248 name: dont_preserve_sgpr
249 tracksRegLiveness: true
253 stackPtrOffsetReg: '$sgpr32'
254 isChainFunction: true
260 ; GCN-LABEL: name: dont_preserve_sgpr
261 ; GCN: liveins: $sgpr0
263 ; GCN-NEXT: renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
264 ; GCN-NEXT: $sgpr0 = COPY killed renamable $sgpr1
265 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
266 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
267 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0
268 renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
269 $sgpr0 = COPY killed renamable $sgpr1
270 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
271 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
272 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0