1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
4 # We're keeping the IR around for the callees and the CCs
7 declare amdgpu_cs_chain void @callee()
8 declare amdgpu_gfx void @gfx_callee()
10 define amdgpu_cs_chain void @preserve_inactive_wwm() {ret void}
11 define amdgpu_cs_chain void @dont_preserve_wwm_if_no_chain_calls() {ret void}
12 define amdgpu_cs_chain void @dont_preserve_wwm_if_init_whole_wave() {ret void}
13 define amdgpu_cs_chain void @dont_preserve_non_wwm() {ret void}
14 define amdgpu_cs_chain void @dont_preserve_v0_v7() {ret void}
15 define amdgpu_cs_chain void @dont_preserve_sgpr() {ret void}
19 # Check that we preserve the inactive lanes of registers v8+ received in the
20 # MachineFunctionInfo as wwmReservedRegs.
23 name: preserve_inactive_wwm
24 tracksRegLiveness: true
28 stackPtrOffsetReg: '$sgpr32'
35 liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
37 ; GCN-LABEL: name: preserve_inactive_wwm
38 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
40 ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
41 ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
42 ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
43 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
44 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
45 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
46 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
47 ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
48 ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
49 ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
50 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
51 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
52 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
53 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
54 S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
55 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
60 name: dont_preserve_wwm_if_no_chain_calls
61 tracksRegLiveness: true
65 stackPtrOffsetReg: '$sgpr32'
71 liveins: $sgpr35, $vgpr8
73 ; GCN-LABEL: name: dont_preserve_wwm_if_no_chain_calls
74 ; GCN: liveins: $sgpr35, $vgpr8
76 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
77 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
78 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
79 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
80 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
81 ; GCN-NEXT: S_ENDPGM 0
82 renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
84 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
85 renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
86 S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
91 name: dont_preserve_wwm_if_init_whole_wave
92 tracksRegLiveness: true
96 stackPtrOffsetReg: '$sgpr32'
101 hasInitWholeWave: true
104 liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
106 ; GCN-LABEL: name: dont_preserve_wwm_if_init_whole_wave
107 ; GCN: liveins: $sgpr0, $sgpr35
109 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
110 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
111 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr1
112 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
113 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
114 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
119 name: dont_preserve_non_wwm
120 tracksRegLiveness: true
124 stackPtrOffsetReg: '$sgpr32'
125 isChainFunction: true
129 liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr16
131 ; GCN-LABEL: name: dont_preserve_non_wwm
132 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr16
134 ; GCN-NEXT: renamable $vgpr16 = V_MOV_B32_e32 16, implicit $exec
135 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 8, implicit $exec
136 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
137 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
138 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
139 renamable $vgpr16 = V_MOV_B32_e32 16, implicit $exec
140 renamable $vgpr8 = V_MOV_B32_e32 8, implicit $exec
141 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
142 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
143 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
148 name: dont_preserve_v0_v7
149 tracksRegLiveness: true
153 stackPtrOffsetReg: '$sgpr32'
154 isChainFunction: true
160 liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
162 ; GCN-LABEL: name: dont_preserve_v0_v7
163 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
165 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
166 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
167 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
168 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
169 ; GCN-NEXT: renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
170 ; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr0
171 ; GCN-NEXT: renamable $vgpr9 = COPY killed renamable $vgpr7
172 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
173 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
174 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
175 renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
176 $sgpr35 = S_MOV_B32 5
177 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
178 renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
179 renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
180 renamable $vgpr8 = COPY killed renamable $vgpr0
181 renamable $vgpr9 = COPY killed renamable $vgpr7
182 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
183 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
184 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
189 name: dont_preserve_sgpr
190 tracksRegLiveness: true
194 stackPtrOffsetReg: '$sgpr32'
200 ; GCN-LABEL: name: dont_preserve_sgpr
201 ; GCN: liveins: $sgpr0
203 ; GCN-NEXT: renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
204 ; GCN-NEXT: $sgpr0 = COPY killed renamable $sgpr1
205 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
206 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
207 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0
208 renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
209 $sgpr0 = COPY killed renamable $sgpr1
210 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
211 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
212 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0