1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -start-before=si-lower-sgpr-spills -stop-after=virtregrewriter,1 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
4 # Tests to check the conservative lieness extension for the wwm registers during SGPR spill lowering.
6 # Even though the VGPR can be shared for the wwm-operand (writelane/readlane get inserted for the SGPR spills)
7 # and the regular operand (%0), they get different registers as we conservatively extend the liveness of the
10 name: test_single_block
11 tracksRegLiveness: true
15 - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
17 isEntryFunction: false
18 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
19 stackPtrOffsetReg: '$sgpr32'
20 frameOffsetReg: '$sgpr33'
24 liveins: $sgpr4, $vgpr2_vgpr3
25 ; GCN-LABEL: name: test_single_block
26 ; GCN: liveins: $sgpr4, $vgpr2_vgpr3
28 ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF
29 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63
31 ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
32 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
33 ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_]], 0, 0, implicit $exec
35 SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
37 renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
38 %0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
39 GLOBAL_STORE_DWORD $vgpr2_vgpr3, %0:vgpr_32, 0, 0, implicit $exec
43 # Due to the presence of wwm-operand in the divergent flow, the regular variable (%0) shouldn't get the same register
44 # allocated for the wwm-operand in writelane/readlane when the SGPR spill is lowered.
48 tracksRegLiveness: true
52 - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
54 isEntryFunction: false
55 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
56 stackPtrOffsetReg: '$sgpr32'
57 frameOffsetReg: '$sgpr33'
60 ; GCN-LABEL: name: test_if_else
62 ; GCN-NEXT: successors: %bb.1(0x80000000)
63 ; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
65 ; GCN-NEXT: S_BRANCH %bb.1
68 ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
69 ; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
71 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
72 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
75 ; GCN-NEXT: successors: %bb.3(0x80000000)
76 ; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
78 ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF
79 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr63
81 ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
82 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
83 ; GCN-NEXT: S_BRANCH %bb.3
86 ; GCN-NEXT: liveins: $sgpr10_sgpr11
88 ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
89 ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
92 liveins: $sgpr6, $sgpr10_sgpr11
95 liveins: $sgpr6, $sgpr10_sgpr11
96 %0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
97 S_CBRANCH_EXECZ %bb.3, implicit $exec
99 liveins: $sgpr6, $sgpr10_sgpr11
100 SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
102 renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
103 %0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
106 liveins: $sgpr10_sgpr11
107 $sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
108 S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
112 # The wwm-register usage outside the loop should have the interference marked with
113 # all the regular virtual registers used in the test. The divergent loop index value (%1)
114 # can actually share the same VGPR as the wwm-operand. But since we extend the liveness of
115 # the wwm operand, an interference will always exist between them.
119 tracksRegLiveness: true
123 - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
125 isEntryFunction: false
126 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
127 stackPtrOffsetReg: '$sgpr32'
128 frameOffsetReg: '$sgpr33'
129 hasSpilledSGPRs: true
131 ; GCN-LABEL: name: test_loop
133 ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
134 ; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11
136 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
137 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
140 ; GCN-NEXT: successors: %bb.2(0x80000000)
141 ; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11
143 ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF
144 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63
146 ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
147 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
148 ; GCN-NEXT: S_BRANCH %bb.2
151 ; GCN-NEXT: successors: %bb.3(0x80000000)
152 ; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11
154 ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0
155 ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
156 ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4
157 ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
158 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
159 ; GCN-NEXT: S_BRANCH %bb.3
162 ; GCN-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
164 ; GCN-NEXT: $vcc = V_CMP_EQ_U32_e64 0, [[V_MOV_B32_e32_1]], implicit $exec
165 ; GCN-NEXT: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
166 ; GCN-NEXT: S_CBRANCH_SCC1 %bb.5, implicit $scc
169 ; GCN-NEXT: successors: %bb.3(0x80000000)
170 ; GCN-NEXT: liveins: $sgpr6_sgpr7
172 ; GCN-NEXT: [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 1, [[V_MOV_B32_e32_1]], implicit $exec
173 ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_SUB_U32_e32_]], implicit $exec
174 ; GCN-NEXT: S_BRANCH %bb.3
177 ; GCN-NEXT: liveins: $sgpr6_sgpr7
179 ; GCN-NEXT: $exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc
180 ; GCN-NEXT: SI_RETURN
182 liveins: $sgpr4, $sgpr10_sgpr11
183 %0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
184 S_CBRANCH_EXECZ %bb.2, implicit $exec
186 liveins: $sgpr4, $sgpr10_sgpr11
187 SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
189 renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
190 %0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
193 liveins: $sgpr4, $sgpr10_sgpr11
194 S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0
195 $sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
196 S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4
197 %1:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
198 S_CBRANCH_EXECZ %bb.3, implicit $exec
201 $vcc = V_CMP_EQ_U32_e64 0, %1:vgpr_32, implicit $exec
202 $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
203 S_CBRANCH_SCC1 %bb.5, implicit $scc
205 liveins: $sgpr6_sgpr7
206 %2:vgpr_32 = V_SUB_U32_e32 1, %1:vgpr_32, implicit $exec
207 %1:vgpr_32 = V_MOV_B32_e32 %2:vgpr_32, implicit $exec
210 liveins: $sgpr6_sgpr7
211 $exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc
215 # There must be one KILL instruction for the wwm-operand in every return block.
216 # Due to that, the wwm-register allocated should be different from the ones
217 # allocated for the regular virtual registers.
220 name: test_multiple_return_blocks
221 tracksRegLiveness: true
225 - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
227 isEntryFunction: false
228 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
229 stackPtrOffsetReg: '$sgpr32'
230 frameOffsetReg: '$sgpr33'
231 hasSpilledSGPRs: true
233 ; GCN-LABEL: name: test_multiple_return_blocks
235 ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
236 ; GCN-NEXT: liveins: $sgpr4, $vgpr2_vgpr3
238 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
241 ; GCN-NEXT: liveins: $sgpr4, $vgpr2_vgpr3
243 ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF
244 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63
246 ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
247 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
248 ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_]], 0, 0, implicit $exec
249 ; GCN-NEXT: SI_RETURN
252 ; GCN-NEXT: liveins: $vgpr2_vgpr3
254 ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
255 ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
256 ; GCN-NEXT: SI_RETURN
258 liveins: $sgpr4, $vgpr2_vgpr3
259 S_CBRANCH_EXECZ %bb.2, implicit $exec
261 liveins: $sgpr4, $vgpr2_vgpr3
262 SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
264 renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
265 %0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
266 GLOBAL_STORE_DWORD $vgpr2_vgpr3, %0:vgpr_32, 0, 0, implicit $exec
269 liveins: $vgpr2_vgpr3
270 %1:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
271 GLOBAL_STORE_DWORD $vgpr2_vgpr3, %1:vgpr_32, 0, 0, implicit $exec