1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
4 # Kernels can have no FP
6 name: kernel_no_fold_fi_non_stack_rsrc_and_soffset
7 tracksRegLiveness: true
12 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
15 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
16 stackPtrOffsetReg: '$sgpr32'
19 liveins: $sgpr12_sgpr13_sgpr14_sgpr15
21 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc_and_soffset
22 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
24 ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
25 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
26 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec
27 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
28 ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
29 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
30 %1:sreg_32_xm0 = S_MOV_B32 0
31 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
32 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, implicit $exec
34 SI_RETURN_TO_EPILOG $vgpr0
39 name: kernel_no_fold_fi_non_stack_rsrc
40 tracksRegLiveness: true
45 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
48 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
49 stackPtrOffsetReg: '$sgpr32'
52 liveins: $sgpr12_sgpr13_sgpr14_sgpr15
54 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc
55 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
57 ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
58 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
59 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec
60 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
61 ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
62 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
63 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
64 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, implicit $exec
66 SI_RETURN_TO_EPILOG $vgpr0
71 name: kernel_no_fold_fi_non_stack_soffset
72 tracksRegLiveness: true
77 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
80 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
81 stackPtrOffsetReg: '$sgpr32'
85 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_soffset
86 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
87 ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
88 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
89 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
90 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
91 ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
92 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
93 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
94 %2:sreg_32_xm0 = S_MOV_B32 0
96 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, implicit $exec
97 %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, implicit $exec
99 S_ENDPGM 0, implicit $vgpr0
104 name: kernel_fold_fi_mubuf
105 tracksRegLiveness: true
110 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
112 isEntryFunction: true
113 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
114 stackPtrOffsetReg: '$sgpr32'
118 ; GCN-LABEL: name: kernel_fold_fi_mubuf
119 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
120 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
121 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
122 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
123 ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
124 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
125 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
127 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
128 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
130 S_ENDPGM 0, implicit $vgpr0
135 # Functions have an unswizzled SP/FP relative to the wave offset
137 name: function_no_fold_fi_non_stack_rsrc_and_soffset
138 tracksRegLiveness: true
143 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
145 isEntryFunction: false
146 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
147 frameOffsetReg: '$sgpr32'
148 stackPtrOffsetReg: '$sgpr32'
151 liveins: $sgpr12_sgpr13_sgpr14_sgpr15
153 ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc_and_soffset
154 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
156 ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
157 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
158 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec
159 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
160 ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
161 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
162 %1:sreg_32_xm0 = S_MOV_B32 0
163 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
164 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, implicit $exec
166 SI_RETURN_TO_EPILOG $vgpr0
171 name: function_no_fold_fi_non_stack_rsrc
172 tracksRegLiveness: true
177 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
179 isEntryFunction: false
180 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
181 frameOffsetReg: '$sgpr32'
182 stackPtrOffsetReg: '$sgpr32'
185 liveins: $sgpr12_sgpr13_sgpr14_sgpr15
187 ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc
188 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
190 ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
191 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
192 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec
193 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
194 ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
195 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
196 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
197 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, implicit $exec
199 SI_RETURN_TO_EPILOG $vgpr0
204 name: function_no_fold_fi_non_stack_soffset
205 tracksRegLiveness: true
210 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
212 isEntryFunction: false
213 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
214 frameOffsetReg: '$sgpr32'
215 stackPtrOffsetReg: '$sgpr32'
219 ; GCN-LABEL: name: function_no_fold_fi_non_stack_soffset
220 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
221 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
222 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
223 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
224 ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
225 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
226 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
228 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
229 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
231 S_ENDPGM 0, implicit $vgpr0
236 name: function_fold_fi_mubuf_wave_relative
237 tracksRegLiveness: true
242 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
244 isEntryFunction: false
245 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
246 frameOffsetReg: '$sgpr32'
247 stackPtrOffsetReg: '$sgpr32'
251 ; GCN-LABEL: name: function_fold_fi_mubuf_wave_relative
252 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
253 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
254 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
255 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
256 ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
257 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
258 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
260 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
261 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
263 S_ENDPGM 0, implicit $vgpr0
268 name: function_fold_fi_mubuf_stack_relative
269 tracksRegLiveness: true
274 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
276 isEntryFunction: false
277 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
278 frameOffsetReg: '$sgpr32'
279 stackPtrOffsetReg: '$sgpr32'
283 ; GCN-LABEL: name: function_fold_fi_mubuf_stack_relative
284 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
285 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
286 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
287 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
288 ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
289 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
290 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
292 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
293 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
295 S_ENDPGM 0, implicit $vgpr0