1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
4 # Kernels can have no FP
6 name: kernel_no_fold_fi_non_stack_rsrc_and_soffset
7 tracksRegLiveness: true
12 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
15 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
16 stackPtrOffsetReg: '$sgpr32'
19 liveins: $sgpr12_sgpr13_sgpr14_sgpr15
21 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc_and_soffset
22 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
23 ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
24 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
25 ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
26 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
27 ; GCN: SI_RETURN_TO_EPILOG $vgpr0
28 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
29 %1:sreg_32_xm0 = S_MOV_B32 0
30 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
31 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, implicit $exec
33 SI_RETURN_TO_EPILOG $vgpr0
38 name: kernel_no_fold_fi_non_stack_rsrc
39 tracksRegLiveness: true
44 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
47 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
48 stackPtrOffsetReg: '$sgpr32'
51 liveins: $sgpr12_sgpr13_sgpr14_sgpr15
53 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc
54 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
55 ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
56 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
57 ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
58 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
59 ; GCN: SI_RETURN_TO_EPILOG $vgpr0
60 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
61 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
62 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, 0, implicit $exec
64 SI_RETURN_TO_EPILOG $vgpr0
69 name: kernel_no_fold_fi_non_stack_soffset
70 tracksRegLiveness: true
75 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
78 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
79 stackPtrOffsetReg: '$sgpr32'
83 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_soffset
84 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
85 ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
86 ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
87 ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
88 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
89 ; GCN: S_ENDPGM 0, implicit $vgpr0
90 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
91 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
92 %2:sreg_32_xm0 = S_MOV_B32 0
94 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, implicit $exec
95 %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, implicit $exec
97 S_ENDPGM 0, implicit $vgpr0
102 name: kernel_fold_fi_mubuf
103 tracksRegLiveness: true
108 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
110 isEntryFunction: true
111 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
112 stackPtrOffsetReg: '$sgpr32'
116 ; GCN-LABEL: name: kernel_fold_fi_mubuf
117 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
118 ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
119 ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
120 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
121 ; GCN: S_ENDPGM 0, implicit $vgpr0
122 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
123 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
125 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
126 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
128 S_ENDPGM 0, implicit $vgpr0
133 # Functions have an unswizzled SP/FP relative to the wave offset
135 name: function_no_fold_fi_non_stack_rsrc_and_soffset
136 tracksRegLiveness: true
141 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
143 isEntryFunction: false
144 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
145 frameOffsetReg: '$sgpr32'
146 stackPtrOffsetReg: '$sgpr32'
149 liveins: $sgpr12_sgpr13_sgpr14_sgpr15
151 ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc_and_soffset
152 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
153 ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
154 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
155 ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
156 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
157 ; GCN: SI_RETURN_TO_EPILOG $vgpr0
158 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
159 %1:sreg_32_xm0 = S_MOV_B32 0
160 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
161 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, implicit $exec
163 SI_RETURN_TO_EPILOG $vgpr0
168 name: function_no_fold_fi_non_stack_rsrc
169 tracksRegLiveness: true
174 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
176 isEntryFunction: false
177 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
178 frameOffsetReg: '$sgpr32'
179 stackPtrOffsetReg: '$sgpr32'
182 liveins: $sgpr12_sgpr13_sgpr14_sgpr15
184 ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc
185 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
186 ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
187 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
188 ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
189 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
190 ; GCN: SI_RETURN_TO_EPILOG $vgpr0
191 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
192 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
193 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, 0, implicit $exec
195 SI_RETURN_TO_EPILOG $vgpr0
200 name: function_no_fold_fi_non_stack_soffset
201 tracksRegLiveness: true
206 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
208 isEntryFunction: false
209 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
210 frameOffsetReg: '$sgpr32'
211 stackPtrOffsetReg: '$sgpr32'
215 ; GCN-LABEL: name: function_no_fold_fi_non_stack_soffset
216 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
217 ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
218 ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
219 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
220 ; GCN: S_ENDPGM 0, implicit $vgpr0
221 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
222 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
224 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
225 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
227 S_ENDPGM 0, implicit $vgpr0
232 name: function_fold_fi_mubuf_wave_relative
233 tracksRegLiveness: true
238 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
240 isEntryFunction: false
241 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
242 frameOffsetReg: '$sgpr32'
243 stackPtrOffsetReg: '$sgpr32'
247 ; GCN-LABEL: name: function_fold_fi_mubuf_wave_relative
248 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
249 ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
250 ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
251 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
252 ; GCN: S_ENDPGM 0, implicit $vgpr0
253 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
254 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
256 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
257 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
259 S_ENDPGM 0, implicit $vgpr0
264 name: function_fold_fi_mubuf_stack_relative
265 tracksRegLiveness: true
270 - { id: 0, size: 4, alignment: 4, local-offset: 0 }
272 isEntryFunction: false
273 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
274 frameOffsetReg: '$sgpr32'
275 stackPtrOffsetReg: '$sgpr32'
279 ; GCN-LABEL: name: function_fold_fi_mubuf_stack_relative
280 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
281 ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
282 ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
283 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
284 ; GCN: S_ENDPGM 0, implicit $vgpr0
285 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
286 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
288 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
289 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
291 S_ENDPGM 0, implicit $vgpr0