1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
5 define amdgpu_kernel void @flat_zero_waitcnt(ptr addrspace(1) %global4,
6 ptr addrspace(1) %global16,
12 define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() {
16 define amdgpu_kernel void @single_branch_successor_not_next_block() {
20 define amdgpu_kernel void @preexisting_waitcnt() {
24 define amdgpu_kernel void @bundle_no_waitcnt() {
28 define amdgpu_kernel void @preexisting_waitcnt_in_bundle() {
32 define amdgpu_kernel void @insert_in_bundle() {
36 define amdgpu_kernel void @exit_bundle() {
40 define amdgpu_kernel void @cross_bundle() {
44 define amdgpu_kernel void @high_register_collision() {
52 # Global loads will return in order so we should:
59 name: flat_zero_waitcnt
62 ; GCN-LABEL: name: flat_zero_waitcnt
64 ; GCN-NEXT: successors: %bb.1(0x80000000)
66 ; GCN-NEXT: S_WAITCNT 0
67 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4, addrspace 1)
68 ; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
69 ; GCN-NEXT: S_WAITCNT 3953
70 ; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
71 ; GCN-NEXT: S_BRANCH %bb.1
74 ; GCN-NEXT: successors: %bb.2(0x80000000)
76 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
77 ; GCN-NEXT: S_WAITCNT 3952
78 ; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
79 ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
80 ; GCN-NEXT: S_BRANCH %bb.2
83 ; GCN-NEXT: S_WAITCNT 49279
84 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
85 ; GCN-NEXT: S_WAITCNT 3952
86 ; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
87 ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
88 ; GCN-NEXT: S_ENDPGM 0
91 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4)
92 $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16)
93 $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
98 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
99 $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16)
100 $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
104 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
105 $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
106 $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
110 # There is only a single fallthrough successor block, so there's no
111 # need to wait immediately.
114 name: single_fallthrough_successor_no_end_block_wait
117 ; GCN-LABEL: name: single_fallthrough_successor_no_end_block_wait
119 ; GCN-NEXT: successors: %bb.1(0x80000000)
121 ; GCN-NEXT: S_WAITCNT 0
122 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
125 ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
126 ; GCN-NEXT: S_WAITCNT 112
127 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
128 ; GCN-NEXT: S_ENDPGM 0
131 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
134 $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
135 FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
139 # The block has a single predecessor with a single successor, but it
140 # is not the next block so it's non-obvious that the wait is not needed.
145 name: single_branch_successor_not_next_block
148 ; GCN-LABEL: name: single_branch_successor_not_next_block
150 ; GCN-NEXT: successors: %bb.2(0x80000000)
152 ; GCN-NEXT: S_WAITCNT 0
153 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
154 ; GCN-NEXT: S_BRANCH %bb.2
157 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
158 ; GCN-NEXT: S_ENDPGM 0
161 ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
162 ; GCN-NEXT: S_WAITCNT 112
163 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
164 ; GCN-NEXT: S_ENDPGM 0
167 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
171 FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
175 $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
176 FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
180 # GCN-LABEL: name: preexisting_waitcnt{{$}}
181 # GCN: FLAT_LOAD_DWORD
182 # GCN-NEXT: S_WAITCNT 0
184 name: preexisting_waitcnt
185 tracksRegLiveness: true
187 isEntryFunction: true
190 liveins: $vgpr1_vgpr2
191 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
193 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
199 name: bundle_no_waitcnt
200 tracksRegLiveness: true
202 isEntryFunction: true
205 liveins: $vgpr1_vgpr2
206 ; GCN-LABEL: name: bundle_no_waitcnt
207 ; GCN: liveins: $vgpr1_vgpr2
209 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
214 ; GCN-NEXT: S_WAITCNT 112
215 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
216 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
221 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
227 # See the waitcnt inside the bundle and don't insert an extra
228 name: preexisting_waitcnt_in_bundle
229 tracksRegLiveness: true
231 isEntryFunction: true
234 liveins: $vgpr1_vgpr2
235 ; GCN-LABEL: name: preexisting_waitcnt_in_bundle
236 ; GCN: liveins: $vgpr1_vgpr2
238 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
241 ; GCN-NEXT: S_WAITCNT 0
243 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
244 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
249 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
255 # Def and use inside bundle
257 name: insert_in_bundle
258 tracksRegLiveness: true
260 isEntryFunction: true
263 liveins: $vgpr1_vgpr2
264 ; GCN-LABEL: name: insert_in_bundle
265 ; GCN: liveins: $vgpr1_vgpr2
267 ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
268 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
269 ; GCN-NEXT: S_WAITCNT 112
270 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
272 BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
273 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
274 FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
280 # Def is last instruction in bundle, use is outside bundle
284 tracksRegLiveness: true
286 isEntryFunction: true
289 liveins: $vgpr1_vgpr2
290 ; GCN-LABEL: name: exit_bundle
291 ; GCN: liveins: $vgpr1_vgpr2
293 ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
294 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
296 ; GCN-NEXT: S_WAITCNT 112
297 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
298 BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
299 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
302 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
308 # Def is in bundle, use is in another bundle
312 tracksRegLiveness: true
314 isEntryFunction: true
317 liveins: $vgpr1_vgpr2
318 ; GCN-LABEL: name: cross_bundle
319 ; GCN: liveins: $vgpr1_vgpr2
321 ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
322 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
324 ; GCN-NEXT: S_WAITCNT 112
325 ; GCN-NEXT: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
326 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
328 BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
329 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
331 BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
332 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
337 # agpr should be disjoint and tracked separately from vgpr
339 name: high_register_collision
343 ; GCN-LABEL: name: high_register_collision
345 ; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
346 ; GCN-NEXT: $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
347 ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
348 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
349 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
350 ; GCN-NEXT: S_ENDPGM 0
351 $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
352 $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
353 $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
354 FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
355 FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr