1 # RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefixes=CHECK,GFX89 %s
2 # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefixes=CHECK,GFX89 %s
5 define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
6 <4 x i32> addrspace(1)* %global16,
12 define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() {
16 define amdgpu_kernel void @single_branch_successor_not_next_block() {
20 define amdgpu_kernel void @preexisting_waitcnt() {
24 define amdgpu_kernel void @bundle_no_waitcnt() {
28 define amdgpu_kernel void @preexisting_waitcnt_in_bundle() {
32 define amdgpu_kernel void @insert_in_bundle() {
36 define amdgpu_kernel void @exit_bundle() {
40 define amdgpu_kernel void @cross_bundle() {
47 # CHECK-LABEL: name: flat_zero_waitcnt
50 # CHECK: FLAT_LOAD_DWORD
51 # CHECK: FLAT_LOAD_DWORDX4
52 # Global loads will return in order so we should:
53 # s_waitcnt vmcnt(1) lgkmcnt(1)
54 # CHECK-NEXT: S_WAITCNT 369
57 # CHECK: FLAT_LOAD_DWORD
58 # GFX89: S_WAITCNT 112
59 # CHECK: FLAT_LOAD_DWORDX4
62 # CHECK: FLAT_LOAD_DWORD
63 # GFX89: S_WAITCNT 112
64 # CHECK: FLAT_LOAD_DWORDX4
66 name: flat_zero_waitcnt
71 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
72 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
73 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
78 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
79 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
80 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
84 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
85 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
86 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
90 # There is only a single fallthrough successor block, so there's no
91 # need to wait immediately.
93 # CHECK-LABEL: name: single_fallthrough_successor_no_end_block_wait
94 # CHECK: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2
95 # CHECK-NOT: S_WAITCNT
98 # CHECK-NEXT: V_LSHLREV_B64
99 # CHECK-NEXT: S_WAITCNT 112
100 # CHECK-NEXT: FLAT_STORE_DWORD
101 name: single_fallthrough_successor_no_end_block_wait
106 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
109 $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec
110 FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
114 # The block has a single predecessor with a single successor, but it
115 # is not the next block so it's non-obvious that the wait is not needed.
118 # CHECK-LABEL: name: single_branch_successor_not_next_block
121 # CHECK-NEXT: FLAT_STORE_DWORD
122 # CHECK-NEXT: S_ENDPGM 0
125 # CHECK-NEXT: V_LSHLREV_B64
126 # CHECK-NEXT: S_WAITCNT 112
127 # CHECK-NEXT: FLAT_STORE_DWORD
128 name: single_branch_successor_not_next_block
133 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
137 FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
141 $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec
142 FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
146 # CHECK-LABEL: name: preexisting_waitcnt{{$}}
147 # CHECK: FLAT_LOAD_DWORD
148 # CHECK-NEXT: S_WAITCNT 0
149 # CHECK-NOT: S_WAITCNT
150 name: preexisting_waitcnt
151 tracksRegLiveness: true
153 isEntryFunction: true
156 liveins: $vgpr1_vgpr2
157 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
159 FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
165 # CHECK-LABEL: name: bundle_no_waitcnt{{$}}
166 # CHECK: FLAT_LOAD_DWORD
171 # CHECK-NEXT: S_WAITCNT 112
172 name: bundle_no_waitcnt
173 tracksRegLiveness: true
175 isEntryFunction: true
178 liveins: $vgpr1_vgpr2
179 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
184 FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
190 # See the waitcnt inside the bundle and don't insert an extra
191 # CHECK-LABEL: name: preexisting_waitcnt_in_bundle{{$}}
192 # CHECK: FLAT_LOAD_DWORD
194 # CHECK-NOT: S_WAITCNT
195 name: preexisting_waitcnt_in_bundle
196 tracksRegLiveness: true
198 isEntryFunction: true
201 liveins: $vgpr1_vgpr2
202 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
207 FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
213 # Def and use inside bundle
214 # CHECK-LABEL: name: insert_in_bundle{{$}}
215 # CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
216 # CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
217 # CHECK-NEXT: S_WAITCNT 112
218 # CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, internal $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
221 name: insert_in_bundle
222 tracksRegLiveness: true
224 isEntryFunction: true
227 liveins: $vgpr1_vgpr2
228 BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
229 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
230 FLAT_STORE_DWORD $vgpr1_vgpr2, internal $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
236 # Def is last instruction in bundle, use is outside bundle
238 # CHECK-LABEL: name: exit_bundle{{$}}
239 # CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
240 # CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
242 # CHECK-NEXT: S_WAITCNT 112
243 # CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
246 tracksRegLiveness: true
248 isEntryFunction: true
251 liveins: $vgpr1_vgpr2
252 BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
253 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
256 FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
262 # Def is in bundle, use is in another bundle
264 # CHECK-LABEL: name: cross_bundle{{$}}
265 # CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
266 # CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
268 # CHECK-NEXT: S_WAITCNT 112
269 # CHECK-NEXT: BUNDLE implicit $vgpr0, implicit $vgpr1_vgpr2 {
270 # CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
274 tracksRegLiveness: true
276 isEntryFunction: true
279 liveins: $vgpr1_vgpr2
280 BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
281 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
283 BUNDLE implicit $vgpr0, implicit $vgpr1_vgpr2 {
284 FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr