1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX9 %s
5 name: test_waitcnt_preexisting_lgkmcnt_unmodified
10 ; GFX9-LABEL: name: test_waitcnt_preexisting_lgkmcnt_unmodified
11 ; GFX9: liveins: $vgpr0
13 ; GFX9-NEXT: S_WAITCNT 0
14 ; GFX9-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
15 ; GFX9-NEXT: S_WAITCNT 49279
16 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
17 ; GFX9-NEXT: S_WAITCNT 112
18 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
19 ; GFX9-NEXT: S_ENDPGM 0
20 $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
22 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
23 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
28 name: test_waitcnt_preexisting_vmcnt_unmodified
33 ; GFX9-LABEL: name: test_waitcnt_preexisting_vmcnt_unmodified
34 ; GFX9: liveins: $vgpr0_vgpr1
36 ; GFX9-NEXT: S_WAITCNT 0
37 ; GFX9-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
38 ; GFX9-NEXT: S_WAITCNT 3952
39 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
40 ; GFX9-NEXT: S_WAITCNT 112
41 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
42 ; GFX9-NEXT: S_ENDPGM 0
43 $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
45 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
46 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
50 # Respect preexisting waitcnt and add required wait.
53 name: test_waitcnt_preexisting_vmcnt_needs_lgkmcnt
58 ; GFX9-LABEL: name: test_waitcnt_preexisting_vmcnt_needs_lgkmcnt
59 ; GFX9: liveins: $vgpr0
61 ; GFX9-NEXT: S_WAITCNT 0
62 ; GFX9-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
63 ; GFX9-NEXT: S_WAITCNT 112
64 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
65 ; GFX9-NEXT: S_WAITCNT 112
66 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
67 ; GFX9-NEXT: S_ENDPGM 0
68 $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
70 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
71 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
76 name: test_waitcnt_preexisting_lgkmcnt_needs_vmcnt
81 ; GFX9-LABEL: name: test_waitcnt_preexisting_lgkmcnt_needs_vmcnt
82 ; GFX9: liveins: $vgpr0_vgpr1
84 ; GFX9-NEXT: S_WAITCNT 0
85 ; GFX9-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
86 ; GFX9-NEXT: S_WAITCNT 112
87 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
88 ; GFX9-NEXT: S_WAITCNT 112
89 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
90 ; GFX9-NEXT: S_ENDPGM 0
91 $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
93 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
94 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
98 # Apply wait for all counters from preexisting waitcnt regardless of the wait
99 # required by the next instruction.
102 name: test_waitcnt_preexisting_apply_all_counters
105 liveins: $vgpr0_vgpr1, $vgpr2
107 ; GFX9-LABEL: name: test_waitcnt_preexisting_apply_all_counters
108 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
110 ; GFX9-NEXT: S_WAITCNT 0
111 ; GFX9-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
112 ; GFX9-NEXT: $vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
113 ; GFX9-NEXT: S_WAITCNT 0
114 ; GFX9-NEXT: $vgpr6 = V_OR_B32_e32 1, killed $vgpr6, implicit $exec
115 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
116 ; GFX9-NEXT: S_WAITCNT 112
117 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
118 $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
119 $vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
121 $vgpr6 = V_OR_B32_e32 1, killed $vgpr6, implicit $exec
122 $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
123 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
127 name: test_waitcnt_preexisting_combine_waitcnt
130 liveins: $vgpr0_vgpr1
132 ; GFX9-LABEL: name: test_waitcnt_preexisting_combine_waitcnt
133 ; GFX9: liveins: $vgpr0_vgpr1
135 ; GFX9-NEXT: S_WAITCNT 0
136 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
137 ; GFX9-NEXT: S_WAITCNT 0
138 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
139 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
146 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
150 name: test_waitcnt_preexisting_combine_waitcnt_diff_counters
153 liveins: $vgpr0_vgpr1
155 ; GFX9-LABEL: name: test_waitcnt_preexisting_combine_waitcnt_diff_counters
156 ; GFX9: liveins: $vgpr0_vgpr1
158 ; GFX9-NEXT: S_WAITCNT 0
159 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
160 ; GFX9-NEXT: S_WAITCNT 112
161 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
162 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
165 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
168 # Apply preexisting waitcnt when no wait is immediately needed.
169 # FIXME: Move waitcnt as late as possible.
172 name: test_waitcnt_preexisting_early_wait
175 liveins: $vgpr0_vgpr1
177 ; GFX9-LABEL: name: test_waitcnt_preexisting_early_wait
178 ; GFX9: liveins: $vgpr0_vgpr1
180 ; GFX9-NEXT: S_WAITCNT 0
181 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
182 ; GFX9-NEXT: S_WAITCNT 0
186 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
187 ; GFX9-NEXT: S_ENDPGM 0
188 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
193 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
198 name: test_waitcnt_preexisting_ignore_kill
201 liveins: $vgpr0_vgpr1
203 ; GFX9-LABEL: name: test_waitcnt_preexisting_ignore_kill
204 ; GFX9: liveins: $vgpr0_vgpr1
206 ; GFX9-NEXT: S_WAITCNT 0
207 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
208 ; GFX9-NEXT: S_WAITCNT 3952
209 ; GFX9-NEXT: KILL $vgpr0
210 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
215 # Combine preexisting waitcnt with wait added to the start of a non-entry function.
218 name: test_waitcnt_preexisting_func_start
221 ; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
223 ; GFX9-NEXT: S_ENDPGM 0
228 # Verify that extra waitcnt are not added after buffer invalidate instructions.
231 name: test_waitcnt_preexisting_buffer_inv
234 ; GFX9-LABEL: name: test_waitcnt_preexisting_buffer_inv
236 ; GFX9-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
237 ; GFX9-NEXT: S_WAITCNT 3952
238 ; GFX9-NEXT: BUFFER_INVL2 implicit $exec
239 ; GFX9-NEXT: BUFFER_WBINVL1_VOL implicit $exec
240 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
241 ; GFX9-NEXT: S_WAITCNT 112
242 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
243 ; GFX9-NEXT: S_ENDPGM 0
244 $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
246 BUFFER_INVL2 implicit $exec
247 BUFFER_WBINVL1_VOL implicit $exec
248 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
249 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr