1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX9 %s
3 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX12 %s
5 # There shall be no S_WAITCNT between two stores.
12 liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79
14 ; GFX9-LABEL: name: spill_vgpr_tuple
15 ; GFX9: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79
17 ; GFX9-NEXT: S_WAITCNT 0
18 ; GFX9-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec
19 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65
20 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65
21 ; GFX9-NEXT: S_ENDPGM 0
23 ; GFX12-LABEL: name: spill_vgpr_tuple
24 ; GFX12: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79
26 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
27 ; GFX12-NEXT: S_WAIT_EXPCNT 0
28 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
29 ; GFX12-NEXT: S_WAIT_BVHCNT 0
30 ; GFX12-NEXT: S_WAIT_KMCNT 0
31 ; GFX12-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec
32 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65
33 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65
34 ; GFX12-NEXT: S_ENDPGM 0
35 $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec
36 BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65
37 BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65
41 # Make sure that while ignoring implicit operands we will not ignore implicit $vcc on VALU
48 liveins: $vgpr0, $sgpr10_sgpr11
50 ; GFX9-LABEL: name: load_vcc_wait
51 ; GFX9: liveins: $vgpr0, $sgpr10_sgpr11
53 ; GFX9-NEXT: S_WAITCNT 0
54 ; GFX9-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
55 ; GFX9-NEXT: S_WAITCNT 49279
56 ; GFX9-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
57 ; GFX9-NEXT: S_ENDPGM 0
59 ; GFX12-LABEL: name: load_vcc_wait
60 ; GFX12: liveins: $vgpr0, $sgpr10_sgpr11
62 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
63 ; GFX12-NEXT: S_WAIT_EXPCNT 0
64 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
65 ; GFX12-NEXT: S_WAIT_BVHCNT 0
66 ; GFX12-NEXT: S_WAIT_KMCNT 0
67 ; GFX12-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
68 ; GFX12-NEXT: S_WAIT_KMCNT 0
69 ; GFX12-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
70 ; GFX12-NEXT: S_ENDPGM 0
71 $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
72 $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
76 # Make sure that while ignoring implicit operands we will not ignore implicit $flat_src on FLAT
79 name: load_flat_scr_lo_flat_load_wait
83 liveins: $sgpr10_sgpr11, $vgpr0_vgpr1
85 ; GFX9-LABEL: name: load_flat_scr_lo_flat_load_wait
86 ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1
88 ; GFX9-NEXT: S_WAITCNT 0
89 ; GFX9-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
90 ; GFX9-NEXT: S_WAITCNT 49279
91 ; GFX9-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
92 ; GFX9-NEXT: S_ENDPGM 0
94 ; GFX12-LABEL: name: load_flat_scr_lo_flat_load_wait
95 ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1
97 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
98 ; GFX12-NEXT: S_WAIT_EXPCNT 0
99 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
100 ; GFX12-NEXT: S_WAIT_BVHCNT 0
101 ; GFX12-NEXT: S_WAIT_KMCNT 0
102 ; GFX12-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
103 ; GFX12-NEXT: S_WAIT_KMCNT 0
104 ; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
105 ; GFX12-NEXT: S_ENDPGM 0
106 $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
107 $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
112 name: load_flat_scr_lo_scratch_store_wait
116 liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32
118 ; GFX9-LABEL: name: load_flat_scr_lo_scratch_store_wait
119 ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32
121 ; GFX9-NEXT: S_WAITCNT 0
122 ; GFX9-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
123 ; GFX9-NEXT: S_WAITCNT 49279
124 ; GFX9-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
125 ; GFX9-NEXT: S_ENDPGM 0
127 ; GFX12-LABEL: name: load_flat_scr_lo_scratch_store_wait
128 ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32
130 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
131 ; GFX12-NEXT: S_WAIT_EXPCNT 0
132 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
133 ; GFX12-NEXT: S_WAIT_BVHCNT 0
134 ; GFX12-NEXT: S_WAIT_KMCNT 0
135 ; GFX12-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
136 ; GFX12-NEXT: S_WAIT_KMCNT 0
137 ; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
138 ; GFX12-NEXT: S_ENDPGM 0
139 $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
140 SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
144 # Check that implicit spill defs do not force wait to zero on the first store
147 name: spill_load_store
151 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
153 ; GFX9-LABEL: name: spill_load_store
154 ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
156 ; GFX9-NEXT: S_WAITCNT 0
157 ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
158 ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec
159 ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec
160 ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
161 ; GFX9-NEXT: S_WAITCNT 3955
162 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
163 ; GFX9-NEXT: S_WAITCNT 3955
164 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec
165 ; GFX9-NEXT: S_WAITCNT 3955
166 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec
167 ; GFX9-NEXT: S_WAITCNT 3955
168 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
169 ; GFX9-NEXT: S_ENDPGM 0
171 ; GFX12-LABEL: name: spill_load_store
172 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
174 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
175 ; GFX12-NEXT: S_WAIT_EXPCNT 0
176 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
177 ; GFX12-NEXT: S_WAIT_BVHCNT 0
178 ; GFX12-NEXT: S_WAIT_KMCNT 0
179 ; GFX12-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
180 ; GFX12-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec
181 ; GFX12-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec
182 ; GFX12-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
183 ; GFX12-NEXT: S_WAIT_LOADCNT 3
184 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
185 ; GFX12-NEXT: S_WAIT_LOADCNT 2
186 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec
187 ; GFX12-NEXT: S_WAIT_LOADCNT 1
188 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec
189 ; GFX12-NEXT: S_WAIT_LOADCNT 0
190 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
191 ; GFX12-NEXT: S_ENDPGM 0
192 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
193 $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec
194 $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec
195 $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
196 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
197 BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec
198 BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec
199 BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
203 # Make sure we have wait to mitigate WAW on gfx12
206 name: scratch_load_waw
209 liveins: $vgpr0, $sgpr0
211 ; GFX9-LABEL: name: scratch_load_waw
212 ; GFX9: liveins: $vgpr0, $sgpr0
214 ; GFX9-NEXT: S_WAITCNT 0
215 ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
216 ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr
217 ; GFX9-NEXT: S_ENDPGM 0
219 ; GFX12-LABEL: name: scratch_load_waw
220 ; GFX12: liveins: $vgpr0, $sgpr0
222 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
223 ; GFX12-NEXT: S_WAIT_EXPCNT 0
224 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
225 ; GFX12-NEXT: S_WAIT_BVHCNT 0
226 ; GFX12-NEXT: S_WAIT_KMCNT 0
227 ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
228 ; GFX12-NEXT: S_WAIT_LOADCNT 0
229 ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr
230 ; GFX12-NEXT: S_ENDPGM 0
231 $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
232 $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr