[clang] Fix crashes when passing VLA to va_arg (#119563)
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / spill-wait.mir
blob6983a2742a41c08c82f142cd1f86980df0192ae1
1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX9 %s
3 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX12 %s
5 # There shall be no S_WAITCNT between two stores.
7 ---
8 name: spill_vgpr_tuple
10 body: |
11   bb.0:
12     liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79
14     ; GFX9-LABEL: name: spill_vgpr_tuple
15     ; GFX9: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79
16     ; GFX9-NEXT: {{  $}}
17     ; GFX9-NEXT: S_WAITCNT 0
18     ; GFX9-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec
19     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65
20     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65
21     ; GFX9-NEXT: S_ENDPGM 0
22     ;
23     ; GFX12-LABEL: name: spill_vgpr_tuple
24     ; GFX12: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79
25     ; GFX12-NEXT: {{  $}}
26     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
27     ; GFX12-NEXT: S_WAIT_EXPCNT 0
28     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
29     ; GFX12-NEXT: S_WAIT_BVHCNT 0
30     ; GFX12-NEXT: S_WAIT_KMCNT 0
31     ; GFX12-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec
32     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65
33     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65
34     ; GFX12-NEXT: S_ENDPGM 0
35     $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec
36     BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65
37     BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65
38     S_ENDPGM 0
39 ...
41 # Make sure that while ignoring implicit operands we will not ignore implicit $vcc on VALU
43 ---
44 name: load_vcc_wait
46 body: |
47   bb.0:
48     liveins: $vgpr0, $sgpr10_sgpr11
50     ; GFX9-LABEL: name: load_vcc_wait
51     ; GFX9: liveins: $vgpr0, $sgpr10_sgpr11
52     ; GFX9-NEXT: {{  $}}
53     ; GFX9-NEXT: S_WAITCNT 0
54     ; GFX9-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
55     ; GFX9-NEXT: S_WAITCNT 49279
56     ; GFX9-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
57     ; GFX9-NEXT: S_ENDPGM 0
58     ;
59     ; GFX12-LABEL: name: load_vcc_wait
60     ; GFX12: liveins: $vgpr0, $sgpr10_sgpr11
61     ; GFX12-NEXT: {{  $}}
62     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
63     ; GFX12-NEXT: S_WAIT_EXPCNT 0
64     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
65     ; GFX12-NEXT: S_WAIT_BVHCNT 0
66     ; GFX12-NEXT: S_WAIT_KMCNT 0
67     ; GFX12-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
68     ; GFX12-NEXT: S_WAIT_KMCNT 0
69     ; GFX12-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
70     ; GFX12-NEXT: S_ENDPGM 0
71     $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
72     $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
73     S_ENDPGM 0
74 ...
76 # Make sure that while ignoring implicit operands we will not ignore implicit $flat_src on FLAT
78 ---
79 name: load_flat_scr_lo_flat_load_wait
81 body: |
82   bb.0:
83     liveins: $sgpr10_sgpr11, $vgpr0_vgpr1
85     ; GFX9-LABEL: name: load_flat_scr_lo_flat_load_wait
86     ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1
87     ; GFX9-NEXT: {{  $}}
88     ; GFX9-NEXT: S_WAITCNT 0
89     ; GFX9-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
90     ; GFX9-NEXT: S_WAITCNT 49279
91     ; GFX9-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
92     ; GFX9-NEXT: S_ENDPGM 0
93     ;
94     ; GFX12-LABEL: name: load_flat_scr_lo_flat_load_wait
95     ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1
96     ; GFX12-NEXT: {{  $}}
97     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
98     ; GFX12-NEXT: S_WAIT_EXPCNT 0
99     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
100     ; GFX12-NEXT: S_WAIT_BVHCNT 0
101     ; GFX12-NEXT: S_WAIT_KMCNT 0
102     ; GFX12-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
103     ; GFX12-NEXT: S_WAIT_KMCNT 0
104     ; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
105     ; GFX12-NEXT: S_ENDPGM 0
106     $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
107     $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
108     S_ENDPGM 0
112 name: load_flat_scr_lo_scratch_store_wait
114 body: |
115   bb.0:
116     liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32
118     ; GFX9-LABEL: name: load_flat_scr_lo_scratch_store_wait
119     ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32
120     ; GFX9-NEXT: {{  $}}
121     ; GFX9-NEXT: S_WAITCNT 0
122     ; GFX9-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
123     ; GFX9-NEXT: S_WAITCNT 49279
124     ; GFX9-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
125     ; GFX9-NEXT: S_ENDPGM 0
126     ;
127     ; GFX12-LABEL: name: load_flat_scr_lo_scratch_store_wait
128     ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32
129     ; GFX12-NEXT: {{  $}}
130     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
131     ; GFX12-NEXT: S_WAIT_EXPCNT 0
132     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
133     ; GFX12-NEXT: S_WAIT_BVHCNT 0
134     ; GFX12-NEXT: S_WAIT_KMCNT 0
135     ; GFX12-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
136     ; GFX12-NEXT: S_WAIT_KMCNT 0
137     ; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
138     ; GFX12-NEXT: S_ENDPGM 0
139     $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
140     SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
141     S_ENDPGM 0
144 # Check that implicit spill defs do not force wait to zero on the first store
147 name: spill_load_store
149 body: |
150   bb.0:
151     liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
153     ; GFX9-LABEL: name: spill_load_store
154     ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
155     ; GFX9-NEXT: {{  $}}
156     ; GFX9-NEXT: S_WAITCNT 0
157     ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
158     ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec
159     ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec
160     ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
161     ; GFX9-NEXT: S_WAITCNT 3955
162     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
163     ; GFX9-NEXT: S_WAITCNT 3955
164     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec
165     ; GFX9-NEXT: S_WAITCNT 3955
166     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec
167     ; GFX9-NEXT: S_WAITCNT 3955
168     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
169     ; GFX9-NEXT: S_ENDPGM 0
170     ;
171     ; GFX12-LABEL: name: spill_load_store
172     ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
173     ; GFX12-NEXT: {{  $}}
174     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
175     ; GFX12-NEXT: S_WAIT_EXPCNT 0
176     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
177     ; GFX12-NEXT: S_WAIT_BVHCNT 0
178     ; GFX12-NEXT: S_WAIT_KMCNT 0
179     ; GFX12-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
180     ; GFX12-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec
181     ; GFX12-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec
182     ; GFX12-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
183     ; GFX12-NEXT: S_WAIT_LOADCNT 3
184     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
185     ; GFX12-NEXT: S_WAIT_LOADCNT 2
186     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec
187     ; GFX12-NEXT: S_WAIT_LOADCNT 1
188     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec
189     ; GFX12-NEXT: S_WAIT_LOADCNT 0
190     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
191     ; GFX12-NEXT: S_ENDPGM 0
192     $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
193     $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec
194     $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec
195     $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
196     BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
197     BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec
198     BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec
199     BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
200     S_ENDPGM 0
203 # Make sure we have wait to mitigate WAW on gfx12
206 name: scratch_load_waw
207 body:             |
208   bb.0.entry:
209     liveins: $vgpr0, $sgpr0
211     ; GFX9-LABEL: name: scratch_load_waw
212     ; GFX9: liveins: $vgpr0, $sgpr0
213     ; GFX9-NEXT: {{  $}}
214     ; GFX9-NEXT: S_WAITCNT 0
215     ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
216     ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr
217     ; GFX9-NEXT: S_ENDPGM 0
218     ;
219     ; GFX12-LABEL: name: scratch_load_waw
220     ; GFX12: liveins: $vgpr0, $sgpr0
221     ; GFX12-NEXT: {{  $}}
222     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
223     ; GFX12-NEXT: S_WAIT_EXPCNT 0
224     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
225     ; GFX12-NEXT: S_WAIT_BVHCNT 0
226     ; GFX12-NEXT: S_WAIT_KMCNT 0
227     ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
228     ; GFX12-NEXT: S_WAIT_LOADCNT 0
229     ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr
230     ; GFX12-NEXT: S_ENDPGM 0
231     $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
232     $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr
233     S_ENDPGM 0