1 # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
2 # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s
4 # Trivial clause at beginning of program
5 name: trivial_clause_load_flat4_x1
9 ; GCN-LABEL: name: trivial_clause_load_flat4_x1
10 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
13 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
17 # Trivial clause at beginning of program
18 name: trivial_clause_load_flat4_x2
22 ; GCN-LABEL: name: trivial_clause_load_flat4_x2
23 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
24 ; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
27 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
28 %vgpr1 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
32 # Trivial clause at beginning of program
33 name: trivial_clause_load_flat4_x3
37 ; GCN-LABEL: name: trivial_clause_load_flat4_x3
38 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
39 ; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr5_vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
40 ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr
43 %vgpr0 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
44 %vgpr1 = FLAT_LOAD_DWORD %vgpr5_vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
45 %vgpr2 = FLAT_LOAD_DWORD %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr
49 # Trivial clause at beginning of program
50 name: trivial_clause_load_flat4_x4
54 ; GCN-LABEL: name: trivial_clause_load_flat4_x4
55 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
56 ; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
57 ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr8_vgpr9, 0, 0, 0, implicit %exec, implicit %flat_scr
58 ; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr10_vgpr11, 0, 0, 0, implicit %exec, implicit %flat_scr
61 %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
62 %vgpr1 = FLAT_LOAD_DWORD %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
63 %vgpr2 = FLAT_LOAD_DWORD %vgpr8_vgpr9, 0, 0, 0, implicit %exec, implicit %flat_scr
64 %vgpr3 = FLAT_LOAD_DWORD %vgpr10_vgpr11, 0, 0, 0, implicit %exec, implicit %flat_scr
68 # Reuse of same input pointer is OK
70 name: trivial_clause_load_flat4_x2_sameptr
73 ; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
74 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
75 ; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
78 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
79 %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
83 # 32-bit load partially clobbers its own ptr reg
84 name: flat_load4_overwrite_ptr_lo
88 ; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
89 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
92 %vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
96 # 32-bit load partially clobbers its own ptr reg
97 name: flat_load4_overwrite_ptr_hi
101 ; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
102 ; GCN: %vgpr1 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
105 %vgpr1 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
109 # 64-bit load clobbers its own ptr reg
110 name: flat_load8_overwrite_ptr
114 ; GCN-LABEL: name: flat_load8_overwrite_ptr
115 ; GCN: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
118 %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
122 # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
126 name: break_clause_at_max_clause_size_flat_load4
130 ; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
131 ; GCN: %vgpr2 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
132 ; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
133 ; GCN-NEXT: %vgpr4 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
134 ; GCN-NEXT: %vgpr5 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
135 ; GCN-NEXT: %vgpr6 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
136 ; GCN-NEXT: %vgpr7 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
137 ; GCN-NEXT: %vgpr8 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
138 ; GCN-NEXT: %vgpr9 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
139 ; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
140 ; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
141 ; GCN-NEXT: %vgpr12 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
142 ; GCN-NEXT: %vgpr13 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
143 ; GCN-NEXT: %vgpr14 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
144 ; GCN-NEXT: %vgpr15 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
145 ; GCN-NEXT: %vgpr16 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
146 ; GCN-NEXT: %vgpr17 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
147 ; XNACK-NEXT: S_NOP 0
148 ; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
149 ; GCN-NEXT: %sgpr0 = S_MOV_B32 %sgpr0, implicit %vgpr2, implicit %vgpr3, implicit %vgpr4, implicit %vgpr5, implicit %vgpr6, implicit %vgpr7, implicit %vgpr8, implicit %vgpr9, implicit %vgpr10, implicit %vgpr11, implicit %vgpr12, implicit %vgpr13, implicit %vgpr14, implicit %vgpr15, implicit %vgpr16, implicit %vgpr17, implicit %vgpr18
152 %vgpr2 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
153 %vgpr3 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
154 %vgpr4 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
155 %vgpr5 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
157 %vgpr6 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
158 %vgpr7 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
159 %vgpr8 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
160 %vgpr9 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
162 %vgpr10 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
163 %vgpr11 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
164 %vgpr12 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
165 %vgpr13 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
167 %vgpr14 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
168 %vgpr15 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
169 %vgpr16 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
170 %vgpr17 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
172 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
173 %sgpr0 = S_MOV_B32 %sgpr0, implicit %vgpr2, implicit %vgpr3, implicit %vgpr4, implicit %vgpr5, implicit %vgpr6, implicit %vgpr7, implicit %vgpr8, implicit %vgpr9, implicit %vgpr10, implicit %vgpr11, implicit %vgpr12, implicit %vgpr13, implicit %vgpr14, implicit %vgpr15, implicit %vgpr16, implicit %vgpr17, implicit %vgpr18
178 name: break_clause_simple_load_flat4_lo_ptr
182 ; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr
183 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
184 ; XNACK-NEXT: S_NOP 0
185 ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
188 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
189 %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
194 name: break_clause_simple_load_flat4_hi_ptr
198 ; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr
199 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
200 ; XNACK-NEXT: S_NOP 0
201 ; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
204 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
205 %vgpr3 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
210 name: break_clause_simple_load_flat8_ptr
214 ; GCN-LABEL: name: break_clause_simple_load_flat8_ptr
215 ; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
216 ; XNACK-NEXT: S_NOP 0
217 ; GCN-NEXT: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
220 %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
221 %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
227 name: break_clause_simple_load_flat16_ptr
231 ; GCN-LABEL: name: break_clause_simple_load_flat16_ptr
232 ; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
233 ; XNACK-NEXT: S_NOP 0
234 ; GCN-NEXT: %vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
236 %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
237 %vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
242 # The clause is broken by the waitcnt inserted at the end of the
243 # block, so no nop is needed.
246 name: break_clause_block_boundary_load_flat8_ptr
249 ; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr
251 ; GCN-NEXT: successors: %bb.1(0x80000000)
252 ; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
254 ; XNACK-NEXT: S_NOP 0
255 ; GCN-NEXT: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
259 %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
262 %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
266 # The load clobbers the pointer of the store, so it needs to break.
268 name: break_clause_store_load_into_ptr_flat4
272 ; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4
273 ; GCN: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
274 ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
277 FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
278 %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
282 # The load clobbers the data of the store, so it needs to break.
283 # FIXME: Would it be better to s_nop and wait later?
285 name: break_clause_store_load_into_data_flat4
289 ; GCN-LABEL: name: break_clause_store_load_into_data_flat4
290 ; GCN: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
291 ; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
294 FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
295 %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
299 # Regular VALU instruction breaks clause, no nop needed
301 name: valu_inst_breaks_clause
305 ; GCN-LABEL: name: valu_inst_breaks_clause
306 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
307 ; GCN-NEXT: %vgpr8 = V_MOV_B32_e32 0, implicit %exec
308 ; XNACK-NEXT: S_NOP 0
309 ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
312 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
313 %vgpr8 = V_MOV_B32_e32 0, implicit %exec
314 %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
318 # Regular SALU instruction breaks clause, no nop needed
320 name: salu_inst_breaks_clause
324 ; GCN-LABEL: name: salu_inst_breaks_clause
325 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
326 ; GCN-NEXT: %sgpr8 = S_MOV_B32 0
327 ; XNACK-NEXT: S_NOP 0
328 ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
331 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
333 %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
338 name: ds_inst_breaks_clause
342 ; GCN-LABEL: name: ds_inst_breaks_clause
343 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
344 ; GCN-NEXT: %vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec
345 ; XNACK-NEXT: S_NOP 0
346 ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
349 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
350 %vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec
351 %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
356 name: smrd_inst_breaks_clause
360 ; GCN-LABEL: name: smrd_inst_breaks_clause
361 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
362 ; GCN-NEXT: %sgpr8 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 0, 0
363 ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
366 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
367 %sgpr8 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 0, 0
368 %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
372 # FIXME: Should this be handled?
373 name: implicit_use_breaks_clause
377 ; GCN-LABEL: name: implicit_use_breaks_clause
378 ; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr, implicit %vgpr4_vgpr5
379 ; XNACK-NEXT: S_NOP 0
380 ; GCN-NEXT: %vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
383 %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr, implicit %vgpr4_vgpr5
384 %vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
388 name: trivial_clause_load_mubuf4_x2
392 ; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
393 ; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
394 ; GCN-NEXT: %vgpr3 = BUFFER_LOAD_DWORD_OFFEN %vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
397 %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
398 %vgpr3 = BUFFER_LOAD_DWORD_OFFEN %vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
402 name: break_clause_simple_load_mubuf_offen_ptr
406 ; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
407 ; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
408 ; XNACK-NEXT: S_NOP 0
409 ; GCN-NEXT: %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
412 %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
413 %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
417 # BUFFER instructions overwriting their own inputs is supposedly OK.
419 name: mubuf_load4_overwrite_ptr
423 ; GCN-LABEL: name: mubuf_load4_overwrite_ptr
424 ; GCN: %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
425 ; GCN-NEXT: %vgpr1 = V_MOV_B32_e32 0, implicit %exec
426 ; GCN-NEXT: %vgpr2 = V_MOV_B32_e32 %vgpr0, implicit %exec
428 %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
429 %vgpr1 = V_MOV_B32_e32 0, implicit %exec
430 %vgpr2 = V_MOV_B32_e32 %vgpr0, implicit %exec
434 # Break a clause from interference between mubuf and flat instructions
436 name: break_clause_flat_load_mubuf_load
440 ; GCN-LABEL: name: break_clause_flat_load_mubuf_load
441 ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
442 ; XNACK-NEXT: S_NOP 0
443 ; GCN-NEXT: %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
446 %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
447 %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
450 # Break a clause from interference between mubuf and flat instructions
452 # GCN-LABEL: name: break_clause_mubuf_load_flat_load
454 # GCN-NEXT: %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4
455 # XNACK-NEXT: S_NOP 0
456 # GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3
458 name: break_clause_mubuf_load_flat_load
462 %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
463 %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
469 name: break_clause_atomic_rtn_into_ptr_flat4
473 ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4
474 ; GCN: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
475 ; XNACK-NEXT: S_NOP 0
476 ; GCN-NEXT: %vgpr4 = FLAT_ATOMIC_ADD_RTN %vgpr5_vgpr6, %vgpr7, 0, 0, implicit %exec, implicit %flat_scr
479 %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
480 %vgpr4 = FLAT_ATOMIC_ADD_RTN %vgpr5_vgpr6, %vgpr7, 0, 0, implicit %exec, implicit %flat_scr
484 name: break_clause_atomic_nortn_ptr_load_flat4
488 ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4
489 ; GCN: FLAT_ATOMIC_ADD %vgpr0_vgpr1, %vgpr2, 0, 0, implicit %exec, implicit %flat_scr
490 ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
493 FLAT_ATOMIC_ADD %vgpr0_vgpr1, %vgpr2, 0, 0, implicit %exec, implicit %flat_scr
494 %vgpr2 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
499 name: break_clause_atomic_rtn_into_ptr_mubuf4
503 ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
504 ; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
505 ; XNACK-NEXT: S_NOP 0
506 ; GCN-NEXT: %vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN %vgpr2, %vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
509 %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
510 %vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN %vgpr2, %vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
515 name: break_clause_atomic_nortn_ptr_load_mubuf4
519 ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
520 ; GCN: BUFFER_ATOMIC_ADD_OFFEN %vgpr0, %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
521 ; GCN-NEXT: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
524 BUFFER_ATOMIC_ADD_OFFEN %vgpr0, %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
525 %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
529 # Make sure there is no assert on mubuf instructions which do not have
530 # vaddr, and don't add register to track.
531 name: no_break_clause_mubuf_load_novaddr
535 ; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
536 ; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
537 ; GCN-NEXT: %vgpr3 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
539 %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
540 %vgpr3 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
544 # Loads and stores using different addresses theoretically does not
546 name: mix_load_store_clause
549 ; GCN-LABEL: name: mix_load_store_clause
550 ; GCN: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
551 ; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
552 ; XNACK-NEXT: S_NOP 0
553 ; GCN-NEXT: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
554 ; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
556 FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
557 %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
558 FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
559 %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
563 # Loads and stores using the same address needs a nop.
565 name: mix_load_store_clause_same_address
568 ; GCN-LABEL: name: mix_load_store_clause_same_address
569 ; GCN: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
570 ; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
571 ; XNACK-NEXT: S_NOP 0
572 ; GCN-NEXT: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
573 ; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
575 FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
576 %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
577 FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
578 %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr