1 # RUN: llc -mtriple=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
2 # RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s
5 # Trivial clause at beginning of program
6 name: trivial_smem_clause_load_smrd4_x1
10 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
11 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
12 ; GCN-NEXT: S_ENDPGM 0
13 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
17 # Trivial clause at beginning of program
18 name: trivial_smem_clause_load_smrd4_x2
22 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
23 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
24 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
25 ; GCN-NEXT: S_ENDPGM 0
26 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
27 $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
31 # Trivial clause at beginning of program
32 name: trivial_smem_clause_load_smrd4_x3
36 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
37 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
38 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
39 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
40 ; GCN-NEXT: S_ENDPGM 0
41 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
42 $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
43 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
47 # Trivial clause at beginning of program
48 name: trivial_smem_clause_load_smrd4_x4
52 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
53 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
54 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
55 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
56 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
57 ; GCN-NEXT: S_ENDPGM 0
58 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
59 $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
60 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
61 $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
65 # Reuse of same input pointer is OK
66 name: trivial_smem_clause_load_smrd4_x2_sameptr
69 ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
70 ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
71 ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
72 ; GCN-NEXT: S_ENDPGM 0
73 $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
74 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
78 # 32-bit load partially clobbers its own ptr reg
79 name: smrd_load4_overwrite_ptr_lo
83 ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
84 ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
85 ; GCN-NEXT: S_ENDPGM 0
86 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
90 # 32-bit load partially clobbers its own ptr reg
91 name: smrd_load4_overwrite_ptr_hi
95 ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
96 ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
97 ; GCN-NEXT: S_ENDPGM 0
98 $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
102 # 64-bit load clobbers its own ptr reg
103 name: smrd_load8_overwrite_ptr
107 ; GCN-LABEL: name: smrd_load8_overwrite_ptr
108 ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
109 ; GCN-NEXT: S_ENDPGM 0
110 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
114 # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
117 name: break_smem_clause_at_max_smem_clause_size_smrd_load4
121 ; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
122 ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
123 ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
124 ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
125 ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
126 ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
127 ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
128 ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
129 ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
130 ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
131 ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
132 ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
133 ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
134 ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
135 ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
136 ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
137 ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
138 ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
139 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
140 ; GCN-NEXT: S_ENDPGM 0
141 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
142 $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
143 $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
144 $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
146 $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
147 $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
148 $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
149 $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
151 $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
152 $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
153 $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
154 $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
156 $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
157 $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
158 $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
159 $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
161 $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
162 $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
167 name: break_smem_clause_simple_load_smrd4_lo_ptr
171 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
172 ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
173 ; XNACK-NEXT: S_NOP 0
174 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
175 ; GCN-NEXT: S_ENDPGM 0
176 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
177 $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
182 name: break_smem_clause_simple_load_smrd4_hi_ptr
186 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
187 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
188 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
189 ; GCN-NEXT: S_ENDPGM 0
190 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
191 $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
196 name: break_smem_clause_simple_load_smrd8_ptr
200 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
201 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
202 ; XNACK-NEXT: S_NOP 0
203 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
204 ; GCN-NEXT: S_ENDPGM 0
205 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
206 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
211 name: break_smem_clause_simple_load_smrd16_ptr
215 ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
216 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
217 ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
218 ; GCN-NEXT: S_ENDPGM 0
219 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
220 $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
225 name: break_smem_clause_block_boundary_load_smrd8_ptr
228 ; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
230 ; GCN: successors: %bb.1(0x80000000)
231 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
233 ; XNACK-NEXT: S_NOP 0
234 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
235 ; GCN-NEXT: S_ENDPGM 0
237 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
240 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
244 # The load clobbers the pointer of the store, so it needs to break.
246 name: break_smem_clause_store_load_into_ptr_smrd4
250 ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
251 ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
252 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
253 ; GCN-NEXT: S_ENDPGM 0
254 S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
255 $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
259 # The load clobbers the data of the store, so it needs to break.
260 # FIXME: Would it be better to s_nop and wait later?
262 name: break_smem_clause_store_load_into_data_smrd4
266 ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
267 ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
268 ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
269 ; GCN-NEXT: S_ENDPGM 0
270 S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
271 $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
275 # Regular VALU instruction breaks clause, no nop needed
276 name: valu_inst_breaks_smem_clause
280 ; GCN-LABEL: name: valu_inst_breaks_smem_clause
281 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
282 ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
283 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
284 ; GCN-NEXT: S_ENDPGM 0
285 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
286 $vgpr8 = V_MOV_B32_e32 0, implicit $exec
287 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
291 # Regular SALU instruction breaks clause, no nop needed
292 name: salu_inst_breaks_smem_clause
296 ; GCN-LABEL: name: salu_inst_breaks_smem_clause
297 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
298 ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
299 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
300 ; GCN-NEXT: S_ENDPGM 0
301 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
303 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
307 name: ds_inst_breaks_smem_clause
311 ; GCN-LABEL: name: ds_inst_breaks_smem_clause
312 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
313 ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
314 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
315 ; GCN-NEXT: S_ENDPGM 0
316 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
317 $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
318 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
323 name: flat_inst_breaks_smem_clause
327 ; GCN-LABEL: name: flat_inst_breaks_smem_clause
328 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
329 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
330 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
331 ; GCN-NEXT: S_ENDPGM 0
332 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
333 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
334 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
338 # FIXME: Should this be handled?
339 name: implicit_use_breaks_smem_clause
343 ; GCN-LABEL: name: implicit_use_breaks_smem_clause
344 ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
345 ; XNACK-NEXT: S_NOP 0
346 ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
347 ; GCN-NEXT: S_ENDPGM 0
348 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
349 $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0