Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / break-smem-soft-clauses.mir
bloba10cc7f6a693b1b812e838b871373046868d630e
1 # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
2 # RUN: llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s
4 ---
5 # Trivial clause at beginning of program
6 name: trivial_smem_clause_load_smrd4_x1
8 body: |
9   bb.0:
10     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
11     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
12     ; GCN-NEXT: S_ENDPGM 0
13     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
14     S_ENDPGM 0
15 ...
16 ---
17 # Trivial clause at beginning of program
18 name: trivial_smem_clause_load_smrd4_x2
20 body: |
21   bb.0:
22     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
23     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
24     ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
25     ; GCN-NEXT: S_ENDPGM 0
26     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
27     $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
28     S_ENDPGM 0
29 ...
30 ---
31 # Trivial clause at beginning of program
32 name: trivial_smem_clause_load_smrd4_x3
34 body: |
35   bb.0:
36     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
37     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
38     ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
39     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
40     ; GCN-NEXT: S_ENDPGM 0
41     $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
42     $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
43     $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
44     S_ENDPGM 0
45 ...
46 ---
47 # Trivial clause at beginning of program
48 name: trivial_smem_clause_load_smrd4_x4
50 body: |
51   bb.0:
52     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
53     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
54     ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
55     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
56     ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
57     ; GCN-NEXT: S_ENDPGM 0
58     $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
59     $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
60     $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
61     $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
62     S_ENDPGM 0
63 ...
64 ---
65 # Reuse of same input pointer is OK
66 name: trivial_smem_clause_load_smrd4_x2_sameptr
67 body: |
68   bb.0:
69     ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
70     ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
71     ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
72     ; GCN-NEXT: S_ENDPGM 0
73     $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
74     $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
75     S_ENDPGM 0
76 ...
77 ---
78 # 32-bit load partially clobbers its own ptr reg
79 name: smrd_load4_overwrite_ptr_lo
81 body: |
82   bb.0:
83     ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
84     ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
85     ; GCN-NEXT: S_ENDPGM 0
86     $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
87     S_ENDPGM 0
88 ...
89 ---
90 # 32-bit load partially clobbers its own ptr reg
91 name: smrd_load4_overwrite_ptr_hi
93 body: |
94   bb.0:
95     ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
96     ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
97     ; GCN-NEXT: S_ENDPGM 0
98     $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
99     S_ENDPGM 0
102 # 64-bit load clobbers its own ptr reg
103 name: smrd_load8_overwrite_ptr
105 body: |
106   bb.0:
107     ; GCN-LABEL: name: smrd_load8_overwrite_ptr
108     ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
109     ; GCN-NEXT: S_ENDPGM 0
110     $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
111     S_ENDPGM 0
114 # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
115 # breaks the clause.
117 name: break_smem_clause_at_max_smem_clause_size_smrd_load4
119 body: |
120   bb.0:
121     ; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
122     ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
123     ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
124     ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
125     ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
126     ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
127     ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
128     ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
129     ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
130     ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
131     ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
132     ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
133     ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
134     ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
135     ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
136     ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
137     ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
138     ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
139     ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
140     ; GCN-NEXT: S_ENDPGM 0
141     $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
142     $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
143     $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
144     $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
146     $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
147     $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
148     $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
149     $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
151     $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
152     $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
153     $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
154     $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
156     $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
157     $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
158     $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
159     $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
161     $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
162     $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
163     S_ENDPGM 0
167 name: break_smem_clause_simple_load_smrd4_lo_ptr
169 body: |
170   bb.0:
171     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
172     ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
173     ; XNACK-NEXT: S_NOP 0
174     ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
175     ; GCN-NEXT: S_ENDPGM 0
176     $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
177     $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
178     S_ENDPGM 0
182 name: break_smem_clause_simple_load_smrd4_hi_ptr
184 body: |
185   bb.0:
186     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
187     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
188     ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
189     ; GCN-NEXT: S_ENDPGM 0
190     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
191     $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
192     S_ENDPGM 0
196 name: break_smem_clause_simple_load_smrd8_ptr
198 body: |
199   bb.0:
200     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
201     ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
202     ; XNACK-NEXT: S_NOP 0
203     ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
204     ; GCN-NEXT: S_ENDPGM 0
205     $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
206     $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
207     S_ENDPGM 0
211 name: break_smem_clause_simple_load_smrd16_ptr
213 body: |
214   bb.0:
215     ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
216     ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
217     ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
218     ; GCN-NEXT: S_ENDPGM 0
219     $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
220     $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
221     S_ENDPGM 0
225 name: break_smem_clause_block_boundary_load_smrd8_ptr
227 body: |
228   ; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
229   ; GCN: bb.0:
230   ; GCN:   successors: %bb.1(0x80000000)
231   ; GCN:   $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
232   ; GCN: bb.1:
233   ; XNACK-NEXT:   S_NOP 0
234   ; GCN-NEXT:   $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
235   ; GCN-NEXT:   S_ENDPGM 0
236   bb.0:
237     $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
239   bb.1:
240     $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
241     S_ENDPGM 0
244 # The load clobbers the pointer of the store, so it needs to break.
246 name: break_smem_clause_store_load_into_ptr_smrd4
248 body: |
249   bb.0:
250     ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
251     ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
252     ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
253     ; GCN-NEXT: S_ENDPGM 0
254     S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
255     $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
256     S_ENDPGM 0
259 # The load clobbers the data of the store, so it needs to break.
260 # FIXME: Would it be better to s_nop and wait later?
262 name: break_smem_clause_store_load_into_data_smrd4
264 body: |
265   bb.0:
266     ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
267     ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
268     ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
269     ; GCN-NEXT: S_ENDPGM 0
270     S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
271     $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
272     S_ENDPGM 0
275 # Regular VALU instruction breaks clause, no nop needed
276 name: valu_inst_breaks_smem_clause
278 body: |
279   bb.0:
280     ; GCN-LABEL: name: valu_inst_breaks_smem_clause
281     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
282     ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
283     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
284     ; GCN-NEXT: S_ENDPGM 0
285     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
286     $vgpr8 = V_MOV_B32_e32 0, implicit $exec
287     $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
288     S_ENDPGM 0
291 # Regular SALU instruction breaks clause, no nop needed
292 name: salu_inst_breaks_smem_clause
294 body: |
295   bb.0:
296     ; GCN-LABEL: name: salu_inst_breaks_smem_clause
297     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
298     ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
299     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
300     ; GCN-NEXT: S_ENDPGM 0
301     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
302     $sgpr8 = S_MOV_B32 0
303     $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
304     S_ENDPGM 0
307 name: ds_inst_breaks_smem_clause
309 body: |
310   bb.0:
311     ; GCN-LABEL: name: ds_inst_breaks_smem_clause
312     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
313     ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
314     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
315     ; GCN-NEXT: S_ENDPGM 0
316     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
317     $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
318     $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
319     S_ENDPGM 0
323 name: flat_inst_breaks_smem_clause
325 body: |
326   bb.0:
327     ; GCN-LABEL: name: flat_inst_breaks_smem_clause
328     ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
329     ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
330     ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
331     ; GCN-NEXT: S_ENDPGM 0
332     $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
333     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
334     $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
335     S_ENDPGM 0
338 # FIXME: Should this be handled?
339 name: implicit_use_breaks_smem_clause
341 body: |
342   bb.0:
343     ; GCN-LABEL: name: implicit_use_breaks_smem_clause
344     ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
345     ; XNACK-NEXT: S_NOP 0
346     ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
347     ; GCN-NEXT: S_ENDPGM 0
348     $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
349     $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
350     S_ENDPGM 0