Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / tools / llvm-mca / AMDGPU / gfx9-retireooo.s
blob706ed36f9e980abd0052dd7d2f8ce567825f7ef9
1 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2 # RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx900 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s
4 s_load_dwordx2 s[2:3], s[0:1], 0x24
5 s_load_dwordx2 s[0:1], s[0:1], 0x2c
6 s_waitcnt lgkmcnt(0)
7 v_mov_b32_e32 v0, s2
8 v_mov_b32_e32 v1, s3
9 flat_load_dword v2, v[0:1]
10 flat_load_dword v3, v[0:1] offset:8
11 flat_load_dword v4, v[0:1] offset:16
12 flat_load_dword v5, v[0:1] offset:24
13 v_mov_b32_e32 v0, s0
14 v_mov_b32_e32 v1, s1
15 v_mov_b32_e32 v6, s6
16 v_mov_b32_e32 v7, s7
17 v_mov_b32_e32 v8, s8
18 v_mov_b32_e32 v9, s9
19 v_mov_b32_e32 v10, s10
20 v_mov_b32_e32 v11, s11
21 v_mov_b32_e32 v12, s12
22 v_mov_b32_e32 v13, s13
23 v_mov_b32_e32 v14, s14
24 v_mov_b32_e32 v15, s15
25 v_mov_b32_e32 v16, s16
26 v_mov_b32_e32 v17, s17
27 v_mov_b32_e32 v18, s18
28 v_mov_b32_e32 v19, s19
29 v_mov_b32_e32 v20, s20
30 v_mov_b32_e32 v21, s21
31 v_mov_b32_e32 v22, s22
32 v_mov_b32_e32 v23, s23
33 v_mov_b32_e32 v24, s24
34 v_mov_b32_e32 v25, s25
35 v_mov_b32_e32 v26, s26
36 v_mov_b32_e32 v27, s27
37 v_mov_b32_e32 v28, s28
38 v_mov_b32_e32 v29, s29
39 s_waitcnt vmcnt(0) lgkmcnt(0)
41 # CHECK: Iterations: 1
42 # CHECK-NEXT: Instructions: 36
43 # CHECK-NEXT: Total Cycles: 94
44 # CHECK-NEXT: Total uOps: 36
46 # CHECK: Dispatch Width: 1
47 # CHECK-NEXT: uOps Per Cycle: 0.38
48 # CHECK-NEXT: IPC: 0.38
49 # CHECK-NEXT: Block RThroughput: 36.0
51 # CHECK: Instruction Info:
52 # CHECK-NEXT: [1]: #uOps
53 # CHECK-NEXT: [2]: Latency
54 # CHECK-NEXT: [3]: RThroughput
55 # CHECK-NEXT: [4]: MayLoad
56 # CHECK-NEXT: [5]: MayStore
57 # CHECK-NEXT: [6]: HasSideEffects (U)
59 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
60 # CHECK-NEXT: 1 5 1.00 * s_load_dwordx2 s[2:3], s[0:1], 0x24
61 # CHECK-NEXT: 1 5 1.00 * s_load_dwordx2 s[0:1], s[0:1], 0x2c
62 # CHECK-NEXT: 1 1 1.00 U s_waitcnt lgkmcnt(0)
63 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v0, s2
64 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v1, s3
65 # CHECK-NEXT: 1 80 1.00 * U flat_load_dword v2, v[0:1]
66 # CHECK-NEXT: 1 80 1.00 * U flat_load_dword v3, v[0:1] offset:8
67 # CHECK-NEXT: 1 80 1.00 * U flat_load_dword v4, v[0:1] offset:16
68 # CHECK-NEXT: 1 80 1.00 * U flat_load_dword v5, v[0:1] offset:24
69 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v0, s0
70 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v1, s1
71 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v6, s6
72 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v7, s7
73 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v8, s8
74 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v9, s9
75 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v10, s10
76 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v11, s11
77 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v12, s12
78 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v13, s13
79 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v14, s14
80 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v15, s15
81 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v16, s16
82 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v17, s17
83 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v18, s18
84 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v19, s19
85 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v20, s20
86 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v21, s21
87 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v22, s22
88 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v23, s23
89 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v24, s24
90 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v25, s25
91 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v26, s26
92 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v27, s27
93 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v28, s28
94 # CHECK-NEXT: 1 1 1.00 U v_mov_b32_e32 v29, s29
95 # CHECK-NEXT: 1 1 1.00 U s_waitcnt vmcnt(0) lgkmcnt(0)
97 # CHECK: Resources:
98 # CHECK-NEXT: [0] - HWBranch
99 # CHECK-NEXT: [1] - HWExport
100 # CHECK-NEXT: [2] - HWLGKM
101 # CHECK-NEXT: [3] - HWSALU
102 # CHECK-NEXT: [4] - HWVALU
103 # CHECK-NEXT: [5] - HWVMEM
104 # CHECK-NEXT: [6] - HWXDL
106 # CHECK: Resource pressure per iteration:
107 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6]
108 # CHECK-NEXT: - - 2.00 2.00 28.00 4.00 -
110 # CHECK: Resource pressure by instruction:
111 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions:
112 # CHECK-NEXT: - - 1.00 - - - - s_load_dwordx2 s[2:3], s[0:1], 0x24
113 # CHECK-NEXT: - - 1.00 - - - - s_load_dwordx2 s[0:1], s[0:1], 0x2c
114 # CHECK-NEXT: - - - 1.00 - - - s_waitcnt lgkmcnt(0)
115 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v0, s2
116 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v1, s3
117 # CHECK-NEXT: - - - - - 1.00 - flat_load_dword v2, v[0:1]
118 # CHECK-NEXT: - - - - - 1.00 - flat_load_dword v3, v[0:1] offset:8
119 # CHECK-NEXT: - - - - - 1.00 - flat_load_dword v4, v[0:1] offset:16
120 # CHECK-NEXT: - - - - - 1.00 - flat_load_dword v5, v[0:1] offset:24
121 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v0, s0
122 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v1, s1
123 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v6, s6
124 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v7, s7
125 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v8, s8
126 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v9, s9
127 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v10, s10
128 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v11, s11
129 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v12, s12
130 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v13, s13
131 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v14, s14
132 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v15, s15
133 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v16, s16
134 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v17, s17
135 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v18, s18
136 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v19, s19
137 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v20, s20
138 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v21, s21
139 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v22, s22
140 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v23, s23
141 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v24, s24
142 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v25, s25
143 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v26, s26
144 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v27, s27
145 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v28, s28
146 # CHECK-NEXT: - - - - 1.00 - - v_mov_b32_e32 v29, s29
147 # CHECK-NEXT: - - - 1.00 - - - s_waitcnt vmcnt(0) lgkmcnt(0)
149 # CHECK: Timeline view:
150 # CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123
151 # CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789
153 # CHECK: [0,0] DeeeeE . . . . . . . . . . . . . . . . . . s_load_dwordx2 s[2:3], s[0:1], 0x24
154 # CHECK-NEXT: [0,1] .DeeeeE . . . . . . . . . . . . . . . . . . s_load_dwordx2 s[0:1], s[0:1], 0x2c
155 # CHECK-NEXT: [0,2] . .DE . . . . . . . . . . . . . . . . . . s_waitcnt lgkmcnt(0)
156 # CHECK-NEXT: [0,3] . . DE . . . . . . . . . . . . . . . . . . v_mov_b32_e32 v0, s2
157 # CHECK-NEXT: [0,4] . . DE. . . . . . . . . . . . . . . . . . v_mov_b32_e32 v1, s3
158 # CHECK-NEXT: [0,5] . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . flat_load_dword v2, v[0:1]
159 # CHECK-NEXT: [0,6] . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . flat_load_dword v3, v[0:1] offset:8
160 # CHECK-NEXT: [0,7] . . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . flat_load_dword v4, v[0:1] offset:16
161 # CHECK-NEXT: [0,8] . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. flat_load_dword v5, v[0:1] offset:24
162 # CHECK-NEXT: [0,9] . . . DE. . . . . . . . . . . . . . . . . v_mov_b32_e32 v0, s0
163 # CHECK-NEXT: [0,10] . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v1, s1
164 # CHECK-NEXT: [0,11] . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v6, s6
165 # CHECK-NEXT: [0,12] . . . .DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v7, s7
166 # CHECK-NEXT: [0,13] . . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v8, s8
167 # CHECK-NEXT: [0,14] . . . . DE. . . . . . . . . . . . . . . . v_mov_b32_e32 v9, s9
168 # CHECK-NEXT: [0,15] . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v10, s10
169 # CHECK-NEXT: [0,16] . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v11, s11
170 # CHECK-NEXT: [0,17] . . . . .DE . . . . . . . . . . . . . . . v_mov_b32_e32 v12, s12
171 # CHECK-NEXT: [0,18] . . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v13, s13
172 # CHECK-NEXT: [0,19] . . . . . DE. . . . . . . . . . . . . . . v_mov_b32_e32 v14, s14
173 # CHECK-NEXT: [0,20] . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v15, s15
174 # CHECK-NEXT: [0,21] . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v16, s16
175 # CHECK-NEXT: [0,22] . . . . . .DE . . . . . . . . . . . . . . v_mov_b32_e32 v17, s17
176 # CHECK-NEXT: [0,23] . . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v18, s18
177 # CHECK-NEXT: [0,24] . . . . . . DE. . . . . . . . . . . . . . v_mov_b32_e32 v19, s19
178 # CHECK-NEXT: [0,25] . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v20, s20
179 # CHECK-NEXT: [0,26] . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v21, s21
180 # CHECK-NEXT: [0,27] . . . . . . .DE . . . . . . . . . . . . . v_mov_b32_e32 v22, s22
181 # CHECK-NEXT: [0,28] . . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v23, s23
182 # CHECK-NEXT: [0,29] . . . . . . . DE. . . . . . . . . . . . . v_mov_b32_e32 v24, s24
183 # CHECK-NEXT: [0,30] . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v25, s25
184 # CHECK-NEXT: [0,31] . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v26, s26
185 # CHECK-NEXT: [0,32] . . . . . . . .DE . . . . . . . . . . . . v_mov_b32_e32 v27, s27
186 # CHECK-NEXT: [0,33] . . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v28, s28
187 # CHECK-NEXT: [0,34] . . . . . . . . DE. . . . . . . . . . . . v_mov_b32_e32 v29, s29
188 # CHECK-NEXT: [0,35] . . . . . . . . . . . . . . . . . . . DE s_waitcnt vmcnt(0) lgkmcnt(0)
190 # CHECK: Average Wait times (based on the timeline view):
191 # CHECK-NEXT: [0]: Executions
192 # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
193 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
194 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
196 # CHECK: [0] [1] [2] [3]
197 # CHECK-NEXT: 0. 1 0.0 0.0 0.0 s_load_dwordx2 s[2:3], s[0:1], 0x24
198 # CHECK-NEXT: 1. 1 0.0 0.0 0.0 s_load_dwordx2 s[0:1], s[0:1], 0x2c
199 # CHECK-NEXT: 2. 1 0.0 0.0 0.0 s_waitcnt lgkmcnt(0)
200 # CHECK-NEXT: 3. 1 0.0 0.0 0.0 v_mov_b32_e32 v0, s2
201 # CHECK-NEXT: 4. 1 0.0 0.0 0.0 v_mov_b32_e32 v1, s3
202 # CHECK-NEXT: 5. 1 0.0 0.0 0.0 flat_load_dword v2, v[0:1]
203 # CHECK-NEXT: 6. 1 0.0 0.0 0.0 flat_load_dword v3, v[0:1] offset:8
204 # CHECK-NEXT: 7. 1 0.0 0.0 0.0 flat_load_dword v4, v[0:1] offset:16
205 # CHECK-NEXT: 8. 1 0.0 0.0 0.0 flat_load_dword v5, v[0:1] offset:24
206 # CHECK-NEXT: 9. 1 0.0 0.0 0.0 v_mov_b32_e32 v0, s0
207 # CHECK-NEXT: 10. 1 0.0 0.0 0.0 v_mov_b32_e32 v1, s1
208 # CHECK-NEXT: 11. 1 0.0 0.0 0.0 v_mov_b32_e32 v6, s6
209 # CHECK-NEXT: 12. 1 0.0 0.0 0.0 v_mov_b32_e32 v7, s7
210 # CHECK-NEXT: 13. 1 0.0 0.0 0.0 v_mov_b32_e32 v8, s8
211 # CHECK-NEXT: 14. 1 0.0 0.0 0.0 v_mov_b32_e32 v9, s9
212 # CHECK-NEXT: 15. 1 0.0 0.0 0.0 v_mov_b32_e32 v10, s10
213 # CHECK-NEXT: 16. 1 0.0 0.0 0.0 v_mov_b32_e32 v11, s11
214 # CHECK-NEXT: 17. 1 0.0 0.0 0.0 v_mov_b32_e32 v12, s12
215 # CHECK-NEXT: 18. 1 0.0 0.0 0.0 v_mov_b32_e32 v13, s13
216 # CHECK-NEXT: 19. 1 0.0 0.0 0.0 v_mov_b32_e32 v14, s14
217 # CHECK-NEXT: 20. 1 0.0 0.0 0.0 v_mov_b32_e32 v15, s15
218 # CHECK-NEXT: 21. 1 0.0 0.0 0.0 v_mov_b32_e32 v16, s16
219 # CHECK-NEXT: 22. 1 0.0 0.0 0.0 v_mov_b32_e32 v17, s17
220 # CHECK-NEXT: 23. 1 0.0 0.0 0.0 v_mov_b32_e32 v18, s18
221 # CHECK-NEXT: 24. 1 0.0 0.0 0.0 v_mov_b32_e32 v19, s19
222 # CHECK-NEXT: 25. 1 0.0 0.0 0.0 v_mov_b32_e32 v20, s20
223 # CHECK-NEXT: 26. 1 0.0 0.0 0.0 v_mov_b32_e32 v21, s21
224 # CHECK-NEXT: 27. 1 0.0 0.0 0.0 v_mov_b32_e32 v22, s22
225 # CHECK-NEXT: 28. 1 0.0 0.0 0.0 v_mov_b32_e32 v23, s23
226 # CHECK-NEXT: 29. 1 0.0 0.0 0.0 v_mov_b32_e32 v24, s24
227 # CHECK-NEXT: 30. 1 0.0 0.0 0.0 v_mov_b32_e32 v25, s25
228 # CHECK-NEXT: 31. 1 0.0 0.0 0.0 v_mov_b32_e32 v26, s26
229 # CHECK-NEXT: 32. 1 0.0 0.0 0.0 v_mov_b32_e32 v27, s27
230 # CHECK-NEXT: 33. 1 0.0 0.0 0.0 v_mov_b32_e32 v28, s28
231 # CHECK-NEXT: 34. 1 0.0 0.0 0.0 v_mov_b32_e32 v29, s29
232 # CHECK-NEXT: 35. 1 0.0 0.0 0.0 s_waitcnt vmcnt(0) lgkmcnt(0)
233 # CHECK-NEXT: 1 0.0 0.0 0.0 <total>