Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / tools / llvm-mca / X86 / Znver3 / one-idioms-avx-ymm.s
blobeb2bb979b7f5f7105311d623248fb3892bc09a8d
1 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -timeline -timeline-max-iterations=2 -register-file-stats -iterations=1000 < %s | FileCheck %s
4 # LLVM-MCA-BEGIN
5 vpcmpeqb %ymm0, %ymm0, %ymm0
6 vpaddb %ymm0, %ymm0, %ymm0
7 # LLVM-MCA-END
9 # LLVM-MCA-BEGIN
10 vpcmpeqw %ymm0, %ymm0, %ymm0
11 vpaddw %ymm0, %ymm0, %ymm0
12 # LLVM-MCA-END
14 # LLVM-MCA-BEGIN
15 vpcmpeqd %ymm0, %ymm0, %ymm0
16 vpaddd %ymm0, %ymm0, %ymm0
17 # LLVM-MCA-END
19 # LLVM-MCA-BEGIN
20 vpcmpeqq %ymm0, %ymm0, %ymm0
21 vpaddq %ymm0, %ymm0, %ymm0
22 # LLVM-MCA-END
24 # CHECK: [0] Code Region
26 # CHECK: Iterations: 1000
27 # CHECK-NEXT: Instructions: 2000
28 # CHECK-NEXT: Total Cycles: 504
29 # CHECK-NEXT: Total uOps: 2000
31 # CHECK: Dispatch Width: 6
32 # CHECK-NEXT: uOps Per Cycle: 3.97
33 # CHECK-NEXT: IPC: 3.97
34 # CHECK-NEXT: Block RThroughput: 0.5
36 # CHECK: Instruction Info:
37 # CHECK-NEXT: [1]: #uOps
38 # CHECK-NEXT: [2]: Latency
39 # CHECK-NEXT: [3]: RThroughput
40 # CHECK-NEXT: [4]: MayLoad
41 # CHECK-NEXT: [5]: MayStore
42 # CHECK-NEXT: [6]: HasSideEffects (U)
44 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
45 # CHECK-NEXT: 1 1 0.25 vpcmpeqb %ymm0, %ymm0, %ymm0
46 # CHECK-NEXT: 1 1 0.25 vpaddb %ymm0, %ymm0, %ymm0
48 # CHECK: Register File statistics:
49 # CHECK-NEXT: Total number of mappings created: 2000
50 # CHECK-NEXT: Max number of mappings used: 74
52 # CHECK: * Register File #1 -- Zn3FpPRF:
53 # CHECK-NEXT: Number of physical registers: 160
54 # CHECK-NEXT: Total number of mappings created: 2000
55 # CHECK-NEXT: Max number of mappings used: 74
57 # CHECK: * Register File #2 -- Zn3IntegerPRF:
58 # CHECK-NEXT: Number of physical registers: 192
59 # CHECK-NEXT: Total number of mappings created: 0
60 # CHECK-NEXT: Max number of mappings used: 0
62 # CHECK: Resources:
63 # CHECK-NEXT: [0] - Zn3AGU0
64 # CHECK-NEXT: [1] - Zn3AGU1
65 # CHECK-NEXT: [2] - Zn3AGU2
66 # CHECK-NEXT: [3] - Zn3ALU0
67 # CHECK-NEXT: [4] - Zn3ALU1
68 # CHECK-NEXT: [5] - Zn3ALU2
69 # CHECK-NEXT: [6] - Zn3ALU3
70 # CHECK-NEXT: [7] - Zn3BRU1
71 # CHECK-NEXT: [8] - Zn3FPP0
72 # CHECK-NEXT: [9] - Zn3FPP1
73 # CHECK-NEXT: [10] - Zn3FPP2
74 # CHECK-NEXT: [11] - Zn3FPP3
75 # CHECK-NEXT: [12.0] - Zn3FPP45
76 # CHECK-NEXT: [12.1] - Zn3FPP45
77 # CHECK-NEXT: [13] - Zn3FPSt
78 # CHECK-NEXT: [14.0] - Zn3LSU
79 # CHECK-NEXT: [14.1] - Zn3LSU
80 # CHECK-NEXT: [14.2] - Zn3LSU
81 # CHECK-NEXT: [15.0] - Zn3Load
82 # CHECK-NEXT: [15.1] - Zn3Load
83 # CHECK-NEXT: [15.2] - Zn3Load
84 # CHECK-NEXT: [16.0] - Zn3Store
85 # CHECK-NEXT: [16.1] - Zn3Store
87 # CHECK: Resource pressure per iteration:
88 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
89 # CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - -
91 # CHECK: Resource pressure by instruction:
92 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
93 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.50 - - - - - - - - - - - - vpcmpeqb %ymm0, %ymm0, %ymm0
94 # CHECK-NEXT: - - - - - - - - 0.25 0.25 - 0.50 - - - - - - - - - - - vpaddb %ymm0, %ymm0, %ymm0
96 # CHECK: Timeline view:
97 # CHECK-NEXT: Index 01234
99 # CHECK: [0,0] DeER. vpcmpeqb %ymm0, %ymm0, %ymm0
100 # CHECK-NEXT: [0,1] D=eER vpaddb %ymm0, %ymm0, %ymm0
101 # CHECK-NEXT: [1,0] DeE-R vpcmpeqb %ymm0, %ymm0, %ymm0
102 # CHECK-NEXT: [1,1] D=eER vpaddb %ymm0, %ymm0, %ymm0
104 # CHECK: Average Wait times (based on the timeline view):
105 # CHECK-NEXT: [0]: Executions
106 # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
107 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
108 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
110 # CHECK: [0] [1] [2] [3]
111 # CHECK-NEXT: 0. 2 1.0 1.0 0.5 vpcmpeqb %ymm0, %ymm0, %ymm0
112 # CHECK-NEXT: 1. 2 2.0 0.0 0.0 vpaddb %ymm0, %ymm0, %ymm0
113 # CHECK-NEXT: 2 1.5 0.5 0.3 <total>
115 # CHECK: [1] Code Region
117 # CHECK: Iterations: 1000
118 # CHECK-NEXT: Instructions: 2000
119 # CHECK-NEXT: Total Cycles: 504
120 # CHECK-NEXT: Total uOps: 2000
122 # CHECK: Dispatch Width: 6
123 # CHECK-NEXT: uOps Per Cycle: 3.97
124 # CHECK-NEXT: IPC: 3.97
125 # CHECK-NEXT: Block RThroughput: 0.5
127 # CHECK: Instruction Info:
128 # CHECK-NEXT: [1]: #uOps
129 # CHECK-NEXT: [2]: Latency
130 # CHECK-NEXT: [3]: RThroughput
131 # CHECK-NEXT: [4]: MayLoad
132 # CHECK-NEXT: [5]: MayStore
133 # CHECK-NEXT: [6]: HasSideEffects (U)
135 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
136 # CHECK-NEXT: 1 1 0.25 vpcmpeqw %ymm0, %ymm0, %ymm0
137 # CHECK-NEXT: 1 1 0.25 vpaddw %ymm0, %ymm0, %ymm0
139 # CHECK: Register File statistics:
140 # CHECK-NEXT: Total number of mappings created: 2000
141 # CHECK-NEXT: Max number of mappings used: 74
143 # CHECK: * Register File #1 -- Zn3FpPRF:
144 # CHECK-NEXT: Number of physical registers: 160
145 # CHECK-NEXT: Total number of mappings created: 2000
146 # CHECK-NEXT: Max number of mappings used: 74
148 # CHECK: * Register File #2 -- Zn3IntegerPRF:
149 # CHECK-NEXT: Number of physical registers: 192
150 # CHECK-NEXT: Total number of mappings created: 0
151 # CHECK-NEXT: Max number of mappings used: 0
153 # CHECK: Resources:
154 # CHECK-NEXT: [0] - Zn3AGU0
155 # CHECK-NEXT: [1] - Zn3AGU1
156 # CHECK-NEXT: [2] - Zn3AGU2
157 # CHECK-NEXT: [3] - Zn3ALU0
158 # CHECK-NEXT: [4] - Zn3ALU1
159 # CHECK-NEXT: [5] - Zn3ALU2
160 # CHECK-NEXT: [6] - Zn3ALU3
161 # CHECK-NEXT: [7] - Zn3BRU1
162 # CHECK-NEXT: [8] - Zn3FPP0
163 # CHECK-NEXT: [9] - Zn3FPP1
164 # CHECK-NEXT: [10] - Zn3FPP2
165 # CHECK-NEXT: [11] - Zn3FPP3
166 # CHECK-NEXT: [12.0] - Zn3FPP45
167 # CHECK-NEXT: [12.1] - Zn3FPP45
168 # CHECK-NEXT: [13] - Zn3FPSt
169 # CHECK-NEXT: [14.0] - Zn3LSU
170 # CHECK-NEXT: [14.1] - Zn3LSU
171 # CHECK-NEXT: [14.2] - Zn3LSU
172 # CHECK-NEXT: [15.0] - Zn3Load
173 # CHECK-NEXT: [15.1] - Zn3Load
174 # CHECK-NEXT: [15.2] - Zn3Load
175 # CHECK-NEXT: [16.0] - Zn3Store
176 # CHECK-NEXT: [16.1] - Zn3Store
178 # CHECK: Resource pressure per iteration:
179 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
180 # CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - -
182 # CHECK: Resource pressure by instruction:
183 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
184 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.50 - - - - - - - - - - - - vpcmpeqw %ymm0, %ymm0, %ymm0
185 # CHECK-NEXT: - - - - - - - - 0.25 0.25 - 0.50 - - - - - - - - - - - vpaddw %ymm0, %ymm0, %ymm0
187 # CHECK: Timeline view:
188 # CHECK-NEXT: Index 01234
190 # CHECK: [0,0] DeER. vpcmpeqw %ymm0, %ymm0, %ymm0
191 # CHECK-NEXT: [0,1] D=eER vpaddw %ymm0, %ymm0, %ymm0
192 # CHECK-NEXT: [1,0] DeE-R vpcmpeqw %ymm0, %ymm0, %ymm0
193 # CHECK-NEXT: [1,1] D=eER vpaddw %ymm0, %ymm0, %ymm0
195 # CHECK: Average Wait times (based on the timeline view):
196 # CHECK-NEXT: [0]: Executions
197 # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
198 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
199 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
201 # CHECK: [0] [1] [2] [3]
202 # CHECK-NEXT: 0. 2 1.0 1.0 0.5 vpcmpeqw %ymm0, %ymm0, %ymm0
203 # CHECK-NEXT: 1. 2 2.0 0.0 0.0 vpaddw %ymm0, %ymm0, %ymm0
204 # CHECK-NEXT: 2 1.5 0.5 0.3 <total>
206 # CHECK: [2] Code Region
208 # CHECK: Iterations: 1000
209 # CHECK-NEXT: Instructions: 2000
210 # CHECK-NEXT: Total Cycles: 504
211 # CHECK-NEXT: Total uOps: 2000
213 # CHECK: Dispatch Width: 6
214 # CHECK-NEXT: uOps Per Cycle: 3.97
215 # CHECK-NEXT: IPC: 3.97
216 # CHECK-NEXT: Block RThroughput: 0.5
218 # CHECK: Instruction Info:
219 # CHECK-NEXT: [1]: #uOps
220 # CHECK-NEXT: [2]: Latency
221 # CHECK-NEXT: [3]: RThroughput
222 # CHECK-NEXT: [4]: MayLoad
223 # CHECK-NEXT: [5]: MayStore
224 # CHECK-NEXT: [6]: HasSideEffects (U)
226 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
227 # CHECK-NEXT: 1 1 0.25 vpcmpeqd %ymm0, %ymm0, %ymm0
228 # CHECK-NEXT: 1 1 0.25 vpaddd %ymm0, %ymm0, %ymm0
230 # CHECK: Register File statistics:
231 # CHECK-NEXT: Total number of mappings created: 2000
232 # CHECK-NEXT: Max number of mappings used: 74
234 # CHECK: * Register File #1 -- Zn3FpPRF:
235 # CHECK-NEXT: Number of physical registers: 160
236 # CHECK-NEXT: Total number of mappings created: 2000
237 # CHECK-NEXT: Max number of mappings used: 74
239 # CHECK: * Register File #2 -- Zn3IntegerPRF:
240 # CHECK-NEXT: Number of physical registers: 192
241 # CHECK-NEXT: Total number of mappings created: 0
242 # CHECK-NEXT: Max number of mappings used: 0
244 # CHECK: Resources:
245 # CHECK-NEXT: [0] - Zn3AGU0
246 # CHECK-NEXT: [1] - Zn3AGU1
247 # CHECK-NEXT: [2] - Zn3AGU2
248 # CHECK-NEXT: [3] - Zn3ALU0
249 # CHECK-NEXT: [4] - Zn3ALU1
250 # CHECK-NEXT: [5] - Zn3ALU2
251 # CHECK-NEXT: [6] - Zn3ALU3
252 # CHECK-NEXT: [7] - Zn3BRU1
253 # CHECK-NEXT: [8] - Zn3FPP0
254 # CHECK-NEXT: [9] - Zn3FPP1
255 # CHECK-NEXT: [10] - Zn3FPP2
256 # CHECK-NEXT: [11] - Zn3FPP3
257 # CHECK-NEXT: [12.0] - Zn3FPP45
258 # CHECK-NEXT: [12.1] - Zn3FPP45
259 # CHECK-NEXT: [13] - Zn3FPSt
260 # CHECK-NEXT: [14.0] - Zn3LSU
261 # CHECK-NEXT: [14.1] - Zn3LSU
262 # CHECK-NEXT: [14.2] - Zn3LSU
263 # CHECK-NEXT: [15.0] - Zn3Load
264 # CHECK-NEXT: [15.1] - Zn3Load
265 # CHECK-NEXT: [15.2] - Zn3Load
266 # CHECK-NEXT: [16.0] - Zn3Store
267 # CHECK-NEXT: [16.1] - Zn3Store
269 # CHECK: Resource pressure per iteration:
270 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
271 # CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - -
273 # CHECK: Resource pressure by instruction:
274 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
275 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.50 - - - - - - - - - - - - vpcmpeqd %ymm0, %ymm0, %ymm0
276 # CHECK-NEXT: - - - - - - - - 0.25 0.25 - 0.50 - - - - - - - - - - - vpaddd %ymm0, %ymm0, %ymm0
278 # CHECK: Timeline view:
279 # CHECK-NEXT: Index 01234
281 # CHECK: [0,0] DeER. vpcmpeqd %ymm0, %ymm0, %ymm0
282 # CHECK-NEXT: [0,1] D=eER vpaddd %ymm0, %ymm0, %ymm0
283 # CHECK-NEXT: [1,0] DeE-R vpcmpeqd %ymm0, %ymm0, %ymm0
284 # CHECK-NEXT: [1,1] D=eER vpaddd %ymm0, %ymm0, %ymm0
286 # CHECK: Average Wait times (based on the timeline view):
287 # CHECK-NEXT: [0]: Executions
288 # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
289 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
290 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
292 # CHECK: [0] [1] [2] [3]
293 # CHECK-NEXT: 0. 2 1.0 1.0 0.5 vpcmpeqd %ymm0, %ymm0, %ymm0
294 # CHECK-NEXT: 1. 2 2.0 0.0 0.0 vpaddd %ymm0, %ymm0, %ymm0
295 # CHECK-NEXT: 2 1.5 0.5 0.3 <total>
297 # CHECK: [3] Code Region
299 # CHECK: Iterations: 1000
300 # CHECK-NEXT: Instructions: 2000
301 # CHECK-NEXT: Total Cycles: 504
302 # CHECK-NEXT: Total uOps: 2000
304 # CHECK: Dispatch Width: 6
305 # CHECK-NEXT: uOps Per Cycle: 3.97
306 # CHECK-NEXT: IPC: 3.97
307 # CHECK-NEXT: Block RThroughput: 0.5
309 # CHECK: Instruction Info:
310 # CHECK-NEXT: [1]: #uOps
311 # CHECK-NEXT: [2]: Latency
312 # CHECK-NEXT: [3]: RThroughput
313 # CHECK-NEXT: [4]: MayLoad
314 # CHECK-NEXT: [5]: MayStore
315 # CHECK-NEXT: [6]: HasSideEffects (U)
317 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
318 # CHECK-NEXT: 1 1 0.50 vpcmpeqq %ymm0, %ymm0, %ymm0
319 # CHECK-NEXT: 1 1 0.25 vpaddq %ymm0, %ymm0, %ymm0
321 # CHECK: Register File statistics:
322 # CHECK-NEXT: Total number of mappings created: 2000
323 # CHECK-NEXT: Max number of mappings used: 73
325 # CHECK: * Register File #1 -- Zn3FpPRF:
326 # CHECK-NEXT: Number of physical registers: 160
327 # CHECK-NEXT: Total number of mappings created: 2000
328 # CHECK-NEXT: Max number of mappings used: 73
330 # CHECK: * Register File #2 -- Zn3IntegerPRF:
331 # CHECK-NEXT: Number of physical registers: 192
332 # CHECK-NEXT: Total number of mappings created: 0
333 # CHECK-NEXT: Max number of mappings used: 0
335 # CHECK: Resources:
336 # CHECK-NEXT: [0] - Zn3AGU0
337 # CHECK-NEXT: [1] - Zn3AGU1
338 # CHECK-NEXT: [2] - Zn3AGU2
339 # CHECK-NEXT: [3] - Zn3ALU0
340 # CHECK-NEXT: [4] - Zn3ALU1
341 # CHECK-NEXT: [5] - Zn3ALU2
342 # CHECK-NEXT: [6] - Zn3ALU3
343 # CHECK-NEXT: [7] - Zn3BRU1
344 # CHECK-NEXT: [8] - Zn3FPP0
345 # CHECK-NEXT: [9] - Zn3FPP1
346 # CHECK-NEXT: [10] - Zn3FPP2
347 # CHECK-NEXT: [11] - Zn3FPP3
348 # CHECK-NEXT: [12.0] - Zn3FPP45
349 # CHECK-NEXT: [12.1] - Zn3FPP45
350 # CHECK-NEXT: [13] - Zn3FPSt
351 # CHECK-NEXT: [14.0] - Zn3LSU
352 # CHECK-NEXT: [14.1] - Zn3LSU
353 # CHECK-NEXT: [14.2] - Zn3LSU
354 # CHECK-NEXT: [15.0] - Zn3Load
355 # CHECK-NEXT: [15.1] - Zn3Load
356 # CHECK-NEXT: [15.2] - Zn3Load
357 # CHECK-NEXT: [16.0] - Zn3Store
358 # CHECK-NEXT: [16.1] - Zn3Store
360 # CHECK: Resource pressure per iteration:
361 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
362 # CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - -
364 # CHECK: Resource pressure by instruction:
365 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
366 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpcmpeqq %ymm0, %ymm0, %ymm0
367 # CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vpaddq %ymm0, %ymm0, %ymm0
369 # CHECK: Timeline view:
370 # CHECK-NEXT: Index 01234
372 # CHECK: [0,0] DeER. vpcmpeqq %ymm0, %ymm0, %ymm0
373 # CHECK-NEXT: [0,1] D=eER vpaddq %ymm0, %ymm0, %ymm0
374 # CHECK-NEXT: [1,0] DeE-R vpcmpeqq %ymm0, %ymm0, %ymm0
375 # CHECK-NEXT: [1,1] D=eER vpaddq %ymm0, %ymm0, %ymm0
377 # CHECK: Average Wait times (based on the timeline view):
378 # CHECK-NEXT: [0]: Executions
379 # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
380 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
381 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
383 # CHECK: [0] [1] [2] [3]
384 # CHECK-NEXT: 0. 2 1.0 1.0 0.5 vpcmpeqq %ymm0, %ymm0, %ymm0
385 # CHECK-NEXT: 1. 2 2.0 0.0 0.0 vpaddq %ymm0, %ymm0, %ymm0
386 # CHECK-NEXT: 2 1.5 0.5 0.3 <total>