[ARM] Adjust how NEON shifts are lowered
[llvm-core.git] / test / tools / llvm-mca / X86 / SkylakeClient / bottleneck-analysis.s
blob1f5e2dbf4690339fbba17b108128cb481d21eca6
1 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -bottleneck-analysis < %s | FileCheck %s
4 .LBB0_4:
5 vmovups (%rsi,%rax,2), %xmm0
6 vpermilps $255, %xmm0, %xmm7
7 vmulps -24(%rsp), %xmm7, %xmm8
8 vpermilps $170, %xmm0, %xmm6
9 vpermilps $85, %xmm0, %xmm5
10 vbroadcastss %xmm0, %xmm0
11 vfmadd231ps %xmm9, %xmm6, %xmm8
12 vfmadd213ps %xmm8, %xmm10, %xmm5
13 vfmadd213ps %xmm5, %xmm11, %xmm0
14 vfmadd213ps %xmm0, %xmm12, %xmm4
15 vfmadd213ps %xmm4, %xmm13, %xmm1
16 vmovaps %xmm7, %xmm4
17 vfmadd213ps %xmm1, %xmm14, %xmm2
18 vmovaps %xmm6, %xmm1
19 vfmadd213ps %xmm2, %xmm15, %xmm3
20 vpermilps $170, %xmm3, %xmm0
21 vmovups %xmm3, (%rdx,%rax)
22 vpermilps $255, %xmm3, %xmm2
23 addq $16, %rax
24 decl %ecx
25 vmovaps %xmm0, %xmm3
26 jne .LBB0_4
28 # CHECK: Iterations: 100
29 # CHECK-NEXT: Instructions: 2200
30 # CHECK-NEXT: Total Cycles: 1039
31 # CHECK-NEXT: Total uOps: 2400
33 # CHECK: Dispatch Width: 6
34 # CHECK-NEXT: uOps Per Cycle: 2.31
35 # CHECK-NEXT: IPC: 2.12
36 # CHECK-NEXT: Block RThroughput: 6.0
38 # CHECK: Cycles with backend pressure increase [ 92.69% ]
39 # CHECK-NEXT: Throughput Bottlenecks:
40 # CHECK-NEXT: Resource Pressure [ 46.78% ]
41 # CHECK-NEXT: - SKLPort0 [ 14.24% ]
42 # CHECK-NEXT: - SKLPort1 [ 14.24% ]
43 # CHECK-NEXT: - SKLPort5 [ 46.49% ]
44 # CHECK-NEXT: - SKLPort6 [ 8.66% ]
45 # CHECK-NEXT: Data Dependencies: [ 64.97% ]
46 # CHECK-NEXT: - Register Dependencies [ 64.97% ]
47 # CHECK-NEXT: - Memory Dependencies [ 0.00% ]
49 # CHECK: Critical sequence based on the simulation:
51 # CHECK: Instruction Dependency Information
52 # CHECK-NEXT: +----< 18. addq $16, %rax
53 # CHECK-NEXT: |
54 # CHECK-NEXT: | < loop carried >
55 # CHECK-NEXT: |
56 # CHECK-NEXT: +----> 0. vmovups (%rsi,%rax,2), %xmm0 ## REGISTER dependency: %rax
57 # CHECK-NEXT: | 1. vpermilps $255, %xmm0, %xmm7
58 # CHECK-NEXT: | 2. vmulps -24(%rsp), %xmm7, %xmm8
59 # CHECK-NEXT: +----> 3. vpermilps $170, %xmm0, %xmm6 ## REGISTER dependency: %xmm0
60 # CHECK-NEXT: | 4. vpermilps $85, %xmm0, %xmm5
61 # CHECK-NEXT: | 5. vbroadcastss %xmm0, %xmm0
62 # CHECK-NEXT: +----> 6. vfmadd231ps %xmm9, %xmm6, %xmm8 ## REGISTER dependency: %xmm6
63 # CHECK-NEXT: +----> 7. vfmadd213ps %xmm8, %xmm10, %xmm5 ## REGISTER dependency: %xmm8
64 # CHECK-NEXT: +----> 8. vfmadd213ps %xmm5, %xmm11, %xmm0 ## REGISTER dependency: %xmm5
65 # CHECK-NEXT: +----> 9. vfmadd213ps %xmm0, %xmm12, %xmm4 ## REGISTER dependency: %xmm0
66 # CHECK-NEXT: +----> 10. vfmadd213ps %xmm4, %xmm13, %xmm1 ## REGISTER dependency: %xmm4
67 # CHECK-NEXT: | 11. vmovaps %xmm7, %xmm4
68 # CHECK-NEXT: +----> 12. vfmadd213ps %xmm1, %xmm14, %xmm2 ## REGISTER dependency: %xmm1
69 # CHECK-NEXT: | 13. vmovaps %xmm6, %xmm1
70 # CHECK-NEXT: | 14. vfmadd213ps %xmm2, %xmm15, %xmm3
71 # CHECK-NEXT: | 15. vpermilps $170, %xmm3, %xmm0
72 # CHECK-NEXT: | 16. vmovups %xmm3, (%rdx,%rax)
73 # CHECK-NEXT: | 17. vpermilps $255, %xmm3, %xmm2
74 # CHECK-NEXT: | 18. addq $16, %rax
75 # CHECK-NEXT: | 19. decl %ecx
76 # CHECK-NEXT: | 20. vmovaps %xmm0, %xmm3
77 # CHECK-NEXT: | 21. jne .LBB0_4
78 # CHECK-NEXT: |
79 # CHECK-NEXT: | < loop carried >
80 # CHECK-NEXT: |
81 # CHECK-NEXT: +----> 2. vmulps -24(%rsp), %xmm7, %xmm8 ## RESOURCE interference: SKLPort1 [ probability: 45% ]
83 # CHECK: Instruction Info:
84 # CHECK-NEXT: [1]: #uOps
85 # CHECK-NEXT: [2]: Latency
86 # CHECK-NEXT: [3]: RThroughput
87 # CHECK-NEXT: [4]: MayLoad
88 # CHECK-NEXT: [5]: MayStore
89 # CHECK-NEXT: [6]: HasSideEffects (U)
91 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
92 # CHECK-NEXT: 1 6 0.50 * vmovups (%rsi,%rax,2), %xmm0
93 # CHECK-NEXT: 1 1 1.00 vpermilps $255, %xmm0, %xmm7
94 # CHECK-NEXT: 2 10 0.50 * vmulps -24(%rsp), %xmm7, %xmm8
95 # CHECK-NEXT: 1 1 1.00 vpermilps $170, %xmm0, %xmm6
96 # CHECK-NEXT: 1 1 1.00 vpermilps $85, %xmm0, %xmm5
97 # CHECK-NEXT: 1 1 1.00 vbroadcastss %xmm0, %xmm0
98 # CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm9, %xmm6, %xmm8
99 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm8, %xmm10, %xmm5
100 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm5, %xmm11, %xmm0
101 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm0, %xmm12, %xmm4
102 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm4, %xmm13, %xmm1
103 # CHECK-NEXT: 1 1 0.33 vmovaps %xmm7, %xmm4
104 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm1, %xmm14, %xmm2
105 # CHECK-NEXT: 1 1 0.33 vmovaps %xmm6, %xmm1
106 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm2, %xmm15, %xmm3
107 # CHECK-NEXT: 1 1 1.00 vpermilps $170, %xmm3, %xmm0
108 # CHECK-NEXT: 2 1 1.00 * vmovups %xmm3, (%rdx,%rax)
109 # CHECK-NEXT: 1 1 1.00 vpermilps $255, %xmm3, %xmm2
110 # CHECK-NEXT: 1 1 0.25 addq $16, %rax
111 # CHECK-NEXT: 1 1 0.25 decl %ecx
112 # CHECK-NEXT: 1 1 0.33 vmovaps %xmm0, %xmm3
113 # CHECK-NEXT: 1 1 0.50 jne .LBB0_4
115 # CHECK: Resources:
116 # CHECK-NEXT: [0] - SKLDivider
117 # CHECK-NEXT: [1] - SKLFPDivider
118 # CHECK-NEXT: [2] - SKLPort0
119 # CHECK-NEXT: [3] - SKLPort1
120 # CHECK-NEXT: [4] - SKLPort2
121 # CHECK-NEXT: [5] - SKLPort3
122 # CHECK-NEXT: [6] - SKLPort4
123 # CHECK-NEXT: [7] - SKLPort5
124 # CHECK-NEXT: [8] - SKLPort6
125 # CHECK-NEXT: [9] - SKLPort7
127 # CHECK: Resource pressure per iteration:
128 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
129 # CHECK-NEXT: - - 5.52 5.53 1.01 1.03 1.00 6.02 2.93 0.96
131 # CHECK: Resource pressure by instruction:
132 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
133 # CHECK-NEXT: - - - - 0.04 0.96 - - - - vmovups (%rsi,%rax,2), %xmm0
134 # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $255, %xmm0, %xmm7
135 # CHECK-NEXT: - - 0.03 0.97 0.96 0.04 - - - - vmulps -24(%rsp), %xmm7, %xmm8
136 # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $170, %xmm0, %xmm6
137 # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $85, %xmm0, %xmm5
138 # CHECK-NEXT: - - - - - - - 1.00 - - vbroadcastss %xmm0, %xmm0
139 # CHECK-NEXT: - - 0.95 0.05 - - - - - - vfmadd231ps %xmm9, %xmm6, %xmm8
140 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm8, %xmm10, %xmm5
141 # CHECK-NEXT: - - 0.92 0.08 - - - - - - vfmadd213ps %xmm5, %xmm11, %xmm0
142 # CHECK-NEXT: - - 0.95 0.05 - - - - - - vfmadd213ps %xmm0, %xmm12, %xmm4
143 # CHECK-NEXT: - - 0.51 0.49 - - - - - - vfmadd213ps %xmm4, %xmm13, %xmm1
144 # CHECK-NEXT: - - 0.52 0.48 - - - - - - vmovaps %xmm7, %xmm4
145 # CHECK-NEXT: - - 0.49 0.51 - - - - - - vfmadd213ps %xmm1, %xmm14, %xmm2
146 # CHECK-NEXT: - - 0.04 0.95 - - - 0.01 - - vmovaps %xmm6, %xmm1
147 # CHECK-NEXT: - - 0.51 0.49 - - - - - - vfmadd213ps %xmm2, %xmm15, %xmm3
148 # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $170, %xmm3, %xmm0
149 # CHECK-NEXT: - - - - 0.01 0.03 1.00 - - 0.96 vmovups %xmm3, (%rdx,%rax)
150 # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $255, %xmm3, %xmm2
151 # CHECK-NEXT: - - - - - - - - 1.00 - addq $16, %rax
152 # CHECK-NEXT: - - 0.04 0.01 - - - 0.01 0.94 - decl %ecx
153 # CHECK-NEXT: - - 0.05 0.95 - - - - - - vmovaps %xmm0, %xmm3
154 # CHECK-NEXT: - - 0.01 - - - - - 0.99 - jne .LBB0_4