Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / tools / llvm-mca / AMDGPU / gfx10-double.s
blob00b429ef6d67d5a5ed98997af729f15a8bc02b90
1 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2 # RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx1010 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s
4 v_cvt_i32_f64 v0, v[0:1]
5 v_cvt_f64_i32 v[2:3], v2
6 v_cvt_f32_f64 v4, v[4:5]
7 v_cvt_f64_f32 v[6:7], v6
8 v_cvt_u32_f64 v8, v[8:9]
9 v_cvt_f64_u32 v[10:11], v10
11 v_frexp_exp_i32_f64 v0, v[0:1]
12 v_frexp_mant_f64 v[2:3], v[2:3]
13 v_fract_f64 v[4:5], v[4:5]
15 v_trunc_f64 v[0:1], v[0:1]
16 v_ceil_f64 v[2:3], v[2:3]
17 v_rndne_f64 v[4:5], v[4:5]
18 v_floor_f64 v[6:7], v[6:7]
20 v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
21 v_add_f64 v[2:3], v[2:3], v[2:3]
22 v_mul_f64 v[4:5], v[4:5], v[4:5]
23 v_min_f64 v[6:7], v[6:7], v[6:7]
24 v_max_f64 v[8:9], v[8:9], v[8:9]
26 v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
28 v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
29 v_ldexp_f64 v[2:3], v[2:3], v0
31 v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
33 v_trig_preop_f64 v[2:3], v[2:3], v0
35 v_cmp_eq_f64 v[0:1], v[0:1]
36 v_cmp_class_f64 vcc_lo, v[2:3], s0
38 v_rcp_f64 v[0:1], v[0:1]
39 v_rsq_f64 v[2:3], v[2:3]
40 v_sqrt_f64 v[4:5], v[4:5]
42 # CHECK: Iterations: 1
43 # CHECK-NEXT: Instructions: 28
44 # CHECK-NEXT: Total Cycles: 205
45 # CHECK-NEXT: Total uOps: 29
47 # CHECK: Dispatch Width: 1
48 # CHECK-NEXT: uOps Per Cycle: 0.14
49 # CHECK-NEXT: IPC: 0.14
50 # CHECK-NEXT: Block RThroughput: 29.0
52 # CHECK: Instruction Info:
53 # CHECK-NEXT: [1]: #uOps
54 # CHECK-NEXT: [2]: Latency
55 # CHECK-NEXT: [3]: RThroughput
56 # CHECK-NEXT: [4]: MayLoad
57 # CHECK-NEXT: [5]: MayStore
58 # CHECK-NEXT: [6]: HasSideEffects (U)
60 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
61 # CHECK-NEXT: 1 22 1.00 U v_cvt_i32_f64_e32 v0, v[0:1]
62 # CHECK-NEXT: 1 22 1.00 U v_cvt_f64_i32_e32 v[2:3], v2
63 # CHECK-NEXT: 1 22 1.00 U v_cvt_f32_f64_e32 v4, v[4:5]
64 # CHECK-NEXT: 1 22 1.00 U v_cvt_f64_f32_e32 v[6:7], v6
65 # CHECK-NEXT: 1 22 1.00 U v_cvt_u32_f64_e32 v8, v[8:9]
66 # CHECK-NEXT: 1 22 1.00 U v_cvt_f64_u32_e32 v[10:11], v10
67 # CHECK-NEXT: 1 22 1.00 U v_frexp_exp_i32_f64_e32 v0, v[0:1]
68 # CHECK-NEXT: 1 22 1.00 U v_frexp_mant_f64_e32 v[2:3], v[2:3]
69 # CHECK-NEXT: 1 22 1.00 U v_fract_f64_e32 v[4:5], v[4:5]
70 # CHECK-NEXT: 1 22 1.00 U v_trunc_f64_e32 v[0:1], v[0:1]
71 # CHECK-NEXT: 1 22 1.00 U v_ceil_f64_e32 v[2:3], v[2:3]
72 # CHECK-NEXT: 1 22 1.00 U v_rndne_f64_e32 v[4:5], v[4:5]
73 # CHECK-NEXT: 1 22 1.00 U v_floor_f64_e32 v[6:7], v[6:7]
74 # CHECK-NEXT: 1 22 1.00 U v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
75 # CHECK-NEXT: 1 22 1.00 U v_add_f64 v[2:3], v[2:3], v[2:3]
76 # CHECK-NEXT: 1 22 1.00 U v_mul_f64 v[4:5], v[4:5], v[4:5]
77 # CHECK-NEXT: 1 22 1.00 U v_min_f64 v[6:7], v[6:7], v[6:7]
78 # CHECK-NEXT: 1 22 1.00 U v_max_f64 v[8:9], v[8:9], v[8:9]
79 # CHECK-NEXT: 1 22 1.00 U v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
80 # CHECK-NEXT: 1 22 1.00 U v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
81 # CHECK-NEXT: 1 22 1.00 U v_ldexp_f64 v[2:3], v[2:3], v0
82 # CHECK-NEXT: 2 22 2.00 U v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
83 # CHECK-NEXT: 1 22 1.00 U v_trig_preop_f64 v[2:3], v[2:3], v0
84 # CHECK-NEXT: 1 22 1.00 U v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1]
85 # CHECK-NEXT: 1 22 1.00 U v_cmp_class_f64_e64 vcc_lo, v[2:3], s0
86 # CHECK-NEXT: 1 24 1.00 U v_rcp_f64_e32 v[0:1], v[0:1]
87 # CHECK-NEXT: 1 24 1.00 U v_rsq_f64_e32 v[2:3], v[2:3]
88 # CHECK-NEXT: 1 24 1.00 U v_sqrt_f64_e32 v[4:5], v[4:5]
90 # CHECK: Resources:
91 # CHECK-NEXT: [0] - HWBranch
92 # CHECK-NEXT: [1] - HWExport
93 # CHECK-NEXT: [2] - HWLGKM
94 # CHECK-NEXT: [3] - HWRC
95 # CHECK-NEXT: [4] - HWSALU
96 # CHECK-NEXT: [5] - HWTransVALU
97 # CHECK-NEXT: [6] - HWVALU
98 # CHECK-NEXT: [7] - HWVMEM
100 # CHECK: Resource pressure per iteration:
101 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
102 # CHECK-NEXT: - - - 29.00 1.00 3.00 28.00 -
104 # CHECK: Resource pressure by instruction:
105 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
106 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_i32_f64_e32 v0, v[0:1]
107 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_i32_e32 v[2:3], v2
108 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f32_f64_e32 v4, v[4:5]
109 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_f32_e32 v[6:7], v6
110 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_u32_f64_e32 v8, v[8:9]
111 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_u32_e32 v[10:11], v10
112 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_frexp_exp_i32_f64_e32 v0, v[0:1]
113 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_frexp_mant_f64_e32 v[2:3], v[2:3]
114 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_fract_f64_e32 v[4:5], v[4:5]
115 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_trunc_f64_e32 v[0:1], v[0:1]
116 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_ceil_f64_e32 v[2:3], v[2:3]
117 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_rndne_f64_e32 v[4:5], v[4:5]
118 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_floor_f64_e32 v[6:7], v[6:7]
119 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
120 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_add_f64 v[2:3], v[2:3], v[2:3]
121 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_mul_f64 v[4:5], v[4:5], v[4:5]
122 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_min_f64 v[6:7], v[6:7], v[6:7]
123 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_max_f64 v[8:9], v[8:9], v[8:9]
124 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
125 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
126 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_ldexp_f64 v[2:3], v[2:3], v0
127 # CHECK-NEXT: - - - 2.00 1.00 - 1.00 - v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
128 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_trig_preop_f64 v[2:3], v[2:3], v0
129 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1]
130 # CHECK-NEXT: - - - 1.00 - - 1.00 - v_cmp_class_f64_e64 vcc_lo, v[2:3], s0
131 # CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rcp_f64_e32 v[0:1], v[0:1]
132 # CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rsq_f64_e32 v[2:3], v[2:3]
133 # CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_sqrt_f64_e32 v[4:5], v[4:5]
135 # CHECK: Timeline view:
136 # CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789
137 # CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 01234
139 # CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_i32_f64_e32 v0, v[0:1]
140 # CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f64_i32_e32 v[2:3], v2
141 # CHECK-NEXT: [0,2] . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f32_f64_e32 v4, v[4:5]
142 # CHECK-NEXT: [0,3] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f64_f32_e32 v[6:7], v6
143 # CHECK-NEXT: [0,4] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_u32_f64_e32 v8, v[8:9]
144 # CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_cvt_f64_u32_e32 v[10:11], v10
145 # CHECK-NEXT: [0,6] . . . . . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_frexp_exp_i32_f64_e32 v0, v[0:1]
146 # CHECK-NEXT: [0,7] . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_frexp_mant_f64_e32 v[2:3], v[2:3]
147 # CHECK-NEXT: [0,8] . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_fract_f64_e32 v[4:5], v[4:5]
148 # CHECK-NEXT: [0,9] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_trunc_f64_e32 v[0:1], v[0:1]
149 # CHECK-NEXT: [0,10] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_ceil_f64_e32 v[2:3], v[2:3]
150 # CHECK-NEXT: [0,11] . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_rndne_f64_e32 v[4:5], v[4:5]
151 # CHECK-NEXT: [0,12] . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . v_floor_f64_e32 v[6:7], v[6:7]
152 # CHECK-NEXT: [0,13] . . . . . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . . v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
153 # CHECK-NEXT: [0,14] . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . v_add_f64 v[2:3], v[2:3], v[2:3]
154 # CHECK-NEXT: [0,15] . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . v_mul_f64 v[4:5], v[4:5], v[4:5]
155 # CHECK-NEXT: [0,16] . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . v_min_f64 v[6:7], v[6:7], v[6:7]
156 # CHECK-NEXT: [0,17] . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . . . . . v_max_f64 v[8:9], v[8:9], v[8:9]
157 # CHECK-NEXT: [0,18] . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . . . . . v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
158 # CHECK-NEXT: [0,19] . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . . . . . . v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
159 # CHECK-NEXT: [0,20] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . v_ldexp_f64 v[2:3], v[2:3], v0
160 # CHECK-NEXT: [0,21] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
161 # CHECK-NEXT: [0,22] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . v_trig_preop_f64 v[2:3], v[2:3], v0
162 # CHECK-NEXT: [0,23] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeE . . . . . . v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1]
163 # CHECK-NEXT: [0,24] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE. . v_cmp_class_f64_e64 vcc_lo, v[2:3], s0
164 # CHECK-NEXT: [0,25] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeE . v_rcp_f64_e32 v[0:1], v[0:1]
165 # CHECK-NEXT: [0,26] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeE. v_rsq_f64_e32 v[2:3], v[2:3]
166 # CHECK-NEXT: [0,27] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeE v_sqrt_f64_e32 v[4:5], v[4:5]
168 # CHECK: Average Wait times (based on the timeline view):
169 # CHECK-NEXT: [0]: Executions
170 # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
171 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
172 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
174 # CHECK: [0] [1] [2] [3]
175 # CHECK-NEXT: 0. 1 0.0 0.0 0.0 v_cvt_i32_f64_e32 v0, v[0:1]
176 # CHECK-NEXT: 1. 1 0.0 0.0 0.0 v_cvt_f64_i32_e32 v[2:3], v2
177 # CHECK-NEXT: 2. 1 0.0 0.0 0.0 v_cvt_f32_f64_e32 v4, v[4:5]
178 # CHECK-NEXT: 3. 1 0.0 0.0 0.0 v_cvt_f64_f32_e32 v[6:7], v6
179 # CHECK-NEXT: 4. 1 0.0 0.0 0.0 v_cvt_u32_f64_e32 v8, v[8:9]
180 # CHECK-NEXT: 5. 1 0.0 0.0 0.0 v_cvt_f64_u32_e32 v[10:11], v10
181 # CHECK-NEXT: 6. 1 0.0 0.0 0.0 v_frexp_exp_i32_f64_e32 v0, v[0:1]
182 # CHECK-NEXT: 7. 1 0.0 0.0 0.0 v_frexp_mant_f64_e32 v[2:3], v[2:3]
183 # CHECK-NEXT: 8. 1 0.0 0.0 0.0 v_fract_f64_e32 v[4:5], v[4:5]
184 # CHECK-NEXT: 9. 1 0.0 0.0 0.0 v_trunc_f64_e32 v[0:1], v[0:1]
185 # CHECK-NEXT: 10. 1 0.0 0.0 0.0 v_ceil_f64_e32 v[2:3], v[2:3]
186 # CHECK-NEXT: 11. 1 0.0 0.0 0.0 v_rndne_f64_e32 v[4:5], v[4:5]
187 # CHECK-NEXT: 12. 1 0.0 0.0 0.0 v_floor_f64_e32 v[6:7], v[6:7]
188 # CHECK-NEXT: 13. 1 0.0 0.0 0.0 v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
189 # CHECK-NEXT: 14. 1 0.0 0.0 0.0 v_add_f64 v[2:3], v[2:3], v[2:3]
190 # CHECK-NEXT: 15. 1 0.0 0.0 0.0 v_mul_f64 v[4:5], v[4:5], v[4:5]
191 # CHECK-NEXT: 16. 1 0.0 0.0 0.0 v_min_f64 v[6:7], v[6:7], v[6:7]
192 # CHECK-NEXT: 17. 1 0.0 0.0 0.0 v_max_f64 v[8:9], v[8:9], v[8:9]
193 # CHECK-NEXT: 18. 1 0.0 0.0 0.0 v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
194 # CHECK-NEXT: 19. 1 0.0 0.0 0.0 v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
195 # CHECK-NEXT: 20. 1 0.0 0.0 0.0 v_ldexp_f64 v[2:3], v[2:3], v0
196 # CHECK-NEXT: 21. 1 0.0 0.0 0.0 v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
197 # CHECK-NEXT: 22. 1 0.0 0.0 0.0 v_trig_preop_f64 v[2:3], v[2:3], v0
198 # CHECK-NEXT: 23. 1 0.0 0.0 0.0 v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1]
199 # CHECK-NEXT: 24. 1 0.0 0.0 0.0 v_cmp_class_f64_e64 vcc_lo, v[2:3], s0
200 # CHECK-NEXT: 25. 1 0.0 0.0 0.0 v_rcp_f64_e32 v[0:1], v[0:1]
201 # CHECK-NEXT: 26. 1 0.0 0.0 0.0 v_rsq_f64_e32 v[2:3], v[2:3]
202 # CHECK-NEXT: 27. 1 0.0 0.0 0.0 v_sqrt_f64_e32 v[4:5], v[4:5]
203 # CHECK-NEXT: 1 0.0 0.0 0.0 <total>