Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / utils / llvm-locstats / llvm-locstats.py
blob454488064106e3fdc1dc97c9238a78fa4ff78e5d
1 #!/usr/bin/env python
3 # This is a tool that works like debug location coverage calculator.
4 # It parses the llvm-dwarfdump --statistics output by reporting it
5 # in a more human readable way.
8 from __future__ import print_function
9 import argparse
10 import os
11 import sys
12 from json import loads
13 from math import ceil
14 from collections import OrderedDict
15 from subprocess import Popen, PIPE
17 # This special value has been used to mark statistics that overflowed.
18 TAINT_VALUE = "tainted"
20 # Initialize the plot.
21 def init_plot(plt):
22 plt.title("Debug Location Statistics", fontweight="bold")
23 plt.xlabel("location buckets")
24 plt.ylabel("number of variables in the location buckets")
25 plt.xticks(rotation=45, fontsize="x-small")
26 plt.yticks()
29 # Finalize the plot.
30 def finish_plot(plt):
31 plt.legend()
32 plt.grid(color="grey", which="major", axis="y", linestyle="-", linewidth=0.3)
33 plt.savefig("locstats.png")
34 print('The plot was saved within "locstats.png".')
37 # Holds the debug location statistics.
38 class LocationStats:
39 def __init__(
40 self,
41 file_name,
42 variables_total,
43 variables_total_locstats,
44 variables_with_loc,
45 variables_scope_bytes_covered,
46 variables_scope_bytes,
47 variables_coverage_map,
49 self.file_name = file_name
50 self.variables_total = variables_total
51 self.variables_total_locstats = variables_total_locstats
52 self.variables_with_loc = variables_with_loc
53 self.scope_bytes_covered = variables_scope_bytes_covered
54 self.scope_bytes = variables_scope_bytes
55 self.variables_coverage_map = variables_coverage_map
57 # Get the PC ranges coverage.
58 def get_pc_coverage(self):
59 if self.scope_bytes_covered == TAINT_VALUE or self.scope_bytes == TAINT_VALUE:
60 return TAINT_VALUE
61 pc_ranges_covered = int(
62 ceil(self.scope_bytes_covered * 100.0) / self.scope_bytes
64 return pc_ranges_covered
66 # Pretty print the debug location buckets.
67 def pretty_print(self):
68 if self.scope_bytes == 0:
69 print("No scope bytes found.")
70 return -1
72 pc_ranges_covered = self.get_pc_coverage()
73 variables_coverage_per_map = {}
74 for cov_bucket in coverage_buckets():
75 variables_coverage_per_map[cov_bucket] = None
76 if (
77 self.variables_coverage_map[cov_bucket] == TAINT_VALUE
78 or self.variables_total_locstats == TAINT_VALUE
80 variables_coverage_per_map[cov_bucket] = TAINT_VALUE
81 else:
82 variables_coverage_per_map[cov_bucket] = int(
83 ceil(self.variables_coverage_map[cov_bucket] * 100.0)
84 / self.variables_total_locstats
87 print(" =================================================")
88 print(" Debug Location Statistics ")
89 print(" =================================================")
90 print(" cov% samples percentage(~) ")
91 print(" -------------------------------------------------")
92 for cov_bucket in coverage_buckets():
93 if (
94 self.variables_coverage_map[cov_bucket]
95 or self.variables_total_locstats == TAINT_VALUE
97 print(
98 " {0:10} {1:8} {2:3}%".format(
99 cov_bucket,
100 self.variables_coverage_map[cov_bucket],
101 variables_coverage_per_map[cov_bucket],
104 else:
105 print(
106 " {0:10} {1:8d} {2:3d}%".format(
107 cov_bucket,
108 self.variables_coverage_map[cov_bucket],
109 variables_coverage_per_map[cov_bucket],
112 print(" =================================================")
113 print(
114 " -the number of debug variables processed: "
115 + str(self.variables_total_locstats)
117 print(" -PC ranges covered: " + str(pc_ranges_covered) + "%")
119 # Only if we are processing all the variables output the total
120 # availability.
121 if self.variables_total and self.variables_with_loc:
122 total_availability = None
123 if (
124 self.variables_total == TAINT_VALUE
125 or self.variables_with_loc == TAINT_VALUE
127 total_availability = TAINT_VALUE
128 else:
129 total_availability = int(
130 ceil(self.variables_with_loc * 100.0) / self.variables_total
132 print(" -------------------------------------------------")
133 print(" -total availability: " + str(total_availability) + "%")
134 print(" =================================================")
136 return 0
138 # Draw a plot representing the location buckets.
139 def draw_plot(self):
140 from matplotlib import pyplot as plt
142 buckets = range(len(self.variables_coverage_map))
143 plt.figure(figsize=(12, 8))
144 init_plot(plt)
145 plt.bar(
146 buckets,
147 self.variables_coverage_map.values(),
148 align="center",
149 tick_label=self.variables_coverage_map.keys(),
150 label="variables of {}".format(self.file_name),
153 # Place the text box with the coverage info.
154 pc_ranges_covered = self.get_pc_coverage()
155 props = dict(boxstyle="round", facecolor="wheat", alpha=0.5)
156 plt.text(
157 0.02,
158 0.90,
159 "PC ranges covered: {}%".format(pc_ranges_covered),
160 transform=plt.gca().transAxes,
161 fontsize=12,
162 verticalalignment="top",
163 bbox=props,
166 finish_plot(plt)
168 # Compare the two LocationStats objects and draw a plot showing
169 # the difference.
170 def draw_location_diff(self, locstats_to_compare):
171 from matplotlib import pyplot as plt
173 pc_ranges_covered = self.get_pc_coverage()
174 pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
176 buckets = range(len(self.variables_coverage_map))
177 buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
179 fig = plt.figure(figsize=(12, 8))
180 ax = fig.add_subplot(111)
181 init_plot(plt)
183 comparison_keys = list(coverage_buckets())
184 ax.bar(
185 buckets,
186 self.variables_coverage_map.values(),
187 align="edge",
188 width=0.4,
189 label="variables of {}".format(self.file_name),
191 ax.bar(
192 buckets_to_compare,
193 locstats_to_compare.variables_coverage_map.values(),
194 color="r",
195 align="edge",
196 width=-0.4,
197 label="variables of {}".format(locstats_to_compare.file_name),
199 ax.set_xticks(range(len(comparison_keys)))
200 ax.set_xticklabels(comparison_keys)
202 props = dict(boxstyle="round", facecolor="wheat", alpha=0.5)
203 plt.text(
204 0.02,
205 0.88,
206 "{} PC ranges covered: {}%".format(self.file_name, pc_ranges_covered),
207 transform=plt.gca().transAxes,
208 fontsize=12,
209 verticalalignment="top",
210 bbox=props,
212 plt.text(
213 0.02,
214 0.83,
215 "{} PC ranges covered: {}%".format(
216 locstats_to_compare.file_name, pc_ranges_covered_to_compare
218 transform=plt.gca().transAxes,
219 fontsize=12,
220 verticalalignment="top",
221 bbox=props,
224 finish_plot(plt)
227 # Define the location buckets.
228 def coverage_buckets():
229 yield "0%"
230 yield "(0%,10%)"
231 for start in range(10, 91, 10):
232 yield "[{0}%,{1}%)".format(start, start + 10)
233 yield "100%"
236 # Parse the JSON representing the debug statistics, and create a
237 # LocationStats object.
238 def parse_locstats(opts, binary):
239 # These will be different due to different options enabled.
240 variables_total = None
241 variables_total_locstats = None
242 variables_with_loc = None
243 variables_scope_bytes_covered = None
244 variables_scope_bytes = None
245 variables_scope_bytes_entry_values = None
246 variables_coverage_map = OrderedDict()
248 # Get the directory of the LLVM tools.
249 llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), "llvm-dwarfdump")
250 # The statistics llvm-dwarfdump option.
251 llvm_dwarfdump_stats_opt = "--statistics"
253 # Generate the stats with the llvm-dwarfdump.
254 subproc = Popen(
255 [llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary],
256 stdin=PIPE,
257 stdout=PIPE,
258 stderr=PIPE,
259 universal_newlines=True,
261 cmd_stdout, cmd_stderr = subproc.communicate()
263 # TODO: Handle errors that are coming from llvm-dwarfdump.
265 # Get the JSON and parse it.
266 json_parsed = None
268 try:
269 json_parsed = loads(cmd_stdout)
270 except:
271 print("error: No valid llvm-dwarfdump statistics found.")
272 sys.exit(1)
274 # TODO: Parse the statistics Version from JSON.
276 def init_field(name):
277 if json_parsed[name] == "overflowed":
278 print('warning: "' + name + '" field overflowed.')
279 return TAINT_VALUE
280 return json_parsed[name]
282 if opts.only_variables:
283 # Read the JSON only for local variables.
284 variables_total_locstats = init_field(
285 "#local vars processed by location statistics"
287 variables_scope_bytes_covered = init_field(
288 "sum_all_local_vars(#bytes in parent scope covered" " by DW_AT_location)"
290 variables_scope_bytes = init_field("sum_all_local_vars(#bytes in parent scope)")
291 if not opts.ignore_debug_entry_values:
292 for cov_bucket in coverage_buckets():
293 cov_category = (
294 "#local vars with {} of parent scope covered "
295 "by DW_AT_location".format(cov_bucket)
297 variables_coverage_map[cov_bucket] = init_field(cov_category)
298 else:
299 variables_scope_bytes_entry_values = init_field(
300 "sum_all_local_vars(#bytes in parent scope "
301 "covered by DW_OP_entry_value)"
303 if (
304 variables_scope_bytes_covered != TAINT_VALUE
305 and variables_scope_bytes_entry_values != TAINT_VALUE
307 variables_scope_bytes_covered = (
308 variables_scope_bytes_covered - variables_scope_bytes_entry_values
310 for cov_bucket in coverage_buckets():
311 cov_category = (
312 "#local vars - entry values with {} of parent scope "
313 "covered by DW_AT_location".format(cov_bucket)
315 variables_coverage_map[cov_bucket] = init_field(cov_category)
316 elif opts.only_formal_parameters:
317 # Read the JSON only for formal parameters.
318 variables_total_locstats = init_field(
319 "#params processed by location statistics"
321 variables_scope_bytes_covered = init_field(
322 "sum_all_params(#bytes in parent scope covered " "by DW_AT_location)"
324 variables_scope_bytes = init_field("sum_all_params(#bytes in parent scope)")
325 if not opts.ignore_debug_entry_values:
326 for cov_bucket in coverage_buckets():
327 cov_category = (
328 "#params with {} of parent scope covered "
329 "by DW_AT_location".format(cov_bucket)
331 variables_coverage_map[cov_bucket] = init_field(cov_category)
332 else:
333 variables_scope_bytes_entry_values = init_field(
334 "sum_all_params(#bytes in parent scope covered " "by DW_OP_entry_value)"
336 if (
337 variables_scope_bytes_covered != TAINT_VALUE
338 and variables_scope_bytes_entry_values != TAINT_VALUE
340 variables_scope_bytes_covered = (
341 variables_scope_bytes_covered - variables_scope_bytes_entry_values
343 for cov_bucket in coverage_buckets():
344 cov_category = (
345 "#params - entry values with {} of parent scope covered"
346 " by DW_AT_location".format(cov_bucket)
348 variables_coverage_map[cov_bucket] = init_field(cov_category)
349 else:
350 # Read the JSON for both local variables and formal parameters.
351 variables_total = init_field("#source variables")
352 variables_with_loc = init_field("#source variables with location")
353 variables_total_locstats = init_field(
354 "#variables processed by location statistics"
356 variables_scope_bytes_covered = init_field(
357 "sum_all_variables(#bytes in parent scope covered " "by DW_AT_location)"
359 variables_scope_bytes = init_field("sum_all_variables(#bytes in parent scope)")
361 if not opts.ignore_debug_entry_values:
362 for cov_bucket in coverage_buckets():
363 cov_category = (
364 "#variables with {} of parent scope covered "
365 "by DW_AT_location".format(cov_bucket)
367 variables_coverage_map[cov_bucket] = init_field(cov_category)
368 else:
369 variables_scope_bytes_entry_values = init_field(
370 "sum_all_variables(#bytes in parent scope covered "
371 "by DW_OP_entry_value)"
373 if (
374 variables_scope_bytes_covered != TAINT_VALUE
375 and variables_scope_bytes_entry_values != TAINT_VALUE
377 variables_scope_bytes_covered = (
378 variables_scope_bytes_covered - variables_scope_bytes_entry_values
380 for cov_bucket in coverage_buckets():
381 cov_category = (
382 "#variables - entry values with {} of parent scope covered "
383 "by DW_AT_location".format(cov_bucket)
385 variables_coverage_map[cov_bucket] = init_field(cov_category)
387 return LocationStats(
388 binary,
389 variables_total,
390 variables_total_locstats,
391 variables_with_loc,
392 variables_scope_bytes_covered,
393 variables_scope_bytes,
394 variables_coverage_map,
398 # Parse the program arguments.
399 def parse_program_args(parser):
400 parser.add_argument(
401 "--only-variables",
402 action="store_true",
403 default=False,
404 help="calculate the location statistics only for local variables",
406 parser.add_argument(
407 "--only-formal-parameters",
408 action="store_true",
409 default=False,
410 help="calculate the location statistics only for formal parameters",
412 parser.add_argument(
413 "--ignore-debug-entry-values",
414 action="store_true",
415 default=False,
416 help="ignore the location statistics on locations with " "entry values",
418 parser.add_argument(
419 "--draw-plot",
420 action="store_true",
421 default=False,
422 help="show histogram of location buckets generated (requires " "matplotlib)",
424 parser.add_argument(
425 "--compare",
426 action="store_true",
427 default=False,
428 help="compare the debug location coverage on two files provided, "
429 "and draw a plot showing the difference (requires "
430 "matplotlib)",
432 parser.add_argument("file_names", nargs="+", type=str, help="file to process")
434 return parser.parse_args()
437 # Verify that the program inputs meet the requirements.
438 def verify_program_inputs(opts):
439 if len(sys.argv) < 2:
440 print("error: Too few arguments.")
441 return False
443 if opts.only_variables and opts.only_formal_parameters:
444 print("error: Please use just one --only* option.")
445 return False
447 if not opts.compare and len(opts.file_names) != 1:
448 print("error: Please specify only one file to process.")
449 return False
451 if opts.compare and len(opts.file_names) != 2:
452 print("error: Please specify two files to process.")
453 return False
455 if opts.draw_plot or opts.compare:
456 try:
457 import matplotlib
458 except ImportError:
459 print("error: matplotlib not found.")
460 return False
462 return True
465 def Main():
466 parser = argparse.ArgumentParser()
467 opts = parse_program_args(parser)
469 if not verify_program_inputs(opts):
470 parser.print_help()
471 sys.exit(1)
473 binary_file = opts.file_names[0]
474 locstats = parse_locstats(opts, binary_file)
476 if not opts.compare:
477 if opts.draw_plot:
478 # Draw a histogram representing the location buckets.
479 locstats.draw_plot()
480 else:
481 # Pretty print collected info on the standard output.
482 if locstats.pretty_print() == -1:
483 sys.exit(0)
484 else:
485 binary_file_to_compare = opts.file_names[1]
486 locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
487 # Draw a plot showing the difference in debug location coverage between
488 # two files.
489 locstats.draw_location_diff(locstats_to_compare)
492 if __name__ == "__main__":
493 Main()
494 sys.exit(0)