llvm/utils/llvm-locstats/llvm-locstats.py

   1 #!/usr/bin/env python3
   2 #
   3 # This is a tool that works like debug location coverage calculator.
   4 # It parses the llvm-dwarfdump --statistics output by reporting it
   5 # in a more human readable way.
   6 #
   7
   8 from __future__ import print_function
   9 import argparse
  10 import os
  11 import sys
  12 from json import loads
  13 from math import ceil
  14 from collections import OrderedDict
  15 from subprocess import Popen, PIPE
  16
  17 # This special value has been used to mark statistics that overflowed.
  18 TAINT_VALUE = "tainted"
  19
  20 # Initialize the plot.
  21 def init_plot(plt):
  22     plt.title("Debug Location Statistics", fontweight="bold")
  23     plt.xlabel("location buckets")
  24     plt.ylabel("number of variables in the location buckets")
  25     plt.xticks(rotation=45, fontsize="x-small")
  26     plt.yticks()
  27
  28
  29 # Finalize the plot.
  30 def finish_plot(plt):
  31     plt.legend()
  32     plt.grid(color="grey", which="major", axis="y", linestyle="-", linewidth=0.3)
  33     plt.savefig("locstats.png")
  34     print('The plot was saved within "locstats.png".')
  35
  36
  37 # Holds the debug location statistics.
  38 class LocationStats:
  39     def __init__(
  40         self,
  41         file_name,
  42         variables_total,
  43         variables_total_locstats,
  44         variables_with_loc,
  45         variables_scope_bytes_covered,
  46         variables_scope_bytes,
  47         variables_coverage_map,
  48     ):
  49         self.file_name = file_name
  50         self.variables_total = variables_total
  51         self.variables_total_locstats = variables_total_locstats
  52         self.variables_with_loc = variables_with_loc
  53         self.scope_bytes_covered = variables_scope_bytes_covered
  54         self.scope_bytes = variables_scope_bytes
  55         self.variables_coverage_map = variables_coverage_map
  56
  57     # Get the PC ranges coverage.
  58     def get_pc_coverage(self):
  59         if self.scope_bytes_covered == TAINT_VALUE or self.scope_bytes == TAINT_VALUE:
  60             return TAINT_VALUE
  61         pc_ranges_covered = int(
  62             ceil(self.scope_bytes_covered * 100.0) / self.scope_bytes
  63         )
  64         return pc_ranges_covered
  65
  66     # Pretty print the debug location buckets.
  67     def pretty_print(self):
  68         if self.scope_bytes == 0:
  69             print("No scope bytes found.")
  70             return -1
  71
  72         pc_ranges_covered = self.get_pc_coverage()
  73         variables_coverage_per_map = {}
  74         for cov_bucket in coverage_buckets():
  75             variables_coverage_per_map[cov_bucket] = None
  76             if (
  77                 self.variables_coverage_map[cov_bucket] == TAINT_VALUE
  78                 or self.variables_total_locstats == TAINT_VALUE
  79             ):
  80                 variables_coverage_per_map[cov_bucket] = TAINT_VALUE
  81             else:
  82                 variables_coverage_per_map[cov_bucket] = int(
  83                     ceil(self.variables_coverage_map[cov_bucket] * 100.0)
  84                     / self.variables_total_locstats
  85                 )
  86
  87         print(" =================================================")
  88         print("            Debug Location Statistics       ")
  89         print(" =================================================")
  90         print("     cov%           samples         percentage(~)  ")
  91         print(" -------------------------------------------------")
  92         for cov_bucket in coverage_buckets():
  93             if (
  94                 self.variables_coverage_map[cov_bucket]
  95                 or self.variables_total_locstats == TAINT_VALUE
  96             ):
  97                 print(
  98                     "   {0:10}     {1:8}              {2:3}%".format(
  99                         cov_bucket,
 100                         self.variables_coverage_map[cov_bucket],
 101                         variables_coverage_per_map[cov_bucket],
 102                     )
 103                 )
 104             else:
 105                 print(
 106                     "   {0:10}     {1:8d}              {2:3d}%".format(
 107                         cov_bucket,
 108                         self.variables_coverage_map[cov_bucket],
 109                         variables_coverage_per_map[cov_bucket],
 110                     )
 111                 )
 112         print(" =================================================")
 113         print(
 114             " -the number of debug variables processed: "
 115             + str(self.variables_total_locstats)
 116         )
 117         print(" -PC ranges covered: " + str(pc_ranges_covered) + "%")
 118
 119         # Only if we are processing all the variables output the total
 120         # availability.
 121         if self.variables_total and self.variables_with_loc:
 122             total_availability = None
 123             if (
 124                 self.variables_total == TAINT_VALUE
 125                 or self.variables_with_loc == TAINT_VALUE
 126             ):
 127                 total_availability = TAINT_VALUE
 128             else:
 129                 total_availability = int(
 130                     ceil(self.variables_with_loc * 100.0) / self.variables_total
 131                 )
 132             print(" -------------------------------------------------")
 133             print(" -total availability: " + str(total_availability) + "%")
 134         print(" =================================================")
 135
 136         return 0
 137
 138     # Draw a plot representing the location buckets.
 139     def draw_plot(self):
 140         from matplotlib import pyplot as plt
 141
 142         buckets = range(len(self.variables_coverage_map))
 143         plt.figure(figsize=(12, 8))
 144         init_plot(plt)
 145         plt.bar(
 146             buckets,
 147             self.variables_coverage_map.values(),
 148             align="center",
 149             tick_label=self.variables_coverage_map.keys(),
 150             label="variables of {}".format(self.file_name),
 151         )
 152
 153         # Place the text box with the coverage info.
 154         pc_ranges_covered = self.get_pc_coverage()
 155         props = dict(boxstyle="round", facecolor="wheat", alpha=0.5)
 156         plt.text(
 157             0.02,
 158             0.90,
 159             "PC ranges covered: {}%".format(pc_ranges_covered),
 160             transform=plt.gca().transAxes,
 161             fontsize=12,
 162             verticalalignment="top",
 163             bbox=props,
 164         )
 165
 166         finish_plot(plt)
 167
 168     # Compare the two LocationStats objects and draw a plot showing
 169     # the difference.
 170     def draw_location_diff(self, locstats_to_compare):
 171         from matplotlib import pyplot as plt
 172
 173         pc_ranges_covered = self.get_pc_coverage()
 174         pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
 175
 176         buckets = range(len(self.variables_coverage_map))
 177         buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
 178
 179         fig = plt.figure(figsize=(12, 8))
 180         ax = fig.add_subplot(111)
 181         init_plot(plt)
 182
 183         comparison_keys = list(coverage_buckets())
 184         ax.bar(
 185             buckets,
 186             self.variables_coverage_map.values(),
 187             align="edge",
 188             width=0.4,
 189             label="variables of {}".format(self.file_name),
 190         )
 191         ax.bar(
 192             buckets_to_compare,
 193             locstats_to_compare.variables_coverage_map.values(),
 194             color="r",
 195             align="edge",
 196             width=-0.4,
 197             label="variables of {}".format(locstats_to_compare.file_name),
 198         )
 199         ax.set_xticks(range(len(comparison_keys)))
 200         ax.set_xticklabels(comparison_keys)
 201
 202         props = dict(boxstyle="round", facecolor="wheat", alpha=0.5)
 203         plt.text(
 204             0.02,
 205             0.88,
 206             "{} PC ranges covered: {}%".format(self.file_name, pc_ranges_covered),
 207             transform=plt.gca().transAxes,
 208             fontsize=12,
 209             verticalalignment="top",
 210             bbox=props,
 211         )
 212         plt.text(
 213             0.02,
 214             0.83,
 215             "{} PC ranges covered: {}%".format(
 216                 locstats_to_compare.file_name, pc_ranges_covered_to_compare
 217             ),
 218             transform=plt.gca().transAxes,
 219             fontsize=12,
 220             verticalalignment="top",
 221             bbox=props,
 222         )
 223
 224         finish_plot(plt)
 225
 226
 227 # Define the location buckets.
 228 def coverage_buckets():
 229     yield "0%"
 230     yield "(0%,10%)"
 231     for start in range(10, 91, 10):
 232         yield "[{0}%,{1}%)".format(start, start + 10)
 233     yield "100%"
 234
 235
 236 # Parse the JSON representing the debug statistics, and create a
 237 # LocationStats object.
 238 def parse_locstats(opts, binary):
 239     # These will be different due to different options enabled.
 240     variables_total = None
 241     variables_total_locstats = None
 242     variables_with_loc = None
 243     variables_scope_bytes_covered = None
 244     variables_scope_bytes = None
 245     variables_scope_bytes_entry_values = None
 246     variables_coverage_map = OrderedDict()
 247
 248     # Get the directory of the LLVM tools.
 249     llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), "llvm-dwarfdump")
 250     # The statistics llvm-dwarfdump option.
 251     llvm_dwarfdump_stats_opt = "--statistics"
 252
 253     # Generate the stats with the llvm-dwarfdump.
 254     subproc = Popen(
 255         [llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary],
 256         stdin=PIPE,
 257         stdout=PIPE,
 258         stderr=PIPE,
 259         universal_newlines=True,
 260     )
 261     cmd_stdout, cmd_stderr = subproc.communicate()
 262
 263     # TODO: Handle errors that are coming from llvm-dwarfdump.
 264
 265     # Get the JSON and parse it.
 266     json_parsed = None
 267
 268     try:
 269         json_parsed = loads(cmd_stdout)
 270     except:
 271         print("error: No valid llvm-dwarfdump statistics found.")
 272         sys.exit(1)
 273
 274     # TODO: Parse the statistics Version from JSON.
 275
 276     def init_field(name):
 277         if json_parsed[name] == "overflowed":
 278             print('warning: "' + name + '" field overflowed.')
 279             return TAINT_VALUE
 280         return json_parsed[name]
 281
 282     if opts.only_variables:
 283         # Read the JSON only for local variables.
 284         variables_total_locstats = init_field(
 285             "#local vars processed by location statistics"
 286         )
 287         variables_scope_bytes_covered = init_field(
 288             "sum_all_local_vars(#bytes in parent scope covered" " by DW_AT_location)"
 289         )
 290         variables_scope_bytes = init_field("sum_all_local_vars(#bytes in parent scope)")
 291         if not opts.ignore_debug_entry_values:
 292             for cov_bucket in coverage_buckets():
 293                 cov_category = (
 294                     "#local vars with {} of parent scope covered "
 295                     "by DW_AT_location".format(cov_bucket)
 296                 )
 297                 variables_coverage_map[cov_bucket] = init_field(cov_category)
 298         else:
 299             variables_scope_bytes_entry_values = init_field(
 300                 "sum_all_local_vars(#bytes in parent scope "
 301                 "covered by DW_OP_entry_value)"
 302             )
 303             if (
 304                 variables_scope_bytes_covered != TAINT_VALUE
 305                 and variables_scope_bytes_entry_values != TAINT_VALUE
 306             ):
 307                 variables_scope_bytes_covered = (
 308                     variables_scope_bytes_covered - variables_scope_bytes_entry_values
 309                 )
 310             for cov_bucket in coverage_buckets():
 311                 cov_category = (
 312                     "#local vars - entry values with {} of parent scope "
 313                     "covered by DW_AT_location".format(cov_bucket)
 314                 )
 315                 variables_coverage_map[cov_bucket] = init_field(cov_category)
 316     elif opts.only_formal_parameters:
 317         # Read the JSON only for formal parameters.
 318         variables_total_locstats = init_field(
 319             "#params processed by location statistics"
 320         )
 321         variables_scope_bytes_covered = init_field(
 322             "sum_all_params(#bytes in parent scope covered " "by DW_AT_location)"
 323         )
 324         variables_scope_bytes = init_field("sum_all_params(#bytes in parent scope)")
 325         if not opts.ignore_debug_entry_values:
 326             for cov_bucket in coverage_buckets():
 327                 cov_category = (
 328                     "#params with {} of parent scope covered "
 329                     "by DW_AT_location".format(cov_bucket)
 330                 )
 331                 variables_coverage_map[cov_bucket] = init_field(cov_category)
 332         else:
 333             variables_scope_bytes_entry_values = init_field(
 334                 "sum_all_params(#bytes in parent scope covered " "by DW_OP_entry_value)"
 335             )
 336             if (
 337                 variables_scope_bytes_covered != TAINT_VALUE
 338                 and variables_scope_bytes_entry_values != TAINT_VALUE
 339             ):
 340                 variables_scope_bytes_covered = (
 341                     variables_scope_bytes_covered - variables_scope_bytes_entry_values
 342                 )
 343             for cov_bucket in coverage_buckets():
 344                 cov_category = (
 345                     "#params - entry values with {} of parent scope covered"
 346                     " by DW_AT_location".format(cov_bucket)
 347                 )
 348                 variables_coverage_map[cov_bucket] = init_field(cov_category)
 349     else:
 350         # Read the JSON for both local variables and formal parameters.
 351         variables_total = init_field("#source variables")
 352         variables_with_loc = init_field("#source variables with location")
 353         variables_total_locstats = init_field(
 354             "#variables processed by location statistics"
 355         )
 356         variables_scope_bytes_covered = init_field(
 357             "sum_all_variables(#bytes in parent scope covered " "by DW_AT_location)"
 358         )
 359         variables_scope_bytes = init_field("sum_all_variables(#bytes in parent scope)")
 360
 361         if not opts.ignore_debug_entry_values:
 362             for cov_bucket in coverage_buckets():
 363                 cov_category = (
 364                     "#variables with {} of parent scope covered "
 365                     "by DW_AT_location".format(cov_bucket)
 366                 )
 367                 variables_coverage_map[cov_bucket] = init_field(cov_category)
 368         else:
 369             variables_scope_bytes_entry_values = init_field(
 370                 "sum_all_variables(#bytes in parent scope covered "
 371                 "by DW_OP_entry_value)"
 372             )
 373             if (
 374                 variables_scope_bytes_covered != TAINT_VALUE
 375                 and variables_scope_bytes_entry_values != TAINT_VALUE
 376             ):
 377                 variables_scope_bytes_covered = (
 378                     variables_scope_bytes_covered - variables_scope_bytes_entry_values
 379                 )
 380             for cov_bucket in coverage_buckets():
 381                 cov_category = (
 382                     "#variables - entry values with {} of parent scope covered "
 383                     "by DW_AT_location".format(cov_bucket)
 384                 )
 385                 variables_coverage_map[cov_bucket] = init_field(cov_category)
 386
 387     return LocationStats(
 388         binary,
 389         variables_total,
 390         variables_total_locstats,
 391         variables_with_loc,
 392         variables_scope_bytes_covered,
 393         variables_scope_bytes,
 394         variables_coverage_map,
 395     )
 396
 397
 398 # Parse the program arguments.
 399 def parse_program_args(parser):
 400     parser.add_argument(
 401         "--only-variables",
 402         action="store_true",
 403         default=False,
 404         help="calculate the location statistics only for local variables",
 405     )
 406     parser.add_argument(
 407         "--only-formal-parameters",
 408         action="store_true",
 409         default=False,
 410         help="calculate the location statistics only for formal parameters",
 411     )
 412     parser.add_argument(
 413         "--ignore-debug-entry-values",
 414         action="store_true",
 415         default=False,
 416         help="ignore the location statistics on locations with " "entry values",
 417     )
 418     parser.add_argument(
 419         "--draw-plot",
 420         action="store_true",
 421         default=False,
 422         help="show histogram of location buckets generated (requires " "matplotlib)",
 423     )
 424     parser.add_argument(
 425         "--compare",
 426         action="store_true",
 427         default=False,
 428         help="compare the debug location coverage on two files provided, "
 429         "and draw a plot showing the difference  (requires "
 430         "matplotlib)",
 431     )
 432     parser.add_argument("file_names", nargs="+", type=str, help="file to process")
 433
 434     return parser.parse_args()
 435
 436
 437 # Verify that the program inputs meet the requirements.
 438 def verify_program_inputs(opts):
 439     if len(sys.argv) < 2:
 440         print("error: Too few arguments.")
 441         return False
 442
 443     if opts.only_variables and opts.only_formal_parameters:
 444         print("error: Please use just one --only* option.")
 445         return False
 446
 447     if not opts.compare and len(opts.file_names) != 1:
 448         print("error: Please specify only one file to process.")
 449         return False
 450
 451     if opts.compare and len(opts.file_names) != 2:
 452         print("error: Please specify two files to process.")
 453         return False
 454
 455     if opts.draw_plot or opts.compare:
 456         try:
 457             import matplotlib
 458         except ImportError:
 459             print("error: matplotlib not found.")
 460             return False
 461
 462     return True
 463
 464
 465 def Main():
 466     parser = argparse.ArgumentParser()
 467     opts = parse_program_args(parser)
 468
 469     if not verify_program_inputs(opts):
 470         parser.print_help()
 471         sys.exit(1)
 472
 473     binary_file = opts.file_names[0]
 474     locstats = parse_locstats(opts, binary_file)
 475
 476     if not opts.compare:
 477         if opts.draw_plot:
 478             # Draw a histogram representing the location buckets.
 479             locstats.draw_plot()
 480         else:
 481             # Pretty print collected info on the standard output.
 482             if locstats.pretty_print() == -1:
 483                 sys.exit(0)
 484     else:
 485         binary_file_to_compare = opts.file_names[1]
 486         locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
 487         # Draw a plot showing the difference in debug location coverage between
 488         # two files.
 489         locstats.draw_location_diff(locstats_to_compare)
 490
 491
 492 if __name__ == "__main__":
 493     Main()
 494     sys.exit(0)