llvm/utils/llvm-locstats/llvm-locstats.py

   1 #!/usr/bin/env python
   2 #
   3 # This is a tool that works like debug location coverage calculator.
   4 # It parses the llvm-dwarfdump --statistics output by reporting it
   5 # in a more human readable way.
   6 #
   7
   8 from __future__ import print_function
   9 import argparse
  10 import os
  11 import sys
  12 from json import loads
  13 from math import ceil
  14 from collections import OrderedDict
  15 from subprocess import Popen, PIPE
  16
  17 # This special value has been used to mark statistics that overflowed.
  18 TAINT_VALUE = "tainted"
  19
  20 # Initialize the plot.
  21 def init_plot(plt):
  22   plt.title('Debug Location Statistics', fontweight='bold')
  23   plt.xlabel('location buckets')
  24   plt.ylabel('number of variables in the location buckets')
  25   plt.xticks(rotation=45, fontsize='x-small')
  26   plt.yticks()
  27
  28 # Finalize the plot.
  29 def finish_plot(plt):
  30   plt.legend()
  31   plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3)
  32   plt.savefig('locstats.png')
  33   print('The plot was saved within "locstats.png".')
  34
  35 # Holds the debug location statistics.
  36 class LocationStats:
  37   def __init__(self, file_name, variables_total, variables_total_locstats,
  38     variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes,
  39     variables_coverage_map):
  40     self.file_name = file_name
  41     self.variables_total = variables_total
  42     self.variables_total_locstats = variables_total_locstats
  43     self.variables_with_loc = variables_with_loc
  44     self.scope_bytes_covered = variables_scope_bytes_covered
  45     self.scope_bytes = variables_scope_bytes
  46     self.variables_coverage_map = variables_coverage_map
  47
  48   # Get the PC ranges coverage.
  49   def get_pc_coverage(self):
  50     if self.scope_bytes_covered == TAINT_VALUE or \
  51        self.scope_bytes == TAINT_VALUE:
  52       return TAINT_VALUE
  53     pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \
  54                 / self.scope_bytes)
  55     return pc_ranges_covered
  56
  57   # Pretty print the debug location buckets.
  58   def pretty_print(self):
  59     if self.scope_bytes == 0:
  60       print ('No scope bytes found.')
  61       return -1
  62
  63     pc_ranges_covered = self.get_pc_coverage()
  64     variables_coverage_per_map = {}
  65     for cov_bucket in coverage_buckets():
  66       variables_coverage_per_map[cov_bucket] = None
  67       if self.variables_coverage_map[cov_bucket] == TAINT_VALUE or \
  68          self.variables_total_locstats == TAINT_VALUE:
  69         variables_coverage_per_map[cov_bucket] = TAINT_VALUE
  70       else:
  71         variables_coverage_per_map[cov_bucket] = \
  72           int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \
  73                    / self.variables_total_locstats)
  74
  75     print (' =================================================')
  76     print ('            Debug Location Statistics       ')
  77     print (' =================================================')
  78     print ('     cov%           samples         percentage(~)  ')
  79     print (' -------------------------------------------------')
  80     for cov_bucket in coverage_buckets():
  81       if self.variables_coverage_map[cov_bucket] or \
  82          self.variables_total_locstats == TAINT_VALUE:
  83         print ('   {0:10}     {1:8}              {2:3}%'. \
  84           format(cov_bucket, self.variables_coverage_map[cov_bucket], \
  85                  variables_coverage_per_map[cov_bucket]))
  86       else:
  87         print ('   {0:10}     {1:8d}              {2:3d}%'. \
  88           format(cov_bucket, self.variables_coverage_map[cov_bucket], \
  89                  variables_coverage_per_map[cov_bucket]))
  90     print (' =================================================')
  91     print (' -the number of debug variables processed: ' \
  92       + str(self.variables_total_locstats))
  93     print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
  94
  95     # Only if we are processing all the variables output the total
  96     # availability.
  97     if self.variables_total and self.variables_with_loc:
  98       total_availability = None
  99       if self.variables_total == TAINT_VALUE or \
 100          self.variables_with_loc == TAINT_VALUE:
 101         total_availability = TAINT_VALUE
 102       else:
 103         total_availability = int(ceil(self.variables_with_loc * 100.0) \
 104                                       / self.variables_total)
 105       print (' -------------------------------------------------')
 106       print (' -total availability: ' + str(total_availability) + '%')
 107     print (' =================================================')
 108
 109     return 0
 110
 111   # Draw a plot representing the location buckets.
 112   def draw_plot(self):
 113     from matplotlib import pyplot as plt
 114
 115     buckets = range(len(self.variables_coverage_map))
 116     plt.figure(figsize=(12, 8))
 117     init_plot(plt)
 118     plt.bar(buckets, self.variables_coverage_map.values(), align='center',
 119             tick_label=self.variables_coverage_map.keys(),
 120             label='variables of {}'.format(self.file_name))
 121
 122     # Place the text box with the coverage info.
 123     pc_ranges_covered = self.get_pc_coverage()
 124     props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
 125     plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered),
 126              transform=plt.gca().transAxes, fontsize=12,
 127              verticalalignment='top', bbox=props)
 128
 129     finish_plot(plt)
 130
 131   # Compare the two LocationStats objects and draw a plot showing
 132   # the difference.
 133   def draw_location_diff(self, locstats_to_compare):
 134     from matplotlib import pyplot as plt
 135
 136     pc_ranges_covered = self.get_pc_coverage()
 137     pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
 138
 139     buckets = range(len(self.variables_coverage_map))
 140     buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
 141
 142     fig = plt.figure(figsize=(12, 8))
 143     ax = fig.add_subplot(111)
 144     init_plot(plt)
 145
 146     comparison_keys = list(coverage_buckets())
 147     ax.bar(buckets, self.variables_coverage_map.values(), align='edge',
 148            width=0.4,
 149            label='variables of {}'.format(self.file_name))
 150     ax.bar(buckets_to_compare,
 151            locstats_to_compare.variables_coverage_map.values(),
 152            color='r', align='edge', width=-0.4,
 153            label='variables of {}'.format(locstats_to_compare.file_name))
 154     ax.set_xticks(range(len(comparison_keys)))
 155     ax.set_xticklabels(comparison_keys)
 156
 157     props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
 158     plt.text(0.02, 0.88,
 159              '{} PC ranges covered: {}%'. \
 160              format(self.file_name, pc_ranges_covered),
 161              transform=plt.gca().transAxes, fontsize=12,
 162              verticalalignment='top', bbox=props)
 163     plt.text(0.02, 0.83,
 164              '{} PC ranges covered: {}%'. \
 165              format(locstats_to_compare.file_name,
 166                     pc_ranges_covered_to_compare),
 167              transform=plt.gca().transAxes, fontsize=12,
 168              verticalalignment='top', bbox=props)
 169
 170     finish_plot(plt)
 171
 172 # Define the location buckets.
 173 def coverage_buckets():
 174   yield '0%'
 175   yield '(0%,10%)'
 176   for start in range(10, 91, 10):
 177     yield '[{0}%,{1}%)'.format(start, start + 10)
 178   yield '100%'
 179
 180 # Parse the JSON representing the debug statistics, and create a
 181 # LocationStats object.
 182 def parse_locstats(opts, binary):
 183   # These will be different due to different options enabled.
 184   variables_total = None
 185   variables_total_locstats = None
 186   variables_with_loc = None
 187   variables_scope_bytes_covered = None
 188   variables_scope_bytes = None
 189   variables_scope_bytes_entry_values = None
 190   variables_coverage_map = OrderedDict()
 191
 192   # Get the directory of the LLVM tools.
 193   llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
 194                                     "llvm-dwarfdump")
 195   # The statistics llvm-dwarfdump option.
 196   llvm_dwarfdump_stats_opt = "--statistics"
 197
 198   # Generate the stats with the llvm-dwarfdump.
 199   subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
 200                   stdin=PIPE, stdout=PIPE, stderr=PIPE, \
 201                   universal_newlines = True)
 202   cmd_stdout, cmd_stderr = subproc.communicate()
 203
 204   # TODO: Handle errors that are coming from llvm-dwarfdump.
 205
 206   # Get the JSON and parse it.
 207   json_parsed = None
 208
 209   try:
 210     json_parsed = loads(cmd_stdout)
 211   except:
 212     print ('error: No valid llvm-dwarfdump statistics found.')
 213     sys.exit(1)
 214
 215   # TODO: Parse the statistics Version from JSON.
 216
 217   def init_field(name):
 218     if json_parsed[name] == 'overflowed':
 219       print ('warning: "' + name + '" field overflowed.')
 220       return TAINT_VALUE
 221     return json_parsed[name]
 222
 223   if opts.only_variables:
 224     # Read the JSON only for local variables.
 225     variables_total_locstats = \
 226       init_field('#local vars processed by location statistics')
 227     variables_scope_bytes_covered = \
 228       init_field('sum_all_local_vars(#bytes in parent scope covered' \
 229                   ' by DW_AT_location)')
 230     variables_scope_bytes = \
 231       init_field('sum_all_local_vars(#bytes in parent scope)')
 232     if not opts.ignore_debug_entry_values:
 233       for cov_bucket in coverage_buckets():
 234         cov_category = "#local vars with {} of parent scope covered " \
 235                        "by DW_AT_location".format(cov_bucket)
 236         variables_coverage_map[cov_bucket] = init_field(cov_category)
 237     else:
 238       variables_scope_bytes_entry_values = \
 239         init_field('sum_all_local_vars(#bytes in parent scope ' \
 240                     'covered by DW_OP_entry_value)')
 241       if variables_scope_bytes_covered != TAINT_VALUE and \
 242          variables_scope_bytes_entry_values != TAINT_VALUE:
 243         variables_scope_bytes_covered = variables_scope_bytes_covered \
 244            - variables_scope_bytes_entry_values
 245       for cov_bucket in coverage_buckets():
 246         cov_category = \
 247           "#local vars - entry values with {} of parent scope " \
 248           "covered by DW_AT_location".format(cov_bucket)
 249         variables_coverage_map[cov_bucket] = init_field(cov_category)
 250   elif opts.only_formal_parameters:
 251     # Read the JSON only for formal parameters.
 252     variables_total_locstats = \
 253       init_field('#params processed by location statistics')
 254     variables_scope_bytes_covered = \
 255       init_field('sum_all_params(#bytes in parent scope covered ' \
 256                   'by DW_AT_location)')
 257     variables_scope_bytes = \
 258       init_field('sum_all_params(#bytes in parent scope)')
 259     if not opts.ignore_debug_entry_values:
 260       for cov_bucket in coverage_buckets():
 261         cov_category = "#params with {} of parent scope covered " \
 262                        "by DW_AT_location".format(cov_bucket)
 263         variables_coverage_map[cov_bucket] = init_field(cov_category)
 264     else:
 265       variables_scope_bytes_entry_values = \
 266         init_field('sum_all_params(#bytes in parent scope covered ' \
 267                     'by DW_OP_entry_value)')
 268       if variables_scope_bytes_covered != TAINT_VALUE and \
 269          variables_scope_bytes_entry_values != TAINT_VALUE:
 270         variables_scope_bytes_covered = variables_scope_bytes_covered \
 271           - variables_scope_bytes_entry_values
 272       for cov_bucket in coverage_buckets():
 273         cov_category = \
 274           "#params - entry values with {} of parent scope covered" \
 275           " by DW_AT_location".format(cov_bucket)
 276         variables_coverage_map[cov_bucket] = init_field(cov_category)
 277   else:
 278     # Read the JSON for both local variables and formal parameters.
 279     variables_total = \
 280       init_field('#source variables')
 281     variables_with_loc = init_field('#source variables with location')
 282     variables_total_locstats = \
 283       init_field('#variables processed by location statistics')
 284     variables_scope_bytes_covered = \
 285       init_field('sum_all_variables(#bytes in parent scope covered ' \
 286                   'by DW_AT_location)')
 287     variables_scope_bytes = \
 288       init_field('sum_all_variables(#bytes in parent scope)')
 289
 290     if not opts.ignore_debug_entry_values:
 291       for cov_bucket in coverage_buckets():
 292         cov_category = "#variables with {} of parent scope covered " \
 293                        "by DW_AT_location".format(cov_bucket)
 294         variables_coverage_map[cov_bucket] = init_field(cov_category)
 295     else:
 296       variables_scope_bytes_entry_values = \
 297         init_field('sum_all_variables(#bytes in parent scope covered ' \
 298                     'by DW_OP_entry_value)')
 299       if variables_scope_bytes_covered != TAINT_VALUE and \
 300          variables_scope_bytes_entry_values != TAINT_VALUE:
 301         variables_scope_bytes_covered = variables_scope_bytes_covered \
 302           - variables_scope_bytes_entry_values
 303       for cov_bucket in coverage_buckets():
 304         cov_category = \
 305           "#variables - entry values with {} of parent scope covered " \
 306           "by DW_AT_location".format(cov_bucket)
 307         variables_coverage_map[cov_bucket] = init_field(cov_category)
 308
 309   return LocationStats(binary, variables_total, variables_total_locstats,
 310                        variables_with_loc, variables_scope_bytes_covered,
 311                        variables_scope_bytes, variables_coverage_map)
 312
 313 # Parse the program arguments.
 314 def parse_program_args(parser):
 315   parser.add_argument('--only-variables', action='store_true', default=False,
 316             help='calculate the location statistics only for local variables')
 317   parser.add_argument('--only-formal-parameters', action='store_true',
 318             default=False,
 319             help='calculate the location statistics only for formal parameters')
 320   parser.add_argument('--ignore-debug-entry-values', action='store_true',
 321             default=False,
 322             help='ignore the location statistics on locations with '
 323                  'entry values')
 324   parser.add_argument('--draw-plot', action='store_true', default=False,
 325             help='show histogram of location buckets generated (requires '
 326                  'matplotlib)')
 327   parser.add_argument('--compare', action='store_true', default=False,
 328             help='compare the debug location coverage on two files provided, '
 329                  'and draw a plot showing the difference  (requires '
 330                  'matplotlib)')
 331   parser.add_argument('file_names', nargs='+', type=str, help='file to process')
 332
 333   return parser.parse_args()
 334
 335 # Verify that the program inputs meet the requirements.
 336 def verify_program_inputs(opts):
 337   if len(sys.argv) < 2:
 338     print ('error: Too few arguments.')
 339     return False
 340
 341   if opts.only_variables and opts.only_formal_parameters:
 342     print ('error: Please use just one --only* option.')
 343     return False
 344
 345   if not opts.compare and len(opts.file_names) != 1:
 346     print ('error: Please specify only one file to process.')
 347     return False
 348
 349   if opts.compare and len(opts.file_names) != 2:
 350     print ('error: Please specify two files to process.')
 351     return False
 352
 353   if opts.draw_plot or opts.compare:
 354     try:
 355       import matplotlib
 356     except ImportError:
 357       print('error: matplotlib not found.')
 358       return False
 359
 360   return True
 361
 362 def Main():
 363   parser = argparse.ArgumentParser()
 364   opts = parse_program_args(parser)
 365
 366   if not verify_program_inputs(opts):
 367     parser.print_help()
 368     sys.exit(1)
 369
 370   binary_file = opts.file_names[0]
 371   locstats = parse_locstats(opts, binary_file)
 372
 373   if not opts.compare:
 374     if opts.draw_plot:
 375       # Draw a histogram representing the location buckets.
 376       locstats.draw_plot()
 377     else:
 378       # Pretty print collected info on the standard output.
 379       if locstats.pretty_print() == -1:
 380         sys.exit(0)
 381   else:
 382     binary_file_to_compare = opts.file_names[1]
 383     locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
 384     # Draw a plot showing the difference in debug location coverage between
 385     # two files.
 386     locstats.draw_location_diff(locstats_to_compare)
 387
 388 if __name__ == '__main__':
 389   Main()
 390   sys.exit(0)