llvm/utils/llvm-locstats/llvm-locstats.py

   1 #!/usr/bin/env python
   2 #
   3 # This is a tool that works like debug location coverage calculator.
   4 # It parses the llvm-dwarfdump --statistics output by reporting it
   5 # in a more human readable way.
   6 #
   7
   8 from __future__ import print_function
   9 import argparse
  10 import os
  11 import sys
  12 from json import loads
  13 from math import ceil
  14 from collections import OrderedDict
  15 from subprocess import Popen, PIPE
  16
  17 # Initialize the plot.
  18 def init_plot(plt):
  19   plt.title('Debug Location Statistics', fontweight='bold')
  20   plt.xlabel('location buckets')
  21   plt.ylabel('number of variables in the location buckets')
  22   plt.xticks(rotation=45, fontsize='x-small')
  23   plt.yticks()
  24
  25 # Finalize the plot.
  26 def finish_plot(plt):
  27   plt.legend()
  28   plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3)
  29   plt.savefig('locstats.png')
  30   print('The plot was saved within "locstats.png".')
  31
  32 # Holds the debug location statistics.
  33 class LocationStats:
  34   def __init__(self, file_name, variables_total, variables_total_locstats,
  35     variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes,
  36     variables_coverage_map):
  37     self.file_name = file_name
  38     self.variables_total = variables_total
  39     self.variables_total_locstats = variables_total_locstats
  40     self.variables_with_loc = variables_with_loc
  41     self.scope_bytes_covered = variables_scope_bytes_covered
  42     self.scope_bytes = variables_scope_bytes
  43     self.variables_coverage_map = variables_coverage_map
  44
  45   # Get the PC ranges coverage.
  46   def get_pc_coverage(self):
  47     pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \
  48                 / self.scope_bytes)
  49     return pc_ranges_covered
  50
  51   # Pretty print the debug location buckets.
  52   def pretty_print(self):
  53     if self.scope_bytes == 0:
  54       print ('No scope bytes found.')
  55       return -1
  56
  57     pc_ranges_covered = self.get_pc_coverage()
  58     variables_coverage_per_map = {}
  59     for cov_bucket in coverage_buckets():
  60       variables_coverage_per_map[cov_bucket] = \
  61         int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \
  62                  / self.variables_total_locstats)
  63
  64     print (' =================================================')
  65     print ('            Debug Location Statistics       ')
  66     print (' =================================================')
  67     print ('     cov%           samples         percentage(~)  ')
  68     print (' -------------------------------------------------')
  69     for cov_bucket in coverage_buckets():
  70       print ('   {0:10}     {1:8d}              {2:3d}%'. \
  71         format(cov_bucket, self.variables_coverage_map[cov_bucket], \
  72                variables_coverage_per_map[cov_bucket]))
  73     print (' =================================================')
  74     print (' -the number of debug variables processed: ' \
  75       + str(self.variables_total_locstats))
  76     print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
  77
  78     # Only if we are processing all the variables output the total
  79     # availability.
  80     if self.variables_total and self.variables_with_loc:
  81       total_availability = int(ceil(self.variables_with_loc * 100.0) \
  82                                     / self.variables_total)
  83       print (' -------------------------------------------------')
  84       print (' -total availability: ' + str(total_availability) + '%')
  85     print (' =================================================')
  86
  87     return 0
  88
  89   # Draw a plot representing the location buckets.
  90   def draw_plot(self):
  91     from matplotlib import pyplot as plt
  92
  93     buckets = range(len(self.variables_coverage_map))
  94     plt.figure(figsize=(12, 8))
  95     init_plot(plt)
  96     plt.bar(buckets, self.variables_coverage_map.values(), align='center',
  97             tick_label=self.variables_coverage_map.keys(),
  98             label='variables of {}'.format(self.file_name))
  99
 100     # Place the text box with the coverage info.
 101     pc_ranges_covered = self.get_pc_coverage()
 102     props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
 103     plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered),
 104              transform=plt.gca().transAxes, fontsize=12,
 105              verticalalignment='top', bbox=props)
 106
 107     finish_plot(plt)
 108
 109   # Compare the two LocationStats objects and draw a plot showing
 110   # the difference.
 111   def draw_location_diff(self, locstats_to_compare):
 112     from matplotlib import pyplot as plt
 113
 114     pc_ranges_covered = self.get_pc_coverage()
 115     pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
 116
 117     buckets = range(len(self.variables_coverage_map))
 118     buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
 119
 120     fig = plt.figure(figsize=(12, 8))
 121     ax = fig.add_subplot(111)
 122     init_plot(plt)
 123
 124     comparison_keys = list(coverage_buckets())
 125     ax.bar(buckets, self.variables_coverage_map.values(), align='edge',
 126            width=0.4,
 127            label='variables of {}'.format(self.file_name))
 128     ax.bar(buckets_to_compare,
 129            locstats_to_compare.variables_coverage_map.values(),
 130            color='r', align='edge', width=-0.4,
 131            label='variables of {}'.format(locstats_to_compare.file_name))
 132     ax.set_xticks(range(len(comparison_keys)))
 133     ax.set_xticklabels(comparison_keys)
 134
 135     props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
 136     plt.text(0.02, 0.88,
 137              '{} PC ranges covered: {}%'. \
 138              format(self.file_name, pc_ranges_covered),
 139              transform=plt.gca().transAxes, fontsize=12,
 140              verticalalignment='top', bbox=props)
 141     plt.text(0.02, 0.83,
 142              '{} PC ranges covered: {}%'. \
 143              format(locstats_to_compare.file_name,
 144                     pc_ranges_covered_to_compare),
 145              transform=plt.gca().transAxes, fontsize=12,
 146              verticalalignment='top', bbox=props)
 147
 148     finish_plot(plt)
 149
 150 # Define the location buckets.
 151 def coverage_buckets():
 152   yield '0%'
 153   yield '(0%,10%)'
 154   for start in range(10, 91, 10):
 155     yield '[{0}%,{1}%)'.format(start, start + 10)
 156   yield '100%'
 157
 158 # Parse the JSON representing the debug statistics, and create a
 159 # LocationStats object.
 160 def parse_locstats(opts, binary):
 161   # These will be different due to different options enabled.
 162   variables_total = None
 163   variables_total_locstats = None
 164   variables_with_loc = None
 165   variables_scope_bytes_covered = None
 166   variables_scope_bytes = None
 167   variables_scope_bytes_entry_values = None
 168   variables_coverage_map = OrderedDict()
 169
 170   # Get the directory of the LLVM tools.
 171   llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
 172                                     "llvm-dwarfdump")
 173   # The statistics llvm-dwarfdump option.
 174   llvm_dwarfdump_stats_opt = "--statistics"
 175
 176   # Generate the stats with the llvm-dwarfdump.
 177   subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
 178                   stdin=PIPE, stdout=PIPE, stderr=PIPE, \
 179                   universal_newlines = True)
 180   cmd_stdout, cmd_stderr = subproc.communicate()
 181
 182   # Get the JSON and parse it.
 183   json_parsed = None
 184
 185   try:
 186     json_parsed = loads(cmd_stdout)
 187   except:
 188     print ('error: No valid llvm-dwarfdump statistics found.')
 189     sys.exit(1)
 190
 191   # TODO: Parse the statistics Version from JSON.
 192
 193   if opts.only_variables:
 194     # Read the JSON only for local variables.
 195     variables_total_locstats = \
 196       json_parsed['#local vars processed by location statistics']
 197     variables_scope_bytes_covered = \
 198       json_parsed['sum_all_local_vars(#bytes in parent scope covered' \
 199                   ' by DW_AT_location)']
 200     variables_scope_bytes = \
 201       json_parsed['sum_all_local_vars(#bytes in parent scope)']
 202     if not opts.ignore_debug_entry_values:
 203       for cov_bucket in coverage_buckets():
 204         cov_category = "#local vars with {} of parent scope covered " \
 205                        "by DW_AT_location".format(cov_bucket)
 206         variables_coverage_map[cov_bucket] = json_parsed[cov_category]
 207     else:
 208       variables_scope_bytes_entry_values = \
 209         json_parsed['sum_all_local_vars(#bytes in parent scope ' \
 210                     'covered by DW_OP_entry_value)']
 211       variables_scope_bytes_covered = variables_scope_bytes_covered \
 212          - variables_scope_bytes_entry_values
 213       for cov_bucket in coverage_buckets():
 214         cov_category = \
 215           "#local vars - entry values with {} of parent scope " \
 216           "covered by DW_AT_location".format(cov_bucket)
 217         variables_coverage_map[cov_bucket] = json_parsed[cov_category]
 218   elif opts.only_formal_parameters:
 219     # Read the JSON only for formal parameters.
 220     variables_total_locstats = \
 221       json_parsed['#params processed by location statistics']
 222     variables_scope_bytes_covered = \
 223       json_parsed['sum_all_params(#bytes in parent scope covered ' \
 224                   'by DW_AT_location)']
 225     variables_scope_bytes = \
 226       json_parsed['sum_all_params(#bytes in parent scope)']
 227     if not opts.ignore_debug_entry_values:
 228       for cov_bucket in coverage_buckets():
 229         cov_category = "#params with {} of parent scope covered " \
 230                        "by DW_AT_location".format(cov_bucket)
 231         variables_coverage_map[cov_bucket] = json_parsed[cov_category]
 232     else:
 233       variables_scope_bytes_entry_values = \
 234         json_parsed['sum_all_params(#bytes in parent scope covered ' \
 235                     'by DW_OP_entry_value)']
 236       variables_scope_bytes_covered = variables_scope_bytes_covered \
 237         - variables_scope_bytes_entry_values
 238       for cov_bucket in coverage_buckets():
 239         cov_category = \
 240           "#params - entry values with {} of parent scope covered" \
 241           " by DW_AT_location".format(cov_bucket)
 242         variables_coverage_map[cov_bucket] = json_parsed[cov_category]
 243   else:
 244     # Read the JSON for both local variables and formal parameters.
 245     variables_total = \
 246       json_parsed['#source variables']
 247     variables_with_loc = json_parsed['#source variables with location']
 248     variables_total_locstats = \
 249       json_parsed['#variables processed by location statistics']
 250     variables_scope_bytes_covered = \
 251       json_parsed['sum_all_variables(#bytes in parent scope covered ' \
 252                   'by DW_AT_location)']
 253     variables_scope_bytes = \
 254       json_parsed['sum_all_variables(#bytes in parent scope)']
 255     if not opts.ignore_debug_entry_values:
 256       for cov_bucket in coverage_buckets():
 257         cov_category = "#variables with {} of parent scope covered " \
 258                        "by DW_AT_location".format(cov_bucket)
 259         variables_coverage_map[cov_bucket] = json_parsed[cov_category]
 260     else:
 261       variables_scope_bytes_entry_values = \
 262         json_parsed['sum_all_variables(#bytes in parent scope covered ' \
 263                     'by DW_OP_entry_value)']
 264       variables_scope_bytes_covered = variables_scope_bytes_covered \
 265         - variables_scope_bytes_entry_values
 266       for cov_bucket in coverage_buckets():
 267         cov_category = \
 268           "#variables - entry values with {} of parent scope covered " \
 269           "by DW_AT_location".format(cov_bucket)
 270         variables_coverage_map[cov_bucket] = json_parsed[cov_category]
 271
 272   return LocationStats(binary, variables_total, variables_total_locstats,
 273                        variables_with_loc, variables_scope_bytes_covered,
 274                        variables_scope_bytes, variables_coverage_map)
 275
 276 # Parse the program arguments.
 277 def parse_program_args(parser):
 278   parser.add_argument('--only-variables', action='store_true', default=False,
 279             help='calculate the location statistics only for local variables')
 280   parser.add_argument('--only-formal-parameters', action='store_true',
 281             default=False,
 282             help='calculate the location statistics only for formal parameters')
 283   parser.add_argument('--ignore-debug-entry-values', action='store_true',
 284             default=False,
 285             help='ignore the location statistics on locations with '
 286                  'entry values')
 287   parser.add_argument('--draw-plot', action='store_true', default=False,
 288             help='show histogram of location buckets generated (requires '
 289                  'matplotlib)')
 290   parser.add_argument('--compare', action='store_true', default=False,
 291             help='compare the debug location coverage on two files provided, '
 292                  'and draw a plot showing the difference  (requires '
 293                  'matplotlib)')
 294   parser.add_argument('file_names', nargs='+', type=str, help='file to process')
 295
 296   return parser.parse_args()
 297
 298 # Verify that the program inputs meet the requirements.
 299 def verify_program_inputs(opts):
 300   if len(sys.argv) < 2:
 301     print ('error: Too few arguments.')
 302     return False
 303
 304   if opts.only_variables and opts.only_formal_parameters:
 305     print ('error: Please use just one --only* option.')
 306     return False
 307
 308   if not opts.compare and len(opts.file_names) != 1:
 309     print ('error: Please specify only one file to process.')
 310     return False
 311
 312   if opts.compare and len(opts.file_names) != 2:
 313     print ('error: Please specify two files to process.')
 314     return False
 315
 316   if opts.draw_plot or opts.compare:
 317     try:
 318       import matplotlib
 319     except ImportError:
 320       print('error: matplotlib not found.')
 321       return False
 322
 323   return True
 324
 325 def Main():
 326   parser = argparse.ArgumentParser()
 327   opts = parse_program_args(parser)
 328
 329   if not verify_program_inputs(opts):
 330     parser.print_help()
 331     sys.exit(1)
 332
 333   binary_file = opts.file_names[0]
 334   locstats = parse_locstats(opts, binary_file)
 335
 336   if not opts.compare:
 337     if opts.draw_plot:
 338       # Draw a histogram representing the location buckets.
 339       locstats.draw_plot()
 340     else:
 341       # Pretty print collected info on the standard output.
 342       if locstats.pretty_print() == -1:
 343         sys.exit(0)
 344   else:
 345     binary_file_to_compare = opts.file_names[1]
 346     locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
 347     # Draw a plot showing the difference in debug location coverage between
 348     # two files.
 349     locstats.draw_location_diff(locstats_to_compare)
 350
 351 if __name__ == '__main__':
 352   Main()
 353   sys.exit(0)