[docs] Fix build-docs.sh
[llvm-project.git] / llvm / utils / llvm-locstats / llvm-locstats.py
blobe86cf13df8c1d0fd08040eb7e271fbfb7c78f4a7
1 #!/usr/bin/env python
3 # This is a tool that works like debug location coverage calculator.
4 # It parses the llvm-dwarfdump --statistics output by reporting it
5 # in a more human readable way.
8 from __future__ import print_function
9 import argparse
10 import os
11 import sys
12 from json import loads
13 from math import ceil
14 from collections import OrderedDict
15 from subprocess import Popen, PIPE
17 # This special value has been used to mark statistics that overflowed.
18 TAINT_VALUE = "tainted"
20 # Initialize the plot.
21 def init_plot(plt):
22 plt.title('Debug Location Statistics', fontweight='bold')
23 plt.xlabel('location buckets')
24 plt.ylabel('number of variables in the location buckets')
25 plt.xticks(rotation=45, fontsize='x-small')
26 plt.yticks()
28 # Finalize the plot.
29 def finish_plot(plt):
30 plt.legend()
31 plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3)
32 plt.savefig('locstats.png')
33 print('The plot was saved within "locstats.png".')
35 # Holds the debug location statistics.
36 class LocationStats:
37 def __init__(self, file_name, variables_total, variables_total_locstats,
38 variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes,
39 variables_coverage_map):
40 self.file_name = file_name
41 self.variables_total = variables_total
42 self.variables_total_locstats = variables_total_locstats
43 self.variables_with_loc = variables_with_loc
44 self.scope_bytes_covered = variables_scope_bytes_covered
45 self.scope_bytes = variables_scope_bytes
46 self.variables_coverage_map = variables_coverage_map
48 # Get the PC ranges coverage.
49 def get_pc_coverage(self):
50 if self.scope_bytes_covered == TAINT_VALUE or \
51 self.scope_bytes == TAINT_VALUE:
52 return TAINT_VALUE
53 pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \
54 / self.scope_bytes)
55 return pc_ranges_covered
57 # Pretty print the debug location buckets.
58 def pretty_print(self):
59 if self.scope_bytes == 0:
60 print ('No scope bytes found.')
61 return -1
63 pc_ranges_covered = self.get_pc_coverage()
64 variables_coverage_per_map = {}
65 for cov_bucket in coverage_buckets():
66 variables_coverage_per_map[cov_bucket] = None
67 if self.variables_coverage_map[cov_bucket] == TAINT_VALUE or \
68 self.variables_total_locstats == TAINT_VALUE:
69 variables_coverage_per_map[cov_bucket] = TAINT_VALUE
70 else:
71 variables_coverage_per_map[cov_bucket] = \
72 int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \
73 / self.variables_total_locstats)
75 print (' =================================================')
76 print (' Debug Location Statistics ')
77 print (' =================================================')
78 print (' cov% samples percentage(~) ')
79 print (' -------------------------------------------------')
80 for cov_bucket in coverage_buckets():
81 if self.variables_coverage_map[cov_bucket] or \
82 self.variables_total_locstats == TAINT_VALUE:
83 print (' {0:10} {1:8} {2:3}%'. \
84 format(cov_bucket, self.variables_coverage_map[cov_bucket], \
85 variables_coverage_per_map[cov_bucket]))
86 else:
87 print (' {0:10} {1:8d} {2:3d}%'. \
88 format(cov_bucket, self.variables_coverage_map[cov_bucket], \
89 variables_coverage_per_map[cov_bucket]))
90 print (' =================================================')
91 print (' -the number of debug variables processed: ' \
92 + str(self.variables_total_locstats))
93 print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
95 # Only if we are processing all the variables output the total
96 # availability.
97 if self.variables_total and self.variables_with_loc:
98 total_availability = None
99 if self.variables_total == TAINT_VALUE or \
100 self.variables_with_loc == TAINT_VALUE:
101 total_availability = TAINT_VALUE
102 else:
103 total_availability = int(ceil(self.variables_with_loc * 100.0) \
104 / self.variables_total)
105 print (' -------------------------------------------------')
106 print (' -total availability: ' + str(total_availability) + '%')
107 print (' =================================================')
109 return 0
111 # Draw a plot representing the location buckets.
112 def draw_plot(self):
113 from matplotlib import pyplot as plt
115 buckets = range(len(self.variables_coverage_map))
116 plt.figure(figsize=(12, 8))
117 init_plot(plt)
118 plt.bar(buckets, self.variables_coverage_map.values(), align='center',
119 tick_label=self.variables_coverage_map.keys(),
120 label='variables of {}'.format(self.file_name))
122 # Place the text box with the coverage info.
123 pc_ranges_covered = self.get_pc_coverage()
124 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
125 plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered),
126 transform=plt.gca().transAxes, fontsize=12,
127 verticalalignment='top', bbox=props)
129 finish_plot(plt)
131 # Compare the two LocationStats objects and draw a plot showing
132 # the difference.
133 def draw_location_diff(self, locstats_to_compare):
134 from matplotlib import pyplot as plt
136 pc_ranges_covered = self.get_pc_coverage()
137 pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
139 buckets = range(len(self.variables_coverage_map))
140 buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
142 fig = plt.figure(figsize=(12, 8))
143 ax = fig.add_subplot(111)
144 init_plot(plt)
146 comparison_keys = list(coverage_buckets())
147 ax.bar(buckets, self.variables_coverage_map.values(), align='edge',
148 width=0.4,
149 label='variables of {}'.format(self.file_name))
150 ax.bar(buckets_to_compare,
151 locstats_to_compare.variables_coverage_map.values(),
152 color='r', align='edge', width=-0.4,
153 label='variables of {}'.format(locstats_to_compare.file_name))
154 ax.set_xticks(range(len(comparison_keys)))
155 ax.set_xticklabels(comparison_keys)
157 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
158 plt.text(0.02, 0.88,
159 '{} PC ranges covered: {}%'. \
160 format(self.file_name, pc_ranges_covered),
161 transform=plt.gca().transAxes, fontsize=12,
162 verticalalignment='top', bbox=props)
163 plt.text(0.02, 0.83,
164 '{} PC ranges covered: {}%'. \
165 format(locstats_to_compare.file_name,
166 pc_ranges_covered_to_compare),
167 transform=plt.gca().transAxes, fontsize=12,
168 verticalalignment='top', bbox=props)
170 finish_plot(plt)
172 # Define the location buckets.
173 def coverage_buckets():
174 yield '0%'
175 yield '(0%,10%)'
176 for start in range(10, 91, 10):
177 yield '[{0}%,{1}%)'.format(start, start + 10)
178 yield '100%'
180 # Parse the JSON representing the debug statistics, and create a
181 # LocationStats object.
182 def parse_locstats(opts, binary):
183 # These will be different due to different options enabled.
184 variables_total = None
185 variables_total_locstats = None
186 variables_with_loc = None
187 variables_scope_bytes_covered = None
188 variables_scope_bytes = None
189 variables_scope_bytes_entry_values = None
190 variables_coverage_map = OrderedDict()
192 # Get the directory of the LLVM tools.
193 llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
194 "llvm-dwarfdump")
195 # The statistics llvm-dwarfdump option.
196 llvm_dwarfdump_stats_opt = "--statistics"
198 # Generate the stats with the llvm-dwarfdump.
199 subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
200 stdin=PIPE, stdout=PIPE, stderr=PIPE, \
201 universal_newlines = True)
202 cmd_stdout, cmd_stderr = subproc.communicate()
204 # TODO: Handle errors that are coming from llvm-dwarfdump.
206 # Get the JSON and parse it.
207 json_parsed = None
209 try:
210 json_parsed = loads(cmd_stdout)
211 except:
212 print ('error: No valid llvm-dwarfdump statistics found.')
213 sys.exit(1)
215 # TODO: Parse the statistics Version from JSON.
217 def init_field(name):
218 if json_parsed[name] == 'overflowed':
219 print ('warning: "' + name + '" field overflowed.')
220 return TAINT_VALUE
221 return json_parsed[name]
223 if opts.only_variables:
224 # Read the JSON only for local variables.
225 variables_total_locstats = \
226 init_field('#local vars processed by location statistics')
227 variables_scope_bytes_covered = \
228 init_field('sum_all_local_vars(#bytes in parent scope covered' \
229 ' by DW_AT_location)')
230 variables_scope_bytes = \
231 init_field('sum_all_local_vars(#bytes in parent scope)')
232 if not opts.ignore_debug_entry_values:
233 for cov_bucket in coverage_buckets():
234 cov_category = "#local vars with {} of parent scope covered " \
235 "by DW_AT_location".format(cov_bucket)
236 variables_coverage_map[cov_bucket] = init_field(cov_category)
237 else:
238 variables_scope_bytes_entry_values = \
239 init_field('sum_all_local_vars(#bytes in parent scope ' \
240 'covered by DW_OP_entry_value)')
241 if variables_scope_bytes_covered != TAINT_VALUE and \
242 variables_scope_bytes_entry_values != TAINT_VALUE:
243 variables_scope_bytes_covered = variables_scope_bytes_covered \
244 - variables_scope_bytes_entry_values
245 for cov_bucket in coverage_buckets():
246 cov_category = \
247 "#local vars - entry values with {} of parent scope " \
248 "covered by DW_AT_location".format(cov_bucket)
249 variables_coverage_map[cov_bucket] = init_field(cov_category)
250 elif opts.only_formal_parameters:
251 # Read the JSON only for formal parameters.
252 variables_total_locstats = \
253 init_field('#params processed by location statistics')
254 variables_scope_bytes_covered = \
255 init_field('sum_all_params(#bytes in parent scope covered ' \
256 'by DW_AT_location)')
257 variables_scope_bytes = \
258 init_field('sum_all_params(#bytes in parent scope)')
259 if not opts.ignore_debug_entry_values:
260 for cov_bucket in coverage_buckets():
261 cov_category = "#params with {} of parent scope covered " \
262 "by DW_AT_location".format(cov_bucket)
263 variables_coverage_map[cov_bucket] = init_field(cov_category)
264 else:
265 variables_scope_bytes_entry_values = \
266 init_field('sum_all_params(#bytes in parent scope covered ' \
267 'by DW_OP_entry_value)')
268 if variables_scope_bytes_covered != TAINT_VALUE and \
269 variables_scope_bytes_entry_values != TAINT_VALUE:
270 variables_scope_bytes_covered = variables_scope_bytes_covered \
271 - variables_scope_bytes_entry_values
272 for cov_bucket in coverage_buckets():
273 cov_category = \
274 "#params - entry values with {} of parent scope covered" \
275 " by DW_AT_location".format(cov_bucket)
276 variables_coverage_map[cov_bucket] = init_field(cov_category)
277 else:
278 # Read the JSON for both local variables and formal parameters.
279 variables_total = \
280 init_field('#source variables')
281 variables_with_loc = init_field('#source variables with location')
282 variables_total_locstats = \
283 init_field('#variables processed by location statistics')
284 variables_scope_bytes_covered = \
285 init_field('sum_all_variables(#bytes in parent scope covered ' \
286 'by DW_AT_location)')
287 variables_scope_bytes = \
288 init_field('sum_all_variables(#bytes in parent scope)')
290 if not opts.ignore_debug_entry_values:
291 for cov_bucket in coverage_buckets():
292 cov_category = "#variables with {} of parent scope covered " \
293 "by DW_AT_location".format(cov_bucket)
294 variables_coverage_map[cov_bucket] = init_field(cov_category)
295 else:
296 variables_scope_bytes_entry_values = \
297 init_field('sum_all_variables(#bytes in parent scope covered ' \
298 'by DW_OP_entry_value)')
299 if variables_scope_bytes_covered != TAINT_VALUE and \
300 variables_scope_bytes_entry_values != TAINT_VALUE:
301 variables_scope_bytes_covered = variables_scope_bytes_covered \
302 - variables_scope_bytes_entry_values
303 for cov_bucket in coverage_buckets():
304 cov_category = \
305 "#variables - entry values with {} of parent scope covered " \
306 "by DW_AT_location".format(cov_bucket)
307 variables_coverage_map[cov_bucket] = init_field(cov_category)
309 return LocationStats(binary, variables_total, variables_total_locstats,
310 variables_with_loc, variables_scope_bytes_covered,
311 variables_scope_bytes, variables_coverage_map)
313 # Parse the program arguments.
314 def parse_program_args(parser):
315 parser.add_argument('--only-variables', action='store_true', default=False,
316 help='calculate the location statistics only for local variables')
317 parser.add_argument('--only-formal-parameters', action='store_true',
318 default=False,
319 help='calculate the location statistics only for formal parameters')
320 parser.add_argument('--ignore-debug-entry-values', action='store_true',
321 default=False,
322 help='ignore the location statistics on locations with '
323 'entry values')
324 parser.add_argument('--draw-plot', action='store_true', default=False,
325 help='show histogram of location buckets generated (requires '
326 'matplotlib)')
327 parser.add_argument('--compare', action='store_true', default=False,
328 help='compare the debug location coverage on two files provided, '
329 'and draw a plot showing the difference (requires '
330 'matplotlib)')
331 parser.add_argument('file_names', nargs='+', type=str, help='file to process')
333 return parser.parse_args()
335 # Verify that the program inputs meet the requirements.
336 def verify_program_inputs(opts):
337 if len(sys.argv) < 2:
338 print ('error: Too few arguments.')
339 return False
341 if opts.only_variables and opts.only_formal_parameters:
342 print ('error: Please use just one --only* option.')
343 return False
345 if not opts.compare and len(opts.file_names) != 1:
346 print ('error: Please specify only one file to process.')
347 return False
349 if opts.compare and len(opts.file_names) != 2:
350 print ('error: Please specify two files to process.')
351 return False
353 if opts.draw_plot or opts.compare:
354 try:
355 import matplotlib
356 except ImportError:
357 print('error: matplotlib not found.')
358 return False
360 return True
362 def Main():
363 parser = argparse.ArgumentParser()
364 opts = parse_program_args(parser)
366 if not verify_program_inputs(opts):
367 parser.print_help()
368 sys.exit(1)
370 binary_file = opts.file_names[0]
371 locstats = parse_locstats(opts, binary_file)
373 if not opts.compare:
374 if opts.draw_plot:
375 # Draw a histogram representing the location buckets.
376 locstats.draw_plot()
377 else:
378 # Pretty print collected info on the standard output.
379 if locstats.pretty_print() == -1:
380 sys.exit(0)
381 else:
382 binary_file_to_compare = opts.file_names[1]
383 locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
384 # Draw a plot showing the difference in debug location coverage between
385 # two files.
386 locstats.draw_location_diff(locstats_to_compare)
388 if __name__ == '__main__':
389 Main()
390 sys.exit(0)