3 # This is a tool that works like debug location coverage calculator.
4 # It parses the llvm-dwarfdump --statistics output by reporting it
5 # in a more human readable way.
8 from __future__
import print_function
12 from json
import loads
14 from collections
import OrderedDict
15 from subprocess
import Popen
, PIPE
17 # This special value has been used to mark statistics that overflowed.
18 TAINT_VALUE
= "tainted"
20 # Initialize the plot.
22 plt
.title('Debug Location Statistics', fontweight
='bold')
23 plt
.xlabel('location buckets')
24 plt
.ylabel('number of variables in the location buckets')
25 plt
.xticks(rotation
=45, fontsize
='x-small')
31 plt
.grid(color
='grey', which
='major', axis
='y', linestyle
='-', linewidth
=0.3)
32 plt
.savefig('locstats.png')
33 print('The plot was saved within "locstats.png".')
35 # Holds the debug location statistics.
37 def __init__(self
, file_name
, variables_total
, variables_total_locstats
,
38 variables_with_loc
, variables_scope_bytes_covered
, variables_scope_bytes
,
39 variables_coverage_map
):
40 self
.file_name
= file_name
41 self
.variables_total
= variables_total
42 self
.variables_total_locstats
= variables_total_locstats
43 self
.variables_with_loc
= variables_with_loc
44 self
.scope_bytes_covered
= variables_scope_bytes_covered
45 self
.scope_bytes
= variables_scope_bytes
46 self
.variables_coverage_map
= variables_coverage_map
48 # Get the PC ranges coverage.
49 def get_pc_coverage(self
):
50 if self
.scope_bytes_covered
== TAINT_VALUE
or \
51 self
.scope_bytes
== TAINT_VALUE
:
53 pc_ranges_covered
= int(ceil(self
.scope_bytes_covered
* 100.0) \
55 return pc_ranges_covered
57 # Pretty print the debug location buckets.
58 def pretty_print(self
):
59 if self
.scope_bytes
== 0:
60 print ('No scope bytes found.')
63 pc_ranges_covered
= self
.get_pc_coverage()
64 variables_coverage_per_map
= {}
65 for cov_bucket
in coverage_buckets():
66 variables_coverage_per_map
[cov_bucket
] = None
67 if self
.variables_coverage_map
[cov_bucket
] == TAINT_VALUE
or \
68 self
.variables_total_locstats
== TAINT_VALUE
:
69 variables_coverage_per_map
[cov_bucket
] = TAINT_VALUE
71 variables_coverage_per_map
[cov_bucket
] = \
72 int(ceil(self
.variables_coverage_map
[cov_bucket
] * 100.0) \
73 / self
.variables_total_locstats
)
75 print (' =================================================')
76 print (' Debug Location Statistics ')
77 print (' =================================================')
78 print (' cov% samples percentage(~) ')
79 print (' -------------------------------------------------')
80 for cov_bucket
in coverage_buckets():
81 if self
.variables_coverage_map
[cov_bucket
] or \
82 self
.variables_total_locstats
== TAINT_VALUE
:
83 print (' {0:10} {1:8} {2:3}%'. \
84 format(cov_bucket
, self
.variables_coverage_map
[cov_bucket
], \
85 variables_coverage_per_map
[cov_bucket
]))
87 print (' {0:10} {1:8d} {2:3d}%'. \
88 format(cov_bucket
, self
.variables_coverage_map
[cov_bucket
], \
89 variables_coverage_per_map
[cov_bucket
]))
90 print (' =================================================')
91 print (' -the number of debug variables processed: ' \
92 + str(self
.variables_total_locstats
))
93 print (' -PC ranges covered: ' + str(pc_ranges_covered
) + '%')
95 # Only if we are processing all the variables output the total
97 if self
.variables_total
and self
.variables_with_loc
:
98 total_availability
= None
99 if self
.variables_total
== TAINT_VALUE
or \
100 self
.variables_with_loc
== TAINT_VALUE
:
101 total_availability
= TAINT_VALUE
103 total_availability
= int(ceil(self
.variables_with_loc
* 100.0) \
104 / self
.variables_total
)
105 print (' -------------------------------------------------')
106 print (' -total availability: ' + str(total_availability
) + '%')
107 print (' =================================================')
111 # Draw a plot representing the location buckets.
113 from matplotlib
import pyplot
as plt
115 buckets
= range(len(self
.variables_coverage_map
))
116 plt
.figure(figsize
=(12, 8))
118 plt
.bar(buckets
, self
.variables_coverage_map
.values(), align
='center',
119 tick_label
=self
.variables_coverage_map
.keys(),
120 label
='variables of {}'.format(self
.file_name
))
122 # Place the text box with the coverage info.
123 pc_ranges_covered
= self
.get_pc_coverage()
124 props
= dict(boxstyle
='round', facecolor
='wheat', alpha
=0.5)
125 plt
.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered
),
126 transform
=plt
.gca().transAxes
, fontsize
=12,
127 verticalalignment
='top', bbox
=props
)
131 # Compare the two LocationStats objects and draw a plot showing
133 def draw_location_diff(self
, locstats_to_compare
):
134 from matplotlib
import pyplot
as plt
136 pc_ranges_covered
= self
.get_pc_coverage()
137 pc_ranges_covered_to_compare
= locstats_to_compare
.get_pc_coverage()
139 buckets
= range(len(self
.variables_coverage_map
))
140 buckets_to_compare
= range(len(locstats_to_compare
.variables_coverage_map
))
142 fig
= plt
.figure(figsize
=(12, 8))
143 ax
= fig
.add_subplot(111)
146 comparison_keys
= list(coverage_buckets())
147 ax
.bar(buckets
, self
.variables_coverage_map
.values(), align
='edge',
149 label
='variables of {}'.format(self
.file_name
))
150 ax
.bar(buckets_to_compare
,
151 locstats_to_compare
.variables_coverage_map
.values(),
152 color
='r', align
='edge', width
=-0.4,
153 label
='variables of {}'.format(locstats_to_compare
.file_name
))
154 ax
.set_xticks(range(len(comparison_keys
)))
155 ax
.set_xticklabels(comparison_keys
)
157 props
= dict(boxstyle
='round', facecolor
='wheat', alpha
=0.5)
159 '{} PC ranges covered: {}%'. \
160 format(self
.file_name
, pc_ranges_covered
),
161 transform
=plt
.gca().transAxes
, fontsize
=12,
162 verticalalignment
='top', bbox
=props
)
164 '{} PC ranges covered: {}%'. \
165 format(locstats_to_compare
.file_name
,
166 pc_ranges_covered_to_compare
),
167 transform
=plt
.gca().transAxes
, fontsize
=12,
168 verticalalignment
='top', bbox
=props
)
172 # Define the location buckets.
173 def coverage_buckets():
176 for start
in range(10, 91, 10):
177 yield '[{0}%,{1}%)'.format(start
, start
+ 10)
180 # Parse the JSON representing the debug statistics, and create a
181 # LocationStats object.
182 def parse_locstats(opts
, binary
):
183 # These will be different due to different options enabled.
184 variables_total
= None
185 variables_total_locstats
= None
186 variables_with_loc
= None
187 variables_scope_bytes_covered
= None
188 variables_scope_bytes
= None
189 variables_scope_bytes_entry_values
= None
190 variables_coverage_map
= OrderedDict()
192 # Get the directory of the LLVM tools.
193 llvm_dwarfdump_cmd
= os
.path
.join(os
.path
.dirname(__file__
), \
195 # The statistics llvm-dwarfdump option.
196 llvm_dwarfdump_stats_opt
= "--statistics"
198 # Generate the stats with the llvm-dwarfdump.
199 subproc
= Popen([llvm_dwarfdump_cmd
, llvm_dwarfdump_stats_opt
, binary
], \
200 stdin
=PIPE
, stdout
=PIPE
, stderr
=PIPE
, \
201 universal_newlines
= True)
202 cmd_stdout
, cmd_stderr
= subproc
.communicate()
204 # TODO: Handle errors that are coming from llvm-dwarfdump.
206 # Get the JSON and parse it.
210 json_parsed
= loads(cmd_stdout
)
212 print ('error: No valid llvm-dwarfdump statistics found.')
215 # TODO: Parse the statistics Version from JSON.
217 def init_field(name
):
218 if json_parsed
[name
] == 'overflowed':
219 print ('warning: "' + name
+ '" field overflowed.')
221 return json_parsed
[name
]
223 if opts
.only_variables
:
224 # Read the JSON only for local variables.
225 variables_total_locstats
= \
226 init_field('#local vars processed by location statistics')
227 variables_scope_bytes_covered
= \
228 init_field('sum_all_local_vars(#bytes in parent scope covered' \
229 ' by DW_AT_location)')
230 variables_scope_bytes
= \
231 init_field('sum_all_local_vars(#bytes in parent scope)')
232 if not opts
.ignore_debug_entry_values
:
233 for cov_bucket
in coverage_buckets():
234 cov_category
= "#local vars with {} of parent scope covered " \
235 "by DW_AT_location".format(cov_bucket
)
236 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
238 variables_scope_bytes_entry_values
= \
239 init_field('sum_all_local_vars(#bytes in parent scope ' \
240 'covered by DW_OP_entry_value)')
241 if variables_scope_bytes_covered
!= TAINT_VALUE
and \
242 variables_scope_bytes_entry_values
!= TAINT_VALUE
:
243 variables_scope_bytes_covered
= variables_scope_bytes_covered \
244 - variables_scope_bytes_entry_values
245 for cov_bucket
in coverage_buckets():
247 "#local vars - entry values with {} of parent scope " \
248 "covered by DW_AT_location".format(cov_bucket
)
249 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
250 elif opts
.only_formal_parameters
:
251 # Read the JSON only for formal parameters.
252 variables_total_locstats
= \
253 init_field('#params processed by location statistics')
254 variables_scope_bytes_covered
= \
255 init_field('sum_all_params(#bytes in parent scope covered ' \
256 'by DW_AT_location)')
257 variables_scope_bytes
= \
258 init_field('sum_all_params(#bytes in parent scope)')
259 if not opts
.ignore_debug_entry_values
:
260 for cov_bucket
in coverage_buckets():
261 cov_category
= "#params with {} of parent scope covered " \
262 "by DW_AT_location".format(cov_bucket
)
263 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
265 variables_scope_bytes_entry_values
= \
266 init_field('sum_all_params(#bytes in parent scope covered ' \
267 'by DW_OP_entry_value)')
268 if variables_scope_bytes_covered
!= TAINT_VALUE
and \
269 variables_scope_bytes_entry_values
!= TAINT_VALUE
:
270 variables_scope_bytes_covered
= variables_scope_bytes_covered \
271 - variables_scope_bytes_entry_values
272 for cov_bucket
in coverage_buckets():
274 "#params - entry values with {} of parent scope covered" \
275 " by DW_AT_location".format(cov_bucket
)
276 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
278 # Read the JSON for both local variables and formal parameters.
280 init_field('#source variables')
281 variables_with_loc
= init_field('#source variables with location')
282 variables_total_locstats
= \
283 init_field('#variables processed by location statistics')
284 variables_scope_bytes_covered
= \
285 init_field('sum_all_variables(#bytes in parent scope covered ' \
286 'by DW_AT_location)')
287 variables_scope_bytes
= \
288 init_field('sum_all_variables(#bytes in parent scope)')
290 if not opts
.ignore_debug_entry_values
:
291 for cov_bucket
in coverage_buckets():
292 cov_category
= "#variables with {} of parent scope covered " \
293 "by DW_AT_location".format(cov_bucket
)
294 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
296 variables_scope_bytes_entry_values
= \
297 init_field('sum_all_variables(#bytes in parent scope covered ' \
298 'by DW_OP_entry_value)')
299 if variables_scope_bytes_covered
!= TAINT_VALUE
and \
300 variables_scope_bytes_entry_values
!= TAINT_VALUE
:
301 variables_scope_bytes_covered
= variables_scope_bytes_covered \
302 - variables_scope_bytes_entry_values
303 for cov_bucket
in coverage_buckets():
305 "#variables - entry values with {} of parent scope covered " \
306 "by DW_AT_location".format(cov_bucket
)
307 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
309 return LocationStats(binary
, variables_total
, variables_total_locstats
,
310 variables_with_loc
, variables_scope_bytes_covered
,
311 variables_scope_bytes
, variables_coverage_map
)
313 # Parse the program arguments.
314 def parse_program_args(parser
):
315 parser
.add_argument('--only-variables', action
='store_true', default
=False,
316 help='calculate the location statistics only for local variables')
317 parser
.add_argument('--only-formal-parameters', action
='store_true',
319 help='calculate the location statistics only for formal parameters')
320 parser
.add_argument('--ignore-debug-entry-values', action
='store_true',
322 help='ignore the location statistics on locations with '
324 parser
.add_argument('--draw-plot', action
='store_true', default
=False,
325 help='show histogram of location buckets generated (requires '
327 parser
.add_argument('--compare', action
='store_true', default
=False,
328 help='compare the debug location coverage on two files provided, '
329 'and draw a plot showing the difference (requires '
331 parser
.add_argument('file_names', nargs
='+', type=str, help='file to process')
333 return parser
.parse_args()
335 # Verify that the program inputs meet the requirements.
336 def verify_program_inputs(opts
):
337 if len(sys
.argv
) < 2:
338 print ('error: Too few arguments.')
341 if opts
.only_variables
and opts
.only_formal_parameters
:
342 print ('error: Please use just one --only* option.')
345 if not opts
.compare
and len(opts
.file_names
) != 1:
346 print ('error: Please specify only one file to process.')
349 if opts
.compare
and len(opts
.file_names
) != 2:
350 print ('error: Please specify two files to process.')
353 if opts
.draw_plot
or opts
.compare
:
357 print('error: matplotlib not found.')
363 parser
= argparse
.ArgumentParser()
364 opts
= parse_program_args(parser
)
366 if not verify_program_inputs(opts
):
370 binary_file
= opts
.file_names
[0]
371 locstats
= parse_locstats(opts
, binary_file
)
375 # Draw a histogram representing the location buckets.
378 # Pretty print collected info on the standard output.
379 if locstats
.pretty_print() == -1:
382 binary_file_to_compare
= opts
.file_names
[1]
383 locstats_to_compare
= parse_locstats(opts
, binary_file_to_compare
)
384 # Draw a plot showing the difference in debug location coverage between
386 locstats
.draw_location_diff(locstats_to_compare
)
388 if __name__
== '__main__':