3 # This is a tool that works like debug location coverage calculator.
4 # It parses the llvm-dwarfdump --statistics output by reporting it
5 # in a more human readable way.
8 from __future__
import print_function
12 from json
import loads
14 from collections
import OrderedDict
15 from subprocess
import Popen
, PIPE
17 # This special value has been used to mark statistics that overflowed.
18 TAINT_VALUE
= "tainted"
20 # Initialize the plot.
22 plt
.title("Debug Location Statistics", fontweight
="bold")
23 plt
.xlabel("location buckets")
24 plt
.ylabel("number of variables in the location buckets")
25 plt
.xticks(rotation
=45, fontsize
="x-small")
32 plt
.grid(color
="grey", which
="major", axis
="y", linestyle
="-", linewidth
=0.3)
33 plt
.savefig("locstats.png")
34 print('The plot was saved within "locstats.png".')
37 # Holds the debug location statistics.
43 variables_total_locstats
,
45 variables_scope_bytes_covered
,
46 variables_scope_bytes
,
47 variables_coverage_map
,
49 self
.file_name
= file_name
50 self
.variables_total
= variables_total
51 self
.variables_total_locstats
= variables_total_locstats
52 self
.variables_with_loc
= variables_with_loc
53 self
.scope_bytes_covered
= variables_scope_bytes_covered
54 self
.scope_bytes
= variables_scope_bytes
55 self
.variables_coverage_map
= variables_coverage_map
57 # Get the PC ranges coverage.
58 def get_pc_coverage(self
):
59 if self
.scope_bytes_covered
== TAINT_VALUE
or self
.scope_bytes
== TAINT_VALUE
:
61 pc_ranges_covered
= int(
62 ceil(self
.scope_bytes_covered
* 100.0) / self
.scope_bytes
64 return pc_ranges_covered
66 # Pretty print the debug location buckets.
67 def pretty_print(self
):
68 if self
.scope_bytes
== 0:
69 print("No scope bytes found.")
72 pc_ranges_covered
= self
.get_pc_coverage()
73 variables_coverage_per_map
= {}
74 for cov_bucket
in coverage_buckets():
75 variables_coverage_per_map
[cov_bucket
] = None
77 self
.variables_coverage_map
[cov_bucket
] == TAINT_VALUE
78 or self
.variables_total_locstats
== TAINT_VALUE
80 variables_coverage_per_map
[cov_bucket
] = TAINT_VALUE
82 variables_coverage_per_map
[cov_bucket
] = int(
83 ceil(self
.variables_coverage_map
[cov_bucket
] * 100.0)
84 / self
.variables_total_locstats
87 print(" =================================================")
88 print(" Debug Location Statistics ")
89 print(" =================================================")
90 print(" cov% samples percentage(~) ")
91 print(" -------------------------------------------------")
92 for cov_bucket
in coverage_buckets():
94 self
.variables_coverage_map
[cov_bucket
]
95 or self
.variables_total_locstats
== TAINT_VALUE
98 " {0:10} {1:8} {2:3}%".format(
100 self
.variables_coverage_map
[cov_bucket
],
101 variables_coverage_per_map
[cov_bucket
],
106 " {0:10} {1:8d} {2:3d}%".format(
108 self
.variables_coverage_map
[cov_bucket
],
109 variables_coverage_per_map
[cov_bucket
],
112 print(" =================================================")
114 " -the number of debug variables processed: "
115 + str(self
.variables_total_locstats
)
117 print(" -PC ranges covered: " + str(pc_ranges_covered
) + "%")
119 # Only if we are processing all the variables output the total
121 if self
.variables_total
and self
.variables_with_loc
:
122 total_availability
= None
124 self
.variables_total
== TAINT_VALUE
125 or self
.variables_with_loc
== TAINT_VALUE
127 total_availability
= TAINT_VALUE
129 total_availability
= int(
130 ceil(self
.variables_with_loc
* 100.0) / self
.variables_total
132 print(" -------------------------------------------------")
133 print(" -total availability: " + str(total_availability
) + "%")
134 print(" =================================================")
138 # Draw a plot representing the location buckets.
140 from matplotlib
import pyplot
as plt
142 buckets
= range(len(self
.variables_coverage_map
))
143 plt
.figure(figsize
=(12, 8))
147 self
.variables_coverage_map
.values(),
149 tick_label
=self
.variables_coverage_map
.keys(),
150 label
="variables of {}".format(self
.file_name
),
153 # Place the text box with the coverage info.
154 pc_ranges_covered
= self
.get_pc_coverage()
155 props
= dict(boxstyle
="round", facecolor
="wheat", alpha
=0.5)
159 "PC ranges covered: {}%".format(pc_ranges_covered
),
160 transform
=plt
.gca().transAxes
,
162 verticalalignment
="top",
168 # Compare the two LocationStats objects and draw a plot showing
170 def draw_location_diff(self
, locstats_to_compare
):
171 from matplotlib
import pyplot
as plt
173 pc_ranges_covered
= self
.get_pc_coverage()
174 pc_ranges_covered_to_compare
= locstats_to_compare
.get_pc_coverage()
176 buckets
= range(len(self
.variables_coverage_map
))
177 buckets_to_compare
= range(len(locstats_to_compare
.variables_coverage_map
))
179 fig
= plt
.figure(figsize
=(12, 8))
180 ax
= fig
.add_subplot(111)
183 comparison_keys
= list(coverage_buckets())
186 self
.variables_coverage_map
.values(),
189 label
="variables of {}".format(self
.file_name
),
193 locstats_to_compare
.variables_coverage_map
.values(),
197 label
="variables of {}".format(locstats_to_compare
.file_name
),
199 ax
.set_xticks(range(len(comparison_keys
)))
200 ax
.set_xticklabels(comparison_keys
)
202 props
= dict(boxstyle
="round", facecolor
="wheat", alpha
=0.5)
206 "{} PC ranges covered: {}%".format(self
.file_name
, pc_ranges_covered
),
207 transform
=plt
.gca().transAxes
,
209 verticalalignment
="top",
215 "{} PC ranges covered: {}%".format(
216 locstats_to_compare
.file_name
, pc_ranges_covered_to_compare
218 transform
=plt
.gca().transAxes
,
220 verticalalignment
="top",
227 # Define the location buckets.
228 def coverage_buckets():
231 for start
in range(10, 91, 10):
232 yield "[{0}%,{1}%)".format(start
, start
+ 10)
236 # Parse the JSON representing the debug statistics, and create a
237 # LocationStats object.
238 def parse_locstats(opts
, binary
):
239 # These will be different due to different options enabled.
240 variables_total
= None
241 variables_total_locstats
= None
242 variables_with_loc
= None
243 variables_scope_bytes_covered
= None
244 variables_scope_bytes
= None
245 variables_scope_bytes_entry_values
= None
246 variables_coverage_map
= OrderedDict()
248 # Get the directory of the LLVM tools.
249 llvm_dwarfdump_cmd
= os
.path
.join(os
.path
.dirname(__file__
), "llvm-dwarfdump")
250 # The statistics llvm-dwarfdump option.
251 llvm_dwarfdump_stats_opt
= "--statistics"
253 # Generate the stats with the llvm-dwarfdump.
255 [llvm_dwarfdump_cmd
, llvm_dwarfdump_stats_opt
, binary
],
259 universal_newlines
=True,
261 cmd_stdout
, cmd_stderr
= subproc
.communicate()
263 # TODO: Handle errors that are coming from llvm-dwarfdump.
265 # Get the JSON and parse it.
269 json_parsed
= loads(cmd_stdout
)
271 print("error: No valid llvm-dwarfdump statistics found.")
274 # TODO: Parse the statistics Version from JSON.
276 def init_field(name
):
277 if json_parsed
[name
] == "overflowed":
278 print('warning: "' + name
+ '" field overflowed.')
280 return json_parsed
[name
]
282 if opts
.only_variables
:
283 # Read the JSON only for local variables.
284 variables_total_locstats
= init_field(
285 "#local vars processed by location statistics"
287 variables_scope_bytes_covered
= init_field(
288 "sum_all_local_vars(#bytes in parent scope covered" " by DW_AT_location)"
290 variables_scope_bytes
= init_field("sum_all_local_vars(#bytes in parent scope)")
291 if not opts
.ignore_debug_entry_values
:
292 for cov_bucket
in coverage_buckets():
294 "#local vars with {} of parent scope covered "
295 "by DW_AT_location".format(cov_bucket
)
297 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
299 variables_scope_bytes_entry_values
= init_field(
300 "sum_all_local_vars(#bytes in parent scope "
301 "covered by DW_OP_entry_value)"
304 variables_scope_bytes_covered
!= TAINT_VALUE
305 and variables_scope_bytes_entry_values
!= TAINT_VALUE
307 variables_scope_bytes_covered
= (
308 variables_scope_bytes_covered
- variables_scope_bytes_entry_values
310 for cov_bucket
in coverage_buckets():
312 "#local vars - entry values with {} of parent scope "
313 "covered by DW_AT_location".format(cov_bucket
)
315 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
316 elif opts
.only_formal_parameters
:
317 # Read the JSON only for formal parameters.
318 variables_total_locstats
= init_field(
319 "#params processed by location statistics"
321 variables_scope_bytes_covered
= init_field(
322 "sum_all_params(#bytes in parent scope covered " "by DW_AT_location)"
324 variables_scope_bytes
= init_field("sum_all_params(#bytes in parent scope)")
325 if not opts
.ignore_debug_entry_values
:
326 for cov_bucket
in coverage_buckets():
328 "#params with {} of parent scope covered "
329 "by DW_AT_location".format(cov_bucket
)
331 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
333 variables_scope_bytes_entry_values
= init_field(
334 "sum_all_params(#bytes in parent scope covered " "by DW_OP_entry_value)"
337 variables_scope_bytes_covered
!= TAINT_VALUE
338 and variables_scope_bytes_entry_values
!= TAINT_VALUE
340 variables_scope_bytes_covered
= (
341 variables_scope_bytes_covered
- variables_scope_bytes_entry_values
343 for cov_bucket
in coverage_buckets():
345 "#params - entry values with {} of parent scope covered"
346 " by DW_AT_location".format(cov_bucket
)
348 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
350 # Read the JSON for both local variables and formal parameters.
351 variables_total
= init_field("#source variables")
352 variables_with_loc
= init_field("#source variables with location")
353 variables_total_locstats
= init_field(
354 "#variables processed by location statistics"
356 variables_scope_bytes_covered
= init_field(
357 "sum_all_variables(#bytes in parent scope covered " "by DW_AT_location)"
359 variables_scope_bytes
= init_field("sum_all_variables(#bytes in parent scope)")
361 if not opts
.ignore_debug_entry_values
:
362 for cov_bucket
in coverage_buckets():
364 "#variables with {} of parent scope covered "
365 "by DW_AT_location".format(cov_bucket
)
367 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
369 variables_scope_bytes_entry_values
= init_field(
370 "sum_all_variables(#bytes in parent scope covered "
371 "by DW_OP_entry_value)"
374 variables_scope_bytes_covered
!= TAINT_VALUE
375 and variables_scope_bytes_entry_values
!= TAINT_VALUE
377 variables_scope_bytes_covered
= (
378 variables_scope_bytes_covered
- variables_scope_bytes_entry_values
380 for cov_bucket
in coverage_buckets():
382 "#variables - entry values with {} of parent scope covered "
383 "by DW_AT_location".format(cov_bucket
)
385 variables_coverage_map
[cov_bucket
] = init_field(cov_category
)
387 return LocationStats(
390 variables_total_locstats
,
392 variables_scope_bytes_covered
,
393 variables_scope_bytes
,
394 variables_coverage_map
,
398 # Parse the program arguments.
399 def parse_program_args(parser
):
404 help="calculate the location statistics only for local variables",
407 "--only-formal-parameters",
410 help="calculate the location statistics only for formal parameters",
413 "--ignore-debug-entry-values",
416 help="ignore the location statistics on locations with " "entry values",
422 help="show histogram of location buckets generated (requires " "matplotlib)",
428 help="compare the debug location coverage on two files provided, "
429 "and draw a plot showing the difference (requires "
432 parser
.add_argument("file_names", nargs
="+", type=str, help="file to process")
434 return parser
.parse_args()
437 # Verify that the program inputs meet the requirements.
438 def verify_program_inputs(opts
):
439 if len(sys
.argv
) < 2:
440 print("error: Too few arguments.")
443 if opts
.only_variables
and opts
.only_formal_parameters
:
444 print("error: Please use just one --only* option.")
447 if not opts
.compare
and len(opts
.file_names
) != 1:
448 print("error: Please specify only one file to process.")
451 if opts
.compare
and len(opts
.file_names
) != 2:
452 print("error: Please specify two files to process.")
455 if opts
.draw_plot
or opts
.compare
:
459 print("error: matplotlib not found.")
466 parser
= argparse
.ArgumentParser()
467 opts
= parse_program_args(parser
)
469 if not verify_program_inputs(opts
):
473 binary_file
= opts
.file_names
[0]
474 locstats
= parse_locstats(opts
, binary_file
)
478 # Draw a histogram representing the location buckets.
481 # Pretty print collected info on the standard output.
482 if locstats
.pretty_print() == -1:
485 binary_file_to_compare
= opts
.file_names
[1]
486 locstats_to_compare
= parse_locstats(opts
, binary_file_to_compare
)
487 # Draw a plot showing the difference in debug location coverage between
489 locstats
.draw_location_diff(locstats_to_compare
)
492 if __name__
== "__main__":