build/android/emma_coverage_stats.py

   1 #!/usr/bin/python
   2 # Copyright 2015 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 """Generates incremental code coverage reports for Java code in Chromium.
   7
   8 Usage:
   9
  10   build/android/emma_coverage_stats.py -v --out <output file path> --emma-dir
  11     <EMMA file directory> --lines-for-coverage-file
  12     <path to file containing lines for coverage>
  13
  14   Creates a JSON representation of the overall and file coverage stats and saves
  15   this information to the specified output file.
  16 """
  17
  18 import argparse
  19 import collections
  20 import json
  21 import logging
  22 import os
  23 import re
  24 import sys
  25 from xml.etree import ElementTree
  26
  27 from devil.utils import run_tests_helper
  28
  29 NOT_EXECUTABLE = -1
  30 NOT_COVERED = 0
  31 COVERED = 1
  32 PARTIALLY_COVERED = 2
  33
  34 # Coverage information about a single line of code.
  35 LineCoverage = collections.namedtuple(
  36     'LineCoverage',
  37     ['lineno', 'source', 'covered_status', 'fractional_line_coverage'])
  38
  39
  40 class _EmmaHtmlParser(object):
  41   """Encapsulates HTML file parsing operations.
  42
  43   This class contains all operations related to parsing HTML files that were
  44   produced using the EMMA code coverage tool.
  45
  46   Example HTML:
  47
  48   Package links:
  49     <a href="_files/1.html">org.chromium.chrome</a>
  50     This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|.
  51
  52   Class links:
  53     <a href="1e.html">DoActivity.java</a>
  54     This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|.
  55
  56   Line coverage data:
  57     <tr class="p">
  58        <td class="l" title="78% line coverage (7 out of 9)">108</td>
  59        <td title="78% line coverage (7 out of 9 instructions)">
  60          if (index < 0 || index = mSelectors.size()) index = 0;</td>
  61     </tr>
  62     <tr>
  63        <td class="l">109</td>
  64        <td> </td>
  65     </tr>
  66     <tr class="c">
  67        <td class="l">110</td>
  68        <td>        if (mSelectors.get(index) != null) {</td>
  69     </tr>
  70     <tr class="z">
  71        <td class="l">111</td>
  72        <td>            for (int i = 0; i < mSelectors.size(); i++) {</td>
  73     </tr>
  74     Each <tr> element is returned by the selector |XPATH_SELECT_LOC|.
  75
  76     We can parse this to get:
  77       1. Line number
  78       2. Line of source code
  79       3. Coverage status (c, z, or p)
  80       4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED)
  81   """
  82   # Selector to match all <a> elements within the rows that are in the table
  83   # that displays all of the different packages.
  84   _XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A'
  85
  86   # Selector to match all <a> elements within the rows that are in the table
  87   # that displays all of the different classes within a package.
  88   _XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A'
  89
  90   # Selector to match all <tr> elements within the table containing Java source
  91   # code in an EMMA HTML file.
  92   _XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR'
  93
  94   # Children of HTML elements are represented as a list in ElementTree. These
  95   # constants represent list indices corresponding to relevant child elements.
  96
  97   # Child 1 contains percentage covered for a line.
  98   _ELEMENT_PERCENT_COVERED = 1
  99
 100   # Child 1 contains the original line of source code.
 101   _ELEMENT_CONTAINING_SOURCE_CODE = 1
 102
 103   # Child 0 contains the line number.
 104   _ELEMENT_CONTAINING_LINENO = 0
 105
 106   # Maps CSS class names to corresponding coverage constants.
 107   _CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED}
 108
 109   # UTF-8 no break space.
 110   _NO_BREAK_SPACE = '\xc2\xa0'
 111
 112   def __init__(self, emma_file_base_dir):
 113     """Initializes _EmmaHtmlParser.
 114
 115     Args:
 116       emma_file_base_dir: Path to the location where EMMA report files are
 117         stored. Should be where index.html is stored.
 118     """
 119     self._base_dir = emma_file_base_dir
 120     self._emma_files_path = os.path.join(self._base_dir, '_files')
 121     self._index_path = os.path.join(self._base_dir, 'index.html')
 122
 123   def GetLineCoverage(self, emma_file_path):
 124     """Returns a list of LineCoverage objects for the given EMMA HTML file.
 125
 126     Args:
 127       emma_file_path: String representing the path to the EMMA HTML file.
 128
 129     Returns:
 130       A list of LineCoverage objects.
 131     """
 132     line_tr_elements = self._FindElements(
 133         emma_file_path, self._XPATH_SELECT_LOC)
 134     line_coverage = []
 135     for tr in line_tr_elements:
 136       # Get the coverage status.
 137       coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE)
 138       # Get the fractional coverage value.
 139       if coverage_status == PARTIALLY_COVERED:
 140         title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE'))
 141         # Parse string that contains percent covered: "83% line coverage ...".
 142         percent_covered = title_attribute.split('%')[0]
 143         fractional_coverage = int(percent_covered) / 100.0
 144       else:
 145         fractional_coverage = 1.0
 146
 147       # Get the line number.
 148       lineno_element = tr[self._ELEMENT_CONTAINING_LINENO]
 149       # Handles oddly formatted HTML (where there is an extra <a> tag).
 150       lineno = int(lineno_element.text or
 151                    lineno_element[self._ELEMENT_CONTAINING_LINENO].text)
 152       # Get the original line of Java source code.
 153       raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text
 154       utf8_source = raw_source.encode('UTF-8')
 155       source = utf8_source.replace(self._NO_BREAK_SPACE, ' ')
 156
 157       line = LineCoverage(lineno, source, coverage_status, fractional_coverage)
 158       line_coverage.append(line)
 159
 160     return line_coverage
 161
 162   def GetPackageNameToEmmaFileDict(self):
 163     """Returns a dict mapping Java packages to EMMA HTML coverage files.
 164
 165     Parses the EMMA index.html file to get a list of packages, then parses each
 166     package HTML file to get a list of classes for that package, and creates
 167     a dict with this info.
 168
 169     Returns:
 170       A dict mapping string representation of Java packages (with class
 171         names appended) to the corresponding file paths of EMMA HTML files.
 172     """
 173     # These <a> elements contain each package name and the path of the file
 174     # where all classes within said package are listed.
 175     package_link_elements = self._FindElements(
 176         self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS)
 177     # Maps file path of package directory (EMMA generated) to package name.
 178     # Example: emma_dir/f.html: org.chromium.chrome.
 179     package_links = {
 180       os.path.join(self._base_dir, link.attrib['HREF']): link.text
 181       for link in package_link_elements if 'HREF' in link.attrib
 182     }
 183
 184     package_to_emma = {}
 185     for package_emma_file_path, package_name in package_links.iteritems():
 186       # These <a> elements contain each class name in the current package and
 187       # the path of the file where the coverage info is stored for each class.
 188       coverage_file_link_elements = self._FindElements(
 189           package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS)
 190
 191       for class_name_element in coverage_file_link_elements:
 192         emma_coverage_file_path = os.path.join(
 193             self._emma_files_path, class_name_element.attrib['HREF'])
 194         full_package_name = '%s.%s' % (package_name, class_name_element.text)
 195         package_to_emma[full_package_name] = emma_coverage_file_path
 196
 197     return package_to_emma
 198
 199   # pylint: disable=no-self-use
 200   def _FindElements(self, file_path, xpath_selector):
 201     """Reads a HTML file and performs an XPath match.
 202
 203     Args:
 204       file_path: String representing the path to the HTML file.
 205       xpath_selector: String representing xpath search pattern.
 206
 207     Returns:
 208       A list of ElementTree.Elements matching the given XPath selector.
 209         Returns an empty list if there is no match.
 210     """
 211     with open(file_path) as f:
 212       file_contents = f.read().decode('ISO-8859-1').encode('UTF-8')
 213       root = ElementTree.fromstring(file_contents)
 214       return root.findall(xpath_selector)
 215
 216
 217 class _EmmaCoverageStats(object):
 218   """Computes code coverage stats for Java code using the coverage tool EMMA.
 219
 220   This class provides an API that allows users to capture absolute code coverage
 221   and code coverage on a subset of lines for each Java source file. Coverage
 222   reports are generated in JSON format.
 223   """
 224   # Regular expression to get package name from Java package statement.
 225   RE_PACKAGE_MATCH_GROUP = 'package'
 226   RE_PACKAGE = re.compile(r'package (?P<%s>[\w.]*);' % RE_PACKAGE_MATCH_GROUP)
 227
 228   def __init__(self, emma_file_base_dir, files_for_coverage):
 229     """Initialize _EmmaCoverageStats.
 230
 231     Args:
 232       emma_file_base_dir: String representing the path to the base directory
 233         where EMMA HTML coverage files are stored, i.e. parent of index.html.
 234       files_for_coverage: A list of Java source code file paths to get EMMA
 235         coverage for.
 236     """
 237     self._emma_parser = _EmmaHtmlParser(emma_file_base_dir)
 238     self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage)
 239
 240   def GetCoverageDict(self, lines_for_coverage):
 241     """Returns a dict containing detailed coverage information.
 242
 243     Gets detailed coverage stats for each file specified in the
 244     |lines_for_coverage| dict and the total incremental number of lines covered
 245     and executable for all files in |lines_for_coverage|.
 246
 247     Args:
 248       lines_for_coverage: A dict mapping Java source file paths to lists of line
 249         numbers.
 250
 251     Returns:
 252       A dict containing coverage stats for the given dict of files and lines.
 253         Contains absolute coverage stats for each file, coverage stats for each
 254         file's lines specified in |lines_for_coverage|, line by line coverage
 255         for each file, and overall coverage stats for the lines specified in
 256         |lines_for_coverage|.
 257     """
 258     file_coverage = {}
 259     for file_path, line_numbers in lines_for_coverage.iteritems():
 260       file_coverage_dict = self.GetCoverageDictForFile(file_path, line_numbers)
 261       if file_coverage_dict:
 262         file_coverage[file_path] = file_coverage_dict
 263       else:
 264         logging.warning(
 265             'No code coverage data for %s, skipping.', file_path)
 266
 267     covered_statuses = [s['incremental'] for s in file_coverage.itervalues()]
 268     num_covered_lines = sum(s['covered'] for s in covered_statuses)
 269     num_total_lines = sum(s['total'] for s in covered_statuses)
 270     return {
 271       'files': file_coverage,
 272       'patch': {
 273         'incremental': {
 274           'covered': num_covered_lines,
 275           'total': num_total_lines
 276         }
 277       }
 278     }
 279
 280   def GetCoverageDictForFile(self, file_path, line_numbers):
 281     """Returns a dict containing detailed coverage info for the given file.
 282
 283     Args:
 284       file_path: The path to the Java source file that we want to create the
 285         coverage dict for.
 286       line_numbers: A list of integer line numbers to retrieve additional stats
 287         for.
 288
 289     Returns:
 290       A dict containing absolute, incremental, and line by line coverage for
 291         a file.
 292     """
 293     if file_path not in self._source_to_emma:
 294       return None
 295     emma_file = self._source_to_emma[file_path]
 296     total_line_coverage = self._emma_parser.GetLineCoverage(emma_file)
 297     incremental_line_coverage = [line for line in total_line_coverage
 298                                  if line.lineno in line_numbers]
 299     line_by_line_coverage = [
 300       {
 301         'line': line.source,
 302         'coverage': line.covered_status,
 303         'changed': line.lineno in line_numbers,
 304         'fractional_coverage': line.fractional_line_coverage,
 305       }
 306       for line in total_line_coverage
 307     ]
 308     total_covered_lines, total_lines = (
 309         self.GetSummaryStatsForLines(total_line_coverage))
 310     incremental_covered_lines, incremental_total_lines = (
 311         self.GetSummaryStatsForLines(incremental_line_coverage))
 312
 313     file_coverage_stats = {
 314       'absolute': {
 315         'covered': total_covered_lines,
 316         'total': total_lines
 317       },
 318       'incremental': {
 319         'covered': incremental_covered_lines,
 320         'total': incremental_total_lines
 321       },
 322       'source': line_by_line_coverage,
 323     }
 324     return file_coverage_stats
 325
 326   # pylint: disable=no-self-use
 327   def GetSummaryStatsForLines(self, line_coverage):
 328     """Gets summary stats for a given list of LineCoverage objects.
 329
 330     Args:
 331       line_coverage: A list of LineCoverage objects.
 332
 333     Returns:
 334       A tuple containing the number of lines that are covered and the total
 335         number of lines that are executable, respectively
 336     """
 337     partially_covered_sum = 0
 338     covered_status_totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0}
 339     for line in line_coverage:
 340       status = line.covered_status
 341       if status == NOT_EXECUTABLE:
 342         continue
 343       covered_status_totals[status] += 1
 344       if status == PARTIALLY_COVERED:
 345         partially_covered_sum += line.fractional_line_coverage
 346
 347     total_covered = covered_status_totals[COVERED] + partially_covered_sum
 348     total_lines = sum(covered_status_totals.values())
 349     return total_covered, total_lines
 350
 351   def _GetSourceFileToEmmaFileDict(self, files):
 352     """Gets a dict used to correlate Java source files with EMMA HTML files.
 353
 354     This method gathers the information needed to correlate EMMA HTML
 355     files with Java source files. EMMA XML and plain text reports do not provide
 356     line by line coverage data, so HTML reports must be used instead.
 357     Unfortunately, the HTML files that are created are given garbage names
 358     (i.e 1.html) so we need to manually correlate EMMA HTML files
 359     with the original Java source files.
 360
 361     Args:
 362       files: A list of file names for which coverage information is desired.
 363
 364     Returns:
 365       A dict mapping Java source file paths to EMMA HTML file paths.
 366     """
 367     # Maps Java source file paths to package names.
 368     # Example: /usr/code/file.java -> org.chromium.file.java.
 369     source_to_package = {}
 370     for file_path in files:
 371       package = self.GetPackageNameFromFile(file_path)
 372       if package:
 373         source_to_package[file_path] = package
 374       else:
 375         logging.warning("Skipping %s because it doesn\'t have a package "
 376                         "statement.", file_path)
 377
 378     # Maps package names to EMMA report HTML files.
 379     # Example: org.chromium.file.java -> out/coverage/1a.html.
 380     package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict()
 381     # Finally, we have a dict mapping Java file paths to EMMA report files.
 382     # Example: /usr/code/file.java -> out/coverage/1a.html.
 383     source_to_emma = {source: package_to_emma[package]
 384                       for source, package in source_to_package.iteritems()
 385                       if package in package_to_emma}
 386     return source_to_emma
 387
 388   @staticmethod
 389   def NeedsCoverage(file_path):
 390     """Checks to see if the file needs to be analyzed for code coverage.
 391
 392     Args:
 393       file_path: A string representing path to the file.
 394
 395     Returns:
 396       True for Java files that exist, False for all others.
 397     """
 398     if os.path.splitext(file_path)[1] == '.java' and os.path.exists(file_path):
 399       return True
 400     else:
 401       logging.info('Skipping file %s, cannot compute code coverage.', file_path)
 402       return False
 403
 404   @staticmethod
 405   def GetPackageNameFromFile(file_path):
 406     """Gets the full package name including the file name for a given file path.
 407
 408     Args:
 409       file_path: String representing the path to the Java source file.
 410
 411     Returns:
 412       A string representing the full package name with file name appended or
 413         None if there is no package statement in the file.
 414     """
 415     with open(file_path) as f:
 416       file_content = f.read()
 417       package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content)
 418       if package_match:
 419         package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP)
 420         file_name = os.path.basename(file_path)
 421         return '%s.%s' % (package, file_name)
 422       else:
 423         return None
 424
 425
 426 def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir):
 427   """Generates a coverage report for a given set of lines.
 428
 429   Writes the results of the coverage analysis to the file specified by
 430   |out_file_path|.
 431
 432   Args:
 433     line_coverage_file: The path to a file which contains a dict mapping file
 434       names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means
 435       that we should compute coverage information on lines 1 - 3 for file1.
 436     out_file_path: A string representing the location to write the JSON report.
 437     coverage_dir: A string representing the file path where the EMMA
 438       HTML coverage files are located (i.e. folder where index.html is located).
 439   """
 440   with open(line_coverage_file) as f:
 441     potential_files_for_coverage = json.load(f)
 442
 443   files_for_coverage = {f: lines
 444                         for f, lines in potential_files_for_coverage.iteritems()
 445                         if _EmmaCoverageStats.NeedsCoverage(f)}
 446
 447   coverage_results = {}
 448   if files_for_coverage:
 449     code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys())
 450     coverage_results = code_coverage.GetCoverageDict(files_for_coverage)
 451   else:
 452     logging.info('No Java files requiring coverage were included in %s.',
 453                  line_coverage_file)
 454
 455   with open(out_file_path, 'w+') as out_status_file:
 456     json.dump(coverage_results, out_status_file)
 457
 458
 459 def main():
 460   argparser = argparse.ArgumentParser()
 461   argparser.add_argument('--out', required=True, type=str,
 462                          help='Report output file path.')
 463   argparser.add_argument('--emma-dir', required=True, type=str,
 464                          help='EMMA HTML report directory.')
 465   argparser.add_argument('--lines-for-coverage-file', required=True, type=str,
 466                          help='File containing a JSON object. Should contain a '
 467                          'dict mapping file names to lists of line numbers of '
 468                          'code for which coverage information is desired.')
 469   argparser.add_argument('-v', '--verbose', action='count',
 470                          help='Print verbose log information.')
 471   args = argparser.parse_args()
 472   run_tests_helper.SetLogLevel(args.verbose)
 473   GenerateCoverageReport(args.lines_for_coverage_file, args.out, args.emma_dir)
 474
 475
 476 if __name__ == '__main__':
 477   sys.exit(main())