build/android/emma_coverage_stats.py

   1 #!/usr/bin/python
   2 # Copyright 2015 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 """Generates incremental code coverage reports for Java code in Chromium.
   7
   8 Usage:
   9
  10   build/android/coverage.py -v --out <output file path> --emma-dir
  11     <EMMA file directory> --lines-for-coverage-file
  12     <path to file containing lines for coverage>
  13
  14   Creates a JSON representation of the overall and file coverage stats and saves
  15   this information to the specified output file.
  16 """
  17
  18 import argparse
  19 import collections
  20 import json
  21 import logging
  22 import os
  23 import re
  24 import sys
  25 from xml.etree import ElementTree
  26
  27 from pylib.utils import run_tests_helper
  28
  29 NOT_EXECUTABLE = -1
  30 NOT_COVERED = 0
  31 COVERED = 1
  32 PARTIALLY_COVERED = 2
  33
  34 # Coverage information about a single line of code.
  35 LineCoverage = collections.namedtuple(
  36     'LineCoverage',
  37     ['lineno', 'source', 'covered_status', 'fractional_line_coverage'])
  38
  39
  40 class _EmmaHtmlParser(object):
  41   """Encapsulates HTML file parsing operations.
  42
  43   This class contains all operations related to parsing HTML files that were
  44   produced using the EMMA code coverage tool.
  45
  46   Example HTML:
  47
  48   Package links:
  49     <a href="_files/1.html">org.chromium.chrome</a>
  50     This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|.
  51
  52   Class links:
  53     <a href="1e.html">DoActivity.java</a>
  54     This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|.
  55
  56   Line coverage data:
  57     <tr class="p">
  58        <td class="l" title="78% line coverage (7 out of 9)">108</td>
  59        <td title="78% line coverage (7 out of 9 instructions)">
  60          if (index < 0 || index = mSelectors.size()) index = 0;</td>
  61     </tr>
  62     <tr>
  63        <td class="l">109</td>
  64        <td> </td>
  65     </tr>
  66     <tr class="c">
  67        <td class="l">110</td>
  68        <td>        if (mSelectors.get(index) != null) {</td>
  69     </tr>
  70     <tr class="z">
  71        <td class="l">111</td>
  72        <td>            for (int i = 0; i < mSelectors.size(); i++) {</td>
  73     </tr>
  74     Each <tr> element is returned by the selector |XPATH_SELECT_LOC|.
  75
  76     We can parse this to get:
  77       1. Line number
  78       2. Line of source code
  79       3. Coverage status (c, z, or p)
  80       4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED)
  81   """
  82   # Selector to match all <a> elements within the rows that are in the table
  83   # that displays all of the different packages.
  84   _XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A'
  85
  86   # Selector to match all <a> elements within the rows that are in the table
  87   # that displays all of the different classes within a package.
  88   _XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A'
  89
  90   # Selector to match all <tr> elements within the table containing Java source
  91   # code in an EMMA HTML file.
  92   _XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR'
  93
  94   # Children of HTML elements are represented as a list in ElementTree. These
  95   # constants represent list indices corresponding to relevant child elements.
  96
  97   # Child 1 contains percentage covered for a line.
  98   _ELEMENT_PERCENT_COVERED = 1
  99
 100   # Child 1 contains the original line of source code.
 101   _ELEMENT_CONTAINING_SOURCE_CODE = 1
 102
 103   # Child 0 contains the line number.
 104   _ELEMENT_CONTAINING_LINENO = 0
 105
 106   # Maps CSS class names to corresponding coverage constants.
 107   _CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED}
 108
 109   # UTF-8 no break space.
 110   _NO_BREAK_SPACE = '\xc2\xa0'
 111
 112   def __init__(self, emma_file_base_dir):
 113     """Initializes _EmmaHtmlParser.
 114
 115     Args:
 116       emma_file_base_dir: Path to the location where EMMA report files are
 117         stored. Should be where index.html is stored.
 118     """
 119     self._base_dir = emma_file_base_dir
 120     self._emma_files_path = os.path.join(self._base_dir, '_files')
 121     self._index_path = os.path.join(self._base_dir, 'index.html')
 122
 123   def GetLineCoverage(self, emma_file_path):
 124     """Returns a list of LineCoverage objects for the given EMMA HTML file.
 125
 126     Args:
 127       emma_file_path: String representing the path to the EMMA HTML file.
 128
 129     Returns:
 130       A list of LineCoverage objects.
 131     """
 132     line_tr_elements = self._FindElements(
 133         emma_file_path, self._XPATH_SELECT_LOC)
 134     line_coverage = []
 135     for tr in line_tr_elements:
 136       # Get the coverage status.
 137       coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE)
 138       # Get the fractional coverage value.
 139       if coverage_status == PARTIALLY_COVERED:
 140         title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE'))
 141         # Parse string that contains percent covered: "83% line coverage ...".
 142         percent_covered = title_attribute.split('%')[0]
 143         fractional_coverage = int(percent_covered) / 100.0
 144       else:
 145         fractional_coverage = 1.0
 146
 147       # Get the line number.
 148       lineno_element = tr[self._ELEMENT_CONTAINING_LINENO]
 149       # Handles oddly formatted HTML (where there is an extra <a> tag).
 150       lineno = int(lineno_element.text or
 151                    lineno_element[self._ELEMENT_CONTAINING_LINENO].text)
 152       # Get the original line of Java source code.
 153       raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text
 154       utf8_source = raw_source.encode('UTF-8')
 155       source = utf8_source.replace(self._NO_BREAK_SPACE, ' ')
 156
 157       line = LineCoverage(lineno, source, coverage_status, fractional_coverage)
 158       line_coverage.append(line)
 159
 160     return line_coverage
 161
 162   def GetPackageNameToEmmaFileDict(self):
 163     """Returns a dict mapping Java packages to EMMA HTML coverage files.
 164
 165     Parses the EMMA index.html file to get a list of packages, then parses each
 166     package HTML file to get a list of classes for that package, and creates
 167     a dict with this info.
 168
 169     Returns:
 170       A dict mapping string representation of Java packages (with class
 171         names appended) to the corresponding file paths of EMMA HTML files.
 172     """
 173     # These <a> elements contain each package name and the path of the file
 174     # where all classes within said package are listed.
 175     package_link_elements = self._FindElements(
 176         self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS)
 177     # Maps file path of package directory (EMMA generated) to package name.
 178     # Example: emma_dir/f.html: org.chromium.chrome.
 179     package_links = {
 180       os.path.join(self._base_dir, link.attrib['HREF']): link.text
 181       for link in package_link_elements if 'HREF' in link.attrib
 182     }
 183
 184     package_to_emma = {}
 185     for package_emma_file_path, package_name in package_links.iteritems():
 186       # These <a> elements contain each class name in the current package and
 187       # the path of the file where the coverage info is stored for each class.
 188       coverage_file_link_elements = self._FindElements(
 189           package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS)
 190
 191       for class_name_element in coverage_file_link_elements:
 192         emma_coverage_file_path = os.path.join(
 193             self._emma_files_path, class_name_element.attrib['HREF'])
 194         full_package_name = '%s.%s' % (package_name, class_name_element.text)
 195         package_to_emma[full_package_name] = emma_coverage_file_path
 196
 197     return package_to_emma
 198
 199   def _FindElements(self, file_path, xpath_selector):
 200     """Reads a HTML file and performs an XPath match.
 201
 202     Args:
 203       file_path: String representing the path to the HTML file.
 204       xpath_selector: String representing xpath search pattern.
 205
 206     Returns:
 207       A list of ElementTree.Elements matching the given XPath selector.
 208         Returns an empty list if there is no match.
 209     """
 210     with open(file_path) as f:
 211       file_contents = f.read().decode('ISO-8859-1').encode('UTF-8')
 212       root = ElementTree.fromstring(file_contents)
 213       return root.findall(xpath_selector)
 214
 215
 216 class _EmmaCoverageStats(object):
 217   """Computes code coverage stats for Java code using the coverage tool EMMA.
 218
 219   This class provides an API that allows users to capture absolute code coverage
 220   and code coverage on a subset of lines for each Java source file. Coverage
 221   reports are generated in JSON format.
 222   """
 223   # Regular expression to get package name from Java package statement.
 224   RE_PACKAGE_MATCH_GROUP = 'package'
 225   RE_PACKAGE = re.compile(r'package (?P<%s>[\w.]*);' % RE_PACKAGE_MATCH_GROUP)
 226
 227   def __init__(self, emma_file_base_dir, files_for_coverage):
 228     """Initialize _EmmaCoverageStats.
 229
 230     Args:
 231       emma_file_base_dir: String representing the path to the base directory
 232         where EMMA HTML coverage files are stored, i.e. parent of index.html.
 233       files_for_coverage: A list of Java source code file paths to get EMMA
 234         coverage for.
 235     """
 236     self._emma_parser = _EmmaHtmlParser(emma_file_base_dir)
 237     self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage)
 238
 239   def GetCoverageDict(self, lines_for_coverage):
 240     """Returns a dict containing detailed coverage information.
 241
 242     Gets detailed coverage stats for each file specified in the
 243     |lines_for_coverage| dict and the total incremental number of lines covered
 244     and executable for all files in |lines_for_coverage|.
 245
 246     Args:
 247       lines_for_coverage: A dict mapping Java source file paths to lists of line
 248         numbers.
 249
 250     Returns:
 251       A dict containing coverage stats for the given dict of files and lines.
 252         Contains absolute coverage stats for each file, coverage stats for each
 253         file's lines specified in |lines_for_coverage|, line by line coverage
 254         for each file, and overall coverage stats for the lines specified in
 255         |lines_for_coverage|.
 256     """
 257     file_coverage = {}
 258     for file_path, line_numbers in lines_for_coverage.iteritems():
 259       file_coverage[file_path] = self.GetCoverageDictForFile(
 260           file_path, line_numbers)
 261
 262     covered_statuses = [s['incremental'] for s in file_coverage.itervalues()]
 263     num_covered_lines = sum(s['covered'] for s in covered_statuses)
 264     num_total_lines = sum(s['total'] for s in covered_statuses)
 265     return {
 266       'files': file_coverage,
 267       'patch': {
 268         'incremental': {
 269           'covered': num_covered_lines,
 270           'total': num_total_lines
 271         }
 272       }
 273     }
 274
 275   def GetCoverageDictForFile(self, file_path, line_numbers):
 276     """Returns a dict containing detailed coverage info for the given file.
 277
 278     Args:
 279       file_path: The path to the Java source file that we want to create the
 280         coverage dict for.
 281       line_numbers: A list of integer line numbers to retrieve additional stats
 282         for.
 283
 284     Returns:
 285       A dict containing absolute, incremental, and line by line coverage for
 286         a file.
 287     """
 288     total_line_coverage = self._GetLineCoverageForFile(file_path)
 289     incremental_line_coverage = [line for line in total_line_coverage
 290                                  if line.lineno in line_numbers]
 291     line_by_line_coverage = [
 292       {
 293         'line': line.source,
 294         'coverage': line.covered_status,
 295         'changed': line.lineno in line_numbers,
 296       }
 297       for line in total_line_coverage
 298     ]
 299     total_covered_lines, total_lines = (
 300         self.GetSummaryStatsForLines(total_line_coverage))
 301     incremental_covered_lines, incremental_total_lines = (
 302         self.GetSummaryStatsForLines(incremental_line_coverage))
 303
 304     file_coverage_stats = {
 305       'absolute': {
 306         'covered': total_covered_lines,
 307         'total': total_lines
 308       },
 309       'incremental': {
 310         'covered': incremental_covered_lines,
 311         'total': incremental_total_lines
 312       },
 313       'source': line_by_line_coverage,
 314     }
 315     return file_coverage_stats
 316
 317   def GetSummaryStatsForLines(self, line_coverage):
 318     """Gets summary stats for a given list of LineCoverage objects.
 319
 320     Args:
 321       line_coverage: A list of LineCoverage objects.
 322
 323     Returns:
 324       A tuple containing the number of lines that are covered and the total
 325         number of lines that are executable, respectively
 326     """
 327     partially_covered_sum = 0
 328     covered_status_totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0}
 329     for line in line_coverage:
 330       status = line.covered_status
 331       if status == NOT_EXECUTABLE:
 332         continue
 333       covered_status_totals[status] += 1
 334       if status == PARTIALLY_COVERED:
 335         partially_covered_sum += line.fractional_line_coverage
 336
 337     total_covered = covered_status_totals[COVERED] + partially_covered_sum
 338     total_lines = sum(covered_status_totals.values())
 339     return total_covered, total_lines
 340
 341   def _GetLineCoverageForFile(self, file_path):
 342     """Gets a list of LineCoverage objects corresponding to the given file path.
 343
 344     Args:
 345       file_path: String representing the path to the Java source file.
 346
 347     Returns:
 348       A list of LineCoverage objects, or None if there is no EMMA file
 349         for the given Java source file.
 350     """
 351     if file_path in self._source_to_emma:
 352       emma_file = self._source_to_emma[file_path]
 353       return self._emma_parser.GetLineCoverage(emma_file)
 354     else:
 355       logging.warning(
 356           'No code coverage data for %s, skipping.', file_path)
 357       return None
 358
 359   def _GetSourceFileToEmmaFileDict(self, files):
 360     """Gets a dict used to correlate Java source files with EMMA HTML files.
 361
 362     This method gathers the information needed to correlate EMMA HTML
 363     files with Java source files. EMMA XML and plain text reports do not provide
 364     line by line coverage data, so HTML reports must be used instead.
 365     Unfortunately, the HTML files that are created are given garbage names
 366     (i.e 1.html) so we need to manually correlate EMMA HTML files
 367     with the original Java source files.
 368
 369     Args:
 370       files: A list of file names for which coverage information is desired.
 371
 372     Returns:
 373       A dict mapping Java source file paths to EMMA HTML file paths.
 374     """
 375     # Maps Java source file paths to package names.
 376     # Example: /usr/code/file.java -> org.chromium.file.java.
 377     source_to_package = {}
 378     for file_path in files:
 379       package = self.GetPackageNameFromFile(file_path)
 380       if package:
 381         source_to_package[file_path] = package
 382       else:
 383         logging.warning("Skipping %s because it doesn\'t have a package "
 384                         "statement.", file_path)
 385
 386     # Maps package names to EMMA report HTML files.
 387     # Example: org.chromium.file.java -> out/coverage/1a.html.
 388     package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict()
 389     # Finally, we have a dict mapping Java file paths to EMMA report files.
 390     # Example: /usr/code/file.java -> out/coverage/1a.html.
 391     source_to_emma = {source: package_to_emma.get(package)
 392                       for source, package in source_to_package.iteritems()}
 393     return source_to_emma
 394
 395   @staticmethod
 396   def NeedsCoverage(file_path):
 397     """Checks to see if the file needs to be analyzed for code coverage.
 398
 399     Args:
 400       file_path: A string representing path to the file.
 401
 402     Returns:
 403       True for Java files that exist, False for all others.
 404     """
 405     if os.path.splitext(file_path)[1] == '.java' and os.path.exists(file_path):
 406       return True
 407     else:
 408       logging.debug(
 409           'Skipping file %s, cannot compute code coverage.', file_path)
 410       return False
 411
 412   @staticmethod
 413   def GetPackageNameFromFile(file_path):
 414     """Gets the full package name including the file name for a given file path.
 415
 416     Args:
 417       file_path: String representing the path to the Java source file.
 418
 419     Returns:
 420       A string representing the full package name with file name appended or
 421         None if there is no package statement in the file.
 422     """
 423     with open(file_path) as f:
 424       file_content = f.read()
 425       package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content)
 426       if package_match:
 427         package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP)
 428         file_name = os.path.basename(file_path)
 429         return '%s.%s' % (package, file_name)
 430       else:
 431         return None
 432
 433
 434 def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir):
 435   """Generates a coverage report for a given set of lines.
 436
 437   Writes the results of the coverage analysis to the file specified by
 438   |out_file_path|.
 439
 440   Args:
 441     line_coverage_file: The path to a file which contains a dict mapping file
 442       names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means
 443       that we should compute coverage information on lines 1 - 3 for file1.
 444     out_file_path: A string representing the location to write the JSON report.
 445     coverage_dir: A string representing the file path where the EMMA
 446       HTML coverage files are located (i.e. folder where index.html is located).
 447   """
 448   with open(line_coverage_file) as f:
 449     potential_files_for_coverage = json.load(f)
 450   files_for_coverage = {f: lines
 451                         for f, lines in potential_files_for_coverage.iteritems()
 452                         if _EmmaCoverageStats.NeedsCoverage(f)}
 453   if not files_for_coverage:
 454     logging.info('No Java files requiring coverage were included in %s.',
 455                  line_coverage_file)
 456     return
 457
 458   code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys())
 459   coverage_results = code_coverage.GetCoverageDict(
 460       files_for_coverage)
 461
 462   with open(out_file_path, 'w+') as out_status_file:
 463     json.dump(coverage_results, out_status_file)
 464
 465
 466 def main():
 467   argparser = argparse.ArgumentParser()
 468   argparser.add_argument('--out', required=True, type=str,
 469                          help='Report output file path.')
 470   argparser.add_argument('--emma-dir', required=True, type=str,
 471                          help='EMMA HTML report directory.')
 472   argparser.add_argument('--lines-for-coverage-file', required=True, type=str,
 473                          help='File containing a JSON object. Should contain a '
 474                          'dict mapping file names to lists of line numbers of '
 475                          'code for which coverage information is desired.')
 476   argparser.add_argument('-v', '--verbose', action='count',
 477                          help='Print verbose log information.')
 478   args = argparser.parse_args()
 479   run_tests_helper.SetLogLevel(args.verbose)
 480   GenerateCoverageReport(args.lines_for_coverage_file, args.out, args.emma_dir)
 481
 482
 483 if __name__ == '__main__':
 484   sys.exit(main())