Add ICU message format support
[chromium-blink-merge.git] / build / android / emma_coverage_stats.py
blob8500890703498679c76431b0ced124cd2101bc84
1 #!/usr/bin/python
2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Generates incremental code coverage reports for Java code in Chromium.
8 Usage:
10 build/android/coverage.py -v --out <output file path> --emma-dir
11 <EMMA file directory> --lines-for-coverage-file
12 <path to file containing lines for coverage>
14 Creates a JSON representation of the overall and file coverage stats and saves
15 this information to the specified output file.
16 """
18 import argparse
19 import collections
20 import json
21 import logging
22 import os
23 import re
24 import sys
25 from xml.etree import ElementTree
27 from pylib.utils import run_tests_helper
29 NOT_EXECUTABLE = -1
30 NOT_COVERED = 0
31 COVERED = 1
32 PARTIALLY_COVERED = 2
34 # Coverage information about a single line of code.
35 LineCoverage = collections.namedtuple(
36 'LineCoverage',
37 ['lineno', 'source', 'covered_status', 'fractional_line_coverage'])
40 class _EmmaHtmlParser(object):
41 """Encapsulates HTML file parsing operations.
43 This class contains all operations related to parsing HTML files that were
44 produced using the EMMA code coverage tool.
46 Example HTML:
48 Package links:
49 <a href="_files/1.html">org.chromium.chrome</a>
50 This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|.
52 Class links:
53 <a href="1e.html">DoActivity.java</a>
54 This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|.
56 Line coverage data:
57 <tr class="p">
58 <td class="l" title="78% line coverage (7 out of 9)">108</td>
59 <td title="78% line coverage (7 out of 9 instructions)">
60 if (index < 0 || index = mSelectors.size()) index = 0;</td>
61 </tr>
62 <tr>
63 <td class="l">109</td>
64 <td> </td>
65 </tr>
66 <tr class="c">
67 <td class="l">110</td>
68 <td> if (mSelectors.get(index) != null) {</td>
69 </tr>
70 <tr class="z">
71 <td class="l">111</td>
72 <td> for (int i = 0; i < mSelectors.size(); i++) {</td>
73 </tr>
74 Each <tr> element is returned by the selector |XPATH_SELECT_LOC|.
76 We can parse this to get:
77 1. Line number
78 2. Line of source code
79 3. Coverage status (c, z, or p)
80 4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED)
81 """
82 # Selector to match all <a> elements within the rows that are in the table
83 # that displays all of the different packages.
84 _XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A'
86 # Selector to match all <a> elements within the rows that are in the table
87 # that displays all of the different classes within a package.
88 _XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A'
90 # Selector to match all <tr> elements within the table containing Java source
91 # code in an EMMA HTML file.
92 _XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR'
94 # Children of HTML elements are represented as a list in ElementTree. These
95 # constants represent list indices corresponding to relevant child elements.
97 # Child 1 contains percentage covered for a line.
98 _ELEMENT_PERCENT_COVERED = 1
100 # Child 1 contains the original line of source code.
101 _ELEMENT_CONTAINING_SOURCE_CODE = 1
103 # Child 0 contains the line number.
104 _ELEMENT_CONTAINING_LINENO = 0
106 # Maps CSS class names to corresponding coverage constants.
107 _CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED}
109 # UTF-8 no break space.
110 _NO_BREAK_SPACE = '\xc2\xa0'
112 def __init__(self, emma_file_base_dir):
113 """Initializes _EmmaHtmlParser.
115 Args:
116 emma_file_base_dir: Path to the location where EMMA report files are
117 stored. Should be where index.html is stored.
119 self._base_dir = emma_file_base_dir
120 self._emma_files_path = os.path.join(self._base_dir, '_files')
121 self._index_path = os.path.join(self._base_dir, 'index.html')
123 def GetLineCoverage(self, emma_file_path):
124 """Returns a list of LineCoverage objects for the given EMMA HTML file.
126 Args:
127 emma_file_path: String representing the path to the EMMA HTML file.
129 Returns:
130 A list of LineCoverage objects.
132 line_tr_elements = self._FindElements(
133 emma_file_path, self._XPATH_SELECT_LOC)
134 line_coverage = []
135 for tr in line_tr_elements:
136 # Get the coverage status.
137 coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE)
138 # Get the fractional coverage value.
139 if coverage_status == PARTIALLY_COVERED:
140 title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE'))
141 # Parse string that contains percent covered: "83% line coverage ...".
142 percent_covered = title_attribute.split('%')[0]
143 fractional_coverage = int(percent_covered) / 100.0
144 else:
145 fractional_coverage = 1.0
147 # Get the line number.
148 lineno_element = tr[self._ELEMENT_CONTAINING_LINENO]
149 # Handles oddly formatted HTML (where there is an extra <a> tag).
150 lineno = int(lineno_element.text or
151 lineno_element[self._ELEMENT_CONTAINING_LINENO].text)
152 # Get the original line of Java source code.
153 raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text
154 utf8_source = raw_source.encode('UTF-8')
155 source = utf8_source.replace(self._NO_BREAK_SPACE, ' ')
157 line = LineCoverage(lineno, source, coverage_status, fractional_coverage)
158 line_coverage.append(line)
160 return line_coverage
162 def GetPackageNameToEmmaFileDict(self):
163 """Returns a dict mapping Java packages to EMMA HTML coverage files.
165 Parses the EMMA index.html file to get a list of packages, then parses each
166 package HTML file to get a list of classes for that package, and creates
167 a dict with this info.
169 Returns:
170 A dict mapping string representation of Java packages (with class
171 names appended) to the corresponding file paths of EMMA HTML files.
173 # These <a> elements contain each package name and the path of the file
174 # where all classes within said package are listed.
175 package_link_elements = self._FindElements(
176 self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS)
177 # Maps file path of package directory (EMMA generated) to package name.
178 # Example: emma_dir/f.html: org.chromium.chrome.
179 package_links = {
180 os.path.join(self._base_dir, link.attrib['HREF']): link.text
181 for link in package_link_elements if 'HREF' in link.attrib
184 package_to_emma = {}
185 for package_emma_file_path, package_name in package_links.iteritems():
186 # These <a> elements contain each class name in the current package and
187 # the path of the file where the coverage info is stored for each class.
188 coverage_file_link_elements = self._FindElements(
189 package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS)
191 for class_name_element in coverage_file_link_elements:
192 emma_coverage_file_path = os.path.join(
193 self._emma_files_path, class_name_element.attrib['HREF'])
194 full_package_name = '%s.%s' % (package_name, class_name_element.text)
195 package_to_emma[full_package_name] = emma_coverage_file_path
197 return package_to_emma
199 def _FindElements(self, file_path, xpath_selector):
200 """Reads a HTML file and performs an XPath match.
202 Args:
203 file_path: String representing the path to the HTML file.
204 xpath_selector: String representing xpath search pattern.
206 Returns:
207 A list of ElementTree.Elements matching the given XPath selector.
208 Returns an empty list if there is no match.
210 with open(file_path) as f:
211 file_contents = f.read().decode('ISO-8859-1').encode('UTF-8')
212 root = ElementTree.fromstring(file_contents)
213 return root.findall(xpath_selector)
216 class _EmmaCoverageStats(object):
217 """Computes code coverage stats for Java code using the coverage tool EMMA.
219 This class provides an API that allows users to capture absolute code coverage
220 and code coverage on a subset of lines for each Java source file. Coverage
221 reports are generated in JSON format.
223 # Regular expression to get package name from Java package statement.
224 RE_PACKAGE_MATCH_GROUP = 'package'
225 RE_PACKAGE = re.compile(r'package (?P<%s>[\w.]*);' % RE_PACKAGE_MATCH_GROUP)
227 def __init__(self, emma_file_base_dir, files_for_coverage):
228 """Initialize _EmmaCoverageStats.
230 Args:
231 emma_file_base_dir: String representing the path to the base directory
232 where EMMA HTML coverage files are stored, i.e. parent of index.html.
233 files_for_coverage: A list of Java source code file paths to get EMMA
234 coverage for.
236 self._emma_parser = _EmmaHtmlParser(emma_file_base_dir)
237 self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage)
239 def GetCoverageDict(self, lines_for_coverage):
240 """Returns a dict containing detailed coverage information.
242 Gets detailed coverage stats for each file specified in the
243 |lines_for_coverage| dict and the total incremental number of lines covered
244 and executable for all files in |lines_for_coverage|.
246 Args:
247 lines_for_coverage: A dict mapping Java source file paths to lists of line
248 numbers.
250 Returns:
251 A dict containing coverage stats for the given dict of files and lines.
252 Contains absolute coverage stats for each file, coverage stats for each
253 file's lines specified in |lines_for_coverage|, line by line coverage
254 for each file, and overall coverage stats for the lines specified in
255 |lines_for_coverage|.
257 file_coverage = {}
258 for file_path, line_numbers in lines_for_coverage.iteritems():
259 file_coverage[file_path] = self.GetCoverageDictForFile(
260 file_path, line_numbers)
262 covered_statuses = [s['incremental'] for s in file_coverage.itervalues()]
263 num_covered_lines = sum(s['covered'] for s in covered_statuses)
264 num_total_lines = sum(s['total'] for s in covered_statuses)
265 return {
266 'files': file_coverage,
267 'patch': {
268 'incremental': {
269 'covered': num_covered_lines,
270 'total': num_total_lines
275 def GetCoverageDictForFile(self, file_path, line_numbers):
276 """Returns a dict containing detailed coverage info for the given file.
278 Args:
279 file_path: The path to the Java source file that we want to create the
280 coverage dict for.
281 line_numbers: A list of integer line numbers to retrieve additional stats
282 for.
284 Returns:
285 A dict containing absolute, incremental, and line by line coverage for
286 a file.
288 total_line_coverage = self._GetLineCoverageForFile(file_path)
289 incremental_line_coverage = [line for line in total_line_coverage
290 if line.lineno in line_numbers]
291 line_by_line_coverage = [
293 'line': line.source,
294 'coverage': line.covered_status,
295 'changed': line.lineno in line_numbers,
297 for line in total_line_coverage
299 total_covered_lines, total_lines = (
300 self.GetSummaryStatsForLines(total_line_coverage))
301 incremental_covered_lines, incremental_total_lines = (
302 self.GetSummaryStatsForLines(incremental_line_coverage))
304 file_coverage_stats = {
305 'absolute': {
306 'covered': total_covered_lines,
307 'total': total_lines
309 'incremental': {
310 'covered': incremental_covered_lines,
311 'total': incremental_total_lines
313 'source': line_by_line_coverage,
315 return file_coverage_stats
317 def GetSummaryStatsForLines(self, line_coverage):
318 """Gets summary stats for a given list of LineCoverage objects.
320 Args:
321 line_coverage: A list of LineCoverage objects.
323 Returns:
324 A tuple containing the number of lines that are covered and the total
325 number of lines that are executable, respectively
327 partially_covered_sum = 0
328 covered_status_totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0}
329 for line in line_coverage:
330 status = line.covered_status
331 if status == NOT_EXECUTABLE:
332 continue
333 covered_status_totals[status] += 1
334 if status == PARTIALLY_COVERED:
335 partially_covered_sum += line.fractional_line_coverage
337 total_covered = covered_status_totals[COVERED] + partially_covered_sum
338 total_lines = sum(covered_status_totals.values())
339 return total_covered, total_lines
341 def _GetLineCoverageForFile(self, file_path):
342 """Gets a list of LineCoverage objects corresponding to the given file path.
344 Args:
345 file_path: String representing the path to the Java source file.
347 Returns:
348 A list of LineCoverage objects, or None if there is no EMMA file
349 for the given Java source file.
351 if file_path in self._source_to_emma:
352 emma_file = self._source_to_emma[file_path]
353 return self._emma_parser.GetLineCoverage(emma_file)
354 else:
355 logging.warning(
356 'No code coverage data for %s, skipping.', file_path)
357 return None
359 def _GetSourceFileToEmmaFileDict(self, files):
360 """Gets a dict used to correlate Java source files with EMMA HTML files.
362 This method gathers the information needed to correlate EMMA HTML
363 files with Java source files. EMMA XML and plain text reports do not provide
364 line by line coverage data, so HTML reports must be used instead.
365 Unfortunately, the HTML files that are created are given garbage names
366 (i.e 1.html) so we need to manually correlate EMMA HTML files
367 with the original Java source files.
369 Args:
370 files: A list of file names for which coverage information is desired.
372 Returns:
373 A dict mapping Java source file paths to EMMA HTML file paths.
375 # Maps Java source file paths to package names.
376 # Example: /usr/code/file.java -> org.chromium.file.java.
377 source_to_package = {}
378 for file_path in files:
379 package = self.GetPackageNameFromFile(file_path)
380 if package:
381 source_to_package[file_path] = package
382 else:
383 logging.warning("Skipping %s because it doesn\'t have a package "
384 "statement.", file_path)
386 # Maps package names to EMMA report HTML files.
387 # Example: org.chromium.file.java -> out/coverage/1a.html.
388 package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict()
389 # Finally, we have a dict mapping Java file paths to EMMA report files.
390 # Example: /usr/code/file.java -> out/coverage/1a.html.
391 source_to_emma = {source: package_to_emma.get(package)
392 for source, package in source_to_package.iteritems()}
393 return source_to_emma
395 @staticmethod
396 def NeedsCoverage(file_path):
397 """Checks to see if the file needs to be analyzed for code coverage.
399 Args:
400 file_path: A string representing path to the file.
402 Returns:
403 True for Java files that exist, False for all others.
405 if os.path.splitext(file_path)[1] == '.java' and os.path.exists(file_path):
406 return True
407 else:
408 logging.debug(
409 'Skipping file %s, cannot compute code coverage.', file_path)
410 return False
412 @staticmethod
413 def GetPackageNameFromFile(file_path):
414 """Gets the full package name including the file name for a given file path.
416 Args:
417 file_path: String representing the path to the Java source file.
419 Returns:
420 A string representing the full package name with file name appended or
421 None if there is no package statement in the file.
423 with open(file_path) as f:
424 file_content = f.read()
425 package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content)
426 if package_match:
427 package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP)
428 file_name = os.path.basename(file_path)
429 return '%s.%s' % (package, file_name)
430 else:
431 return None
434 def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir):
435 """Generates a coverage report for a given set of lines.
437 Writes the results of the coverage analysis to the file specified by
438 |out_file_path|.
440 Args:
441 line_coverage_file: The path to a file which contains a dict mapping file
442 names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means
443 that we should compute coverage information on lines 1 - 3 for file1.
444 out_file_path: A string representing the location to write the JSON report.
445 coverage_dir: A string representing the file path where the EMMA
446 HTML coverage files are located (i.e. folder where index.html is located).
448 with open(line_coverage_file) as f:
449 potential_files_for_coverage = json.load(f)
450 files_for_coverage = {f: lines
451 for f, lines in potential_files_for_coverage.iteritems()
452 if _EmmaCoverageStats.NeedsCoverage(f)}
453 if not files_for_coverage:
454 logging.info('No Java files requiring coverage were included in %s.',
455 line_coverage_file)
456 return
458 code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys())
459 coverage_results = code_coverage.GetCoverageDict(
460 files_for_coverage)
462 with open(out_file_path, 'w+') as out_status_file:
463 json.dump(coverage_results, out_status_file)
466 def main():
467 argparser = argparse.ArgumentParser()
468 argparser.add_argument('--out', required=True, type=str,
469 help='Report output file path.')
470 argparser.add_argument('--emma-dir', required=True, type=str,
471 help='EMMA HTML report directory.')
472 argparser.add_argument('--lines-for-coverage-file', required=True, type=str,
473 help='File containing a JSON object. Should contain a '
474 'dict mapping file names to lists of line numbers of '
475 'code for which coverage information is desired.')
476 argparser.add_argument('-v', '--verbose', action='count',
477 help='Print verbose log information.')
478 args = argparser.parse_args()
479 run_tests_helper.SetLogLevel(args.verbose)
480 GenerateCoverageReport(args.lines_for_coverage_file, args.out, args.emma_dir)
483 if __name__ == '__main__':
484 sys.exit(main())