2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Generates incremental code coverage reports for Java code in Chromium.
10 build/android/emma_coverage_stats.py -v --out <output file path> --emma-dir
11 <EMMA file directory> --lines-for-coverage-file
12 <path to file containing lines for coverage>
14 Creates a JSON representation of the overall and file coverage stats and saves
15 this information to the specified output file.
25 from xml
.etree
import ElementTree
27 from devil
.utils
import run_tests_helper
34 # Coverage information about a single line of code.
35 LineCoverage
= collections
.namedtuple(
37 ['lineno', 'source', 'covered_status', 'fractional_line_coverage'])
40 class _EmmaHtmlParser(object):
41 """Encapsulates HTML file parsing operations.
43 This class contains all operations related to parsing HTML files that were
44 produced using the EMMA code coverage tool.
49 <a href="_files/1.html">org.chromium.chrome</a>
50 This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|.
53 <a href="1e.html">DoActivity.java</a>
54 This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|.
58 <td class="l" title="78% line coverage (7 out of 9)">108</td>
59 <td title="78% line coverage (7 out of 9 instructions)">
60 if (index < 0 || index = mSelectors.size()) index = 0;</td>
63 <td class="l">109</td>
67 <td class="l">110</td>
68 <td> if (mSelectors.get(index) != null) {</td>
71 <td class="l">111</td>
72 <td> for (int i = 0; i < mSelectors.size(); i++) {</td>
74 Each <tr> element is returned by the selector |XPATH_SELECT_LOC|.
76 We can parse this to get:
78 2. Line of source code
79 3. Coverage status (c, z, or p)
80 4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED)
82 # Selector to match all <a> elements within the rows that are in the table
83 # that displays all of the different packages.
84 _XPATH_SELECT_PACKAGE_ELEMENTS
= './/BODY/TABLE[4]/TR/TD/A'
86 # Selector to match all <a> elements within the rows that are in the table
87 # that displays all of the different classes within a package.
88 _XPATH_SELECT_CLASS_ELEMENTS
= './/BODY/TABLE[3]/TR/TD/A'
90 # Selector to match all <tr> elements within the table containing Java source
91 # code in an EMMA HTML file.
92 _XPATH_SELECT_LOC
= './/BODY/TABLE[4]/TR'
94 # Children of HTML elements are represented as a list in ElementTree. These
95 # constants represent list indices corresponding to relevant child elements.
97 # Child 1 contains percentage covered for a line.
98 _ELEMENT_PERCENT_COVERED
= 1
100 # Child 1 contains the original line of source code.
101 _ELEMENT_CONTAINING_SOURCE_CODE
= 1
103 # Child 0 contains the line number.
104 _ELEMENT_CONTAINING_LINENO
= 0
106 # Maps CSS class names to corresponding coverage constants.
107 _CSS_TO_STATUS
= {'c': COVERED
, 'p': PARTIALLY_COVERED
, 'z': NOT_COVERED
}
109 # UTF-8 no break space.
110 _NO_BREAK_SPACE
= '\xc2\xa0'
112 def __init__(self
, emma_file_base_dir
):
113 """Initializes _EmmaHtmlParser.
116 emma_file_base_dir: Path to the location where EMMA report files are
117 stored. Should be where index.html is stored.
119 self
._base
_dir
= emma_file_base_dir
120 self
._emma
_files
_path
= os
.path
.join(self
._base
_dir
, '_files')
121 self
._index
_path
= os
.path
.join(self
._base
_dir
, 'index.html')
123 def GetLineCoverage(self
, emma_file_path
):
124 """Returns a list of LineCoverage objects for the given EMMA HTML file.
127 emma_file_path: String representing the path to the EMMA HTML file.
130 A list of LineCoverage objects.
132 line_tr_elements
= self
._FindElements
(
133 emma_file_path
, self
._XPATH
_SELECT
_LOC
)
135 for tr
in line_tr_elements
:
136 # Get the coverage status.
137 coverage_status
= self
._CSS
_TO
_STATUS
.get(tr
.get('CLASS'), NOT_EXECUTABLE
)
138 # Get the fractional coverage value.
139 if coverage_status
== PARTIALLY_COVERED
:
140 title_attribute
= (tr
[self
._ELEMENT
_PERCENT
_COVERED
].get('TITLE'))
141 # Parse string that contains percent covered: "83% line coverage ...".
142 percent_covered
= title_attribute
.split('%')[0]
143 fractional_coverage
= int(percent_covered
) / 100.0
145 fractional_coverage
= 1.0
147 # Get the line number.
148 lineno_element
= tr
[self
._ELEMENT
_CONTAINING
_LINENO
]
149 # Handles oddly formatted HTML (where there is an extra <a> tag).
150 lineno
= int(lineno_element
.text
or
151 lineno_element
[self
._ELEMENT
_CONTAINING
_LINENO
].text
)
152 # Get the original line of Java source code.
153 raw_source
= tr
[self
._ELEMENT
_CONTAINING
_SOURCE
_CODE
].text
154 utf8_source
= raw_source
.encode('UTF-8')
155 source
= utf8_source
.replace(self
._NO
_BREAK
_SPACE
, ' ')
157 line
= LineCoverage(lineno
, source
, coverage_status
, fractional_coverage
)
158 line_coverage
.append(line
)
162 def GetPackageNameToEmmaFileDict(self
):
163 """Returns a dict mapping Java packages to EMMA HTML coverage files.
165 Parses the EMMA index.html file to get a list of packages, then parses each
166 package HTML file to get a list of classes for that package, and creates
167 a dict with this info.
170 A dict mapping string representation of Java packages (with class
171 names appended) to the corresponding file paths of EMMA HTML files.
173 # These <a> elements contain each package name and the path of the file
174 # where all classes within said package are listed.
175 package_link_elements
= self
._FindElements
(
176 self
._index
_path
, self
._XPATH
_SELECT
_PACKAGE
_ELEMENTS
)
177 # Maps file path of package directory (EMMA generated) to package name.
178 # Example: emma_dir/f.html: org.chromium.chrome.
180 os
.path
.join(self
._base
_dir
, link
.attrib
['HREF']): link
.text
181 for link
in package_link_elements
if 'HREF' in link
.attrib
185 for package_emma_file_path
, package_name
in package_links
.iteritems():
186 # These <a> elements contain each class name in the current package and
187 # the path of the file where the coverage info is stored for each class.
188 coverage_file_link_elements
= self
._FindElements
(
189 package_emma_file_path
, self
._XPATH
_SELECT
_CLASS
_ELEMENTS
)
191 for class_name_element
in coverage_file_link_elements
:
192 emma_coverage_file_path
= os
.path
.join(
193 self
._emma
_files
_path
, class_name_element
.attrib
['HREF'])
194 full_package_name
= '%s.%s' % (package_name
, class_name_element
.text
)
195 package_to_emma
[full_package_name
] = emma_coverage_file_path
197 return package_to_emma
199 # pylint: disable=no-self-use
200 def _FindElements(self
, file_path
, xpath_selector
):
201 """Reads a HTML file and performs an XPath match.
204 file_path: String representing the path to the HTML file.
205 xpath_selector: String representing xpath search pattern.
208 A list of ElementTree.Elements matching the given XPath selector.
209 Returns an empty list if there is no match.
211 with
open(file_path
) as f
:
212 file_contents
= f
.read().decode('ISO-8859-1').encode('UTF-8')
213 root
= ElementTree
.fromstring(file_contents
)
214 return root
.findall(xpath_selector
)
217 class _EmmaCoverageStats(object):
218 """Computes code coverage stats for Java code using the coverage tool EMMA.
220 This class provides an API that allows users to capture absolute code coverage
221 and code coverage on a subset of lines for each Java source file. Coverage
222 reports are generated in JSON format.
224 # Regular expression to get package name from Java package statement.
225 RE_PACKAGE_MATCH_GROUP
= 'package'
226 RE_PACKAGE
= re
.compile(r
'package (?P<%s>[\w.]*);' % RE_PACKAGE_MATCH_GROUP
)
228 def __init__(self
, emma_file_base_dir
, files_for_coverage
):
229 """Initialize _EmmaCoverageStats.
232 emma_file_base_dir: String representing the path to the base directory
233 where EMMA HTML coverage files are stored, i.e. parent of index.html.
234 files_for_coverage: A list of Java source code file paths to get EMMA
237 self
._emma
_parser
= _EmmaHtmlParser(emma_file_base_dir
)
238 self
._source
_to
_emma
= self
._GetSourceFileToEmmaFileDict
(files_for_coverage
)
240 def GetCoverageDict(self
, lines_for_coverage
):
241 """Returns a dict containing detailed coverage information.
243 Gets detailed coverage stats for each file specified in the
244 |lines_for_coverage| dict and the total incremental number of lines covered
245 and executable for all files in |lines_for_coverage|.
248 lines_for_coverage: A dict mapping Java source file paths to lists of line
252 A dict containing coverage stats for the given dict of files and lines.
253 Contains absolute coverage stats for each file, coverage stats for each
254 file's lines specified in |lines_for_coverage|, line by line coverage
255 for each file, and overall coverage stats for the lines specified in
256 |lines_for_coverage|.
259 for file_path
, line_numbers
in lines_for_coverage
.iteritems():
260 file_coverage_dict
= self
.GetCoverageDictForFile(file_path
, line_numbers
)
261 if file_coverage_dict
:
262 file_coverage
[file_path
] = file_coverage_dict
265 'No code coverage data for %s, skipping.', file_path
)
267 covered_statuses
= [s
['incremental'] for s
in file_coverage
.itervalues()]
268 num_covered_lines
= sum(s
['covered'] for s
in covered_statuses
)
269 num_total_lines
= sum(s
['total'] for s
in covered_statuses
)
271 'files': file_coverage
,
274 'covered': num_covered_lines
,
275 'total': num_total_lines
280 def GetCoverageDictForFile(self
, file_path
, line_numbers
):
281 """Returns a dict containing detailed coverage info for the given file.
284 file_path: The path to the Java source file that we want to create the
286 line_numbers: A list of integer line numbers to retrieve additional stats
290 A dict containing absolute, incremental, and line by line coverage for
293 if file_path
not in self
._source
_to
_emma
:
295 emma_file
= self
._source
_to
_emma
[file_path
]
296 total_line_coverage
= self
._emma
_parser
.GetLineCoverage(emma_file
)
297 incremental_line_coverage
= [line
for line
in total_line_coverage
298 if line
.lineno
in line_numbers
]
299 line_by_line_coverage
= [
302 'coverage': line
.covered_status
,
303 'changed': line
.lineno
in line_numbers
,
304 'fractional_coverage': line
.fractional_line_coverage
,
306 for line
in total_line_coverage
308 total_covered_lines
, total_lines
= (
309 self
.GetSummaryStatsForLines(total_line_coverage
))
310 incremental_covered_lines
, incremental_total_lines
= (
311 self
.GetSummaryStatsForLines(incremental_line_coverage
))
313 file_coverage_stats
= {
315 'covered': total_covered_lines
,
319 'covered': incremental_covered_lines
,
320 'total': incremental_total_lines
322 'source': line_by_line_coverage
,
324 return file_coverage_stats
326 # pylint: disable=no-self-use
327 def GetSummaryStatsForLines(self
, line_coverage
):
328 """Gets summary stats for a given list of LineCoverage objects.
331 line_coverage: A list of LineCoverage objects.
334 A tuple containing the number of lines that are covered and the total
335 number of lines that are executable, respectively
337 partially_covered_sum
= 0
338 covered_status_totals
= {COVERED
: 0, NOT_COVERED
: 0, PARTIALLY_COVERED
: 0}
339 for line
in line_coverage
:
340 status
= line
.covered_status
341 if status
== NOT_EXECUTABLE
:
343 covered_status_totals
[status
] += 1
344 if status
== PARTIALLY_COVERED
:
345 partially_covered_sum
+= line
.fractional_line_coverage
347 total_covered
= covered_status_totals
[COVERED
] + partially_covered_sum
348 total_lines
= sum(covered_status_totals
.values())
349 return total_covered
, total_lines
351 def _GetSourceFileToEmmaFileDict(self
, files
):
352 """Gets a dict used to correlate Java source files with EMMA HTML files.
354 This method gathers the information needed to correlate EMMA HTML
355 files with Java source files. EMMA XML and plain text reports do not provide
356 line by line coverage data, so HTML reports must be used instead.
357 Unfortunately, the HTML files that are created are given garbage names
358 (i.e 1.html) so we need to manually correlate EMMA HTML files
359 with the original Java source files.
362 files: A list of file names for which coverage information is desired.
365 A dict mapping Java source file paths to EMMA HTML file paths.
367 # Maps Java source file paths to package names.
368 # Example: /usr/code/file.java -> org.chromium.file.java.
369 source_to_package
= {}
370 for file_path
in files
:
371 package
= self
.GetPackageNameFromFile(file_path
)
373 source_to_package
[file_path
] = package
375 logging
.warning("Skipping %s because it doesn\'t have a package "
376 "statement.", file_path
)
378 # Maps package names to EMMA report HTML files.
379 # Example: org.chromium.file.java -> out/coverage/1a.html.
380 package_to_emma
= self
._emma
_parser
.GetPackageNameToEmmaFileDict()
381 # Finally, we have a dict mapping Java file paths to EMMA report files.
382 # Example: /usr/code/file.java -> out/coverage/1a.html.
383 source_to_emma
= {source
: package_to_emma
[package
]
384 for source
, package
in source_to_package
.iteritems()
385 if package
in package_to_emma
}
386 return source_to_emma
389 def NeedsCoverage(file_path
):
390 """Checks to see if the file needs to be analyzed for code coverage.
393 file_path: A string representing path to the file.
396 True for Java files that exist, False for all others.
398 if os
.path
.splitext(file_path
)[1] == '.java' and os
.path
.exists(file_path
):
401 logging
.info('Skipping file %s, cannot compute code coverage.', file_path
)
405 def GetPackageNameFromFile(file_path
):
406 """Gets the full package name including the file name for a given file path.
409 file_path: String representing the path to the Java source file.
412 A string representing the full package name with file name appended or
413 None if there is no package statement in the file.
415 with
open(file_path
) as f
:
416 file_content
= f
.read()
417 package_match
= re
.search(_EmmaCoverageStats
.RE_PACKAGE
, file_content
)
419 package
= package_match
.group(_EmmaCoverageStats
.RE_PACKAGE_MATCH_GROUP
)
420 file_name
= os
.path
.basename(file_path
)
421 return '%s.%s' % (package
, file_name
)
426 def GenerateCoverageReport(line_coverage_file
, out_file_path
, coverage_dir
):
427 """Generates a coverage report for a given set of lines.
429 Writes the results of the coverage analysis to the file specified by
433 line_coverage_file: The path to a file which contains a dict mapping file
434 names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means
435 that we should compute coverage information on lines 1 - 3 for file1.
436 out_file_path: A string representing the location to write the JSON report.
437 coverage_dir: A string representing the file path where the EMMA
438 HTML coverage files are located (i.e. folder where index.html is located).
440 with
open(line_coverage_file
) as f
:
441 potential_files_for_coverage
= json
.load(f
)
443 files_for_coverage
= {f
: lines
444 for f
, lines
in potential_files_for_coverage
.iteritems()
445 if _EmmaCoverageStats
.NeedsCoverage(f
)}
447 coverage_results
= {}
448 if files_for_coverage
:
449 code_coverage
= _EmmaCoverageStats(coverage_dir
, files_for_coverage
.keys())
450 coverage_results
= code_coverage
.GetCoverageDict(files_for_coverage
)
452 logging
.info('No Java files requiring coverage were included in %s.',
455 with
open(out_file_path
, 'w+') as out_status_file
:
456 json
.dump(coverage_results
, out_status_file
)
460 argparser
= argparse
.ArgumentParser()
461 argparser
.add_argument('--out', required
=True, type=str,
462 help='Report output file path.')
463 argparser
.add_argument('--emma-dir', required
=True, type=str,
464 help='EMMA HTML report directory.')
465 argparser
.add_argument('--lines-for-coverage-file', required
=True, type=str,
466 help='File containing a JSON object. Should contain a '
467 'dict mapping file names to lists of line numbers of '
468 'code for which coverage information is desired.')
469 argparser
.add_argument('-v', '--verbose', action
='count',
470 help='Print verbose log information.')
471 args
= argparser
.parse_args()
472 run_tests_helper
.SetLogLevel(args
.verbose
)
473 GenerateCoverageReport(args
.lines_for_coverage_file
, args
.out
, args
.emma_dir
)
476 if __name__
== '__main__':