tools/code_coverage/process_coverage.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6
   7 """Script to clean the lcov files and convert it to HTML
   8
   9 TODO(niranjan): Add usage information here
  10 """
  11
  12
  13 import optparse
  14 import os
  15 import shutil
  16 import subprocess
  17 import sys
  18 import tempfile
  19 import time
  20 import urllib2
  21
  22
  23 # These are source files that were generated during compile time. We want to
  24 # remove references to these files from the lcov file otherwise genhtml will
  25 # throw an error.
  26 win32_srcs_exclude = ['parse.y',
  27                       'xpathgrammar.cpp',
  28                       'cssgrammar.cpp',
  29                       'csspropertynames.gperf']
  30
  31 # Number of lines of a new coverage data set
  32 # to send at a time to the dashboard.
  33 POST_CHUNK_SIZE = 50
  34
  35 # Number of post request failures to allow before exiting.
  36 MAX_FAILURES = 5
  37
  38 def CleanPathNames(dir):
  39   """Clean the pathnames of the HTML generated by genhtml.
  40
  41   This method is required only for code coverage on Win32. Due to a known issue
  42   with reading from CIFS shares mounted on Linux, genhtml appends a ^M to every
  43   file name it reads from the Windows share, causing corrupt filenames in
  44   genhtml's output folder.
  45
  46   Args:
  47     dir: Output folder of the genhtml output.
  48
  49   Returns:
  50     None
  51   """
  52   # Stip off the ^M characters that get appended to the file name
  53   for dirpath, dirname, filenames in os.walk(dir):
  54     for file in filenames:
  55       file_clean = file.replace('\r', '')
  56       if file_clean != file:
  57         os.rename(file, file_clean)
  58
  59
  60 def GenerateHtml(lcov_path, dash_root):
  61   """Runs genhtml to convert lcov data to human readable HTML.
  62
  63   This script expects the LCOV file name to be in the format:
  64   chrome_<platform>_<revision#>.lcov.
  65   This method parses the file name and then sets up the correct folder
  66   hierarchy for the coverage data and then runs genhtml to get the actual HTML
  67   formatted coverage data.
  68
  69   Args:
  70     lcov_path: Path of the lcov data file.
  71     dash_root: Root location of the dashboard.
  72
  73   Returns:
  74     Code coverage percentage on sucess.
  75     None on failure.
  76   """
  77   # Parse the LCOV file name.
  78   filename = os.path.basename(lcov_path).split('.')[0]
  79   buffer = filename.split('_')
  80   dash_root = dash_root.rstrip('/') # Remove trailing '/'
  81
  82   # Set up correct folder hierarchy in the dashboard root
  83   # TODO(niranjan): Check the formatting using a regexp
  84   if len(buffer) >= 3: # Check if filename has right formatting
  85     platform = buffer[len(buffer) - 2]
  86     revision = buffer[len(buffer) - 1]
  87     if os.path.exists(os.path.join(dash_root, platform)) == False:
  88       os.mkdir(os.path.join(dash_root, platform))
  89     output_dir = os.path.join(dash_root, platform, revision)
  90     os.mkdir(output_dir)
  91   else:
  92     # TODO(niranjan): Add failure logging here.
  93     return None # File not formatted correctly
  94
  95   # Run genhtml
  96   os.system('/usr/bin/genhtml -o %s %s' % (output_dir, lcov_path))
  97   # TODO(niranjan): Check the exit status of the genhtml command.
  98   # TODO(niranjan): Parse the stdout and return coverage percentage.
  99   CleanPathNames(output_dir)
 100   return 'dummy' # TODO(niranjan): Return actual percentage.
 101
 102
 103 def CleanWin32Lcov(lcov_path, src_root):
 104   """Cleanup the lcov data generated on Windows.
 105
 106   This method fixes up the paths inside the lcov file from the Win32 specific
 107   paths to the actual paths of the mounted CIFS share. The lcov files generated
 108   on Windows have the following format:
 109
 110   SF:c:\chrome_src\src\skia\sgl\skscan_antihair.cpp
 111   DA:97,0
 112   DA:106,0
 113   DA:107,0
 114   DA:109,0
 115   ...
 116   end_of_record
 117
 118   This method changes the source-file (SF) lines to a format compatible with
 119   genhtml on Linux by fixing paths. This method also removes references to
 120   certain dynamically generated files to be excluded from the code ceverage.
 121
 122   Args:
 123     lcov_path: Path of the Win32 lcov file to be cleaned.
 124     src_root: Location of the source and symbols dir.
 125   Returns:
 126     None
 127   """
 128   strip_flag = False
 129   lcov = open(lcov_path, 'r')
 130   loc_csv_file = open(lcov_path + '.csv', 'w')
 131   (tmpfile_id, tmpfile_name) = tempfile.mkstemp()
 132   tmpfile = open(tmpfile_name, 'w')
 133   src_root = src_root.rstrip('/')       # Remove trailing '/'
 134   for line in lcov:
 135     if line.startswith('SF'):
 136       # We want to exclude certain auto-generated files otherwise genhtml will
 137       # fail to convert lcov to HTML.
 138       for exp in win32_srcs_exclude:
 139         if line.rfind(exp) != -1:
 140           strip_flag = True # Indicates that we want to remove this section
 141
 142       # Now we normalize the paths
 143       # e.g. Change SF:c:\foo\src\... to SF:/chrome_src/...
 144       parse_buffer = line.split(':')
 145       buffer = '%s:%s%s' % (parse_buffer[0],
 146                             src_root,
 147                             parse_buffer[2])
 148       buffer = buffer.replace('\\', '/')
 149       line = buffer.replace('\r', '')
 150
 151       # We want an accurate count of the lines of code in a given file so that
 152       # we can estimate the code coverage perscentage accurately. We use a
 153       # third party script cloc.pl which gives that count and then just parse
 154       # its command line output to filter out the other unnecessary data.
 155       # TODO(niranjan): Find out a better way of doing this.
 156       buffer = buffer.lstrip('SF:')
 157       file_for_loc = buffer.replace('\r\n', '')
 158       # TODO(niranjan): Add a check to see if cloc is present on the machine.
 159       command = ["perl",
 160                  "cloc.pl",
 161                  file_for_loc]
 162       output = subprocess.Popen(command,
 163                                 stdout=subprocess.PIPE,
 164                                 stderr=subprocess.STDOUT).communicate()[0]
 165       if output.rfind('error:'):
 166         return None
 167
 168       tmp_buf1 = output.split('=')
 169       tmp_buf2 = tmp_buf1[len(tmp_buf1) - 2].split('x')[0].split(' ')
 170       loc = tmp_buf2[len(tmp_buf2) - 2]
 171       loc_csv_file.write('%s,%s\r\n' % (file_for_loc, loc))
 172
 173     # Write to the temp file if the section to write is valid
 174     if strip_flag == False:
 175       # Also write this to the 'clean' LCOV file
 176       tmpfile.write('%s' % (line))
 177
 178     # Reset the strip flag
 179     if line.endswith('end_of_record'):
 180       strip_flag = False
 181
 182   # Close the files and replace the lcov file by the 'clean' tmpfile
 183   tmpfile.close()
 184   lcov.close()
 185   loc_csv_file.close()
 186   shutil.move(tmpfile_name, lcov_path)
 187
 188
 189 def ParseCoverageDataForDashboard(lcov_path):
 190   """Parse code coverage data into coverage results per source node.
 191
 192   Use lcov and linecount data to create a map of source nodes to
 193   corresponding total and tested line counts.
 194
 195   Args:
 196     lcov_path: File path to lcov coverage data.
 197
 198   Returns:
 199     List of strings with comma separated source node and coverage.
 200   """
 201   results = {}
 202   linecount_path = lcov_path + '.csv'
 203   assert(os.path.exists(linecount_path),
 204          'linecount csv does not exist at: %s' % linecount_path)
 205   csv_file = open(linecount_path, 'r')
 206   linecounts = csv_file.readlines()
 207   csv_file.close()
 208   lcov_file = open(lcov_path, 'r')
 209   srcfile_index = 0
 210   for line in lcov_file:
 211     line = line.strip()
 212
 213     # Set the current srcfile name for a new src file declaration.
 214     if line[:len('SF:')] == 'SF:':
 215       instrumented_set = {}
 216       executed_set = {}
 217       srcfile_name = line[len('SF:'):]
 218
 219     # Mark coverage data points hashlist style for the current src file.
 220     if line[:len('DA:')] == 'DA:':
 221       line_info = line[len('DA:'):].split(',')
 222       assert(len(line_info) == 2, 'DA: line format unexpected - %s' % line)
 223       (line_num, line_was_executed) = line_info
 224       instrumented_set[line_num] = True
 225       # line_was_executed is '0' or '1'
 226       if int(line_was_executed):
 227         executed_set[line_num] = True
 228
 229     # Update results for the current src file at record end.
 230     if line == 'end_of_record':
 231       instrumented = len(instrumented_set.keys())
 232       executed = len(executed_set.keys())
 233       parent_directory = srcfile_name[:srcfile_name.rfind('/') + 1]
 234       linecount_point = linecounts[srcfile_index].strip().split(',')
 235       assert(len(linecount_point) == 2,
 236              'lintcount format unexpected - %s' % linecounts[srcfile_index])
 237       (linecount_path, linecount_count) = linecount_point
 238       srcfile_index += 1
 239
 240       # Sanity check that path names in the lcov and linecount are lined up.
 241       if linecount_path[-10:] != srcfile_name[-10:]:
 242         print 'NAME MISMATCH: %s :: %s' % (srcfile_name, linecount_path)
 243       if instrumented > int(linecount_count):
 244         linecount_count = instrumented
 245
 246       # Keep counts the same way that it is done in the genhtml utility.
 247       # Count the coverage of a file towards the file,
 248       # the parent directory, and the source root.
 249       AddResults(results, srcfile_name, int(linecount_count), executed)
 250       AddResults(results, parent_directory, int(linecount_count), executed)
 251       AddResults(results, '/', instrumented, executed)
 252
 253   lcov_file.close()
 254   keys = results.keys()
 255   keys.sort()
 256   # The first key (sorted) will be the base directory '/'
 257   # but its full path may be '/mnt/chrome_src/src/'
 258   # using this offset will ignore the part '/mnt/chrome_src/src'.
 259   # Offset is the last '/' that isn't the last character for the
 260   # first directory name in results (position 1 in keys).
 261   offset = len(keys[1][:keys[1][:-1].rfind('/')])
 262   lines = []
 263   for key in keys:
 264     if len(key) > offset:
 265       node_path = key[offset:]
 266     else:
 267       node_path = key
 268     (total, covered) = results[key]
 269     percent = float(covered) * 100 / total
 270     lines.append('%s,%.2f' % (node_path, percent))
 271   return lines
 272
 273
 274 def AddResults(results, location, lines_total, lines_executed):
 275   """Add resulting line tallies to a location's total.
 276
 277   Args:
 278     results: Map of node location to corresponding coverage data.
 279     location: Source node string.
 280     lines_total: Number of lines to add to the total count for this node.
 281     lines_executed: Number of lines to add to the executed count for this node.
 282   """
 283   if results.has_key(location):
 284     (i, e) = results[location]
 285     results[location] = (i + lines_total, e + lines_executed)
 286   else:
 287     results[location] = (lines_total, lines_executed)
 288
 289
 290 def PostResultsToDashboard(lcov_path, results, post_url):
 291   """Post coverage results to coverage dashboard.
 292
 293   Args:
 294     lcov_path: File path for lcov data in the expected format:
 295         <project>_<platform>_<cl#>.coverage.lcov
 296     results: string list in the appropriate posting format.
 297   """
 298   project_platform_cl = lcov_path.split('.')[0].split('_')
 299   assert(len(project_platform_cl) == 3,
 300          'lcov_path not in expected format: %s' % lcov_path)
 301   (project, platform, cl_string) = project_platform_cl
 302   project_name = '%s-%s' % (project, platform)
 303   url = '%s/newdata.do?project=%s&cl=%s' % (post_url, project_name, cl_string)
 304
 305   # Send POSTs of POST_CHUNK_SIZE lines of the result set until
 306   # there is no more data and last_loop is set to True.
 307   last_loop = False
 308   cur_line = 0
 309   while not last_loop:
 310     body = '\n'.join(results[cur_line:cur_line + POST_CHUNK_SIZE])
 311     cur_line += POST_CHUNK_SIZE
 312     last_loop = (cur_line >= len(results))
 313     req = urllib2.Request('%s&last=%s' % (url, str(last_loop)), body)
 314     req.add_header('Content-Type', 'text/plain')
 315     SendPost(req)
 316
 317
 318 # Global counter for the current number of request failures.
 319 num_fails = 0
 320
 321 def SendPost(req):
 322   """Execute a post request and retry for up to MAX_FAILURES.
 323
 324   Args:
 325     req: A urllib2 request object.
 326
 327   Raises:
 328     URLError: If urlopen throws after too many retries.
 329     HTTPError: If urlopen throws after too many retries.
 330   """
 331   global num_fails
 332   try:
 333     urllib2.urlopen(req)
 334     # Reset failure count.
 335     num_fails = 0
 336   except (urllib2.URLError, urllib2.HTTPError):
 337     num_fails += 1
 338     if num_fails < MAX_FAILURES:
 339       print 'fail, retrying (%d)' % num_fails
 340       time.sleep(5)
 341       SendPost(req)
 342     else:
 343       print 'POST request exceeded allowed retries.'
 344       raise
 345
 346
 347 def main():
 348   if not sys.platform.startswith('linux'):
 349     print 'This script is supported only on Linux'
 350     return 0
 351
 352   # Command line parsing
 353   parser = optparse.OptionParser()
 354   parser.add_option('-p',
 355                     '--platform',
 356                     dest='platform',
 357                     default=None,
 358                     help=('Platform that the locv file was generated on. Must'
 359                           'be one of {win32, linux2, linux3, macosx}'))
 360   parser.add_option('-s',
 361                     '--source',
 362                     dest='src_dir',
 363                     default=None,
 364                     help='Path to the source code and symbols')
 365   parser.add_option('-d',
 366                     '--dash_root',
 367                     dest='dash_root',
 368                     default=None,
 369                     help='Root directory for the dashboard')
 370   parser.add_option('-l',
 371                     '--lcov',
 372                     dest='lcov_path',
 373                     default=None,
 374                     help='Location of the LCOV file to process')
 375   parser.add_option('-u',
 376                     '--post_url',
 377                     dest='post_url',
 378                     default=None,
 379                     help='Base URL of the coverage dashboard')
 380   (options, args) = parser.parse_args()
 381
 382   if options.platform == None:
 383     parser.error('Platform not specified')
 384   if options.lcov_path == None:
 385     parser.error('lcov file path not specified')
 386   if options.src_dir == None:
 387     parser.error('Source directory not specified')
 388   if options.dash_root == None:
 389     parser.error('Dashboard root not specified')
 390   if options.post_url == None:
 391     parser.error('Post URL not specified')
 392   if options.platform == 'win32':
 393     CleanWin32Lcov(options.lcov_path, options.src_dir)
 394     percent = GenerateHtml(options.lcov_path, options.dash_root)
 395     if percent == None:
 396       # TODO(niranjan): Add logging.
 397       print 'Failed to generate code coverage'
 398       return 1
 399     else:
 400       # TODO(niranjan): Do something with the code coverage numbers
 401       pass
 402   else:
 403     print 'Unsupported platform'
 404     return 1
 405
 406   # Prep coverage results for dashboard and post new set.
 407   parsed_data = ParseCoverageDataForDashboard(options.lcov_path)
 408   PostResultsToDashboard(options.lcov_path, parsed_data, options.post_url)
 409   return 0
 410
 411
 412 if __name__ == '__main__':
 413   sys.exit(main())