tools/bisect-builds.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 """Snapshot Build Bisect Tool
   7
   8 This script bisects a snapshot archive using binary search. It starts at
   9 a bad revision (it will try to guess HEAD) and asks for a last known-good
  10 revision. It will then binary search across this revision range by downloading,
  11 unzipping, and opening Chromium for you. After testing the specific revision,
  12 it will ask you whether it is good or bad before continuing the search.
  13 """
  14
  15 # The base URL for stored build archives.
  16 CHROMIUM_BASE_URL = ('http://commondatastorage.googleapis.com'
  17                      '/chromium-browser-snapshots')
  18 WEBKIT_BASE_URL = ('http://commondatastorage.googleapis.com'
  19                    '/chromium-webkit-snapshots')
  20 ASAN_BASE_URL = ('http://commondatastorage.googleapis.com'
  21                  '/chromium-browser-asan')
  22
  23 # GS bucket name.
  24 GS_BUCKET_NAME = 'chrome-unsigned/desktop-W15K3Y'
  25
  26 # Base URL for downloading official builds.
  27 GOOGLE_APIS_URL = 'commondatastorage.googleapis.com'
  28
  29 # The base URL for official builds.
  30 OFFICIAL_BASE_URL = 'http://%s/%s' % (GOOGLE_APIS_URL, GS_BUCKET_NAME)
  31
  32 # URL template for viewing changelogs between revisions.
  33 CHANGELOG_URL = ('http://build.chromium.org'
  34                  '/f/chromium/perf/dashboard/ui/changelog.html'
  35                  '?url=/trunk/src&range=%d%%3A%d')
  36
  37 # URL template for viewing changelogs between official versions.
  38 OFFICIAL_CHANGELOG_URL = ('http://omahaproxy.appspot.com/changelog'
  39                           '?old_version=%s&new_version=%s')
  40
  41 # DEPS file URL.
  42 DEPS_FILE = 'http://src.chromium.org/viewvc/chrome/trunk/src/DEPS?revision=%d'
  43
  44 # Blink changelogs URL.
  45 BLINK_CHANGELOG_URL = ('http://build.chromium.org'
  46                       '/f/chromium/perf/dashboard/ui/changelog_blink.html'
  47                       '?url=/trunk&range=%d%%3A%d')
  48
  49 DONE_MESSAGE_GOOD_MIN = ('You are probably looking for a change made after %s ('
  50                          'known good), but no later than %s (first known bad).')
  51 DONE_MESSAGE_GOOD_MAX = ('You are probably looking for a change made after %s ('
  52                          'known bad), but no later than %s (first known good).')
  53
  54 CHROMIUM_GITHASH_TO_SVN_URL = (
  55     'https://chromium.googlesource.com/chromium/src/+/%s?format=json')
  56
  57 BLINK_GITHASH_TO_SVN_URL = (
  58     'https://chromium.googlesource.com/chromium/blink/+/%s?format=json')
  59
  60 GITHASH_TO_SVN_URL = {
  61     'chromium': CHROMIUM_GITHASH_TO_SVN_URL,
  62     'blink': BLINK_GITHASH_TO_SVN_URL,
  63 }
  64
  65 # Search pattern to be matched in the JSON output from
  66 # CHROMIUM_GITHASH_TO_SVN_URL to get the chromium revision (svn revision).
  67 CHROMIUM_SEARCH_PATTERN = (
  68     r'.*git-svn-id: svn://svn.chromium.org/chrome/trunk/src@(\d+) ')
  69
  70 # Search pattern to be matched in the json output from
  71 # BLINK_GITHASH_TO_SVN_URL to get the blink revision (svn revision).
  72 BLINK_SEARCH_PATTERN = (
  73     r'.*git-svn-id: svn://svn.chromium.org/blink/trunk@(\d+) ')
  74
  75 SEARCH_PATTERN = {
  76     'chromium': CHROMIUM_SEARCH_PATTERN,
  77     'blink': BLINK_SEARCH_PATTERN,
  78 }
  79
  80 CREDENTIAL_ERROR_MESSAGE = ('You are attempting to access protected data with '
  81                             'no configured credentials')
  82
  83 ###############################################################################
  84
  85 import httplib
  86 import json
  87 import optparse
  88 import os
  89 import re
  90 import shlex
  91 import shutil
  92 import subprocess
  93 import sys
  94 import tempfile
  95 import threading
  96 import urllib
  97 from distutils.version import LooseVersion
  98 from xml.etree import ElementTree
  99 import zipfile
 100
 101
 102 class PathContext(object):
 103   """A PathContext is used to carry the information used to construct URLs and
 104   paths when dealing with the storage server and archives."""
 105   def __init__(self, base_url, platform, good_revision, bad_revision,
 106                is_official, is_asan, use_local_repo, flash_path = None,
 107                pdf_path = None):
 108     super(PathContext, self).__init__()
 109     # Store off the input parameters.
 110     self.base_url = base_url
 111     self.platform = platform  # What's passed in to the '-a/--archive' option.
 112     self.good_revision = good_revision
 113     self.bad_revision = bad_revision
 114     self.is_official = is_official
 115     self.is_asan = is_asan
 116     self.build_type = 'release'
 117     self.flash_path = flash_path
 118     # Dictionary which stores svn revision number as key and it's
 119     # corresponding git hash as value. This data is populated in
 120     # _FetchAndParse and used later in GetDownloadURL while downloading
 121     # the build.
 122     self.githash_svn_dict = {}
 123     self.pdf_path = pdf_path
 124
 125     # The name of the ZIP file in a revision directory on the server.
 126     self.archive_name = None
 127
 128     # If the script is run from a local Chromium checkout,
 129     # "--use-local-repo" option can be used to make the script run faster.
 130     # It uses "git svn find-rev <SHA1>" command to convert git hash to svn
 131     # revision number.
 132     self.use_local_repo = use_local_repo
 133
 134     # Set some internal members:
 135     #   _listing_platform_dir = Directory that holds revisions. Ends with a '/'.
 136     #   _archive_extract_dir = Uncompressed directory in the archive_name file.
 137     #   _binary_name = The name of the executable to run.
 138     if self.platform in ('linux', 'linux64', 'linux-arm'):
 139       self._binary_name = 'chrome'
 140     elif self.platform in ('mac', 'mac64'):
 141       self.archive_name = 'chrome-mac.zip'
 142       self._archive_extract_dir = 'chrome-mac'
 143     elif self.platform in ('win', 'win64'):
 144       self.archive_name = 'chrome-win32.zip'
 145       self._archive_extract_dir = 'chrome-win32'
 146       self._binary_name = 'chrome.exe'
 147     else:
 148       raise Exception('Invalid platform: %s' % self.platform)
 149
 150     if is_official:
 151       if self.platform == 'linux':
 152         self._listing_platform_dir = 'precise32/'
 153         self.archive_name = 'chrome-precise32.zip'
 154         self._archive_extract_dir = 'chrome-precise32'
 155       elif self.platform == 'linux64':
 156         self._listing_platform_dir = 'precise64/'
 157         self.archive_name = 'chrome-precise64.zip'
 158         self._archive_extract_dir = 'chrome-precise64'
 159       elif self.platform == 'mac':
 160         self._listing_platform_dir = 'mac/'
 161         self._binary_name = 'Google Chrome.app/Contents/MacOS/Google Chrome'
 162       elif self.platform == 'mac64':
 163         self._listing_platform_dir = 'mac64/'
 164         self._binary_name = 'Google Chrome.app/Contents/MacOS/Google Chrome'
 165       elif self.platform == 'win':
 166         self._listing_platform_dir = 'win/'
 167         self.archive_name = 'chrome-win.zip'
 168         self._archive_extract_dir = 'chrome-win'
 169       elif self.platform == 'win64':
 170         self._listing_platform_dir = 'win64/'
 171         self.archive_name = 'chrome-win64.zip'
 172         self._archive_extract_dir = 'chrome-win64'
 173     else:
 174       if self.platform in ('linux', 'linux64', 'linux-arm'):
 175         self.archive_name = 'chrome-linux.zip'
 176         self._archive_extract_dir = 'chrome-linux'
 177         if self.platform == 'linux':
 178           self._listing_platform_dir = 'Linux/'
 179         elif self.platform == 'linux64':
 180           self._listing_platform_dir = 'Linux_x64/'
 181         elif self.platform == 'linux-arm':
 182           self._listing_platform_dir = 'Linux_ARM_Cross-Compile/'
 183       elif self.platform == 'mac':
 184         self._listing_platform_dir = 'Mac/'
 185         self._binary_name = 'Chromium.app/Contents/MacOS/Chromium'
 186       elif self.platform == 'win':
 187         self._listing_platform_dir = 'Win/'
 188
 189   def GetASANPlatformDir(self):
 190     """ASAN builds are in directories like "linux-release", or have filenames
 191     like "asan-win32-release-277079.zip". This aligns to our platform names
 192     except in the case of Windows where they use "win32" instead of "win"."""
 193     if self.platform == 'win':
 194       return 'win32'
 195     else:
 196       return self.platform
 197
 198   def GetListingURL(self, marker=None):
 199     """Returns the URL for a directory listing, with an optional marker."""
 200     marker_param = ''
 201     if marker:
 202       marker_param = '&marker=' + str(marker)
 203     if self.is_asan:
 204       prefix = '%s-%s' % (self.GetASANPlatformDir(), self.build_type)
 205       return self.base_url + '/?delimiter=&prefix=' + prefix + marker_param
 206     else:
 207       return (self.base_url + '/?delimiter=/&prefix=' +
 208               self._listing_platform_dir + marker_param)
 209
 210   def GetDownloadURL(self, revision):
 211     """Gets the download URL for a build archive of a specific revision."""
 212     if self.is_asan:
 213       return '%s/%s-%s/%s-%d.zip' % (
 214           ASAN_BASE_URL, self.GetASANPlatformDir(), self.build_type,
 215           self.GetASANBaseName(), revision)
 216     if self.is_official:
 217       return '%s/%s/%s%s' % (
 218           OFFICIAL_BASE_URL, revision, self._listing_platform_dir,
 219           self.archive_name)
 220     else:
 221       if str(revision) in self.githash_svn_dict:
 222         revision = self.githash_svn_dict[str(revision)]
 223       return '%s/%s%s/%s' % (self.base_url, self._listing_platform_dir,
 224                              revision, self.archive_name)
 225
 226   def GetLastChangeURL(self):
 227     """Returns a URL to the LAST_CHANGE file."""
 228     return self.base_url + '/' + self._listing_platform_dir + 'LAST_CHANGE'
 229
 230   def GetASANBaseName(self):
 231     """Returns the base name of the ASAN zip file."""
 232     if 'linux' in self.platform:
 233       return 'asan-symbolized-%s-%s' % (self.GetASANPlatformDir(),
 234                                         self.build_type)
 235     else:
 236       return 'asan-%s-%s' % (self.GetASANPlatformDir(), self.build_type)
 237
 238   def GetLaunchPath(self, revision):
 239     """Returns a relative path (presumably from the archive extraction location)
 240     that is used to run the executable."""
 241     if self.is_asan:
 242       extract_dir = '%s-%d' % (self.GetASANBaseName(), revision)
 243     else:
 244       extract_dir = self._archive_extract_dir
 245     return os.path.join(extract_dir, self._binary_name)
 246
 247   def ParseDirectoryIndex(self):
 248     """Parses the Google Storage directory listing into a list of revision
 249     numbers."""
 250
 251     def _FetchAndParse(url):
 252       """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If
 253       next-marker is not None, then the listing is a partial listing and another
 254       fetch should be performed with next-marker being the marker= GET
 255       parameter."""
 256       handle = urllib.urlopen(url)
 257       document = ElementTree.parse(handle)
 258
 259       # All nodes in the tree are namespaced. Get the root's tag name to extract
 260       # the namespace. Etree does namespaces as |{namespace}tag|.
 261       root_tag = document.getroot().tag
 262       end_ns_pos = root_tag.find('}')
 263       if end_ns_pos == -1:
 264         raise Exception('Could not locate end namespace for directory index')
 265       namespace = root_tag[:end_ns_pos + 1]
 266
 267       # Find the prefix (_listing_platform_dir) and whether or not the list is
 268       # truncated.
 269       prefix_len = len(document.find(namespace + 'Prefix').text)
 270       next_marker = None
 271       is_truncated = document.find(namespace + 'IsTruncated')
 272       if is_truncated is not None and is_truncated.text.lower() == 'true':
 273         next_marker = document.find(namespace + 'NextMarker').text
 274       # Get a list of all the revisions.
 275       revisions = []
 276       githash_svn_dict = {}
 277       if self.is_asan:
 278         asan_regex = re.compile(r'.*%s-(\d+)\.zip$' % (self.GetASANBaseName()))
 279         # Non ASAN builds are in a <revision> directory. The ASAN builds are
 280         # flat
 281         all_prefixes = document.findall(namespace + 'Contents/' +
 282                                         namespace + 'Key')
 283         for prefix in all_prefixes:
 284           m = asan_regex.match(prefix.text)
 285           if m:
 286             try:
 287               revisions.append(int(m.group(1)))
 288             except ValueError:
 289               pass
 290       else:
 291         all_prefixes = document.findall(namespace + 'CommonPrefixes/' +
 292                                         namespace + 'Prefix')
 293         # The <Prefix> nodes have content of the form of
 294         # |_listing_platform_dir/revision/|. Strip off the platform dir and the
 295         # trailing slash to just have a number.
 296         for prefix in all_prefixes:
 297           revnum = prefix.text[prefix_len:-1]
 298           try:
 299             if not revnum.isdigit():
 300               git_hash = revnum
 301               revnum = self.GetSVNRevisionFromGitHash(git_hash)
 302               githash_svn_dict[revnum] = git_hash
 303             if revnum is not None:
 304               revnum = int(revnum)
 305               revisions.append(revnum)
 306           except ValueError:
 307             pass
 308       return (revisions, next_marker, githash_svn_dict)
 309
 310     # Fetch the first list of revisions.
 311     (revisions, next_marker, self.githash_svn_dict) = _FetchAndParse(
 312         self.GetListingURL())
 313     # If the result list was truncated, refetch with the next marker. Do this
 314     # until an entire directory listing is done.
 315     while next_marker:
 316       next_url = self.GetListingURL(next_marker)
 317       (new_revisions, next_marker, new_dict) = _FetchAndParse(next_url)
 318       revisions.extend(new_revisions)
 319       self.githash_svn_dict.update(new_dict)
 320     return revisions
 321
 322   def _GetSVNRevisionFromGitHashWithoutGitCheckout(self, git_sha1, depot):
 323     json_url = GITHASH_TO_SVN_URL[depot] % git_sha1
 324     response = urllib.urlopen(json_url)
 325     if response.getcode() == 200:
 326       try:
 327         data = json.loads(response.read()[4:])
 328       except ValueError:
 329         print 'ValueError for JSON URL: %s' % json_url
 330         raise ValueError
 331     else:
 332       raise ValueError
 333     if 'message' in data:
 334       message = data['message'].split('\n')
 335       message = [line for line in message if line.strip()]
 336       search_pattern = re.compile(SEARCH_PATTERN[depot])
 337       result = search_pattern.search(message[len(message)-1])
 338       if result:
 339         return result.group(1)
 340     print 'Failed to get svn revision number for %s' % git_sha1
 341     raise ValueError
 342
 343   def _GetSVNRevisionFromGitHashFromGitCheckout(self, git_sha1, depot):
 344     def _RunGit(command, path):
 345       command = ['git'] + command
 346       if path:
 347         original_path = os.getcwd()
 348         os.chdir(path)
 349       shell = sys.platform.startswith('win')
 350       proc = subprocess.Popen(command, shell=shell, stdout=subprocess.PIPE,
 351                               stderr=subprocess.PIPE)
 352       (output, _) = proc.communicate()
 353
 354       if path:
 355         os.chdir(original_path)
 356       return (output, proc.returncode)
 357
 358     path = None
 359     if depot == 'blink':
 360       path = os.path.join(os.getcwd(), 'third_party', 'WebKit')
 361     if os.path.basename(os.getcwd()) == 'src':
 362       command = ['svn', 'find-rev', git_sha1]
 363       (git_output, return_code) = _RunGit(command, path)
 364       if not return_code:
 365         return git_output.strip('\n')
 366       raise ValueError
 367     else:
 368       print ('Script should be run from src folder. ' +
 369              'Eg: python tools/bisect-builds.py -g 280588 -b 280590' +
 370              '--archive linux64 --use-local-repo')
 371       sys.exit(1)
 372
 373   def GetSVNRevisionFromGitHash(self, git_sha1, depot='chromium'):
 374     if not self.use_local_repo:
 375       return self._GetSVNRevisionFromGitHashWithoutGitCheckout(git_sha1, depot)
 376     else:
 377       return self._GetSVNRevisionFromGitHashFromGitCheckout(git_sha1, depot)
 378
 379   def GetRevList(self):
 380     """Gets the list of revision numbers between self.good_revision and
 381     self.bad_revision."""
 382     # Download the revlist and filter for just the range between good and bad.
 383     minrev = min(self.good_revision, self.bad_revision)
 384     maxrev = max(self.good_revision, self.bad_revision)
 385     revlist_all = map(int, self.ParseDirectoryIndex())
 386
 387     revlist = [x for x in revlist_all if x >= int(minrev) and x <= int(maxrev)]
 388     revlist.sort()
 389
 390     # Set good and bad revisions to be legit revisions.
 391     if revlist:
 392       if self.good_revision < self.bad_revision:
 393         self.good_revision = revlist[0]
 394         self.bad_revision = revlist[-1]
 395       else:
 396         self.bad_revision = revlist[0]
 397         self.good_revision = revlist[-1]
 398
 399       # Fix chromium rev so that the deps blink revision matches REVISIONS file.
 400       if self.base_url == WEBKIT_BASE_URL:
 401         revlist_all.sort()
 402         self.good_revision = FixChromiumRevForBlink(revlist,
 403                                                     revlist_all,
 404                                                     self,
 405                                                     self.good_revision)
 406         self.bad_revision = FixChromiumRevForBlink(revlist,
 407                                                    revlist_all,
 408                                                    self,
 409                                                    self.bad_revision)
 410     return revlist
 411
 412   def GetOfficialBuildsList(self):
 413     """Gets the list of official build numbers between self.good_revision and
 414     self.bad_revision."""
 415
 416     def CheckDepotToolsInPath():
 417       delimiter = ';' if sys.platform.startswith('win') else ':'
 418       path_list = os.environ['PATH'].split(delimiter)
 419       for path in path_list:
 420         if path.find('depot_tools') != -1:
 421           return path
 422       return None
 423
 424     def RunGsutilCommand(args):
 425       gsutil_path = CheckDepotToolsInPath()
 426       if gsutil_path is None:
 427         print ('Follow the instructions in this document '
 428                'http://dev.chromium.org/developers/how-tos/install-depot-tools'
 429                ' to install depot_tools and then try again.')
 430         sys.exit(1)
 431       gsutil_path = os.path.join(gsutil_path, 'third_party', 'gsutil', 'gsutil')
 432       gsutil = subprocess.Popen([sys.executable, gsutil_path] + args,
 433                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 434                                 env=None)
 435       stdout, stderr = gsutil.communicate()
 436       if gsutil.returncode:
 437         if (re.findall(r'status[ |=]40[1|3]', stderr) or
 438             stderr.startswith(CREDENTIAL_ERROR_MESSAGE)):
 439           print ('Follow these steps to configure your credentials and try'
 440                  ' running the bisect-builds.py again.:\n'
 441                  '  1. Run "python %s config" and follow its instructions.\n'
 442                  '  2. If you have a @google.com account, use that account.\n'
 443                  '  3. For the project-id, just enter 0.' % gsutil_path)
 444           sys.exit(1)
 445         else:
 446           raise Exception('Error running the gsutil command: %s' % stderr)
 447       return stdout
 448
 449     def GsutilList(bucket):
 450       query = 'gs://%s/' % bucket
 451       stdout = RunGsutilCommand(['ls', query])
 452       return [url[len(query):].strip('/') for url in stdout.splitlines()]
 453
 454     # Download the revlist and filter for just the range between good and bad.
 455     minrev = min(self.good_revision, self.bad_revision)
 456     maxrev = max(self.good_revision, self.bad_revision)
 457     build_numbers = GsutilList(GS_BUCKET_NAME)
 458     revision_re = re.compile(r'(\d\d\.\d\.\d{4}\.\d+)')
 459     build_numbers = filter(lambda b: revision_re.search(b), build_numbers)
 460     final_list = []
 461     parsed_build_numbers = [LooseVersion(x) for x in build_numbers]
 462     connection = httplib.HTTPConnection(GOOGLE_APIS_URL)
 463     for build_number in sorted(parsed_build_numbers):
 464       if build_number > maxrev:
 465         break
 466       if build_number < minrev:
 467         continue
 468       path = ('/' + GS_BUCKET_NAME + '/' + str(build_number) + '/' +
 469               self._listing_platform_dir + self.archive_name)
 470       connection.request('HEAD', path)
 471       response = connection.getresponse()
 472       if response.status == 200:
 473         final_list.append(str(build_number))
 474       response.read()
 475     connection.close()
 476     return final_list
 477
 478 def UnzipFilenameToDir(filename, directory):
 479   """Unzip |filename| to |directory|."""
 480   cwd = os.getcwd()
 481   if not os.path.isabs(filename):
 482     filename = os.path.join(cwd, filename)
 483   zf = zipfile.ZipFile(filename)
 484   # Make base.
 485   if not os.path.isdir(directory):
 486     os.mkdir(directory)
 487   os.chdir(directory)
 488   # Extract files.
 489   for info in zf.infolist():
 490     name = info.filename
 491     if name.endswith('/'):  # dir
 492       if not os.path.isdir(name):
 493         os.makedirs(name)
 494     else:  # file
 495       directory = os.path.dirname(name)
 496       if not os.path.isdir(directory):
 497         os.makedirs(directory)
 498       out = open(name, 'wb')
 499       out.write(zf.read(name))
 500       out.close()
 501     # Set permissions. Permission info in external_attr is shifted 16 bits.
 502     os.chmod(name, info.external_attr >> 16L)
 503   os.chdir(cwd)
 504
 505
 506 def FetchRevision(context, rev, filename, quit_event=None, progress_event=None):
 507   """Downloads and unzips revision |rev|.
 508   @param context A PathContext instance.
 509   @param rev The Chromium revision number/tag to download.
 510   @param filename The destination for the downloaded file.
 511   @param quit_event A threading.Event which will be set by the master thread to
 512                     indicate that the download should be aborted.
 513   @param progress_event A threading.Event which will be set by the master thread
 514                     to indicate that the progress of the download should be
 515                     displayed.
 516   """
 517   def ReportHook(blocknum, blocksize, totalsize):
 518     if quit_event and quit_event.isSet():
 519       raise RuntimeError('Aborting download of revision %s' % str(rev))
 520     if progress_event and progress_event.isSet():
 521       size = blocknum * blocksize
 522       if totalsize == -1:  # Total size not known.
 523         progress = 'Received %d bytes' % size
 524       else:
 525         size = min(totalsize, size)
 526         progress = 'Received %d of %d bytes, %.2f%%' % (
 527             size, totalsize, 100.0 * size / totalsize)
 528       # Send a \r to let all progress messages use just one line of output.
 529       sys.stdout.write('\r' + progress)
 530       sys.stdout.flush()
 531
 532   download_url = context.GetDownloadURL(rev)
 533   try:
 534     urllib.urlretrieve(download_url, filename, ReportHook)
 535     if progress_event and progress_event.isSet():
 536       print
 537   except RuntimeError:
 538     pass
 539
 540
 541 def RunRevision(context, revision, zip_file, profile, num_runs, command, args):
 542   """Given a zipped revision, unzip it and run the test."""
 543   print 'Trying revision %s...' % str(revision)
 544
 545   # Create a temp directory and unzip the revision into it.
 546   cwd = os.getcwd()
 547   tempdir = tempfile.mkdtemp(prefix='bisect_tmp')
 548   UnzipFilenameToDir(zip_file, tempdir)
 549   os.chdir(tempdir)
 550
 551   # Run the build as many times as specified.
 552   testargs = ['--user-data-dir=%s' % profile] + args
 553   # The sandbox must be run as root on Official Chrome, so bypass it.
 554   if ((context.is_official or context.flash_path or context.pdf_path) and
 555       context.platform.startswith('linux')):
 556     testargs.append('--no-sandbox')
 557   if context.flash_path:
 558     testargs.append('--ppapi-flash-path=%s' % context.flash_path)
 559     # We have to pass a large enough Flash version, which currently needs not
 560     # be correct. Instead of requiring the user of the script to figure out and
 561     # pass the correct version we just spoof it.
 562     testargs.append('--ppapi-flash-version=99.9.999.999')
 563
 564   # TODO(vitalybuka): Remove in the future. See crbug.com/395687.
 565   if context.pdf_path:
 566     shutil.copy(context.pdf_path,
 567                 os.path.dirname(context.GetLaunchPath(revision)))
 568     testargs.append('--enable-print-preview')
 569
 570   runcommand = []
 571   for token in shlex.split(command):
 572     if token == '%a':
 573       runcommand.extend(testargs)
 574     else:
 575       runcommand.append(
 576           token.replace('%p', os.path.abspath(context.GetLaunchPath(revision))).
 577           replace('%s', ' '.join(testargs)))
 578
 579   results = []
 580   for _ in range(num_runs):
 581     subproc = subprocess.Popen(runcommand,
 582                                bufsize=-1,
 583                                stdout=subprocess.PIPE,
 584                                stderr=subprocess.PIPE)
 585     (stdout, stderr) = subproc.communicate()
 586     results.append((subproc.returncode, stdout, stderr))
 587
 588   os.chdir(cwd)
 589   try:
 590     shutil.rmtree(tempdir, True)
 591   except Exception:
 592     pass
 593
 594   for (returncode, stdout, stderr) in results:
 595     if returncode:
 596       return (returncode, stdout, stderr)
 597   return results[0]
 598
 599
 600 # The arguments official_builds, status, stdout and stderr are unused.
 601 # They are present here because this function is passed to Bisect which then
 602 # calls it with 5 arguments.
 603 # pylint: disable=W0613
 604 def AskIsGoodBuild(rev, official_builds, status, stdout, stderr):
 605   """Asks the user whether build |rev| is good or bad."""
 606   # Loop until we get a response that we can parse.
 607   while True:
 608     response = raw_input('Revision %s is '
 609                          '[(g)ood/(b)ad/(r)etry/(u)nknown/(q)uit]: ' %
 610                          str(rev))
 611     if response and response in ('g', 'b', 'r', 'u'):
 612       return response
 613     if response and response == 'q':
 614       raise SystemExit()
 615
 616
 617 def IsGoodASANBuild(rev, official_builds, status, stdout, stderr):
 618   """Determine if an ASAN build |rev| is good or bad
 619
 620   Will examine stderr looking for the error message emitted by ASAN. If not
 621   found then will fallback to asking the user."""
 622   if stderr:
 623     bad_count = 0
 624     for line in stderr.splitlines():
 625       print line
 626       if line.find('ERROR: AddressSanitizer:') != -1:
 627         bad_count += 1
 628     if bad_count > 0:
 629       print 'Revision %d determined to be bad.' % rev
 630       return 'b'
 631   return AskIsGoodBuild(rev, official_builds, status, stdout, stderr)
 632
 633 class DownloadJob(object):
 634   """DownloadJob represents a task to download a given Chromium revision."""
 635
 636   def __init__(self, context, name, rev, zip_file):
 637     super(DownloadJob, self).__init__()
 638     # Store off the input parameters.
 639     self.context = context
 640     self.name = name
 641     self.rev = rev
 642     self.zip_file = zip_file
 643     self.quit_event = threading.Event()
 644     self.progress_event = threading.Event()
 645     self.thread = None
 646
 647   def Start(self):
 648     """Starts the download."""
 649     fetchargs = (self.context,
 650                  self.rev,
 651                  self.zip_file,
 652                  self.quit_event,
 653                  self.progress_event)
 654     self.thread = threading.Thread(target=FetchRevision,
 655                                    name=self.name,
 656                                    args=fetchargs)
 657     self.thread.start()
 658
 659   def Stop(self):
 660     """Stops the download which must have been started previously."""
 661     assert self.thread, 'DownloadJob must be started before Stop is called.'
 662     self.quit_event.set()
 663     self.thread.join()
 664     os.unlink(self.zip_file)
 665
 666   def WaitFor(self):
 667     """Prints a message and waits for the download to complete. The download
 668     must have been started previously."""
 669     assert self.thread, 'DownloadJob must be started before WaitFor is called.'
 670     print 'Downloading revision %s...' % str(self.rev)
 671     self.progress_event.set()  # Display progress of download.
 672     self.thread.join()
 673
 674
 675 def Bisect(context,
 676            num_runs=1,
 677            command='%p %a',
 678            try_args=(),
 679            profile=None,
 680            interactive=True,
 681            evaluate=AskIsGoodBuild):
 682   """Given known good and known bad revisions, run a binary search on all
 683   archived revisions to determine the last known good revision.
 684
 685   @param context PathContext object initialized with user provided parameters.
 686   @param num_runs Number of times to run each build for asking good/bad.
 687   @param try_args A tuple of arguments to pass to the test application.
 688   @param profile The name of the user profile to run with.
 689   @param interactive If it is false, use command exit code for good or bad
 690                      judgment of the argument build.
 691   @param evaluate A function which returns 'g' if the argument build is good,
 692                   'b' if it's bad or 'u' if unknown.
 693
 694   Threading is used to fetch Chromium revisions in the background, speeding up
 695   the user's experience. For example, suppose the bounds of the search are
 696   good_rev=0, bad_rev=100. The first revision to be checked is 50. Depending on
 697   whether revision 50 is good or bad, the next revision to check will be either
 698   25 or 75. So, while revision 50 is being checked, the script will download
 699   revisions 25 and 75 in the background. Once the good/bad verdict on rev 50 is
 700   known:
 701
 702     - If rev 50 is good, the download of rev 25 is cancelled, and the next test
 703       is run on rev 75.
 704
 705     - If rev 50 is bad, the download of rev 75 is cancelled, and the next test
 706       is run on rev 25.
 707   """
 708
 709   if not profile:
 710     profile = 'profile'
 711
 712   good_rev = context.good_revision
 713   bad_rev = context.bad_revision
 714   cwd = os.getcwd()
 715
 716   print 'Downloading list of known revisions...',
 717   if not context.use_local_repo and not context.is_official:
 718     print '(use --use-local-repo for speed if you have a local checkout)'
 719   else:
 720     print
 721   _GetDownloadPath = lambda rev: os.path.join(cwd,
 722       '%s-%s' % (str(rev), context.archive_name))
 723   if context.is_official:
 724     revlist = context.GetOfficialBuildsList()
 725   else:
 726     revlist = context.GetRevList()
 727
 728   # Get a list of revisions to bisect across.
 729   if len(revlist) < 2:  # Don't have enough builds to bisect.
 730     msg = 'We don\'t have enough builds to bisect. revlist: %s' % revlist
 731     raise RuntimeError(msg)
 732
 733   # Figure out our bookends and first pivot point; fetch the pivot revision.
 734   minrev = 0
 735   maxrev = len(revlist) - 1
 736   pivot = maxrev / 2
 737   rev = revlist[pivot]
 738   zip_file = _GetDownloadPath(rev)
 739   fetch = DownloadJob(context, 'initial_fetch', rev, zip_file)
 740   fetch.Start()
 741   fetch.WaitFor()
 742
 743   # Binary search time!
 744   while fetch and fetch.zip_file and maxrev - minrev > 1:
 745     if bad_rev < good_rev:
 746       min_str, max_str = 'bad', 'good'
 747     else:
 748       min_str, max_str = 'good', 'bad'
 749     print 'Bisecting range [%s (%s), %s (%s)].' % (revlist[minrev], min_str,
 750                                                    revlist[maxrev], max_str)
 751
 752     # Pre-fetch next two possible pivots
 753     #   - down_pivot is the next revision to check if the current revision turns
 754     #     out to be bad.
 755     #   - up_pivot is the next revision to check if the current revision turns
 756     #     out to be good.
 757     down_pivot = int((pivot - minrev) / 2) + minrev
 758     down_fetch = None
 759     if down_pivot != pivot and down_pivot != minrev:
 760       down_rev = revlist[down_pivot]
 761       down_fetch = DownloadJob(context, 'down_fetch', down_rev,
 762                                _GetDownloadPath(down_rev))
 763       down_fetch.Start()
 764
 765     up_pivot = int((maxrev - pivot) / 2) + pivot
 766     up_fetch = None
 767     if up_pivot != pivot and up_pivot != maxrev:
 768       up_rev = revlist[up_pivot]
 769       up_fetch = DownloadJob(context, 'up_fetch', up_rev,
 770                              _GetDownloadPath(up_rev))
 771       up_fetch.Start()
 772
 773     # Run test on the pivot revision.
 774     status = None
 775     stdout = None
 776     stderr = None
 777     try:
 778       (status, stdout, stderr) = RunRevision(context,
 779                                              rev,
 780                                              fetch.zip_file,
 781                                              profile,
 782                                              num_runs,
 783                                              command,
 784                                              try_args)
 785     except Exception, e:
 786       print >> sys.stderr, e
 787
 788     # Call the evaluate function to see if the current revision is good or bad.
 789     # On that basis, kill one of the background downloads and complete the
 790     # other, as described in the comments above.
 791     try:
 792       if not interactive:
 793         if status:
 794           answer = 'b'
 795           print 'Bad revision: %s' % rev
 796         else:
 797           answer = 'g'
 798           print 'Good revision: %s' % rev
 799       else:
 800         answer = evaluate(rev, context.is_official, status, stdout, stderr)
 801       if ((answer == 'g' and good_rev < bad_rev)
 802           or (answer == 'b' and bad_rev < good_rev)):
 803         fetch.Stop()
 804         minrev = pivot
 805         if down_fetch:
 806           down_fetch.Stop()  # Kill the download of the older revision.
 807           fetch = None
 808         if up_fetch:
 809           up_fetch.WaitFor()
 810           pivot = up_pivot
 811           fetch = up_fetch
 812       elif ((answer == 'b' and good_rev < bad_rev)
 813             or (answer == 'g' and bad_rev < good_rev)):
 814         fetch.Stop()
 815         maxrev = pivot
 816         if up_fetch:
 817           up_fetch.Stop()  # Kill the download of the newer revision.
 818           fetch = None
 819         if down_fetch:
 820           down_fetch.WaitFor()
 821           pivot = down_pivot
 822           fetch = down_fetch
 823       elif answer == 'r':
 824         pass  # Retry requires no changes.
 825       elif answer == 'u':
 826         # Nuke the revision from the revlist and choose a new pivot.
 827         fetch.Stop()
 828         revlist.pop(pivot)
 829         maxrev -= 1  # Assumes maxrev >= pivot.
 830
 831         if maxrev - minrev > 1:
 832           # Alternate between using down_pivot or up_pivot for the new pivot
 833           # point, without affecting the range. Do this instead of setting the
 834           # pivot to the midpoint of the new range because adjacent revisions
 835           # are likely affected by the same issue that caused the (u)nknown
 836           # response.
 837           if up_fetch and down_fetch:
 838             fetch = [up_fetch, down_fetch][len(revlist) % 2]
 839           elif up_fetch:
 840             fetch = up_fetch
 841           else:
 842             fetch = down_fetch
 843           fetch.WaitFor()
 844           if fetch == up_fetch:
 845             pivot = up_pivot - 1  # Subtracts 1 because revlist was resized.
 846           else:
 847             pivot = down_pivot
 848           zip_file = fetch.zip_file
 849
 850         if down_fetch and fetch != down_fetch:
 851           down_fetch.Stop()
 852         if up_fetch and fetch != up_fetch:
 853           up_fetch.Stop()
 854       else:
 855         assert False, 'Unexpected return value from evaluate(): ' + answer
 856     except SystemExit:
 857       print 'Cleaning up...'
 858       for f in [_GetDownloadPath(revlist[down_pivot]),
 859                 _GetDownloadPath(revlist[up_pivot])]:
 860         try:
 861           os.unlink(f)
 862         except OSError:
 863           pass
 864       sys.exit(0)
 865
 866     rev = revlist[pivot]
 867
 868   return (revlist[minrev], revlist[maxrev], context)
 869
 870
 871 def GetBlinkDEPSRevisionForChromiumRevision(rev):
 872   """Returns the blink revision that was in REVISIONS file at
 873   chromium revision |rev|."""
 874   # . doesn't match newlines without re.DOTALL, so this is safe.
 875   blink_re = re.compile(r'webkit_revision\D*(\d+)')
 876   url = urllib.urlopen(DEPS_FILE % rev)
 877   m = blink_re.search(url.read())
 878   url.close()
 879   if m:
 880     return int(m.group(1))
 881   else:
 882     raise Exception('Could not get Blink revision for Chromium rev %d' % rev)
 883
 884
 885 def GetBlinkRevisionForChromiumRevision(context, rev):
 886   """Returns the blink revision that was in REVISIONS file at
 887   chromium revision |rev|."""
 888   def _IsRevisionNumber(revision):
 889     if isinstance(revision, int):
 890       return True
 891     else:
 892       return revision.isdigit()
 893   if str(rev) in context.githash_svn_dict:
 894     rev = context.githash_svn_dict[str(rev)]
 895   file_url = '%s/%s%s/REVISIONS' % (context.base_url,
 896                                     context._listing_platform_dir, rev)
 897   url = urllib.urlopen(file_url)
 898   if url.getcode() == 200:
 899     try:
 900       data = json.loads(url.read())
 901     except ValueError:
 902       print 'ValueError for JSON URL: %s' % file_url
 903       raise ValueError
 904   else:
 905     raise ValueError
 906   url.close()
 907   if 'webkit_revision' in data:
 908     blink_rev = data['webkit_revision']
 909     if not _IsRevisionNumber(blink_rev):
 910       blink_rev = int(context.GetSVNRevisionFromGitHash(blink_rev, 'blink'))
 911     return blink_rev
 912   else:
 913     raise Exception('Could not get blink revision for cr rev %d' % rev)
 914
 915
 916 def FixChromiumRevForBlink(revisions_final, revisions, self, rev):
 917   """Returns the chromium revision that has the correct blink revision
 918   for blink bisect, DEPS and REVISIONS file might not match since
 919   blink snapshots point to tip of tree blink.
 920   Note: The revisions_final variable might get modified to include
 921   additional revisions."""
 922   blink_deps_rev = GetBlinkDEPSRevisionForChromiumRevision(rev)
 923
 924   while (GetBlinkRevisionForChromiumRevision(self, rev) > blink_deps_rev):
 925     idx = revisions.index(rev)
 926     if idx > 0:
 927       rev = revisions[idx-1]
 928       if rev not in revisions_final:
 929         revisions_final.insert(0, rev)
 930
 931   revisions_final.sort()
 932   return rev
 933
 934
 935 def GetChromiumRevision(context, url):
 936   """Returns the chromium revision read from given URL."""
 937   try:
 938     # Location of the latest build revision number
 939     latest_revision = urllib.urlopen(url).read()
 940     if latest_revision.isdigit():
 941       return int(latest_revision)
 942     return context.GetSVNRevisionFromGitHash(latest_revision)
 943   except Exception:
 944     print 'Could not determine latest revision. This could be bad...'
 945     return 999999999
 946
 947
 948 def main():
 949   usage = ('%prog [options] [-- chromium-options]\n'
 950            'Perform binary search on the snapshot builds to find a minimal\n'
 951            'range of revisions where a behavior change happened. The\n'
 952            'behaviors are described as "good" and "bad".\n'
 953            'It is NOT assumed that the behavior of the later revision is\n'
 954            'the bad one.\n'
 955            '\n'
 956            'Revision numbers should use\n'
 957            '  Official versions (e.g. 1.0.1000.0) for official builds. (-o)\n'
 958            '  SVN revisions (e.g. 123456) for chromium builds, from trunk.\n'
 959            '    Use base_trunk_revision from http://omahaproxy.appspot.com/\n'
 960            '    for earlier revs.\n'
 961            '    Chrome\'s about: build number and omahaproxy branch_revision\n'
 962            '    are incorrect, they are from branches.\n'
 963            '\n'
 964            'Tip: add "-- --no-first-run" to bypass the first run prompts.')
 965   parser = optparse.OptionParser(usage=usage)
 966   # Strangely, the default help output doesn't include the choice list.
 967   choices = ['mac', 'mac64', 'win', 'win64', 'linux', 'linux64', 'linux-arm']
 968             # linux-chromiumos lacks a continuous archive http://crbug.com/78158
 969   parser.add_option('-a', '--archive',
 970                     choices=choices,
 971                     help='The buildbot archive to bisect [%s].' %
 972                          '|'.join(choices))
 973   parser.add_option('-o',
 974                     action='store_true',
 975                     dest='official_builds',
 976                     help='Bisect across official Chrome builds (internal '
 977                          'only) instead of Chromium archives.')
 978   parser.add_option('-b', '--bad',
 979                     type='str',
 980                     help='A bad revision to start bisection. '
 981                          'May be earlier or later than the good revision. '
 982                          'Default is HEAD.')
 983   parser.add_option('-f', '--flash_path',
 984                     type='str',
 985                     help='Absolute path to a recent Adobe Pepper Flash '
 986                          'binary to be used in this bisection (e.g. '
 987                          'on Windows C:\...\pepflashplayer.dll and on Linux '
 988                          '/opt/google/chrome/PepperFlash/'
 989                          'libpepflashplayer.so).')
 990   parser.add_option('-d', '--pdf_path',
 991                     type='str',
 992                     help='Absolute path to a recent PDF plugin '
 993                          'binary to be used in this bisection (e.g. '
 994                          'on Windows C:\...\pdf.dll and on Linux '
 995                          '/opt/google/chrome/libpdf.so). Option also enables '
 996                          'print preview.')
 997   parser.add_option('-g', '--good',
 998                     type='str',
 999                     help='A good revision to start bisection. ' +
1000                          'May be earlier or later than the bad revision. ' +
1001                          'Default is 0.')
1002   parser.add_option('-p', '--profile', '--user-data-dir',
1003                     type='str',
1004                     default='profile',
1005                     help='Profile to use; this will not reset every run. '
1006                          'Defaults to a clean profile.')
1007   parser.add_option('-t', '--times',
1008                     type='int',
1009                     default=1,
1010                     help='Number of times to run each build before asking '
1011                          'if it\'s good or bad. Temporary profiles are reused.')
1012   parser.add_option('-c', '--command',
1013                     type='str',
1014                     default='%p %a',
1015                     help='Command to execute. %p and %a refer to Chrome '
1016                          'executable and specified extra arguments '
1017                          'respectively. Use %s to specify all extra arguments '
1018                          'as one string. Defaults to "%p %a". Note that any '
1019                          'extra paths specified should be absolute.')
1020   parser.add_option('-l', '--blink',
1021                     action='store_true',
1022                     help='Use Blink bisect instead of Chromium. ')
1023   parser.add_option('', '--not-interactive',
1024                     action='store_true',
1025                     default=False,
1026                     help='Use command exit code to tell good/bad revision.')
1027   parser.add_option('--asan',
1028                     dest='asan',
1029                     action='store_true',
1030                     default=False,
1031                     help='Allow the script to bisect ASAN builds')
1032   parser.add_option('--use-local-repo',
1033                     dest='use_local_repo',
1034                     action='store_true',
1035                     default=False,
1036                     help='Allow the script to convert git SHA1 to SVN '
1037                          'revision using "git svn find-rev <SHA1>" '
1038                          'command from a Chromium checkout.')
1039
1040   (opts, args) = parser.parse_args()
1041
1042   if opts.archive is None:
1043     print 'Error: missing required parameter: --archive'
1044     print
1045     parser.print_help()
1046     return 1
1047
1048   if opts.asan:
1049     supported_platforms = ['linux', 'mac', 'win']
1050     if opts.archive not in supported_platforms:
1051       print 'Error: ASAN bisecting only supported on these platforms: [%s].' % (
1052             '|'.join(supported_platforms))
1053       return 1
1054     if opts.official_builds:
1055       print 'Error: Do not yet support bisecting official ASAN builds.'
1056       return 1
1057
1058   if opts.asan:
1059     base_url = ASAN_BASE_URL
1060   elif opts.blink:
1061     base_url = WEBKIT_BASE_URL
1062   else:
1063     base_url = CHROMIUM_BASE_URL
1064
1065   # Create the context. Initialize 0 for the revisions as they are set below.
1066   context = PathContext(base_url, opts.archive, opts.good, opts.bad,
1067                         opts.official_builds, opts.asan, opts.use_local_repo,
1068                         opts.flash_path, opts.pdf_path)
1069   # Pick a starting point, try to get HEAD for this.
1070   if not opts.bad:
1071     context.bad_revision = '999.0.0.0'
1072     context.bad_revision = GetChromiumRevision(
1073         context, context.GetLastChangeURL())
1074
1075   # Find out when we were good.
1076   if not opts.good:
1077     context.good_revision = '0.0.0.0' if opts.official_builds else 0
1078
1079   if opts.flash_path:
1080     msg = 'Could not find Flash binary at %s' % opts.flash_path
1081     assert os.path.exists(opts.flash_path), msg
1082
1083   if opts.pdf_path:
1084     msg = 'Could not find PDF binary at %s' % opts.pdf_path
1085     assert os.path.exists(opts.pdf_path), msg
1086
1087   if opts.official_builds:
1088     context.good_revision = LooseVersion(context.good_revision)
1089     context.bad_revision = LooseVersion(context.bad_revision)
1090   else:
1091     context.good_revision = int(context.good_revision)
1092     context.bad_revision = int(context.bad_revision)
1093
1094   if opts.times < 1:
1095     print('Number of times to run (%d) must be greater than or equal to 1.' %
1096           opts.times)
1097     parser.print_help()
1098     return 1
1099
1100   if opts.asan:
1101     evaluator = IsGoodASANBuild
1102   else:
1103     evaluator = AskIsGoodBuild
1104
1105   # Save these revision numbers to compare when showing the changelog URL
1106   # after the bisect.
1107   good_rev = context.good_revision
1108   bad_rev = context.bad_revision
1109
1110   (min_chromium_rev, max_chromium_rev, context) = Bisect(
1111       context, opts.times, opts.command, args, opts.profile,
1112       not opts.not_interactive, evaluator)
1113
1114   # Get corresponding blink revisions.
1115   try:
1116     min_blink_rev = GetBlinkRevisionForChromiumRevision(context,
1117                                                         min_chromium_rev)
1118     max_blink_rev = GetBlinkRevisionForChromiumRevision(context,
1119                                                         max_chromium_rev)
1120   except Exception:
1121     # Silently ignore the failure.
1122     min_blink_rev, max_blink_rev = 0, 0
1123
1124   if opts.blink:
1125     # We're done. Let the user know the results in an official manner.
1126     if good_rev > bad_rev:
1127       print DONE_MESSAGE_GOOD_MAX % (str(min_blink_rev), str(max_blink_rev))
1128     else:
1129       print DONE_MESSAGE_GOOD_MIN % (str(min_blink_rev), str(max_blink_rev))
1130
1131     print 'BLINK CHANGELOG URL:'
1132     print '  ' + BLINK_CHANGELOG_URL % (max_blink_rev, min_blink_rev)
1133
1134   else:
1135     # We're done. Let the user know the results in an official manner.
1136     if good_rev > bad_rev:
1137       print DONE_MESSAGE_GOOD_MAX % (str(min_chromium_rev),
1138                                      str(max_chromium_rev))
1139     else:
1140       print DONE_MESSAGE_GOOD_MIN % (str(min_chromium_rev),
1141                                      str(max_chromium_rev))
1142     if min_blink_rev != max_blink_rev:
1143       print ('NOTE: There is a Blink roll in the range, '
1144              'you might also want to do a Blink bisect.')
1145
1146     print 'CHANGELOG URL:'
1147     if opts.official_builds:
1148       print OFFICIAL_CHANGELOG_URL % (min_chromium_rev, max_chromium_rev)
1149     else:
1150       print '  ' + CHANGELOG_URL % (min_chromium_rev, max_chromium_rev)
1151
1152
1153 if __name__ == '__main__':
1154   sys.exit(main())