2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Snapshot Build Bisect Tool
8 This script bisects a snapshot archive using binary search. It starts at
9 a bad revision (it will try to guess HEAD) and asks for a last known-good
10 revision. It will then binary search across this revision range by downloading,
11 unzipping, and opening Chromium for you. After testing the specific revision,
12 it will ask you whether it is good or bad before continuing the search.
15 # The root URL for storage.
16 BASE_URL
= 'http://commondatastorage.googleapis.com/chromium-browser-snapshots'
18 # The root URL for official builds.
19 OFFICIAL_BASE_URL
= 'http://master.chrome.corp.google.com/official_builds'
22 CHANGELOG_URL
= 'http://build.chromium.org/f/chromium/' \
23 'perf/dashboard/ui/changelog.html?url=/trunk/src&range=%d%%3A%d'
25 # Official Changelogs URL.
26 OFFICIAL_CHANGELOG_URL
= 'http://omahaproxy.appspot.com/'\
27 'changelog?old_version=%s&new_version=%s'
30 DEPS_FILE
= 'http://src.chromium.org/viewvc/chrome/trunk/src/DEPS?revision=%d'
31 # WebKit Changelogs URL.
32 WEBKIT_CHANGELOG_URL
= 'http://trac.webkit.org/log/' \
33 'trunk/?rev=%d&stop_rev=%d&verbose=on&limit=10000'
35 DONE_MESSAGE_GOOD_MIN
= 'You are probably looking for a change made after %s ' \
36 '(known good), but no later than %s (first known bad).'
37 DONE_MESSAGE_GOOD_MAX
= 'You are probably looking for a change made after %s ' \
38 '(known bad), but no later than %s (first known good).'
40 ###############################################################################
53 from distutils
.version
import LooseVersion
54 from xml
.etree
import ElementTree
58 class PathContext(object):
59 """A PathContext is used to carry the information used to construct URLs and
60 paths when dealing with the storage server and archives."""
61 def __init__(self
, platform
, good_revision
, bad_revision
, is_official
):
62 super(PathContext
, self
).__init
__()
63 # Store off the input parameters.
64 self
.platform
= platform
# What's passed in to the '-a/--archive' option.
65 self
.good_revision
= good_revision
66 self
.bad_revision
= bad_revision
67 self
.is_official
= is_official
69 # The name of the ZIP file in a revision directory on the server.
70 self
.archive_name
= None
72 # Set some internal members:
73 # _listing_platform_dir = Directory that holds revisions. Ends with a '/'.
74 # _archive_extract_dir = Uncompressed directory in the archive_name file.
75 # _binary_name = The name of the executable to run.
76 if self
.platform
== 'linux' or self
.platform
== 'linux64':
77 self
._binary
_name
= 'chrome'
78 elif self
.platform
== 'mac':
79 self
.archive_name
= 'chrome-mac.zip'
80 self
._archive
_extract
_dir
= 'chrome-mac'
81 elif self
.platform
== 'win':
82 self
.archive_name
= 'chrome-win32.zip'
83 self
._archive
_extract
_dir
= 'chrome-win32'
84 self
._binary
_name
= 'chrome.exe'
86 raise Exception('Invalid platform: %s' % self
.platform
)
89 if self
.platform
== 'linux':
90 self
._listing
_platform
_dir
= 'lucid32bit/'
91 self
.archive_name
= 'chrome-lucid32bit.zip'
92 self
._archive
_extract
_dir
= 'chrome-lucid32bit'
93 elif self
.platform
== 'linux64':
94 self
._listing
_platform
_dir
= 'lucid64bit/'
95 self
.archive_name
= 'chrome-lucid64bit.zip'
96 self
._archive
_extract
_dir
= 'chrome-lucid64bit'
97 elif self
.platform
== 'mac':
98 self
._listing
_platform
_dir
= 'mac/'
99 self
._binary
_name
= 'Google Chrome.app/Contents/MacOS/Google Chrome'
100 elif self
.platform
== 'win':
101 self
._listing
_platform
_dir
= 'win/'
103 if self
.platform
== 'linux' or self
.platform
== 'linux64':
104 self
.archive_name
= 'chrome-linux.zip'
105 self
._archive
_extract
_dir
= 'chrome-linux'
106 if self
.platform
== 'linux':
107 self
._listing
_platform
_dir
= 'Linux/'
108 elif self
.platform
== 'linux64':
109 self
._listing
_platform
_dir
= 'Linux_x64/'
110 elif self
.platform
== 'mac':
111 self
._listing
_platform
_dir
= 'Mac/'
112 self
._binary
_name
= 'Chromium.app/Contents/MacOS/Chromium'
113 elif self
.platform
== 'win':
114 self
._listing
_platform
_dir
= 'Win/'
116 def GetListingURL(self
, marker
=None):
117 """Returns the URL for a directory listing, with an optional marker."""
120 marker_param
= '&marker=' + str(marker
)
121 return BASE_URL
+ '/?delimiter=/&prefix=' + self
._listing
_platform
_dir
+ \
124 def GetDownloadURL(self
, revision
):
125 """Gets the download URL for a build archive of a specific revision."""
127 return "%s/%s/%s%s" % (
128 OFFICIAL_BASE_URL
, revision
, self
._listing
_platform
_dir
,
131 return "%s/%s%s/%s" % (
132 BASE_URL
, self
._listing
_platform
_dir
, revision
, self
.archive_name
)
134 def GetLastChangeURL(self
):
135 """Returns a URL to the LAST_CHANGE file."""
136 return BASE_URL
+ '/' + self
._listing
_platform
_dir
+ 'LAST_CHANGE'
138 def GetLaunchPath(self
):
139 """Returns a relative path (presumably from the archive extraction location)
140 that is used to run the executable."""
141 return os
.path
.join(self
._archive
_extract
_dir
, self
._binary
_name
)
143 def ParseDirectoryIndex(self
):
144 """Parses the Google Storage directory listing into a list of revision
147 def _FetchAndParse(url
):
148 """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If
149 next-marker is not None, then the listing is a partial listing and another
150 fetch should be performed with next-marker being the marker= GET
152 handle
= urllib
.urlopen(url
)
153 document
= ElementTree
.parse(handle
)
155 # All nodes in the tree are namespaced. Get the root's tag name to extract
156 # the namespace. Etree does namespaces as |{namespace}tag|.
157 root_tag
= document
.getroot().tag
158 end_ns_pos
= root_tag
.find('}')
160 raise Exception("Could not locate end namespace for directory index")
161 namespace
= root_tag
[:end_ns_pos
+ 1]
163 # Find the prefix (_listing_platform_dir) and whether or not the list is
165 prefix_len
= len(document
.find(namespace
+ 'Prefix').text
)
167 is_truncated
= document
.find(namespace
+ 'IsTruncated')
168 if is_truncated
is not None and is_truncated
.text
.lower() == 'true':
169 next_marker
= document
.find(namespace
+ 'NextMarker').text
171 # Get a list of all the revisions.
172 all_prefixes
= document
.findall(namespace
+ 'CommonPrefixes/' +
173 namespace
+ 'Prefix')
174 # The <Prefix> nodes have content of the form of
175 # |_listing_platform_dir/revision/|. Strip off the platform dir and the
176 # trailing slash to just have a number.
178 for prefix
in all_prefixes
:
179 revnum
= prefix
.text
[prefix_len
:-1]
182 revisions
.append(revnum
)
185 return (revisions
, next_marker
)
187 # Fetch the first list of revisions.
188 (revisions
, next_marker
) = _FetchAndParse(self
.GetListingURL())
190 # If the result list was truncated, refetch with the next marker. Do this
191 # until an entire directory listing is done.
193 next_url
= self
.GetListingURL(next_marker
)
194 (new_revisions
, next_marker
) = _FetchAndParse(next_url
)
195 revisions
.extend(new_revisions
)
198 def GetRevList(self
):
199 """Gets the list of revision numbers between self.good_revision and
200 self.bad_revision."""
201 # Download the revlist and filter for just the range between good and bad.
202 minrev
= min(self
.good_revision
, self
.bad_revision
)
203 maxrev
= max(self
.good_revision
, self
.bad_revision
)
204 revlist
= map(int, self
.ParseDirectoryIndex())
205 revlist
= [x
for x
in revlist
if x
>= int(minrev
) and x
<= int(maxrev
)]
209 def GetOfficialBuildsList(self
):
210 """Gets the list of official build numbers between self.good_revision and
211 self.bad_revision."""
212 # Download the revlist and filter for just the range between good and bad.
213 minrev
= min(self
.good_revision
, self
.bad_revision
)
214 maxrev
= max(self
.good_revision
, self
.bad_revision
)
215 handle
= urllib
.urlopen(OFFICIAL_BASE_URL
)
216 dirindex
= handle
.read()
218 build_numbers
= re
.findall(r
'<a href="([0-9][0-9].*)/">', dirindex
)
221 parsed_build_numbers
= [LooseVersion(x
) for x
in build_numbers
]
222 for build_number
in sorted(parsed_build_numbers
):
223 path
= OFFICIAL_BASE_URL
+ '/' + str(build_number
) + '/' + \
224 self
._listing
_platform
_dir
+ self
.archive_name
227 connection
= urllib
.urlopen(path
)
229 if build_number
> maxrev
:
231 if build_number
>= minrev
:
232 final_list
.append(str(build_number
))
233 except urllib
.HTTPError
, e
:
237 def UnzipFilenameToDir(filename
, dir):
238 """Unzip |filename| to directory |dir|."""
240 if not os
.path
.isabs(filename
):
241 filename
= os
.path
.join(cwd
, filename
)
242 zf
= zipfile
.ZipFile(filename
)
244 if not os
.path
.isdir(dir):
248 for info
in zf
.infolist():
250 if name
.endswith('/'): # dir
251 if not os
.path
.isdir(name
):
254 dir = os
.path
.dirname(name
)
255 if not os
.path
.isdir(dir):
257 out
= open(name
, 'wb')
258 out
.write(zf
.read(name
))
260 # Set permissions. Permission info in external_attr is shifted 16 bits.
261 os
.chmod(name
, info
.external_attr
>> 16L)
265 def FetchRevision(context
, rev
, filename
, quit_event
=None, progress_event
=None):
266 """Downloads and unzips revision |rev|.
267 @param context A PathContext instance.
268 @param rev The Chromium revision number/tag to download.
269 @param filename The destination for the downloaded file.
270 @param quit_event A threading.Event which will be set by the master thread to
271 indicate that the download should be aborted.
272 @param progress_event A threading.Event which will be set by the master thread
273 to indicate that the progress of the download should be
276 def ReportHook(blocknum
, blocksize
, totalsize
):
277 if quit_event
and quit_event
.isSet():
278 raise RuntimeError("Aborting download of revision %s" % str(rev
))
279 if progress_event
and progress_event
.isSet():
280 size
= blocknum
* blocksize
281 if totalsize
== -1: # Total size not known.
282 progress
= "Received %d bytes" % size
284 size
= min(totalsize
, size
)
285 progress
= "Received %d of %d bytes, %.2f%%" % (
286 size
, totalsize
, 100.0 * size
/ totalsize
)
287 # Send a \r to let all progress messages use just one line of output.
288 sys
.stdout
.write("\r" + progress
)
291 download_url
= context
.GetDownloadURL(rev
)
293 urllib
.urlretrieve(download_url
, filename
, ReportHook
)
294 if progress_event
and progress_event
.isSet():
296 except RuntimeError, e
:
300 def RunRevision(context
, revision
, zipfile
, profile
, num_runs
, args
):
301 """Given a zipped revision, unzip it and run the test."""
302 print "Trying revision %s..." % str(revision
)
304 # Create a temp directory and unzip the revision into it.
306 tempdir
= tempfile
.mkdtemp(prefix
='bisect_tmp')
307 UnzipFilenameToDir(zipfile
, tempdir
)
310 # Run the build as many times as specified.
311 testargs
= [context
.GetLaunchPath(), '--user-data-dir=%s' % profile
] + args
312 # The sandbox must be run as root on Official Chrome, so bypass it.
313 if context
.is_official
and (context
.platform
== 'linux' or
314 context
.platform
== 'linux64'):
315 testargs
.append('--no-sandbox')
317 for i
in range(0, num_runs
):
318 subproc
= subprocess
.Popen(testargs
,
320 stdout
=subprocess
.PIPE
,
321 stderr
=subprocess
.PIPE
)
322 (stdout
, stderr
) = subproc
.communicate()
326 shutil
.rmtree(tempdir
, True)
330 return (subproc
.returncode
, stdout
, stderr
)
333 def AskIsGoodBuild(rev
, official_builds
, status
, stdout
, stderr
):
334 """Ask the user whether build |rev| is good or bad."""
335 # Loop until we get a response that we can parse.
337 response
= raw_input('Revision %s is [(g)ood/(b)ad/(u)nknown/(q)uit]: ' %
339 if response
and response
in ('g', 'b', 'u'):
341 if response
and response
== 'q':
345 class DownloadJob(object):
346 """DownloadJob represents a task to download a given Chromium revision."""
347 def __init__(self
, context
, name
, rev
, zipfile
):
348 super(DownloadJob
, self
).__init
__()
349 # Store off the input parameters.
350 self
.context
= context
353 self
.zipfile
= zipfile
354 self
.quit_event
= threading
.Event()
355 self
.progress_event
= threading
.Event()
358 """Starts the download."""
359 fetchargs
= (self
.context
,
364 self
.thread
= threading
.Thread(target
=FetchRevision
,
370 """Stops the download which must have been started previously."""
371 self
.quit_event
.set()
373 os
.unlink(self
.zipfile
)
376 """Prints a message and waits for the download to complete. The download
377 must have been started previously."""
378 print "Downloading revision %s..." % str(self
.rev
)
379 self
.progress_event
.set() # Display progress of download.
390 evaluate
=AskIsGoodBuild
):
391 """Given known good and known bad revisions, run a binary search on all
392 archived revisions to determine the last known good revision.
394 @param platform Which build to download/run ('mac', 'win', 'linux64', etc.).
395 @param official_builds Specify build type (Chromium or Official build).
396 @param good_rev Number/tag of the known good revision.
397 @param bad_rev Number/tag of the known bad revision.
398 @param num_runs Number of times to run each build for asking good/bad.
399 @param try_args A tuple of arguments to pass to the test application.
400 @param profile The name of the user profile to run with.
401 @param evaluate A function which returns 'g' if the argument build is good,
402 'b' if it's bad or 'u' if unknown.
404 Threading is used to fetch Chromium revisions in the background, speeding up
405 the user's experience. For example, suppose the bounds of the search are
406 good_rev=0, bad_rev=100. The first revision to be checked is 50. Depending on
407 whether revision 50 is good or bad, the next revision to check will be either
408 25 or 75. So, while revision 50 is being checked, the script will download
409 revisions 25 and 75 in the background. Once the good/bad verdict on rev 50 is
412 - If rev 50 is good, the download of rev 25 is cancelled, and the next test
415 - If rev 50 is bad, the download of rev 75 is cancelled, and the next test
422 context
= PathContext(platform
, good_rev
, bad_rev
, official_builds
)
427 print "Downloading list of known revisions..."
428 _GetDownloadPath
= lambda rev
: os
.path
.join(cwd
,
429 '%s-%s' % (str(rev
), context
.archive_name
))
431 revlist
= context
.GetOfficialBuildsList()
433 revlist
= context
.GetRevList()
435 # Get a list of revisions to bisect across.
436 if len(revlist
) < 2: # Don't have enough builds to bisect.
437 msg
= 'We don\'t have enough builds to bisect. revlist: %s' % revlist
438 raise RuntimeError(msg
)
440 # Figure out our bookends and first pivot point; fetch the pivot revision.
442 maxrev
= len(revlist
) - 1
445 zipfile
= _GetDownloadPath(rev
)
446 fetch
= DownloadJob(context
, 'initial_fetch', rev
, zipfile
)
450 # Binary search time!
451 while fetch
and fetch
.zipfile
and maxrev
- minrev
> 1:
452 if bad_rev
< good_rev
:
453 min_str
, max_str
= "bad", "good"
455 min_str
, max_str
= "good", "bad"
456 print 'Bisecting range [%s (%s), %s (%s)].' % (revlist
[minrev
], min_str
, \
457 revlist
[maxrev
], max_str
)
459 # Pre-fetch next two possible pivots
460 # - down_pivot is the next revision to check if the current revision turns
462 # - up_pivot is the next revision to check if the current revision turns
464 down_pivot
= int((pivot
- minrev
) / 2) + minrev
466 if down_pivot
!= pivot
and down_pivot
!= minrev
:
467 down_rev
= revlist
[down_pivot
]
468 down_fetch
= DownloadJob(context
, 'down_fetch', down_rev
,
469 _GetDownloadPath(down_rev
))
472 up_pivot
= int((maxrev
- pivot
) / 2) + pivot
474 if up_pivot
!= pivot
and up_pivot
!= maxrev
:
475 up_rev
= revlist
[up_pivot
]
476 up_fetch
= DownloadJob(context
, 'up_fetch', up_rev
,
477 _GetDownloadPath(up_rev
))
480 # Run test on the pivot revision.
485 (status
, stdout
, stderr
) = RunRevision(context
,
492 print >>sys
.stderr
, e
496 # Call the evaluate function to see if the current revision is good or bad.
497 # On that basis, kill one of the background downloads and complete the
498 # other, as described in the comments above.
500 answer
= evaluate(rev
, official_builds
, status
, stdout
, stderr
)
501 if answer
== 'g' and good_rev
< bad_rev
or \
502 answer
== 'b' and bad_rev
< good_rev
:
505 down_fetch
.Stop() # Kill the download of the older revision.
510 elif answer
== 'b' and good_rev
< bad_rev
or \
511 answer
== 'g' and bad_rev
< good_rev
:
514 up_fetch
.Stop() # Kill the download of the newer revision.
520 # Nuke the revision from the revlist and choose a new pivot.
522 maxrev
-= 1 # Assumes maxrev >= pivot.
524 if maxrev
- minrev
> 1:
525 # Alternate between using down_pivot or up_pivot for the new pivot
526 # point, without affecting the range. Do this instead of setting the
527 # pivot to the midpoint of the new range because adjacent revisions
528 # are likely affected by the same issue that caused the (u)nknown
530 if up_fetch
and down_fetch
:
531 fetch
= [up_fetch
, down_fetch
][len(revlist
) % 2]
537 if fetch
== up_fetch
:
538 pivot
= up_pivot
- 1 # Subtracts 1 because revlist was resized.
541 zipfile
= fetch
.zipfile
543 if down_fetch
and fetch
!= down_fetch
:
545 if up_fetch
and fetch
!= up_fetch
:
548 assert False, "Unexpected return value from evaluate(): " + answer
550 print "Cleaning up..."
551 for f
in [_GetDownloadPath(revlist
[down_pivot
]),
552 _GetDownloadPath(revlist
[up_pivot
])]:
561 return (revlist
[minrev
], revlist
[maxrev
])
564 def GetWebKitRevisionForChromiumRevision(rev
):
565 """Returns the webkit revision that was in chromium's DEPS file at
566 chromium revision |rev|."""
567 # . doesn't match newlines without re.DOTALL, so this is safe.
568 webkit_re
= re
.compile(r
'webkit_revision.:\D*(\d+)')
569 url
= urllib
.urlopen(DEPS_FILE
% rev
)
570 m
= webkit_re
.search(url
.read())
573 return int(m
.group(1))
575 raise Exception('Could not get webkit revision for cr rev %d' % rev
)
578 def GetChromiumRevision(url
):
579 """Returns the chromium revision read from given URL."""
581 # Location of the latest build revision number
582 return int(urllib
.urlopen(url
).read())
584 print('Could not determine latest revision. This could be bad...')
589 usage
= ('%prog [options] [-- chromium-options]\n'
590 'Perform binary search on the snapshot builds to find a minimal\n'
591 'range of revisions where a behavior change happened. The\n'
592 'behaviors are described as "good" and "bad".\n'
593 'It is NOT assumed that the behavior of the later revision is\n'
596 'Revision numbers should use\n'
597 ' Official versions (e.g. 1.0.1000.0) for official builds. (-o)\n'
598 ' SVN revisions (e.g. 123456) for chromium builds, from trunk.\n'
599 ' Use base_trunk_revision from http://omahaproxy.appspot.com/\n'
600 ' for earlier revs.\n'
601 ' Chrome\'s about: build number and omahaproxy branch_revision\n'
602 ' are incorrect, they are from branches.\n'
604 'Tip: add "-- --no-first-run" to bypass the first run prompts.')
605 parser
= optparse
.OptionParser(usage
=usage
)
606 # Strangely, the default help output doesn't include the choice list.
607 choices
= ['mac', 'win', 'linux', 'linux64']
608 # linux-chromiumos lacks a continuous archive http://crbug.com/78158
609 parser
.add_option('-a', '--archive',
611 help = 'The buildbot archive to bisect [%s].' %
613 parser
.add_option('-o', action
="store_true", dest
='official_builds',
614 help = 'Bisect across official ' +
615 'Chrome builds (internal only) instead of ' +
616 'Chromium archives.')
617 parser
.add_option('-b', '--bad', type = 'str',
618 help = 'A bad revision to start bisection. ' +
619 'May be earlier or later than the good revision. ' +
621 parser
.add_option('-g', '--good', type = 'str',
622 help = 'A good revision to start bisection. ' +
623 'May be earlier or later than the bad revision. ' +
625 parser
.add_option('-p', '--profile', '--user-data-dir', type = 'str',
626 help = 'Profile to use; this will not reset every run. ' +
627 'Defaults to a clean profile.', default
= 'profile')
628 parser
.add_option('-t', '--times', type = 'int',
629 help = 'Number of times to run each build before asking ' +
630 'if it\'s good or bad. Temporary profiles are reused.',
632 (opts
, args
) = parser
.parse_args()
634 if opts
.archive
is None:
635 print 'Error: missing required parameter: --archive'
640 # Create the context. Initialize 0 for the revisions as they are set below.
641 context
= PathContext(opts
.archive
, 0, 0, opts
.official_builds
)
642 # Pick a starting point, try to get HEAD for this.
646 bad_rev
= '999.0.0.0'
647 if not opts
.official_builds
:
648 bad_rev
= GetChromiumRevision(context
.GetLastChangeURL())
650 # Find out when we were good.
654 good_rev
= '0.0.0.0' if opts
.official_builds
else 0
656 if opts
.official_builds
:
657 good_rev
= LooseVersion(good_rev
)
658 bad_rev
= LooseVersion(bad_rev
)
660 good_rev
= int(good_rev
)
661 bad_rev
= int(bad_rev
)
664 print('Number of times to run (%d) must be greater than or equal to 1.' %
669 (min_chromium_rev
, max_chromium_rev
) = Bisect(
670 opts
.archive
, opts
.official_builds
, good_rev
, bad_rev
, opts
.times
, args
,
673 # Get corresponding webkit revisions.
675 min_webkit_rev
= GetWebKitRevisionForChromiumRevision(min_chromium_rev
)
676 max_webkit_rev
= GetWebKitRevisionForChromiumRevision(max_chromium_rev
)
678 # Silently ignore the failure.
679 min_webkit_rev
, max_webkit_rev
= 0, 0
681 # We're done. Let the user know the results in an official manner.
682 if good_rev
> bad_rev
:
683 print DONE_MESSAGE_GOOD_MAX
% (str(min_chromium_rev
), str(max_chromium_rev
))
685 print DONE_MESSAGE_GOOD_MIN
% (str(min_chromium_rev
), str(max_chromium_rev
))
687 if min_webkit_rev
!= max_webkit_rev
:
688 print 'WEBKIT CHANGELOG URL:'
689 print ' ' + WEBKIT_CHANGELOG_URL
% (max_webkit_rev
, min_webkit_rev
)
690 print 'CHANGELOG URL:'
691 if opts
.official_builds
:
692 print OFFICIAL_CHANGELOG_URL
% (min_chromium_rev
, max_chromium_rev
)
694 print ' ' + CHANGELOG_URL
% (min_chromium_rev
, max_chromium_rev
)
696 if __name__
== '__main__':