2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Snapshot Build Bisect Tool
8 This script bisects a snapshot archive using binary search. It starts at
9 a bad revision (it will try to guess HEAD) and asks for a last known-good
10 revision. It will then binary search across this revision range by downloading,
11 unzipping, and opening Chromium for you. After testing the specific revision,
12 it will ask you whether it is good or bad before continuing the search.
15 # The root URL for storage.
16 BASE_URL
= 'http://commondatastorage.googleapis.com/chromium-browser-snapshots'
18 # The root URL for official builds.
19 OFFICIAL_BASE_URL
= 'http://master.chrome.corp.google.com/official_builds'
22 CHANGELOG_URL
= 'http://build.chromium.org/f/chromium/' \
23 'perf/dashboard/ui/changelog.html?' \
24 'url=/trunk/src&range=%d%%3A%d'
26 # Official Changelogs URL.
27 OFFICIAL_CHANGELOG_URL
= 'http://omahaproxy.appspot.com/'\
28 'changelog?old_version=%s&new_version=%s'
31 DEPS_FILE
= 'http://src.chromium.org/viewvc/chrome/trunk/src/DEPS?revision=%d'
32 # Blink Changelogs URL.
33 BLINK_CHANGELOG_URL
= 'http://build.chromium.org/f/chromium/' \
34 'perf/dashboard/ui/changelog_blink.html?' \
35 'url=/trunk&range=%d%%3A%d'
37 DONE_MESSAGE_GOOD_MIN
= 'You are probably looking for a change made after %s ' \
38 '(known good), but no later than %s (first known bad).'
39 DONE_MESSAGE_GOOD_MAX
= 'You are probably looking for a change made after %s ' \
40 '(known bad), but no later than %s (first known good).'
42 ###############################################################################
55 from distutils
.version
import LooseVersion
56 from xml
.etree
import ElementTree
60 class PathContext(object):
61 """A PathContext is used to carry the information used to construct URLs and
62 paths when dealing with the storage server and archives."""
63 def __init__(self
, platform
, good_revision
, bad_revision
, is_official
):
64 super(PathContext
, self
).__init
__()
65 # Store off the input parameters.
66 self
.platform
= platform
# What's passed in to the '-a/--archive' option.
67 self
.good_revision
= good_revision
68 self
.bad_revision
= bad_revision
69 self
.is_official
= is_official
71 # The name of the ZIP file in a revision directory on the server.
72 self
.archive_name
= None
74 # Set some internal members:
75 # _listing_platform_dir = Directory that holds revisions. Ends with a '/'.
76 # _archive_extract_dir = Uncompressed directory in the archive_name file.
77 # _binary_name = The name of the executable to run.
78 if self
.platform
in ('linux', 'linux64', 'linux-arm'):
79 self
._binary
_name
= 'chrome'
80 elif self
.platform
== 'mac':
81 self
.archive_name
= 'chrome-mac.zip'
82 self
._archive
_extract
_dir
= 'chrome-mac'
83 elif self
.platform
== 'win':
84 self
.archive_name
= 'chrome-win32.zip'
85 self
._archive
_extract
_dir
= 'chrome-win32'
86 self
._binary
_name
= 'chrome.exe'
88 raise Exception('Invalid platform: %s' % self
.platform
)
91 if self
.platform
== 'linux':
92 self
._listing
_platform
_dir
= 'lucid32bit/'
93 self
.archive_name
= 'chrome-lucid32bit.zip'
94 self
._archive
_extract
_dir
= 'chrome-lucid32bit'
95 elif self
.platform
== 'linux64':
96 self
._listing
_platform
_dir
= 'lucid64bit/'
97 self
.archive_name
= 'chrome-lucid64bit.zip'
98 self
._archive
_extract
_dir
= 'chrome-lucid64bit'
99 elif self
.platform
== 'mac':
100 self
._listing
_platform
_dir
= 'mac/'
101 self
._binary
_name
= 'Google Chrome.app/Contents/MacOS/Google Chrome'
102 elif self
.platform
== 'win':
103 self
._listing
_platform
_dir
= 'win/'
105 if self
.platform
in ('linux', 'linux64', 'linux-arm'):
106 self
.archive_name
= 'chrome-linux.zip'
107 self
._archive
_extract
_dir
= 'chrome-linux'
108 if self
.platform
== 'linux':
109 self
._listing
_platform
_dir
= 'Linux/'
110 elif self
.platform
== 'linux64':
111 self
._listing
_platform
_dir
= 'Linux_x64/'
112 elif self
.platform
== 'linux-arm':
113 self
._listing
_platform
_dir
= 'Linux_ARM_Cross-Compile/'
114 elif self
.platform
== 'mac':
115 self
._listing
_platform
_dir
= 'Mac/'
116 self
._binary
_name
= 'Chromium.app/Contents/MacOS/Chromium'
117 elif self
.platform
== 'win':
118 self
._listing
_platform
_dir
= 'Win/'
120 def GetListingURL(self
, marker
=None):
121 """Returns the URL for a directory listing, with an optional marker."""
124 marker_param
= '&marker=' + str(marker
)
125 return BASE_URL
+ '/?delimiter=/&prefix=' + self
._listing
_platform
_dir
+ \
128 def GetDownloadURL(self
, revision
):
129 """Gets the download URL for a build archive of a specific revision."""
131 return "%s/%s/%s%s" % (
132 OFFICIAL_BASE_URL
, revision
, self
._listing
_platform
_dir
,
135 return "%s/%s%s/%s" % (
136 BASE_URL
, self
._listing
_platform
_dir
, revision
, self
.archive_name
)
138 def GetLastChangeURL(self
):
139 """Returns a URL to the LAST_CHANGE file."""
140 return BASE_URL
+ '/' + self
._listing
_platform
_dir
+ 'LAST_CHANGE'
142 def GetLaunchPath(self
):
143 """Returns a relative path (presumably from the archive extraction location)
144 that is used to run the executable."""
145 return os
.path
.join(self
._archive
_extract
_dir
, self
._binary
_name
)
147 def ParseDirectoryIndex(self
):
148 """Parses the Google Storage directory listing into a list of revision
151 def _FetchAndParse(url
):
152 """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If
153 next-marker is not None, then the listing is a partial listing and another
154 fetch should be performed with next-marker being the marker= GET
156 handle
= urllib
.urlopen(url
)
157 document
= ElementTree
.parse(handle
)
159 # All nodes in the tree are namespaced. Get the root's tag name to extract
160 # the namespace. Etree does namespaces as |{namespace}tag|.
161 root_tag
= document
.getroot().tag
162 end_ns_pos
= root_tag
.find('}')
164 raise Exception("Could not locate end namespace for directory index")
165 namespace
= root_tag
[:end_ns_pos
+ 1]
167 # Find the prefix (_listing_platform_dir) and whether or not the list is
169 prefix_len
= len(document
.find(namespace
+ 'Prefix').text
)
171 is_truncated
= document
.find(namespace
+ 'IsTruncated')
172 if is_truncated
is not None and is_truncated
.text
.lower() == 'true':
173 next_marker
= document
.find(namespace
+ 'NextMarker').text
175 # Get a list of all the revisions.
176 all_prefixes
= document
.findall(namespace
+ 'CommonPrefixes/' +
177 namespace
+ 'Prefix')
178 # The <Prefix> nodes have content of the form of
179 # |_listing_platform_dir/revision/|. Strip off the platform dir and the
180 # trailing slash to just have a number.
182 for prefix
in all_prefixes
:
183 revnum
= prefix
.text
[prefix_len
:-1]
186 revisions
.append(revnum
)
189 return (revisions
, next_marker
)
191 # Fetch the first list of revisions.
192 (revisions
, next_marker
) = _FetchAndParse(self
.GetListingURL())
194 # If the result list was truncated, refetch with the next marker. Do this
195 # until an entire directory listing is done.
197 next_url
= self
.GetListingURL(next_marker
)
198 (new_revisions
, next_marker
) = _FetchAndParse(next_url
)
199 revisions
.extend(new_revisions
)
202 def GetRevList(self
):
203 """Gets the list of revision numbers between self.good_revision and
204 self.bad_revision."""
205 # Download the revlist and filter for just the range between good and bad.
206 minrev
= min(self
.good_revision
, self
.bad_revision
)
207 maxrev
= max(self
.good_revision
, self
.bad_revision
)
208 revlist
= map(int, self
.ParseDirectoryIndex())
209 revlist
= [x
for x
in revlist
if x
>= int(minrev
) and x
<= int(maxrev
)]
213 def GetOfficialBuildsList(self
):
214 """Gets the list of official build numbers between self.good_revision and
215 self.bad_revision."""
216 # Download the revlist and filter for just the range between good and bad.
217 minrev
= min(self
.good_revision
, self
.bad_revision
)
218 maxrev
= max(self
.good_revision
, self
.bad_revision
)
219 handle
= urllib
.urlopen(OFFICIAL_BASE_URL
)
220 dirindex
= handle
.read()
222 build_numbers
= re
.findall(r
'<a href="([0-9][0-9].*)/">', dirindex
)
225 parsed_build_numbers
= [LooseVersion(x
) for x
in build_numbers
]
226 for build_number
in sorted(parsed_build_numbers
):
227 path
= OFFICIAL_BASE_URL
+ '/' + str(build_number
) + '/' + \
228 self
._listing
_platform
_dir
+ self
.archive_name
231 connection
= urllib
.urlopen(path
)
233 if build_number
> maxrev
:
235 if build_number
>= minrev
:
236 final_list
.append(str(build_number
))
237 except urllib
.HTTPError
, e
:
241 def UnzipFilenameToDir(filename
, dir):
242 """Unzip |filename| to directory |dir|."""
244 if not os
.path
.isabs(filename
):
245 filename
= os
.path
.join(cwd
, filename
)
246 zf
= zipfile
.ZipFile(filename
)
248 if not os
.path
.isdir(dir):
252 for info
in zf
.infolist():
254 if name
.endswith('/'): # dir
255 if not os
.path
.isdir(name
):
258 dir = os
.path
.dirname(name
)
259 if not os
.path
.isdir(dir):
261 out
= open(name
, 'wb')
262 out
.write(zf
.read(name
))
264 # Set permissions. Permission info in external_attr is shifted 16 bits.
265 os
.chmod(name
, info
.external_attr
>> 16L)
269 def FetchRevision(context
, rev
, filename
, quit_event
=None, progress_event
=None):
270 """Downloads and unzips revision |rev|.
271 @param context A PathContext instance.
272 @param rev The Chromium revision number/tag to download.
273 @param filename The destination for the downloaded file.
274 @param quit_event A threading.Event which will be set by the master thread to
275 indicate that the download should be aborted.
276 @param progress_event A threading.Event which will be set by the master thread
277 to indicate that the progress of the download should be
280 def ReportHook(blocknum
, blocksize
, totalsize
):
281 if quit_event
and quit_event
.isSet():
282 raise RuntimeError("Aborting download of revision %s" % str(rev
))
283 if progress_event
and progress_event
.isSet():
284 size
= blocknum
* blocksize
285 if totalsize
== -1: # Total size not known.
286 progress
= "Received %d bytes" % size
288 size
= min(totalsize
, size
)
289 progress
= "Received %d of %d bytes, %.2f%%" % (
290 size
, totalsize
, 100.0 * size
/ totalsize
)
291 # Send a \r to let all progress messages use just one line of output.
292 sys
.stdout
.write("\r" + progress
)
295 download_url
= context
.GetDownloadURL(rev
)
297 urllib
.urlretrieve(download_url
, filename
, ReportHook
)
298 if progress_event
and progress_event
.isSet():
300 except RuntimeError, e
:
304 def RunRevision(context
, revision
, zipfile
, profile
, num_runs
, args
):
305 """Given a zipped revision, unzip it and run the test."""
306 print "Trying revision %s..." % str(revision
)
308 # Create a temp directory and unzip the revision into it.
310 tempdir
= tempfile
.mkdtemp(prefix
='bisect_tmp')
311 UnzipFilenameToDir(zipfile
, tempdir
)
314 # Run the build as many times as specified.
315 testargs
= [context
.GetLaunchPath(), '--user-data-dir=%s' % profile
] + args
316 # The sandbox must be run as root on Official Chrome, so bypass it.
317 if context
.is_official
and context
.platform
.startswith('linux'):
318 testargs
.append('--no-sandbox')
320 for i
in range(0, num_runs
):
321 subproc
= subprocess
.Popen(testargs
,
323 stdout
=subprocess
.PIPE
,
324 stderr
=subprocess
.PIPE
)
325 (stdout
, stderr
) = subproc
.communicate()
329 shutil
.rmtree(tempdir
, True)
333 return (subproc
.returncode
, stdout
, stderr
)
336 def AskIsGoodBuild(rev
, official_builds
, status
, stdout
, stderr
):
337 """Ask the user whether build |rev| is good or bad."""
338 # Loop until we get a response that we can parse.
340 response
= raw_input('Revision %s is [(g)ood/(b)ad/(u)nknown/(q)uit]: ' %
342 if response
and response
in ('g', 'b', 'u'):
344 if response
and response
== 'q':
348 class DownloadJob(object):
349 """DownloadJob represents a task to download a given Chromium revision."""
350 def __init__(self
, context
, name
, rev
, zipfile
):
351 super(DownloadJob
, self
).__init
__()
352 # Store off the input parameters.
353 self
.context
= context
356 self
.zipfile
= zipfile
357 self
.quit_event
= threading
.Event()
358 self
.progress_event
= threading
.Event()
361 """Starts the download."""
362 fetchargs
= (self
.context
,
367 self
.thread
= threading
.Thread(target
=FetchRevision
,
373 """Stops the download which must have been started previously."""
374 self
.quit_event
.set()
376 os
.unlink(self
.zipfile
)
379 """Prints a message and waits for the download to complete. The download
380 must have been started previously."""
381 print "Downloading revision %s..." % str(self
.rev
)
382 self
.progress_event
.set() # Display progress of download.
393 evaluate
=AskIsGoodBuild
):
394 """Given known good and known bad revisions, run a binary search on all
395 archived revisions to determine the last known good revision.
397 @param platform Which build to download/run ('mac', 'win', 'linux64', etc.).
398 @param official_builds Specify build type (Chromium or Official build).
399 @param good_rev Number/tag of the known good revision.
400 @param bad_rev Number/tag of the known bad revision.
401 @param num_runs Number of times to run each build for asking good/bad.
402 @param try_args A tuple of arguments to pass to the test application.
403 @param profile The name of the user profile to run with.
404 @param evaluate A function which returns 'g' if the argument build is good,
405 'b' if it's bad or 'u' if unknown.
407 Threading is used to fetch Chromium revisions in the background, speeding up
408 the user's experience. For example, suppose the bounds of the search are
409 good_rev=0, bad_rev=100. The first revision to be checked is 50. Depending on
410 whether revision 50 is good or bad, the next revision to check will be either
411 25 or 75. So, while revision 50 is being checked, the script will download
412 revisions 25 and 75 in the background. Once the good/bad verdict on rev 50 is
415 - If rev 50 is good, the download of rev 25 is cancelled, and the next test
418 - If rev 50 is bad, the download of rev 75 is cancelled, and the next test
425 context
= PathContext(platform
, good_rev
, bad_rev
, official_builds
)
430 print "Downloading list of known revisions..."
431 _GetDownloadPath
= lambda rev
: os
.path
.join(cwd
,
432 '%s-%s' % (str(rev
), context
.archive_name
))
434 revlist
= context
.GetOfficialBuildsList()
436 revlist
= context
.GetRevList()
438 # Get a list of revisions to bisect across.
439 if len(revlist
) < 2: # Don't have enough builds to bisect.
440 msg
= 'We don\'t have enough builds to bisect. revlist: %s' % revlist
441 raise RuntimeError(msg
)
443 # Figure out our bookends and first pivot point; fetch the pivot revision.
445 maxrev
= len(revlist
) - 1
448 zipfile
= _GetDownloadPath(rev
)
449 fetch
= DownloadJob(context
, 'initial_fetch', rev
, zipfile
)
453 # Binary search time!
454 while fetch
and fetch
.zipfile
and maxrev
- minrev
> 1:
455 if bad_rev
< good_rev
:
456 min_str
, max_str
= "bad", "good"
458 min_str
, max_str
= "good", "bad"
459 print 'Bisecting range [%s (%s), %s (%s)].' % (revlist
[minrev
], min_str
, \
460 revlist
[maxrev
], max_str
)
462 # Pre-fetch next two possible pivots
463 # - down_pivot is the next revision to check if the current revision turns
465 # - up_pivot is the next revision to check if the current revision turns
467 down_pivot
= int((pivot
- minrev
) / 2) + minrev
469 if down_pivot
!= pivot
and down_pivot
!= minrev
:
470 down_rev
= revlist
[down_pivot
]
471 down_fetch
= DownloadJob(context
, 'down_fetch', down_rev
,
472 _GetDownloadPath(down_rev
))
475 up_pivot
= int((maxrev
- pivot
) / 2) + pivot
477 if up_pivot
!= pivot
and up_pivot
!= maxrev
:
478 up_rev
= revlist
[up_pivot
]
479 up_fetch
= DownloadJob(context
, 'up_fetch', up_rev
,
480 _GetDownloadPath(up_rev
))
483 # Run test on the pivot revision.
488 (status
, stdout
, stderr
) = RunRevision(context
,
495 print >>sys
.stderr
, e
499 # Call the evaluate function to see if the current revision is good or bad.
500 # On that basis, kill one of the background downloads and complete the
501 # other, as described in the comments above.
503 answer
= evaluate(rev
, official_builds
, status
, stdout
, stderr
)
504 if answer
== 'g' and good_rev
< bad_rev
or \
505 answer
== 'b' and bad_rev
< good_rev
:
508 down_fetch
.Stop() # Kill the download of the older revision.
513 elif answer
== 'b' and good_rev
< bad_rev
or \
514 answer
== 'g' and bad_rev
< good_rev
:
517 up_fetch
.Stop() # Kill the download of the newer revision.
523 # Nuke the revision from the revlist and choose a new pivot.
525 maxrev
-= 1 # Assumes maxrev >= pivot.
527 if maxrev
- minrev
> 1:
528 # Alternate between using down_pivot or up_pivot for the new pivot
529 # point, without affecting the range. Do this instead of setting the
530 # pivot to the midpoint of the new range because adjacent revisions
531 # are likely affected by the same issue that caused the (u)nknown
533 if up_fetch
and down_fetch
:
534 fetch
= [up_fetch
, down_fetch
][len(revlist
) % 2]
540 if fetch
== up_fetch
:
541 pivot
= up_pivot
- 1 # Subtracts 1 because revlist was resized.
544 zipfile
= fetch
.zipfile
546 if down_fetch
and fetch
!= down_fetch
:
548 if up_fetch
and fetch
!= up_fetch
:
551 assert False, "Unexpected return value from evaluate(): " + answer
553 print "Cleaning up..."
554 for f
in [_GetDownloadPath(revlist
[down_pivot
]),
555 _GetDownloadPath(revlist
[up_pivot
])]:
564 return (revlist
[minrev
], revlist
[maxrev
])
567 def GetBlinkRevisionForChromiumRevision(rev
):
568 """Returns the blink revision that was in chromium's DEPS file at
569 chromium revision |rev|."""
570 # . doesn't match newlines without re.DOTALL, so this is safe.
571 blink_re
= re
.compile(r
'webkit_revision.:\D*(\d+)')
572 url
= urllib
.urlopen(DEPS_FILE
% rev
)
573 m
= blink_re
.search(url
.read())
576 return int(m
.group(1))
578 raise Exception('Could not get blink revision for cr rev %d' % rev
)
581 def GetChromiumRevision(url
):
582 """Returns the chromium revision read from given URL."""
584 # Location of the latest build revision number
585 return int(urllib
.urlopen(url
).read())
587 print('Could not determine latest revision. This could be bad...')
592 usage
= ('%prog [options] [-- chromium-options]\n'
593 'Perform binary search on the snapshot builds to find a minimal\n'
594 'range of revisions where a behavior change happened. The\n'
595 'behaviors are described as "good" and "bad".\n'
596 'It is NOT assumed that the behavior of the later revision is\n'
599 'Revision numbers should use\n'
600 ' Official versions (e.g. 1.0.1000.0) for official builds. (-o)\n'
601 ' SVN revisions (e.g. 123456) for chromium builds, from trunk.\n'
602 ' Use base_trunk_revision from http://omahaproxy.appspot.com/\n'
603 ' for earlier revs.\n'
604 ' Chrome\'s about: build number and omahaproxy branch_revision\n'
605 ' are incorrect, they are from branches.\n'
607 'Tip: add "-- --no-first-run" to bypass the first run prompts.')
608 parser
= optparse
.OptionParser(usage
=usage
)
609 # Strangely, the default help output doesn't include the choice list.
610 choices
= ['mac', 'win', 'linux', 'linux64', 'linux-arm']
611 # linux-chromiumos lacks a continuous archive http://crbug.com/78158
612 parser
.add_option('-a', '--archive',
614 help = 'The buildbot archive to bisect [%s].' %
616 parser
.add_option('-o', action
="store_true", dest
='official_builds',
617 help = 'Bisect across official ' +
618 'Chrome builds (internal only) instead of ' +
619 'Chromium archives.')
620 parser
.add_option('-b', '--bad', type = 'str',
621 help = 'A bad revision to start bisection. ' +
622 'May be earlier or later than the good revision. ' +
624 parser
.add_option('-g', '--good', type = 'str',
625 help = 'A good revision to start bisection. ' +
626 'May be earlier or later than the bad revision. ' +
628 parser
.add_option('-p', '--profile', '--user-data-dir', type = 'str',
629 help = 'Profile to use; this will not reset every run. ' +
630 'Defaults to a clean profile.', default
= 'profile')
631 parser
.add_option('-t', '--times', type = 'int',
632 help = 'Number of times to run each build before asking ' +
633 'if it\'s good or bad. Temporary profiles are reused.',
635 (opts
, args
) = parser
.parse_args()
637 if opts
.archive
is None:
638 print 'Error: missing required parameter: --archive'
643 # Create the context. Initialize 0 for the revisions as they are set below.
644 context
= PathContext(opts
.archive
, 0, 0, opts
.official_builds
)
645 # Pick a starting point, try to get HEAD for this.
649 bad_rev
= '999.0.0.0'
650 if not opts
.official_builds
:
651 bad_rev
= GetChromiumRevision(context
.GetLastChangeURL())
653 # Find out when we were good.
657 good_rev
= '0.0.0.0' if opts
.official_builds
else 0
659 if opts
.official_builds
:
660 good_rev
= LooseVersion(good_rev
)
661 bad_rev
= LooseVersion(bad_rev
)
663 good_rev
= int(good_rev
)
664 bad_rev
= int(bad_rev
)
667 print('Number of times to run (%d) must be greater than or equal to 1.' %
672 (min_chromium_rev
, max_chromium_rev
) = Bisect(
673 opts
.archive
, opts
.official_builds
, good_rev
, bad_rev
, opts
.times
, args
,
676 # Get corresponding blink revisions.
678 min_blink_rev
= GetBlinkRevisionForChromiumRevision(min_chromium_rev
)
679 max_blink_rev
= GetBlinkRevisionForChromiumRevision(max_chromium_rev
)
681 # Silently ignore the failure.
682 min_blink_rev
, max_blink_rev
= 0, 0
684 # We're done. Let the user know the results in an official manner.
685 if good_rev
> bad_rev
:
686 print DONE_MESSAGE_GOOD_MAX
% (str(min_chromium_rev
), str(max_chromium_rev
))
688 print DONE_MESSAGE_GOOD_MIN
% (str(min_chromium_rev
), str(max_chromium_rev
))
690 if min_blink_rev
!= max_blink_rev
:
691 print 'BLINK CHANGELOG URL:'
692 print ' ' + BLINK_CHANGELOG_URL
% (max_blink_rev
, min_blink_rev
)
693 print 'CHANGELOG URL:'
694 if opts
.official_builds
:
695 print OFFICIAL_CHANGELOG_URL
% (min_chromium_rev
, max_chromium_rev
)
697 print ' ' + CHANGELOG_URL
% (min_chromium_rev
, max_chromium_rev
)
699 if __name__
== '__main__':