1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
6 from profile_creators
import profile_extender
7 from telemetry
.core
import exceptions
8 from telemetry
.core
import util
11 class FastNavigationProfileExtender(profile_extender
.ProfileExtender
):
12 """Extends a Chrome profile.
14 This class creates or extends an existing profile by performing a set of tab
15 navigations in large batches. This is accomplished by opening a large number
16 of tabs, simultaneously navigating all the tabs, and then waiting for all the
17 tabs to load. This provides two benefits:
18 - Takes advantage of the high number of logical cores on modern CPUs.
19 - The total time spent waiting for navigations to time out scales linearly
20 with the number of batches, but does not scale with the size of the
23 def __init__(self
, finder_options
, maximum_batch_size
):
27 maximum_batch_size: A positive integer indicating the number of tabs to
28 simultaneously perform navigations.
30 super(FastNavigationProfileExtender
, self
).__init
__(finder_options
)
32 # The instance keeps a list of Tabs that can be navigated successfully.
33 # This means that the Tab is not crashed, and is processing JavaScript in a
35 self
._navigation
_tabs
= []
37 # The number of tabs to use.
38 self
._NUM
_TABS
= maximum_batch_size
40 # The amount of additional time to wait for a batch of pages to finish
41 # loading for each page in the batch.
42 self
._BATCH
_TIMEOUT
_PER
_PAGE
_IN
_SECONDS
= 20
44 # The amount of time to wait for a page to quiesce. Some pages will never
46 self
._TIME
_TO
_WAIT
_FOR
_PAGE
_TO
_QUIESCE
_IN
_SECONDS
= 10
49 """Superclass override."""
52 self
._PerformNavigations
()
54 self
.TearDownBrowser()
56 # When there hasn't been an exception, verify that the profile was
58 # TODO(erikchen): I've intentionally omitted my implementation of
59 # VerifyProfileWasExtended() in small_profile_extender, since the profile
60 # is not being correctly extended. http://crbug.com/484833
61 # http://crbug.com/484880
62 self
.VerifyProfileWasExtended()
64 def VerifyProfileWasExtended(self
):
65 """Verifies that the profile was correctly extended.
67 Can be overridden by subclasses.
71 def GetUrlIterator(self
):
72 """Gets URLs for the browser to navigate to.
74 Intended for subclass override.
77 An iterator whose elements are urls to be navigated to.
79 raise NotImplementedError()
81 def ShouldExitAfterBatchNavigation(self
):
82 """Returns a boolean indicating whether profile extension is finished.
84 Intended for subclass override.
86 raise NotImplementedError()
88 def CleanUpAfterBatchNavigation(self
):
89 """A hook for subclasses to perform cleanup after each batch of
92 Can be overridden by subclasses.
96 def _RefreshNavigationTabs(self
):
97 """Updates the member self._navigation_tabs to contain self._NUM_TABS
98 elements, each of which is not crashed. The crashed tabs are intentionally
99 leaked, since Telemetry doesn't have a good way of killing crashed tabs.
101 It is also possible for a tab to be stalled in an infinite JavaScript loop.
102 These tabs will be in self.browser.tabs, but not in self._navigation_tabs.
103 There is no way to kill these tabs, so they are also leaked. This method is
104 careful to only use tabs in self._navigation_tabs, or newly created tabs.
106 live_tabs
= [tab
for tab
in self
._navigation
_tabs
if tab
.IsAlive()]
107 self
._navigation
_tabs
= live_tabs
109 while len(self
._navigation
_tabs
) < self
._NUM
_TABS
:
110 self
._navigation
_tabs
.append(self
._browser
.tabs
.New())
112 def _RemoveNavigationTab(self
, tab
):
113 """Removes a tab which is no longer in a useable state from
114 self._navigation_tabs. The tab is not removed from self.browser.tabs,
115 since there is no guarantee that the tab can be safely removed."""
116 self
._navigation
_tabs
.remove(tab
)
118 def _RetrieveTabUrl(self
, tab
, timeout
):
119 """Retrives the URL of the tab."""
120 # TODO(erikchen): Use tab.url instead, which talks to the browser process
121 # instead of the renderer process. http://crbug.com/486119
122 return tab
.EvaluateJavaScript('document.URL', timeout
)
124 def _WaitForUrlToChange(self
, tab
, initial_url
, end_time
):
125 """Waits for the tab to navigate away from its initial url.
127 If time.time() is larger than end_time, the function does nothing.
128 Otherwise, the function tries to return no later than end_time.
131 seconds_to_wait
= end_time
- time
.time()
132 if seconds_to_wait
<= 0:
135 current_url
= self
._RetrieveTabUrl
(tab
, seconds_to_wait
)
136 if current_url
!= initial_url
and current_url
!= "":
139 # Retrieving the current url is a non-trivial operation. Add a small
140 # sleep here to prevent this method from contending with the actual
144 def _WaitForTabToBeReady(self
, tab
, end_time
):
145 """Waits for the tab to be ready.
147 If time.time() is larger than end_time, the function does nothing.
148 Otherwise, the function tries to return no later than end_time.
150 seconds_to_wait
= end_time
- time
.time()
151 if seconds_to_wait
<= 0:
153 tab
.WaitForDocumentReadyStateToBeComplete(seconds_to_wait
)
155 # Wait up to 10 seconds for the page to quiesce. If the page hasn't
156 # quiesced in 10 seconds, it will probably never quiesce.
157 seconds_to_wait
= end_time
- time
.time()
158 seconds_to_wait
= max(0, seconds_to_wait
)
160 util
.WaitFor(tab
.HasReachedQuiescence
, seconds_to_wait
)
161 except exceptions
.TimeoutException
:
164 def _BatchNavigateTabs(self
, batch
):
165 """Performs a batch of tab navigations with minimal delay.
168 batch: A list of tuples (tab, url).
171 A list of tuples (tab, initial_url). |initial_url| is the url of the
172 |tab| prior to a navigation command being sent to it.
174 # Attempting to pass in a timeout of 0 seconds results in a synchronous
175 # socket error from the websocket library. Pass in a very small timeout
176 # instead so that the websocket library raises a Timeout exception. This
177 # prevents the logic from accidentally catching different socket
179 timeout_in_seconds
= 0.01
182 for tab
, url
in batch
:
183 initial_url
= self
._RetrieveTabUrl
(tab
, 20)
185 tab
.Navigate(url
, None, timeout_in_seconds
)
186 except exceptions
.TimeoutException
:
187 # We expect to receive a timeout exception, since we're not waiting for
188 # the navigation to complete.
190 queued_tabs
.append((tab
, initial_url
))
193 def _WaitForQueuedTabsToLoad(self
, queued_tabs
):
194 """Waits for all the batch navigated tabs to finish loading.
197 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed
198 to have already been sent a navigation command.
200 total_batch_timeout
= (len(queued_tabs
) *
201 self
._BATCH
_TIMEOUT
_PER
_PAGE
_IN
_SECONDS
)
202 end_time
= time
.time() + total_batch_timeout
203 for tab
, initial_url
in queued_tabs
:
204 # Since we didn't wait any time for the tab url navigation to commit, it's
205 # possible that the tab hasn't started navigating yet.
206 self
._WaitForUrlToChange
(tab
, initial_url
, end_time
)
207 self
._WaitForTabToBeReady
(tab
, end_time
)
209 def _GetUrlsToNavigate(self
, url_iterator
):
210 """Returns an array of urls to navigate to, given a url_iterator."""
212 for _
in xrange(self
._NUM
_TABS
):
214 urls
.append(url_iterator
.next())
215 except StopIteration:
219 def _PerformNavigations(self
):
220 """Repeatedly fetches a batch of urls, and navigates to those urls. This
221 will run until an empty batch is returned, or
222 ShouldExitAfterBatchNavigation() returns True.
224 url_iterator
= self
.GetUrlIterator()
226 self
._RefreshNavigationTabs
()
227 urls
= self
._GetUrlsToNavigate
(url_iterator
)
233 for i
in range(len(urls
)):
235 tab
= self
._navigation
_tabs
[i
]
236 batch
.append((tab
, url
))
238 queued_tabs
= self
._BatchNavigateTabs
(batch
)
239 self
._WaitForQueuedTabsToLoad
(queued_tabs
)
241 self
.CleanUpAfterBatchNavigation()
243 if self
.ShouldExitAfterBatchNavigation():