1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
6 from profile_creators
import profile_extender
7 from telemetry
.core
import exceptions
10 class FastNavigationProfileExtender(profile_extender
.ProfileExtender
):
11 """Extends a Chrome profile.
13 This class creates or extends an existing profile by performing a set of tab
14 navigations in large batches. This is accomplished by opening a large number
15 of tabs, simultaneously navigating all the tabs, and then waiting for all the
16 tabs to load. This provides two benefits:
17 - Takes advantage of the high number of logical cores on modern CPUs.
18 - The total time spent waiting for navigations to time out scales linearly
19 with the number of batches, but does not scale with the size of the
22 def __init__(self
, finder_options
, maximum_batch_size
):
26 maximum_batch_size: A positive integer indicating the number of tabs to
27 simultaneously perform navigations.
29 super(FastNavigationProfileExtender
, self
).__init
__(finder_options
)
31 # The instance keeps a list of Tabs that can be navigated successfully.
32 # This means that the Tab is not crashed, and is processing JavaScript in a
34 self
._navigation
_tabs
= []
36 # The number of tabs to use.
37 self
._NUM
_TABS
= maximum_batch_size
39 # The amount of time to wait for a batch of pages to finish loading.
40 self
._BATCH
_PAGE
_LOAD
_TIMEOUT
_IN
_SECONDS
= 10
42 # The default amount of time to wait for the retrieval of the URL of a tab.
43 self
._TAB
_URL
_RETRIEVAL
_TIMEOUT
_IN
_SECONDS
= 1
46 """Superclass override."""
49 self
._PerformNavigations
()
51 self
.TearDownBrowser()
53 def GetUrlIterator(self
):
54 """Gets URLs for the browser to navigate to.
56 Intended for subclass override.
59 An iterator whose elements are urls to be navigated to.
61 raise NotImplementedError()
63 def ShouldExitAfterBatchNavigation(self
):
64 """Returns a boolean indicating whether profile extension is finished.
66 Intended for subclass override.
68 raise NotImplementedError()
70 def CleanUpAfterBatchNavigation(self
):
71 """A hook for subclasses to perform cleanup after each batch of
74 Can be overridden by subclasses.
79 """Adds a new tab to the browser."""
81 # Adding a new tab requires making a request over devtools. This can fail
82 # for a variety of reasons. Retry 3 times.
84 for i
in range(retry_count
):
86 self
._navigation
_tabs
.append(self
._browser
.tabs
.New())
87 except exceptions
.Error
:
88 if i
== retry_count
- 1:
93 def _RefreshNavigationTabs(self
):
94 """Updates the member self._navigation_tabs to contain self._NUM_TABS
95 elements, each of which is not crashed. The crashed tabs are intentionally
96 leaked, since Telemetry doesn't have a good way of killing crashed tabs.
98 It is also possible for a tab to be stalled in an infinite JavaScript loop.
99 These tabs will be in self.browser.tabs, but not in self._navigation_tabs.
100 There is no way to kill these tabs, so they are also leaked. This method is
101 careful to only use tabs in self._navigation_tabs, or newly created tabs.
103 live_tabs
= [tab
for tab
in self
._navigation
_tabs
if tab
.IsAlive()]
104 self
._navigation
_tabs
= live_tabs
106 while len(self
._navigation
_tabs
) < self
._NUM
_TABS
:
109 def _RemoveNavigationTab(self
, tab
):
110 """Removes a tab which is no longer in a useable state from
111 self._navigation_tabs. The tab is not removed from self.browser.tabs,
112 since there is no guarantee that the tab can be safely removed."""
113 self
._navigation
_tabs
.remove(tab
)
115 def _RetrieveTabUrl(self
, tab
, timeout
):
116 """Retrives the URL of the tab."""
118 return tab
.EvaluateJavaScript('document.URL', timeout
)
119 except exceptions
.Error
:
122 def _WaitForUrlToChange(self
, tab
, initial_url
, timeout
):
123 """Waits for the tab to navigate away from its initial url."""
124 end_time
= time
.time() + timeout
126 seconds_to_wait
= end_time
- time
.time()
127 seconds_to_wait
= max(0, seconds_to_wait
)
129 if seconds_to_wait
== 0:
132 current_url
= self
._RetrieveTabUrl
(tab
, seconds_to_wait
)
133 if current_url
!= initial_url
:
136 # Retrieving the current url is a non-trivial operation. Add a small
137 # sleep here to prevent this method from contending with the actual
141 def _BatchNavigateTabs(self
, batch
):
142 """Performs a batch of tab navigations with minimal delay.
145 batch: A list of tuples (tab, url).
148 A list of tuples (tab, initial_url). |initial_url| is the url of the
149 |tab| prior to a navigation command being sent to it.
151 timeout_in_seconds
= 0
154 for tab
, url
in batch
:
155 initial_url
= self
._RetrieveTabUrl
(tab
,
156 self
._TAB
_URL
_RETRIEVAL
_TIMEOUT
_IN
_SECONDS
)
159 tab
.Navigate(url
, None, timeout_in_seconds
)
160 except exceptions
.Error
:
161 # We expect a time out. It's possible for other problems to arise, but
162 # this method is not responsible for dealing with them. Ignore all
166 queued_tabs
.append((tab
, initial_url
))
169 def _WaitForQueuedTabsToLoad(self
, queued_tabs
):
170 """Waits for all the batch navigated tabs to finish loading.
173 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed
174 to have already been sent a navigation command.
176 end_time
= time
.time() + self
._BATCH
_PAGE
_LOAD
_TIMEOUT
_IN
_SECONDS
177 for tab
, initial_url
in queued_tabs
:
178 seconds_to_wait
= end_time
- time
.time()
179 seconds_to_wait
= max(0, seconds_to_wait
)
181 if seconds_to_wait
== 0:
184 # Since we don't wait any time for the tab url navigation to commit, it's
185 # possible that the tab hasn't started navigating yet.
186 self
._WaitForUrlToChange
(tab
, initial_url
, seconds_to_wait
)
188 seconds_to_wait
= end_time
- time
.time()
189 seconds_to_wait
= max(0, seconds_to_wait
)
192 tab
.WaitForDocumentReadyStateToBeComplete(seconds_to_wait
)
193 except exceptions
.TimeoutException
:
196 except exceptions
.Error
:
197 # If any error occurs, remove the tab. it's probably in an
198 # unrecoverable state.
199 self
._RemoveNavigationTab
(tab
)
201 def _GetUrlsToNavigate(self
, url_iterator
):
202 """Returns an array of urls to navigate to, given a url_iterator."""
204 for _
in xrange(self
._NUM
_TABS
):
206 urls
.append(url_iterator
.next())
207 except StopIteration:
211 def _PerformNavigations(self
):
212 """Repeatedly fetches a batch of urls, and navigates to those urls. This
213 will run until an empty batch is returned, or
214 ShouldExitAfterBatchNavigation() returns True.
216 url_iterator
= self
.GetUrlIterator()
218 self
._RefreshNavigationTabs
()
219 urls
= self
._GetUrlsToNavigate
(url_iterator
)
225 for i
in range(len(urls
)):
227 tab
= self
._navigation
_tabs
[i
]
228 batch
.append((tab
, url
))
230 queued_tabs
= self
._BatchNavigateTabs
(batch
)
231 self
._WaitForQueuedTabsToLoad
(queued_tabs
)
233 self
.CleanUpAfterBatchNavigation()
235 if self
.ShouldExitAfterBatchNavigation():