1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
6 from profile_creators
import profile_extender
7 from telemetry
.core
import browser_finder
8 from telemetry
.core
import browser_finder_exceptions
9 from telemetry
.core
import exceptions
12 class FastNavigationProfileExtender(profile_extender
.ProfileExtender
):
13 """Extends a Chrome profile.
15 This class creates or extends an existing profile by performing a set of tab
16 navigations in large batches. This is accomplished by opening a large number
17 of tabs, simultaneously navigating all the tabs, and then waiting for all the
18 tabs to load. This provides two benefits:
19 - Takes advantage of the high number of logical cores on modern CPUs.
20 - The total time spent waiting for navigations to time out scales linearly
21 with the number of batches, but does not scale with the size of the
24 def __init__(self
, maximum_batch_size
):
28 maximum_batch_size: A positive integer indicating the number of tabs to
29 simultaneously perform navigations.
31 super(FastNavigationProfileExtender
, self
).__init
__()
33 # The instance keeps a list of Tabs that can be navigated successfully.
34 # This means that the Tab is not crashed, and is processing JavaScript in a
36 self
._navigation
_tabs
= []
38 # The number of tabs to use.
39 self
._NUM
_TABS
= maximum_batch_size
41 # The amount of time to wait for a batch of pages to finish loading.
42 self
._BATCH
_PAGE
_LOAD
_TIMEOUT
_IN
_SECONDS
= 10
44 # The default amount of time to wait for the retrieval of the URL of a tab.
45 self
._TAB
_URL
_RETRIEVAL
_TIMEOUT
_IN
_SECONDS
= 1
47 def Run(self
, finder_options
):
48 """Extends the profile.
51 finder_options: An instance of BrowserFinderOptions that contains the
52 directory of the input profile, the directory to place the output
53 profile, and sufficient information to choose a specific browser binary.
56 self
.SetUp(finder_options
)
57 self
._PerformNavigations
()
61 def GetUrlIterator(self
):
62 """Gets URLs for the browser to navigate to.
64 Intended for subclass override.
67 An iterator whose elements are urls to be navigated to.
69 raise NotImplementedError()
71 def ShouldExitAfterBatchNavigation(self
):
72 """Returns a boolean indicating whether profile extension is finished.
74 Intended for subclass override.
76 raise NotImplementedError()
78 def CleanUpAfterBatchNavigation(self
):
79 """A hook for subclasses to perform cleanup after each batch of
82 Can be overridden by subclasses.
87 def profile_path(self
):
88 return self
._profile
_path
91 """Adds a new tab to the browser."""
93 # Adding a new tab requires making a request over devtools. This can fail
94 # for a variety of reasons. Retry 3 times.
96 for i
in range(retry_count
):
98 self
._navigation
_tabs
.append(self
._browser
.tabs
.New())
99 except exceptions
.Error
:
100 if i
== retry_count
- 1:
105 def _RefreshNavigationTabs(self
):
106 """Updates the member self._navigation_tabs to contain self._NUM_TABS
107 elements, each of which is not crashed. The crashed tabs are intentionally
108 leaked, since Telemetry doesn't have a good way of killing crashed tabs.
110 It is also possible for a tab to be stalled in an infinite JavaScript loop.
111 These tabs will be in self.browser.tabs, but not in self._navigation_tabs.
112 There is no way to kill these tabs, so they are also leaked. This method is
113 careful to only use tabs in self._navigation_tabs, or newly created tabs.
115 live_tabs
= [tab
for tab
in self
._navigation
_tabs
if tab
.IsAlive()]
116 self
._navigation
_tabs
= live_tabs
118 while len(self
._navigation
_tabs
) < self
._NUM
_TABS
:
121 def _RemoveNavigationTab(self
, tab
):
122 """Removes a tab which is no longer in a useable state from
123 self._navigation_tabs. The tab is not removed from self.browser.tabs,
124 since there is no guarantee that the tab can be safely removed."""
125 self
._navigation
_tabs
.remove(tab
)
127 def _GetPossibleBrowser(self
, finder_options
):
128 """Return a possible_browser with the given options."""
129 possible_browser
= browser_finder
.FindBrowser(finder_options
)
130 if not possible_browser
:
131 raise browser_finder_exceptions
.BrowserFinderException(
132 'No browser found.\n\nAvailable browsers:\n%s\n' %
133 '\n'.join(browser_finder
.GetAllAvailableBrowserTypes(finder_options
)))
134 finder_options
.browser_options
.browser_type
= (
135 possible_browser
.browser_type
)
137 return possible_browser
139 def _RetrieveTabUrl(self
, tab
, timeout
):
140 """Retrives the URL of the tab."""
142 return tab
.EvaluateJavaScript('document.URL', timeout
)
143 except exceptions
.Error
:
146 def _WaitForUrlToChange(self
, tab
, initial_url
, timeout
):
147 """Waits for the tab to navigate away from its initial url."""
148 end_time
= time
.time() + timeout
150 seconds_to_wait
= end_time
- time
.time()
151 seconds_to_wait
= max(0, seconds_to_wait
)
153 if seconds_to_wait
== 0:
156 current_url
= self
._RetrieveTabUrl
(tab
, seconds_to_wait
)
157 if current_url
!= initial_url
:
160 # Retrieving the current url is a non-trivial operation. Add a small
161 # sleep here to prevent this method from contending with the actual
165 def _BatchNavigateTabs(self
, batch
):
166 """Performs a batch of tab navigations with minimal delay.
169 batch: A list of tuples (tab, url).
172 A list of tuples (tab, initial_url). |initial_url| is the url of the
173 |tab| prior to a navigation command being sent to it.
175 timeout_in_seconds
= 0
178 for tab
, url
in batch
:
179 initial_url
= self
._RetrieveTabUrl
(tab
,
180 self
._TAB
_URL
_RETRIEVAL
_TIMEOUT
_IN
_SECONDS
)
183 tab
.Navigate(url
, None, timeout_in_seconds
)
184 except exceptions
.Error
:
185 # We expect a time out. It's possible for other problems to arise, but
186 # this method is not responsible for dealing with them. Ignore all
190 queued_tabs
.append((tab
, initial_url
))
193 def _WaitForQueuedTabsToLoad(self
, queued_tabs
):
194 """Waits for all the batch navigated tabs to finish loading.
197 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed
198 to have already been sent a navigation command.
200 end_time
= time
.time() + self
._BATCH
_PAGE
_LOAD
_TIMEOUT
_IN
_SECONDS
201 for tab
, initial_url
in queued_tabs
:
202 seconds_to_wait
= end_time
- time
.time()
203 seconds_to_wait
= max(0, seconds_to_wait
)
205 if seconds_to_wait
== 0:
208 # Since we don't wait any time for the tab url navigation to commit, it's
209 # possible that the tab hasn't started navigating yet.
210 self
._WaitForUrlToChange
(tab
, initial_url
, seconds_to_wait
)
212 seconds_to_wait
= end_time
- time
.time()
213 seconds_to_wait
= max(0, seconds_to_wait
)
216 tab
.WaitForDocumentReadyStateToBeComplete(seconds_to_wait
)
217 except exceptions
.TimeoutException
:
220 except exceptions
.Error
:
221 # If any error occurs, remove the tab. it's probably in an
222 # unrecoverable state.
223 self
._RemoveNavigationTab
(tab
)
225 def _GetUrlsToNavigate(self
, url_iterator
):
226 """Returns an array of urls to navigate to, given a url_iterator."""
228 for _
in xrange(self
._NUM
_TABS
):
230 urls
.append(url_iterator
.next())
231 except StopIteration:
235 def _PerformNavigations(self
):
236 """Repeatedly fetches a batch of urls, and navigates to those urls. This
237 will run until an empty batch is returned, or
238 ShouldExitAfterBatchNavigation() returns True.
240 url_iterator
= self
.GetUrlIterator()
242 self
._RefreshNavigationTabs
()
243 urls
= self
._GetUrlsToNavigate
(url_iterator
)
249 for i
in range(len(urls
)):
251 tab
= self
._navigation
_tabs
[i
]
252 batch
.append((tab
, url
))
254 queued_tabs
= self
._BatchNavigateTabs
(batch
)
255 self
._WaitForQueuedTabsToLoad
(queued_tabs
)
257 self
.CleanUpAfterBatchNavigation()
259 if self
.ShouldExitAfterBatchNavigation():