1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
6 from telemetry
.core
import browser_finder
7 from telemetry
.core
import browser_finder_exceptions
8 from telemetry
.core
import exceptions
9 from telemetry
.core
import platform
10 from telemetry
.core
.backends
.chrome_inspector
import devtools_http
13 class FastNavigationProfileExtender(object):
14 """Extends a Chrome profile.
16 This class creates or extends an existing profile by performing a set of tab
17 navigations in large batches. This is accomplished by opening a large number
18 of tabs, simultaneously navigating all the tabs, and then waiting for all the
19 tabs to load. This provides two benefits:
20 - Takes advantage of the high number of logical cores on modern CPUs.
21 - The total time spent waiting for navigations to time out scales linearly
22 with the number of batches, but does not scale with the size of the
25 def __init__(self
, maximum_batch_size
):
29 maximum_batch_size: A positive integer indicating the number of tabs to
30 simultaneously perform navigations.
32 super(FastNavigationProfileExtender
, self
).__init
__()
34 # The path of the profile that the browser will use while it's running.
35 # This member is initialized during SetUp().
36 self
._profile
_path
= None
38 # A reference to the browser that will be performing all of the tab
40 # This member is initialized during SetUp().
43 # The instance keeps a list of Tabs that can be navigated successfully.
44 # This means that the Tab is not crashed, and is processing JavaScript in a
46 self
._navigation
_tabs
= []
48 # The number of tabs to use.
49 self
._NUM
_TABS
= maximum_batch_size
51 # The amount of time to wait for a batch of pages to finish loading.
52 self
._BATCH
_PAGE
_LOAD
_TIMEOUT
_IN
_SECONDS
= 10
54 # The default amount of time to wait for the retrieval of the URL of a tab.
55 self
._TAB
_URL
_RETRIEVAL
_TIMEOUT
_IN
_SECONDS
= 1
57 def Run(self
, finder_options
):
58 """Extends the profile.
61 finder_options: An instance of BrowserFinderOptions that contains the
62 directory of the input profile, the directory to place the output
63 profile, and sufficient information to choose a specific browser binary.
66 self
.SetUp(finder_options
)
67 self
._PerformNavigations
()
71 def GetUrlIterator(self
):
72 """Gets URLs for the browser to navigate to.
74 Intended for subclass override.
77 An iterator whose elements are urls to be navigated to.
79 raise NotImplementedError()
81 def ShouldExitAfterBatchNavigation(self
):
82 """Returns a boolean indicating whether profile extension is finished.
84 Intended for subclass override.
86 raise NotImplementedError()
88 def SetUp(self
, finder_options
):
89 """Finds the browser, starts the browser, and opens the requisite number of
92 Can be overridden by subclasses. Subclasses must call the super class
95 self
._profile
_path
= finder_options
.output_profile_path
96 possible_browser
= self
._GetPossibleBrowser
(finder_options
)
98 assert possible_browser
.supports_tab_control
99 assert (platform
.GetHostPlatform().GetOSName() in
100 ["win", "mac", "linux"])
101 self
._browser
= possible_browser
.Create(finder_options
)
104 """Teardown that is guaranteed to be executed before the instance is
107 Can be overridden by subclasses. Subclasses must call the super class
111 self
._browser
.Close()
114 def CleanUpAfterBatchNavigation(self
):
115 """A hook for subclasses to perform cleanup after each batch of
118 Can be overridden by subclasses.
123 def profile_path(self
):
124 return self
._profile
_path
126 def _RefreshNavigationTabs(self
):
127 """Updates the member self._navigation_tabs to contain self._NUM_TABS
128 elements, each of which is not crashed. The crashed tabs are intentionally
129 leaked, since Telemetry doesn't have a good way of killing crashed tabs.
131 It is also possible for a tab to be stalled in an infinite JavaScript loop.
132 These tabs will be in self._browser.tabs, but not in self._navigation_tabs.
133 There is no way to kill these tabs, so they are also leaked. This method is
134 careful to only use tabs in self._navigation_tabs, or newly created tabs.
136 live_tabs
= [tab
for tab
in self
._navigation
_tabs
if tab
.IsAlive()]
137 self
._navigation
_tabs
= live_tabs
139 while len(self
._navigation
_tabs
) < self
._NUM
_TABS
:
140 self
._navigation
_tabs
.append(self
._browser
.tabs
.New())
142 def _RemoveNavigationTab(self
, tab
):
143 """Removes a tab which is no longer in a useable state from
144 self._navigation_tabs. The tab is not removed from self._browser.tabs,
145 since there is no guarantee that the tab can be safely removed."""
146 self
._navigation
_tabs
.remove(tab
)
148 def _GetPossibleBrowser(self
, finder_options
):
149 """Return a possible_browser with the given options."""
150 possible_browser
= browser_finder
.FindBrowser(finder_options
)
151 if not possible_browser
:
152 raise browser_finder_exceptions
.BrowserFinderException(
153 'No browser found.\n\nAvailable browsers:\n%s\n' %
154 '\n'.join(browser_finder
.GetAllAvailableBrowserTypes(finder_options
)))
155 finder_options
.browser_options
.browser_type
= (
156 possible_browser
.browser_type
)
158 return possible_browser
160 def _RetrieveTabUrl(self
, tab
, timeout
):
161 """Retrives the URL of the tab."""
163 return tab
.EvaluateJavaScript('document.URL', timeout
)
164 except (exceptions
.DevtoolsTargetCrashException
,
165 devtools_http
.DevToolsClientConnectionError
,
166 devtools_http
.DevToolsClientUrlError
):
169 def _WaitForUrlToChange(self
, tab
, initial_url
, timeout
):
170 """Waits for the tab to navigate away from its initial url."""
171 end_time
= time
.time() + timeout
173 seconds_to_wait
= end_time
- time
.time()
174 seconds_to_wait
= max(0, seconds_to_wait
)
176 if seconds_to_wait
== 0:
179 current_url
= self
._RetrieveTabUrl
(tab
, seconds_to_wait
)
180 if current_url
!= initial_url
:
183 # Retrieving the current url is a non-trivial operation. Add a small
184 # sleep here to prevent this method from contending with the actual
188 def _BatchNavigateTabs(self
, batch
):
189 """Performs a batch of tab navigations with minimal delay.
192 batch: A list of tuples (tab, url).
195 A list of tuples (tab, initial_url). |initial_url| is the url of the
196 |tab| prior to a navigation command being sent to it.
198 timeout_in_seconds
= 0
201 for tab
, url
in batch
:
202 initial_url
= self
._RetrieveTabUrl
(tab
,
203 self
._TAB
_URL
_RETRIEVAL
_TIMEOUT
_IN
_SECONDS
)
206 tab
.Navigate(url
, None, timeout_in_seconds
)
207 except (exceptions
.DevtoolsTargetCrashException
,
208 devtools_http
.DevToolsClientConnectionError
,
209 devtools_http
.DevToolsClientUrlError
):
210 # We expect a time out. It's possible for other problems to arise, but
211 # this method is not responsible for dealing with them. Ignore all
215 queued_tabs
.append((tab
, initial_url
))
218 def _WaitForQueuedTabsToLoad(self
, queued_tabs
):
219 """Waits for all the batch navigated tabs to finish loading.
222 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed
223 to have already been sent a navigation command.
225 end_time
= time
.time() + self
._BATCH
_PAGE
_LOAD
_TIMEOUT
_IN
_SECONDS
226 for tab
, initial_url
in queued_tabs
:
227 seconds_to_wait
= end_time
- time
.time()
228 seconds_to_wait
= max(0, seconds_to_wait
)
230 if seconds_to_wait
== 0:
233 # Since we don't wait any time for the tab url navigation to commit, it's
234 # possible that the tab hasn't started navigating yet.
235 self
._WaitForUrlToChange
(tab
, initial_url
, seconds_to_wait
)
237 seconds_to_wait
= end_time
- time
.time()
238 seconds_to_wait
= max(0, seconds_to_wait
)
241 tab
.WaitForDocumentReadyStateToBeComplete(seconds_to_wait
)
242 except exceptions
.TimeoutException
:
245 except (exceptions
.DevtoolsTargetCrashException
,
246 devtools_http
.DevToolsClientConnectionError
,
247 devtools_http
.DevToolsClientUrlError
):
248 # If any error occurs, remove the tab. it's probably in an
249 # unrecoverable state.
250 self
._RemoveNavigationTab
(tab
)
252 def _GetUrlsToNavigate(self
, url_iterator
):
253 """Returns an array of urls to navigate to, given a url_iterator."""
255 for _
in xrange(self
._NUM
_TABS
):
257 urls
.append(url_iterator
.next())
258 except StopIteration:
262 def _PerformNavigations(self
):
263 """Repeatedly fetches a batch of urls, and navigates to those urls. This
264 will run until an empty batch is returned, or
265 ShouldExitAfterBatchNavigation() returns True.
267 url_iterator
= self
.GetUrlIterator()
269 self
._RefreshNavigationTabs
()
270 urls
= self
._GetUrlsToNavigate
(url_iterator
)
276 for i
in range(len(urls
)):
278 tab
= self
._navigation
_tabs
[i
]
279 batch
.append((tab
, url
))
281 queued_tabs
= self
._BatchNavigateTabs
(batch
)
282 self
._WaitForQueuedTabsToLoad
(queued_tabs
)
284 self
.CleanUpAfterBatchNavigation()
286 if self
.ShouldExitAfterBatchNavigation():