1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
6 from profile_creators
import profile_extender
7 from telemetry
.core
import browser_finder
8 from telemetry
.core
import browser_finder_exceptions
9 from telemetry
.core
import exceptions
12 class FastNavigationProfileExtender(profile_extender
.ProfileExtender
):
13 """Extends a Chrome profile.
15 This class creates or extends an existing profile by performing a set of tab
16 navigations in large batches. This is accomplished by opening a large number
17 of tabs, simultaneously navigating all the tabs, and then waiting for all the
18 tabs to load. This provides two benefits:
19 - Takes advantage of the high number of logical cores on modern CPUs.
20 - The total time spent waiting for navigations to time out scales linearly
21 with the number of batches, but does not scale with the size of the
24 def __init__(self
, maximum_batch_size
):
28 maximum_batch_size: A positive integer indicating the number of tabs to
29 simultaneously perform navigations.
31 super(FastNavigationProfileExtender
, self
).__init
__()
33 # The instance keeps a list of Tabs that can be navigated successfully.
34 # This means that the Tab is not crashed, and is processing JavaScript in a
36 self
._navigation
_tabs
= []
38 # The number of tabs to use.
39 self
._NUM
_TABS
= maximum_batch_size
41 # The amount of time to wait for a batch of pages to finish loading.
42 self
._BATCH
_PAGE
_LOAD
_TIMEOUT
_IN
_SECONDS
= 10
44 # The default amount of time to wait for the retrieval of the URL of a tab.
45 self
._TAB
_URL
_RETRIEVAL
_TIMEOUT
_IN
_SECONDS
= 1
47 def Run(self
, finder_options
):
48 """Extends the profile.
51 finder_options: An instance of BrowserFinderOptions that contains the
52 directory of the input profile, the directory to place the output
53 profile, and sufficient information to choose a specific browser binary.
56 self
.SetUp(finder_options
)
57 self
._PerformNavigations
()
61 def GetUrlIterator(self
):
62 """Gets URLs for the browser to navigate to.
64 Intended for subclass override.
67 An iterator whose elements are urls to be navigated to.
69 raise NotImplementedError()
71 def ShouldExitAfterBatchNavigation(self
):
72 """Returns a boolean indicating whether profile extension is finished.
74 Intended for subclass override.
76 raise NotImplementedError()
78 def CleanUpAfterBatchNavigation(self
):
79 """A hook for subclasses to perform cleanup after each batch of
82 Can be overridden by subclasses.
86 def _RefreshNavigationTabs(self
):
87 """Updates the member self._navigation_tabs to contain self._NUM_TABS
88 elements, each of which is not crashed. The crashed tabs are intentionally
89 leaked, since Telemetry doesn't have a good way of killing crashed tabs.
91 It is also possible for a tab to be stalled in an infinite JavaScript loop.
92 These tabs will be in self.browser.tabs, but not in self._navigation_tabs.
93 There is no way to kill these tabs, so they are also leaked. This method is
94 careful to only use tabs in self._navigation_tabs, or newly created tabs.
96 live_tabs
= [tab
for tab
in self
._navigation
_tabs
if tab
.IsAlive()]
97 self
._navigation
_tabs
= live_tabs
99 while len(self
._navigation
_tabs
) < self
._NUM
_TABS
:
100 self
._navigation
_tabs
.append(self
.browser
.tabs
.New())
102 def _RemoveNavigationTab(self
, tab
):
103 """Removes a tab which is no longer in a useable state from
104 self._navigation_tabs. The tab is not removed from self.browser.tabs,
105 since there is no guarantee that the tab can be safely removed."""
106 self
._navigation
_tabs
.remove(tab
)
108 def _GetPossibleBrowser(self
, finder_options
):
109 """Return a possible_browser with the given options."""
110 possible_browser
= browser_finder
.FindBrowser(finder_options
)
111 if not possible_browser
:
112 raise browser_finder_exceptions
.BrowserFinderException(
113 'No browser found.\n\nAvailable browsers:\n%s\n' %
114 '\n'.join(browser_finder
.GetAllAvailableBrowserTypes(finder_options
)))
115 finder_options
.browser_options
.browser_type
= (
116 possible_browser
.browser_type
)
118 return possible_browser
120 def _RetrieveTabUrl(self
, tab
, timeout
):
121 """Retrives the URL of the tab."""
123 return tab
.EvaluateJavaScript('document.URL', timeout
)
124 except exceptions
.Error
:
127 def _WaitForUrlToChange(self
, tab
, initial_url
, timeout
):
128 """Waits for the tab to navigate away from its initial url."""
129 end_time
= time
.time() + timeout
131 seconds_to_wait
= end_time
- time
.time()
132 seconds_to_wait
= max(0, seconds_to_wait
)
134 if seconds_to_wait
== 0:
137 current_url
= self
._RetrieveTabUrl
(tab
, seconds_to_wait
)
138 if current_url
!= initial_url
:
141 # Retrieving the current url is a non-trivial operation. Add a small
142 # sleep here to prevent this method from contending with the actual
146 def _BatchNavigateTabs(self
, batch
):
147 """Performs a batch of tab navigations with minimal delay.
150 batch: A list of tuples (tab, url).
153 A list of tuples (tab, initial_url). |initial_url| is the url of the
154 |tab| prior to a navigation command being sent to it.
156 timeout_in_seconds
= 0
159 for tab
, url
in batch
:
160 initial_url
= self
._RetrieveTabUrl
(tab
,
161 self
._TAB
_URL
_RETRIEVAL
_TIMEOUT
_IN
_SECONDS
)
164 tab
.Navigate(url
, None, timeout_in_seconds
)
165 except exceptions
.Error
:
166 # We expect a time out. It's possible for other problems to arise, but
167 # this method is not responsible for dealing with them. Ignore all
171 queued_tabs
.append((tab
, initial_url
))
174 def _WaitForQueuedTabsToLoad(self
, queued_tabs
):
175 """Waits for all the batch navigated tabs to finish loading.
178 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed
179 to have already been sent a navigation command.
181 end_time
= time
.time() + self
._BATCH
_PAGE
_LOAD
_TIMEOUT
_IN
_SECONDS
182 for tab
, initial_url
in queued_tabs
:
183 seconds_to_wait
= end_time
- time
.time()
184 seconds_to_wait
= max(0, seconds_to_wait
)
186 if seconds_to_wait
== 0:
189 # Since we don't wait any time for the tab url navigation to commit, it's
190 # possible that the tab hasn't started navigating yet.
191 self
._WaitForUrlToChange
(tab
, initial_url
, seconds_to_wait
)
193 seconds_to_wait
= end_time
- time
.time()
194 seconds_to_wait
= max(0, seconds_to_wait
)
197 tab
.WaitForDocumentReadyStateToBeComplete(seconds_to_wait
)
198 except exceptions
.TimeoutException
:
201 except exceptions
.Error
:
202 # If any error occurs, remove the tab. it's probably in an
203 # unrecoverable state.
204 self
._RemoveNavigationTab
(tab
)
206 def _GetUrlsToNavigate(self
, url_iterator
):
207 """Returns an array of urls to navigate to, given a url_iterator."""
209 for _
in xrange(self
._NUM
_TABS
):
211 urls
.append(url_iterator
.next())
212 except StopIteration:
216 def _PerformNavigations(self
):
217 """Repeatedly fetches a batch of urls, and navigates to those urls. This
218 will run until an empty batch is returned, or
219 ShouldExitAfterBatchNavigation() returns True.
221 url_iterator
= self
.GetUrlIterator()
223 self
._RefreshNavigationTabs
()
224 urls
= self
._GetUrlsToNavigate
(url_iterator
)
230 for i
in range(len(urls
)):
232 tab
= self
._navigation
_tabs
[i
]
233 batch
.append((tab
, url
))
235 queued_tabs
= self
._BatchNavigateTabs
(batch
)
236 self
._WaitForQueuedTabsToLoad
(queued_tabs
)
238 self
.CleanUpAfterBatchNavigation()
240 if self
.ShouldExitAfterBatchNavigation():