2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Does scraping for all currently-known versions of Chrome"""
11 from drivers
import keyboard
12 from drivers
import mouse
13 from drivers
import windowing
16 # TODO: this has moved, use some logic to find it. For now,
18 DEFAULT_PATH
= r
"k:\chrome.exe"
21 def InvokeBrowser(path
):
22 """Invoke the Chrome browser.
25 path: full path to browser
28 A tuple of (main window, process handle, address bar, render pane)
31 # Reuse an existing instance of the browser if we can find one. This
32 # may not work correctly, especially if the window is behind other windows.
34 # TODO(jhaas): make this work with Vista
35 wnds
= windowing
.FindChildWindows(0, "Chrome_XPFrame")
41 (proc
, wnd
) = windowing
.InvokeAndWait(path
)
43 # Get windows we'll need
44 address_bar
= windowing
.FindChildWindow(wnd
, "Chrome_AutocompleteEdit")
45 render_pane
= GetChromeRenderPane(wnd
)
47 return (wnd
, proc
, address_bar
, render_pane
)
50 def Scrape(urls
, outdir
, size
, pos
, timeout
, kwargs
):
51 """Invoke a browser, send it to a series of URLs, and save its output.
54 urls: list of URLs to scrape
55 outdir: directory to place output
56 size: size of browser window to use
57 pos: position of browser window
58 timeout: amount of time to wait for page to load
59 kwargs: miscellaneous keyword args
62 None if success, else an error string
64 if "path" in kwargs
and kwargs
["path"]: path
= kwargs
["path"]
65 else: path
= DEFAULT_PATH
67 (wnd
, proc
, address_bar
, render_pane
) = InvokeBrowser(path
)
69 # Resize and reposition the frame
70 windowing
.MoveAndSizeWindow(wnd
, pos
, size
, render_pane
)
72 # Visit each URL we're given
73 if type(urls
) in types
.StringTypes
: urls
= [urls
]
78 # Double-click in the address bar, type the name, and press Enter
79 mouse
.ClickInWindow(address_bar
)
80 keyboard
.TypeString(url
, 0.1)
81 keyboard
.TypeString("\n")
83 # Wait for the page to finish loading
84 load_time
= windowing
.WaitForThrobber(wnd
, (20, 16, 36, 32), timeout
)
85 timedout
= load_time
< 0
91 image
= windowing
.ScrapeWindow(render_pane
)
94 if "filename" in kwargs
:
95 if callable(kwargs
["filename"]):
96 filename
= kwargs
["filename"](url
)
98 filename
= kwargs
["filename"]
100 filename
= windowing
.URLtoFilename(url
, outdir
, ".bmp")
104 windowing
.SetForegroundWindow(wnd
)
106 # Send Alt-F4, then wait for process to end
107 keyboard
.TypeString(r
"{\4}", use_modifiers
=True)
108 if not windowing
.WaitForProcessExit(proc
, timeout
):
109 windowing
.EndProcess(proc
)
118 def Time(urls
, size
, timeout
, kwargs
):
119 """Measure how long it takes to load each of a series of URLs
122 urls: list of URLs to time
123 size: size of browser window to use
124 timeout: amount of time to wait for page to load
125 kwargs: miscellaneous keyword args
128 A list of tuples (url, time). "time" can be "crashed" or "timeout"
130 if "path" in kwargs
and kwargs
["path"]: path
= kwargs
["path"]
131 else: path
= DEFAULT_PATH
134 # Visit each URL we're given
135 if type(urls
) in types
.StringTypes
: urls
= [urls
]
140 # Invoke the browser if necessary
142 (wnd
, proc
, address_bar
, render_pane
) = InvokeBrowser(path
)
144 # Resize and reposition the frame
145 windowing
.MoveAndSizeWindow(wnd
, (0,0), size
, render_pane
)
147 # Double-click in the address bar, type the name, and press Enter
148 mouse
.ClickInWindow(address_bar
)
149 keyboard
.TypeString(url
, 0.1)
150 keyboard
.TypeString("\n")
152 # Wait for the page to finish loading
153 load_time
= windowing
.WaitForThrobber(wnd
, (20, 16, 36, 32), timeout
)
155 timedout
= load_time
< 0
158 load_time
= "timeout"
160 # Send an alt-F4 to make the browser close; if this times out,
161 # we've probably got a crash
162 windowing
.SetForegroundWindow(wnd
)
164 keyboard
.TypeString(r
"{\4}", use_modifiers
=True)
165 if not windowing
.WaitForProcessExit(proc
, timeout
):
166 windowing
.EndProcess(proc
)
167 load_time
= "crashed"
169 except pywintypes
.error
:
171 load_time
= "crashed"
173 ret
.append( (url
, load_time
) )
176 windowing
.SetForegroundWindow(wnd
)
177 keyboard
.TypeString(r
"{\4}", use_modifiers
=True)
178 if not windowing
.WaitForProcessExit(proc
, timeout
):
179 windowing
.EndProcess(proc
)
185 # We're being invoked rather than imported, so run some tests
186 path
= r
"c:\sitecompare\scrapes\chrome\0.1.97.0"
187 windowing
.PreparePath(path
)
189 # Scrape three sites and save the results
191 "http://www.microsoft.com",
192 "http://www.google.com",
193 "http://www.sun.com"],
194 path
, (1024, 768), (0, 0))
198 if __name__
== "__main__":