2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Does scraping for all known versions of IE."""
12 from drivers
import keyboard
13 from drivers
import mouse
14 from drivers
import windowing
17 version
= "7.0.5730.1"
19 DEFAULT_PATH
= r
"c:\program files\internet explorer\iexplore.exe"
22 """Invoke the IE browser and return the process, frame, and content window.
25 path: full path to browser
28 A tuple of (process handle, render pane)
30 if not path
: path
= DEFAULT_PATH
32 (iewnd
, ieproc
, address_bar
, render_pane
, tab_window
) = InvokeBrowser(path
)
33 return (ieproc
, iewnd
, render_pane
)
36 def InvokeBrowser(path
):
37 """Invoke the IE browser.
40 path: full path to browser
43 A tuple of (main window, process handle, address bar,
44 render_pane, tab_window)
47 (ieproc
, iewnd
) = windowing
.InvokeAndWait(path
)
49 # Get windows we'll need
50 for tries
in xrange(10):
52 address_bar
= windowing
.FindChildWindow(
53 iewnd
, "WorkerW|Navigation Bar/ReBarWindow32/"
54 "Address Band Root/ComboBoxEx32/ComboBox/Edit")
55 render_pane
= windowing
.FindChildWindow(
56 iewnd
, "TabWindowClass/Shell DocObject View")
57 tab_window
= windowing
.FindChildWindow(
58 iewnd
, "CommandBarClass/ReBarWindow32/TabBandClass/DirectUIHWND")
64 return (iewnd
, ieproc
, address_bar
, render_pane
, tab_window
)
67 def Scrape(urls
, outdir
, size
, pos
, timeout
=20, **kwargs
):
68 """Invoke a browser, send it to a series of URLs, and save its output.
71 urls: list of URLs to scrape
72 outdir: directory to place output
73 size: size of browser window to use
74 pos: position of browser window
75 timeout: amount of time to wait for page to load
76 kwargs: miscellaneous keyword args
79 None if success, else an error string
81 path
= r
"c:\program files\internet explorer\iexplore.exe"
83 if "path" in kwargs
and kwargs
["path"]: path
= kwargs
["path"]
85 (iewnd
, ieproc
, address_bar
, render_pane
, tab_window
) = (
88 # Resize and reposition the frame
89 windowing
.MoveAndSizeWindow(iewnd
, pos
, size
, render_pane
)
91 # Visit each URL we're given
92 if type(urls
) in types
.StringTypes
: urls
= [urls
]
98 # Double-click in the address bar, type the name, and press Enter
99 mouse
.DoubleClickInWindow(address_bar
)
100 keyboard
.TypeString(url
)
101 keyboard
.TypeString("\n")
103 # Wait for the page to finish loading
104 load_time
= windowing
.WaitForThrobber(
105 tab_window
, (6, 8, 22, 24), timeout
)
106 timedout
= load_time
< 0
112 image
= windowing
.ScrapeWindow(render_pane
)
115 if "filename" in kwargs
:
116 if callable(kwargs
["filename"]):
117 filename
= kwargs
["filename"](url
)
119 filename
= kwargs
["filename"]
121 filename
= windowing
.URLtoFilename(url
, outdir
, ".bmp")
124 windowing
.EndProcess(ieproc
)
130 def Time(urls
, size
, timeout
, **kwargs
):
131 """Measure how long it takes to load each of a series of URLs
134 urls: list of URLs to time
135 size: size of browser window to use
136 timeout: amount of time to wait for page to load
137 kwargs: miscellaneous keyword args
140 A list of tuples (url, time). "time" can be "crashed" or "timeout"
142 if "path" in kwargs
and kwargs
["path"]: path
= kwargs
["path"]
143 else: path
= DEFAULT_PATH
146 # Visit each URL we're given
147 if type(urls
) in types
.StringTypes
: urls
= [urls
]
152 # Invoke the browser if necessary
154 (wnd
, proc
, address_bar
, render_pane
, tab_window
) = InvokeBrowser(path
)
156 # Resize and reposition the frame
157 windowing
.MoveAndSizeWindow(wnd
, (0,0), size
, render_pane
)
159 # Double-click in the address bar, type the name, and press Enter
160 mouse
.DoubleClickInWindow(address_bar
)
161 keyboard
.TypeString(url
)
162 keyboard
.TypeString("\n")
164 # Wait for the page to finish loading
165 load_time
= windowing
.WaitForThrobber(
166 tab_window
, (6, 8, 22, 24), timeout
)
167 timedout
= load_time
< 0
170 load_time
= "timeout"
172 # Send an alt-F4 to make the browser close; if this times out,
173 # we've probably got a crash
174 keyboard
.TypeString(r
"{\4}", use_modifiers
=True)
175 if not windowing
.WaitForProcessExit(proc
, timeout
):
176 windowing
.EndProcess(proc
)
177 load_time
= "crashed"
179 except pywintypes
.error
:
180 load_time
= "crashed"
183 ret
.append( (url
, load_time
) )
185 # Send an alt-F4 to make the browser close; if this times out,
186 # we've probably got a crash
188 keyboard
.TypeString(r
"{\4}", use_modifiers
=True)
189 if not windowing
.WaitForProcessExit(proc
, timeout
):
190 windowing
.EndProcess(proc
)
196 # We're being invoked rather than imported, so run some tests
197 path
= r
"c:\sitecompare\scrapes\ie7\7.0.5380.11"
198 windowing
.PreparePath(path
)
200 # Scrape three sites and save the results
202 ["http://www.microsoft.com",
203 "http://www.google.com",
204 "http://www.sun.com"],
205 path
, (1024, 768), (0, 0))
209 if __name__
== "__main__":