Give names to all utility processes.
[chromium-blink-merge.git] / tools / site_compare / scrapers / firefox / firefox2.py
blob2181f588f818b5fd4ab73d1da9ad1db8fe5f74fd
1 #!/usr/bin/env python
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Does scraping for Firefox 2.0."""
8 import pywintypes
9 import time
10 import types
12 from drivers import keyboard
13 from drivers import mouse
14 from drivers import windowing
16 # Default version
17 version = "2.0.0.6"
19 DEFAULT_PATH = r"c:\program files\mozilla firefox\firefox.exe"
21 # TODO(jhaas): the Firefox scraper is a bit rickety at the moment. Known
22 # issues: 1) won't work if the default profile puts toolbars in different
23 # locations, 2) uses sleep() statements rather than more robust checks,
24 # 3) fails badly if an existing Firefox window is open when the scrape
25 # is invoked. This needs to be fortified at some point.
27 def GetBrowser(path):
28 """Invoke the Firefox browser and return the process and window.
30 Args:
31 path: full path to browser
33 Returns:
34 A tuple of (process handle, render pane)
35 """
36 if not path: path = DEFAULT_PATH
38 # Invoke Firefox
39 (proc, wnd) = windowing.InvokeAndWait(path)
41 # Get the content pane
42 render_pane = windowing.FindChildWindow(
43 wnd,
44 "MozillaWindowClass/MozillaWindowClass/MozillaWindowClass")
46 return (proc, wnd, render_pane)
49 def InvokeBrowser(path):
50 """Invoke the Firefox browser.
52 Args:
53 path: full path to browser
55 Returns:
56 A tuple of (main window, process handle, render pane)
57 """
58 # Reuse an existing instance of the browser if we can find one. This
59 # may not work correctly, especially if the window is behind other windows.
60 wnds = windowing.FindChildWindows(0, "MozillaUIWindowClass")
61 if len(wnds):
62 wnd = wnds[0]
63 proc = None
64 else:
65 # Invoke Firefox
66 (proc, wnd) = windowing.InvokeAndWait(path)
68 # Get the content pane
69 render_pane = windowing.FindChildWindow(
70 wnd,
71 "MozillaWindowClass/MozillaWindowClass/MozillaWindowClass")
73 return (wnd, proc, render_pane)
76 def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
77 """Invoke a browser, send it to a series of URLs, and save its output.
79 Args:
80 urls: list of URLs to scrape
81 outdir: directory to place output
82 size: size of browser window to use
83 pos: position of browser window
84 timeout: amount of time to wait for page to load
85 kwargs: miscellaneous keyword args
87 Returns:
88 None if success, else an error string
89 """
90 if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
91 else: path = DEFAULT_PATH
93 (wnd, proc, render_pane) = InvokeBrowser(path)
95 # Resize and reposition the frame
96 windowing.MoveAndSizeWindow(wnd, pos, size, render_pane)
98 time.sleep(3)
100 # Firefox is a bit of a pain: it doesn't use standard edit controls,
101 # and it doesn't display a throbber when there's no tab. Let's make
102 # sure there's at least one tab, then select the first one
104 mouse.ClickInWindow(wnd)
105 keyboard.TypeString("[t]", True)
106 mouse.ClickInWindow(wnd, (30, 115))
107 time.sleep(2)
109 timedout = False
111 # Visit each URL we're given
112 if type(urls) in types.StringTypes: urls = [urls]
114 for url in urls:
116 # Use keyboard shortcuts
117 keyboard.TypeString("{d}", True)
118 keyboard.TypeString(url)
119 keyboard.TypeString("\n")
121 # Wait for the page to finish loading
122 load_time = windowing.WaitForThrobber(wnd, (10, 96, 26, 112), timeout)
123 timedout = load_time < 0
125 if timedout:
126 break
128 # Scrape the page
129 image = windowing.ScrapeWindow(render_pane)
131 # Save to disk
132 if "filename" in kwargs:
133 if callable(kwargs["filename"]):
134 filename = kwargs["filename"](url)
135 else:
136 filename = kwargs["filename"]
137 else:
138 filename = windowing.URLtoFilename(url, outdir, ".bmp")
139 image.save(filename)
141 # Close all the tabs, cheesily
142 mouse.ClickInWindow(wnd)
144 while len(windowing.FindChildWindows(0, "MozillaUIWindowClass")):
145 keyboard.TypeString("[w]", True)
146 time.sleep(1)
148 if timedout:
149 return "timeout"
152 def Time(urls, size, timeout, **kwargs):
153 """Measure how long it takes to load each of a series of URLs
155 Args:
156 urls: list of URLs to time
157 size: size of browser window to use
158 timeout: amount of time to wait for page to load
159 kwargs: miscellaneous keyword args
161 Returns:
162 A list of tuples (url, time). "time" can be "crashed" or "timeout"
164 if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
165 else: path = DEFAULT_PATH
166 proc = None
168 # Visit each URL we're given
169 if type(urls) in types.StringTypes: urls = [urls]
171 ret = []
172 for url in urls:
173 try:
174 # Invoke the browser if necessary
175 if not proc:
176 (wnd, proc, render_pane) = InvokeBrowser(path)
178 # Resize and reposition the frame
179 windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane)
181 time.sleep(3)
183 # Firefox is a bit of a pain: it doesn't use standard edit controls,
184 # and it doesn't display a throbber when there's no tab. Let's make
185 # sure there's at least one tab, then select the first one
187 mouse.ClickInWindow(wnd)
188 keyboard.TypeString("[t]", True)
189 mouse.ClickInWindow(wnd, (30, 115))
190 time.sleep(2)
192 # Use keyboard shortcuts
193 keyboard.TypeString("{d}", True)
194 keyboard.TypeString(url)
195 keyboard.TypeString("\n")
197 # Wait for the page to finish loading
198 load_time = windowing.WaitForThrobber(wnd, (10, 96, 26, 112), timeout)
199 timedout = load_time < 0
201 if timedout:
202 load_time = "timeout"
204 # Try to close the browser; if this fails it's probably a crash
205 mouse.ClickInWindow(wnd)
207 count = 0
208 while (len(windowing.FindChildWindows(0, "MozillaUIWindowClass"))
209 and count < 5):
210 keyboard.TypeString("[w]", True)
211 time.sleep(1)
212 count = count + 1
214 if len(windowing.FindChildWindows(0, "MozillaUIWindowClass")):
215 windowing.EndProcess(proc)
216 load_time = "crashed"
218 proc = None
219 except pywintypes.error:
220 proc = None
221 load_time = "crashed"
223 ret.append( (url, load_time) )
225 if proc:
226 count = 0
227 while (len(windowing.FindChildWindows(0, "MozillaUIWindowClass"))
228 and count < 5):
229 keyboard.TypeString("[w]", True)
230 time.sleep(1)
231 count = count + 1
232 return ret
235 def main():
236 # We're being invoked rather than imported, so run some tests
237 path = r"c:\sitecompare\scrapes\Firefox\2.0.0.6"
238 windowing.PreparePath(path)
240 # Scrape three sites and save the results
241 Scrape(
242 ["http://www.microsoft.com", "http://www.google.com",
243 "http://www.sun.com"],
244 path, (1024, 768), (0, 0))
245 return 0
248 if __name__ == "__main__":
249 sys.exit(main())