Remove old about scheme URL constants.
[chromium-blink-merge.git] / chrome / tools / webforms_aggregator_unittests.py
blob00ea2bde1c3ed7ae15ae2b5948dc36a0120948ab
1 #!/usr/bin/env python
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 import logging
7 import os
8 import subprocess
9 import tempfile
10 import unittest
12 # Same name as the aggregator module name.
13 import webforms_aggregator
15 logger = logging.getLogger(webforms_aggregator.__name__)
16 console = logging.StreamHandler()
17 logger.addHandler(console)
19 # Commenting out the following line will set logger level to default: WARNING
20 logger.setLevel(logging.INFO)
23 class WebformsAggregatorTest(unittest.TestCase):
24 """Unit tests for the webforms_aggregator module."""
25 PORT1 = 8002
26 PORT2 = 8003
28 HOME_CONTENT = """
29 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" \
30 "http://www.w3.org/TR/html4/loose.dtd">
31 <html>
32 <head>
33 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
34 <title>%s</title>
35 </head>
36 <body>
37 <h1>%s</h1>
38 <p>This is a mock site. Its mere purpose is to contribute towards testing \
39 the aggregator crawler.</p>
40 <ul>
41 <li><a href="%s">page1</a></li>
42 <li><a href="%s">page2</a></li>
43 <li><a href="%s">page3</a></li>
44 </ul>
45 <hr>
46 <p>
47 <a href="%s">sign in</a>
48 </p>
49 </body>
50 </html>
51 """
53 SIMPLE_PAGE_CONTENT = """
54 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" \
55 "http://www.w3.org/TR/html4/loose.dtd">
56 <html>
57 <head>
58 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
59 <title>%s</title>
60 </head>
61 <body>
62 <h1>%s</h1>
63 <p>%s</p>
64 <ul>
65 <li><a href="%s">%s</a></li>
66 <li><a href="%s">%s</a></li>
67 </ul>
68 <hr>
69 <p>
70 <a href="%s">return to home page</a>
71 </p>
72 </body>
73 </html>
74 """
76 SIGNIN_CONTENT = """
77 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" \
78 "http://www.w3.org/TR/html4/loose.dtd">
79 <html>
80 <head>
81 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
82 <title>%s</title>
83 </head>
84 <body>
85 <h1>Sign in!</h1>
86 <h3>%s</h3>
87 <form>
88 <label>User name: </label><input type="text"><br><br>
89 <label>password: </label><input type="password"><br><br>
90 <input type="submit" value="Sign in">
91 </form>
92 <hr>
93 <p><a href="%s">return to home page</a></p>
94 </body>
95 </html>
96 """
98 REG_CONTENT = """
99 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" \
100 "http://www.w3.org/TR/html4/loose.dtd">
101 <html>
102 <head>
103 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
104 <title>%s</title>
105 </head>
106 <body>
107 <h1>Create a user account!</h1>
109 <h3>Enter your data below:</h3>
110 <form method="get">
111 <label>First name: </label><input type="text"><br><br>
112 <label>Surname: </label><input type="text"><br><br>
113 <label>User name: </label><input type="text"><br><br>
114 <label>password: </label><input type="password"><br><br>
115 <label>retype password: </label><input type="password"><br><br>
116 <input type="submit" value="Register">
117 </form>
118 <hr>
119 <p><a href="%s">return to home page</a></p>
120 </body>
121 </html>
124 def CreateMockSiteOne(self):
125 """Site One has a registration form.
127 self.files['site1_home'] = 'site1_index.html'
128 self.files['site1_page1'] = 'site1_page1.html'
129 self.files['site1_page2'] = 'site1_page2.html'
130 self.files['site1_page3'] = 'site1_page3.html'
131 self.files['site1_signin'] = 'site1_signin.html'
132 self.files['site1_reg'] = 'site1_register.html'
134 file_content = {}
135 file_content[self.files['site1_home']] = self.HOME_CONTENT % (
136 'Site One home page', 'Welcome to site one. It has a reg page!',
137 self.files['site1_page1'], self.files['site1_page2'],
138 self.files['site1_page3'], self.files['site1_signin'])
140 file_content[self.files['site1_page1']] = self.SIMPLE_PAGE_CONTENT % (
141 'Site One page 1',
142 'Page 1!', 'This is a useless page. It does almost nothing.',
143 self.files['site1_page2'], 'page 2', self.files['site1_page3'],
144 'page 3', self.files['site1_home'])
146 file_content[self.files['site1_page2']] = self.SIMPLE_PAGE_CONTENT % (
147 'Site One page 2', 'Page 2!',
148 'This is another useless page. It does almost what the page 1 does.',
149 self.files['site1_page1'], 'page 1', self.files['site1_page3'],
150 'page 3', self.files['site1_home'])
152 file_content[self.files['site1_page3']] = self.SIMPLE_PAGE_CONTENT % (
153 'Site One page 3', 'Page 3!',
154 "This is the last useless page. It doesn't do anything useful at all.",
155 self.files['site1_page1'], 'page 1', self.files['site1_page2'],
156 'page 2', self.files['site1_home'])
158 file_content[self.files['site1_signin']] = self.SIGNIN_CONTENT % (
159 'Site One signin',
160 'If you don\'t have a user account click <a href="%s">here</a>.' \
161 % self.files['site1_reg'],
162 self.files['site1_home'])
164 file_content[self.files['site1_reg']] = self.REG_CONTENT % (
165 'Site One signin', self.files['site1_home'])
167 for filename, content in file_content.iteritems():
168 f = open(filename, 'w')
169 try:
170 f.write(content)
171 finally:
172 f.close()
174 def CreateMockSiteTwo(self):
175 """ Site Two has no registration page."""
177 self.files['site2_home'] = 'site2_index.html'
178 self.files['site2_page1'] = 'site2_page1.html'
179 self.files['site2_page2'] = 'site2_page2.html'
180 self.files['site2_page3'] = 'site2_page3.html'
181 self.files['site2_signin'] = 'site2_signin.html'
183 file_content = {}
184 file_content[self.files['site2_home']] = self.HOME_CONTENT % (
185 'Site Two home page', 'Welcome to site two. It has no reg page!',
186 self.files['site2_page1'], self.files['site2_page2'],
187 self.files['site2_page3'], self.files['site2_signin'])
189 file_content[self.files['site2_page1']] = self.SIMPLE_PAGE_CONTENT % (
190 'Site Two page 1',
191 'Page 1!', 'This is a useless page. It does almost nothing.',
192 self.files['site2_page2'], 'page 2', self.files['site2_page3'],
193 'page 3', self.files['site2_home'])
195 file_content[self.files['site2_page2']] = self.SIMPLE_PAGE_CONTENT % (
196 'Site Two page 2', 'Page 2!',
197 'This is another useless page. It does almost what the page 1 does.',
198 self.files['site2_page1'], 'page 1', self.files['site2_page3'],
199 'page 3', self.files['site2_home'])
201 file_content[self.files['site2_page3']] = self.SIMPLE_PAGE_CONTENT % (
202 'Site Two page 3', 'Page 3!',
203 "This is the last useless page. It doesn't do anything useful at all.",
204 self.files['site2_page1'], 'page 1', self.files['site2_page2'],
205 'page 2', self.files['site2_home'])
207 file_content[self.files['site2_signin']] = self.SIGNIN_CONTENT % (
208 'Site Two signin', 'You cannot register online with this site.',
209 self.files['site2_home'])
211 for filename, content in file_content.iteritems():
212 f = open(filename, 'w')
213 try:
214 f.write(content)
215 finally:
216 f.close()
218 def setUp(self):
219 self.cwd = os.getcwdu()
220 self.temp_dir = tempfile.mkdtemp()
221 os.chdir(self.temp_dir)
223 self.files = {}
225 self.CreateMockSiteOne()
226 self.CreateMockSiteTwo()
227 self.files['cookie'] = 'test.cookie'
228 self.url1 = 'http://localhost:%s/%s' % (self.PORT1,
229 self.files['site1_home'])
230 self.url2 = 'http://localhost:%s/%s' % (self.PORT2,
231 self.files['site2_home'])
232 self.domain1 = 'localhost:%s' %self.PORT1
233 self.files['url'] = 'urls.txt'
234 url_file_handler = open(self.files['url'], 'w')
235 try:
236 url_file_handler.write('URLs to crawl:')
237 url_file_handler.write(os.linesep)
238 for url in (self.url1, self.url2):
239 url_file_handler.write(url)
240 url_file_handler.write(os.linesep)
241 finally:
242 url_file_handler.close()
244 command_line = 'python -u -m SimpleHTTPServer %s' % self.PORT1
245 args = command_line.split()
246 self.server1 = subprocess.Popen(
247 args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
248 self.server1.stdout.readline() # Needed in order for the server to start up
250 command_line = 'python -u -m SimpleHTTPServer %s' % self.PORT2
251 args = command_line.split()
252 self.server2 = subprocess.Popen(
253 args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
254 self.server2.stdout.readline() # Needed in order for the server to start up
256 def tearDown(self):
257 self.server1.terminate()
258 self.server2.terminate()
260 for filename in self.files.values():
261 if os.path.isfile(filename):
262 os.unlink(filename)
263 os.chdir(self.cwd)
264 os.rmdir(self.temp_dir)
266 def testRetrieverDownloadsPage(self):
267 """Verify the retriever can download a page."""
268 r = webforms_aggregator.Retriever(self.url1, self.domain1,
269 self.files['cookie'])
270 self.assertTrue(r.Download(),
271 msg='Retriever could not download "%s"' % self.url1)
273 def testCrawlerFindsRegPageFromUrl(self):
274 """Verify that the crawler is able to find a reg page from the given URL."""
275 c = webforms_aggregator.Crawler(self.url1)
276 self.assertTrue(
277 c.Run(), msg='Crawler could not find the reg page of "%s"' % self.url1)
279 def testCrawlerCannotFindNonExistentRegPageFromUrl(self):
280 """Verify that the crawler won't find a non existent reg page
281 from the given URL."""
282 c = webforms_aggregator.Crawler(self.url2)
283 self.assertFalse(
284 c.Run(),
285 msg='Crawler found a non existent reg page of "%s"' % self.url1)
287 def testThreadedCrawlerFindsRegPageFromUrlsFile(self):
288 """Verify the threaded crawler finds reg page from a file of URLs."""
289 c = webforms_aggregator.ThreadedCrawler(self.files['url'])
290 self.assertNotEqual(
291 c.Run(), -1,
292 msg='Threaded crawler could not find the reg page from the URLs file')
295 if __name__ == '__main__':
296 suite = unittest.TestLoader().loadTestsFromTestCase(
297 WebformsAggregatorTest)
298 unittest.TextTestRunner(verbosity=2).run(suite)