cygprofile: increase timeouts to allow showing web contents
[chromium-blink-merge.git] / chrome / common / extensions / docs / server2 / document_parser_test.py
blob855561a06ff4e603e4015821bcb78b3fbf513fda
1 #!/usr/bin/env python
2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 import unittest
8 from document_parser import ParseDocument, RemoveTitle
11 _WHOLE_DOCUMENT = '''
12 Preamble before heading.
14 <h1 id='main' class='header'>Main header</h1>
15 Some intro to the content.
17 <h2 id='banana' class='header' title=''>Bananas</h2>
18 Something about bananas.
20 <h2 id='orange' title='hello'>Oranges</h2>
21 Something about oranges.
23 <h3 id='valencia'>Valencia Oranges</h3>
24 A description of valencia oranges.
26 <h3 id='seville'>Seville Oranges</h3>
27 A description of seville oranges.
29 <h2>Grapefruit</h3>
30 Grapefruit closed a h2 with a h3. This should be a warning.
32 <h1 id='not-main'>Not the main header</h1>
33 But it should still show up in the TOC as though it were an h2.
35 <h2>Not <h3>a banana</h2>
36 The embedded h3 should be ignored.
38 <h4>It's a h4</h4>
39 h4 are part of the document structure, but this is not inside a h3.
41 <h3>Plantains</h3>
42 Now I'm just getting lazy.
44 <h4>Another h4</h4>
45 This h4 is inside a h3 so will show up.
47 <h5>Header 5</h5>
48 Header 5s are not parsed.
49 '''
52 _WHOLE_DOCUMENT_WITHOUT_TITLE = '''
53 Preamble before heading.
56 Some intro to the content.
58 <h2 id='banana' class='header' title=''>Bananas</h2>
59 Something about bananas.
61 <h2 id='orange' title='hello'>Oranges</h2>
62 Something about oranges.
64 <h3 id='valencia'>Valencia Oranges</h3>
65 A description of valencia oranges.
67 <h3 id='seville'>Seville Oranges</h3>
68 A description of seville oranges.
70 <h2>Grapefruit</h3>
71 Grapefruit closed a h2 with a h3. This should be a warning.
73 <h1 id='not-main'>Not the main header</h1>
74 But it should still show up in the TOC as though it were an h2.
76 <h2>Not <h3>a banana</h2>
77 The embedded h3 should be ignored.
79 <h4>It's a h4</h4>
80 h4 are part of the document structure, but this is not inside a h3.
82 <h3>Plantains</h3>
83 Now I'm just getting lazy.
85 <h4>Another h4</h4>
86 This h4 is inside a h3 so will show up.
88 <h5>Header 5</h5>
89 Header 5s are not parsed.
90 '''
93 class DocumentParserUnittest(unittest.TestCase):
95 def testEmptyDocument(self):
96 self.assertEqual(('', 'No opening <h1> was found'), RemoveTitle(''))
98 result = ParseDocument('')
99 self.assertEqual(None, result.title)
100 self.assertEqual(None, result.title_attributes)
101 self.assertEqual([], result.sections)
102 self.assertEqual([], result.warnings)
104 result = ParseDocument('', expect_title=True)
105 self.assertEqual('', result.title)
106 self.assertEqual({}, result.title_attributes)
107 self.assertEqual([], result.sections)
108 self.assertEqual(['Expected a title'], result.warnings)
110 def testRemoveTitle(self):
111 no_closing_tag = '<h1>No closing tag'
112 self.assertEqual((no_closing_tag, 'No closing </h1> was found'),
113 RemoveTitle(no_closing_tag))
115 no_opening_tag = 'No opening tag</h1>'
116 self.assertEqual((no_opening_tag, 'No opening <h1> was found'),
117 RemoveTitle(no_opening_tag))
119 tags_wrong_order = '</h1>Tags in wrong order<h1>'
120 self.assertEqual((tags_wrong_order, 'The </h1> appeared before the <h1>'),
121 RemoveTitle(tags_wrong_order))
123 multiple_titles = '<h1>First header</h1> and <h1>Second header</h1>'
124 self.assertEqual((' and <h1>Second header</h1>', None),
125 RemoveTitle(multiple_titles))
127 upper_case = '<H1>Upper case header tag</H1> hi'
128 self.assertEqual((' hi', None), RemoveTitle(upper_case))
129 mixed_case = '<H1>Mixed case header tag</h1> hi'
130 self.assertEqual((' hi', None), RemoveTitle(mixed_case))
132 def testOnlyTitleDocument(self):
133 document = '<h1 id="header">heading</h1>'
134 self.assertEqual(('', None), RemoveTitle(document))
136 result = ParseDocument(document)
137 self.assertEqual(None, result.title)
138 self.assertEqual(None, result.title_attributes)
139 self.assertEqual([], result.sections)
140 self.assertEqual(['Found unexpected title "heading"'], result.warnings)
142 result = ParseDocument(document, expect_title=True)
143 self.assertEqual('heading', result.title)
144 self.assertEqual({'id': 'header'}, result.title_attributes)
145 self.assertEqual([], result.sections)
146 self.assertEqual([], result.warnings)
148 def testWholeDocument(self):
149 self.assertEqual((_WHOLE_DOCUMENT_WITHOUT_TITLE, None),
150 RemoveTitle(_WHOLE_DOCUMENT))
151 result = ParseDocument(_WHOLE_DOCUMENT, expect_title=True)
152 self.assertEqual('Main header', result.title)
153 self.assertEqual({'id': 'main', 'class': 'header'}, result.title_attributes)
154 self.assertEqual([
155 'Found closing </h3> while processing a <h2> (line 19, column 15)',
156 'Found multiple <h1> tags. Subsequent <h1> tags will be classified as '
157 '<h2> for the purpose of the structure (line 22, column 1)',
158 'Found <h3> in the middle of processing a <h2> (line 25, column 9)',
159 # TODO(kalman): Re-enable this warning once the reference pages have
160 # their references fixed.
161 #'Found <h4> without any preceding <h3> (line 28, column 1)',
162 ], result.warnings)
164 # The non-trivial table of contents assertions...
165 self.assertEqual(1, len(result.sections))
166 entries = result.sections[0].structure
168 self.assertEqual(4, len(entries), entries)
169 entry0, entry1, entry2, entry3 = entries
171 self.assertEqual('hello', entry0.name)
172 self.assertEqual({'id': 'orange'}, entry0.attributes)
173 self.assertEqual(2, len(entry0.entries))
174 entry0_0, entry0_1 = entry0.entries
176 self.assertEqual('Valencia Oranges', entry0_0.name)
177 self.assertEqual({'id': 'valencia'}, entry0_0.attributes)
178 self.assertEqual([], entry0_0.entries)
179 self.assertEqual('Seville Oranges', entry0_1.name)
180 self.assertEqual({'id': 'seville'}, entry0_1.attributes)
181 self.assertEqual([], entry0_1.entries)
183 self.assertEqual('Grapefruit', entry1.name)
184 self.assertEqual({}, entry1.attributes)
185 self.assertEqual([], entry1.entries)
187 self.assertEqual('Not the main header', entry2.name)
188 self.assertEqual({'id': 'not-main'}, entry2.attributes)
189 self.assertEqual([], entry2.entries)
191 self.assertEqual('Not a banana', entry3.name)
192 self.assertEqual({}, entry3.attributes)
193 self.assertEqual(2, len(entry3.entries))
194 entry3_1, entry3_2 = entry3.entries
196 self.assertEqual('It\'s a h4', entry3_1.name)
197 self.assertEqual({}, entry3_1.attributes)
198 self.assertEqual([], entry3_1.entries)
200 self.assertEqual('Plantains', entry3_2.name)
201 self.assertEqual({}, entry3_2.attributes)
202 self.assertEqual(1, len(entry3_2.entries))
203 entry3_2_1, = entry3_2.entries
205 self.assertEqual('Another h4', entry3_2_1.name)
206 self.assertEqual({}, entry3_2_1.attributes)
207 self.assertEqual([], entry3_2_1.entries)
209 def testSingleExplicitSection(self):
210 def test(document):
211 result = ParseDocument(document, expect_title=True)
212 self.assertEqual([], result.warnings)
213 self.assertEqual('Header', result.title)
214 self.assertEqual(1, len(result.sections))
215 section0, = result.sections
216 entry0, = section0.structure
217 self.assertEqual('An inner header', entry0.name)
218 # A single section, one with the title inside the section, the other out.
219 test('<h1>Header</h1>'
220 '<section>'
221 'Just a single section here.'
222 '<h2>An inner header</h2>'
223 '</section>')
224 test('<section>'
225 'Another single section here.'
226 '<h1>Header</h1>'
227 '<h2>An inner header</h2>'
228 '</section>')
230 def testMultipleSections(self):
231 result = ParseDocument(
232 '<h1>Header</h1>'
233 '<h2>First header</h2>'
234 'This content outside a section is the first section.'
235 '<section>'
236 'Second section'
237 '<h2>Second header</h2>'
238 '</section>'
239 '<section>'
240 'Third section'
241 '<h2>Third header</h2>'
242 '</section>',
243 expect_title=True)
244 self.assertEqual([], result.warnings)
245 self.assertEqual('Header', result.title)
246 self.assertEqual(3, len(result.sections))
247 section0, section1, section2 = result.sections
248 def assert_single_header(section, name):
249 self.assertEqual(1, len(section.structure))
250 self.assertEqual(name, section.structure[0].name)
251 assert_single_header(section0, 'First header')
252 assert_single_header(section1, 'Second header')
253 assert_single_header(section2, 'Third header')
256 if __name__ == '__main__':
257 unittest.main()