3 from translate
.convert
import html2po
4 from translate
.convert
import po2html
5 from translate
.convert
import test_convert
6 from translate
.misc
import wStringIO
9 def html2po(self
, markup
):
10 """Helper to convert html to po without a file."""
11 inputfile
= wStringIO
.StringIO(markup
)
12 convertor
= html2po
.html2po()
13 outputpo
= convertor
.convertfile(inputfile
, "test", False, False)
16 def po2html(self
, posource
, htmltemplate
):
17 """Helper to convert po to html without a file."""
18 inputfile
= wStringIO
.StringIO(posource
)
19 outputfile
= wStringIO
.StringIO()
20 templatefile
= wStringIO
.StringIO(htmltemplate
)
21 assert po2html
.converthtml(inputfile
, outputfile
, templatefile
)
22 return outputfile
.getvalue()
24 def countunits(self
, pofile
, expected
):
25 """helper to check that we got the expected number of messages"""
26 actual
= len(pofile
.units
)
28 if pofile
.units
[0].isheader():
31 assert actual
== expected
33 def compareunit(self
, pofile
, unitnumber
, expected
):
34 """helper to validate a PO message"""
35 if not pofile
.units
[0].isheader():
36 unitnumber
= unitnumber
- 1
37 print 'unit source: ' + str(pofile
.units
[unitnumber
].source
) + '|'
38 print 'expected: ' + expected
.encode('utf-8') + '|'
39 assert unicode(pofile
.units
[unitnumber
].source
) == unicode(expected
)
41 def check_single(self
, markup
, itemtext
):
42 """checks that converting this markup produces a single element with value itemtext"""
43 pofile
= self
.html2po(markup
)
44 self
.countunits(pofile
, 1)
45 self
.compareunit(pofile
, 1, itemtext
)
47 def check_null(self
, markup
):
48 """checks that converting this markup produces no elements"""
49 pofile
= self
.html2po(markup
)
50 self
.countunits(pofile
, 0)
52 def test_htmllang(self
):
53 """test to ensure that we no longer use the lang attribure"""
54 markup
= '''<html lang="en"><head><title>My title</title></head><body></body></html>'''
55 pofile
= self
.html2po(markup
)
56 self
.countunits(pofile
, 1)
57 # Check that the first item is the <title> not <head>
58 self
.compareunit(pofile
, 1, "My title")
61 """test that we can extract the <title> tag"""
62 self
.check_single("<html><head><title>My title</title></head><body></body></html>", "My title")
64 def test_title_with_linebreak(self
):
65 """Test a linebreak in the <title> tag"""
75 self
.check_single(htmltext
, "My title")
78 """Test that we can extract certain <meta> info from <head>."""
79 self
.check_single('''<html><head><meta name="keywords" content="these are keywords"></head><body></body></html>''', "these are keywords")
82 """test that we can extract the <p> tag"""
83 self
.check_single("<html><head></head><body><p>A paragraph.</p></body></html>", "A paragraph.")
84 markup
= "<p>First line.<br>Second line.</p>"
85 pofile
= self
.html2po(markup
)
86 self
.compareunit(pofile
, 1, "First line.<br>Second line.")
88 def test_tag_p_with_linebreak(self
):
89 """Test newlines within the <p> tag."""
95 A paragraph is a section in a piece of writing, usually highlighting a
96 particular point or topic. It always begins on a new line and usually
97 with indentation, and it consists of at least one sentence.
102 self
.check_single(htmltext
, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.")
103 markup
= "<p>First\nline.<br>Second\nline.</p>"
104 pofile
= self
.html2po(markup
)
105 self
.compareunit(pofile
, 1, "First line.<br>Second line.")
107 def test_tag_div(self
):
108 """test that we can extract the <div> tag"""
109 self
.check_single("<html><head></head><body><div>A paragraph.</div></body></html>", "A paragraph.")
110 markup
= "<div>First line.<br>Second line.</div>"
111 pofile
= self
.html2po(markup
)
112 self
.compareunit(pofile
, 1, "First line.<br>Second line.")
114 def test_tag_div_with_linebreaks(self
):
115 """Test linebreaks within a <div> tag."""
121 A paragraph is a section in a piece of writing, usually highlighting a
122 particular point or topic. It always begins on a new line and usually
123 with indentation, and it consists of at least one sentence.
128 self
.check_single(htmltext
, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.")
129 markup
= "<div>First\nline.<br>Second\nline.</div>"
130 pofile
= self
.html2po(markup
)
131 self
.compareunit(pofile
, 1, "First line.<br>Second line.")
133 def test_tag_a(self
):
134 """test that we can extract the <a> tag"""
135 self
.check_single('<html><head></head><body><p>A paragraph with <a href="http://translate.org.za/">hyperlink</a>.</p></body></html>', 'A paragraph with <a href="http://translate.org.za/">hyperlink</a>.')
137 def test_tag_a_with_linebreak(self
):
138 """Test that we can extract the <a> tag with newlines in it."""
146 href="http://translate.org.za/">hyperlink</a>
148 newlines.</p></body></html>
150 self
.check_single(htmltext
, 'A paragraph with <a href="http://translate.org.za/">hyperlink</a> and newlines.')
152 def test_tag_img(self
):
153 """Test that we can extract the alt attribute from the <img> tag."""
154 self
.check_single('''<html><head></head><body><img src="picture.png" alt="A picture"></body></html>''', "A picture")
156 def test_img_empty(self
):
157 """Test that we can extract the alt attribute from the <img> tag."""
158 htmlsource
= '''<html><head></head><body><img src="images/topbar.jpg" width="750" height="80"></body></html>'''
159 self
.check_null(htmlsource
)
161 def test_tag_table_summary(self
):
162 """Test that we can extract the summary attribute."""
163 self
.check_single( '''<html><head></head><body><table summary="Table summary"></table></body></html>''', "Table summary")
165 def test_table_simple(self
):
166 """Test that we can fully extract a simple table."""
167 markup
= '''<html><head></head><body><table><tr><th>Heading One</th><th>Heading Two</th><tr><td>One</td><td>Two</td></tr></table></body></html>'''
168 pofile
= self
.html2po(markup
)
169 self
.countunits(pofile
, 4)
170 self
.compareunit(pofile
, 1, "Heading One")
171 self
.compareunit(pofile
, 2, "Heading Two")
172 self
.compareunit(pofile
, 3, "One")
173 self
.compareunit(pofile
, 4, "Two")
175 def test_table_complex(self
):
176 markup
= '''<table summary="This is the summary"><caption>A caption</caption><thead><tr><th abbr="Head 1">Heading One</th><th>Heading Two</th></thead><tfoot><tr><td>Foot One</td><td>Foot Two</td></tr></tfoot><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>'''
177 pofile
= self
.html2po(markup
)
178 self
.countunits(pofile
, 9)
179 self
.compareunit(pofile
, 1, "This is the summary")
180 self
.compareunit(pofile
, 2, "A caption")
181 self
.compareunit(pofile
, 3, "Head 1")
182 self
.compareunit(pofile
, 4, "Heading One")
183 self
.compareunit(pofile
, 5, "Heading Two")
184 self
.compareunit(pofile
, 6, "Foot One")
185 self
.compareunit(pofile
, 7, "Foot Two")
186 self
.compareunit(pofile
, 8, "One")
187 self
.compareunit(pofile
, 9, "Two")
189 def test_table_empty(self
):
190 """Test that we ignore tables that are empty.
192 A table is deemed empty if it has no translatable content.
195 self
.check_null('''<html><head></head><body><table><tr><td><img src="bob.png"></td></tr></table></body></html>''')
196 self
.check_null('''<html><head></head><body><table><tr><td> </td></tr></table></body></html>''')
197 self
.check_null('''<html><head></head><body><table><tr><td><strong></strong></td></tr></table></body></html>''')
199 def test_address(self
):
200 """Test to see if the address element is extracted"""
201 self
.check_single("<body><address>My address</address></body>", "My address")
203 def test_headings(self
):
204 """Test to see if the h* elements are extracted"""
205 markup
= "<html><head></head><body><h1>Heading One</h1><h2>Heading Two</h2><h3>Heading Three</h3><h4>Heading Four</h4><h5>Heading Five</h5><h6>Heading Six</h6></body></html>"
206 pofile
= self
.html2po(markup
)
207 self
.countunits(pofile
, 6)
208 self
.compareunit(pofile
, 1, "Heading One")
209 self
.compareunit(pofile
, 2, "Heading Two")
210 self
.compareunit(pofile
, 3, "Heading Three")
211 self
.compareunit(pofile
, 4, "Heading Four")
212 self
.compareunit(pofile
, 5, "Heading Five")
213 self
.compareunit(pofile
, 6, "Heading Six")
215 def test_headings_with_linebreaks(self
):
216 """Test to see if h* elements with newlines can be extracted"""
217 markup
= "<html><head></head><body><h1>Heading\nOne</h1><h2>Heading\nTwo</h2><h3>Heading\nThree</h3><h4>Heading\nFour</h4><h5>Heading\nFive</h5><h6>Heading\nSix</h6></body></html>"
218 pofile
= self
.html2po(markup
)
219 self
.countunits(pofile
, 6)
220 self
.compareunit(pofile
, 1, "Heading One")
221 self
.compareunit(pofile
, 2, "Heading Two")
222 self
.compareunit(pofile
, 3, "Heading Three")
223 self
.compareunit(pofile
, 4, "Heading Four")
224 self
.compareunit(pofile
, 5, "Heading Five")
225 self
.compareunit(pofile
, 6, "Heading Six")
228 """Test to see if the definition list title (dt) element is extracted"""
229 self
.check_single("<html><head></head><body><dl><dt>Definition List Item Title</dt></dl></body></html>", "Definition List Item Title")
232 """Test to see if the definition list description (dd) element is extracted"""
233 self
.check_single("<html><head></head><body><dl><dd>Definition List Item Description</dd></dl></body></html>", "Definition List Item Description")
236 """test to check that we don't double extract a span item"""
237 self
.check_single("<html><head></head><body><p>You are a <span>Spanish</span> sentence.</p></body></html>", "You are a <span>Spanish</span> sentence.")
240 """Test to see if the list item <li> is exracted"""
241 markup
= "<html><head></head><body><ul><li>Unordered One</li><li>Unordered Two</li></ul><ol><li>Ordered One</li><li>Ordered Two</li></ol></body></html>"
242 pofile
= self
.html2po(markup
)
243 self
.countunits(pofile
, 4)
244 self
.compareunit(pofile
, 1, "Unordered One")
245 self
.compareunit(pofile
, 2, "Unordered Two")
246 self
.compareunit(pofile
, 3, "Ordered One")
247 self
.compareunit(pofile
, 4, "Ordered Two")
249 def test_duplicates(self
):
250 """check that we use the default style of msgid_comments to disambiguate duplicate messages"""
251 markup
= "<html><head></head><body><p>Duplicate</p><p>Duplicate</p></body></html>"
252 pofile
= self
.html2po(markup
)
253 self
.countunits(pofile
, 2)
254 # FIXME change this so that we check that the KDE comment is correctly added
255 self
.compareunit(pofile
, 1, "Duplicate")
256 self
.compareunit(pofile
, 2, "Duplicate")
258 def wtest_multiline_reflow(self
):
259 """check that we reflow multiline content to make it more readable for translators"""
260 self
.check_single('''<td valign="middle" width="96%"><font class="headingwhite">South
261 Africa</font></td>''', '''<font class="headingwhite">South Africa</font>''')
263 def wtest_nested_tags(self
):
264 """check that we can extract items within nested tags"""
265 markup
= "<div><p>Extract this</p>And this</div>"
266 pofile
= self
.html2po(markup
)
267 self
.countunits(pofile
, 2)
268 self
.compareunit(pofile
, 1, "Extract this")
269 self
.compareunit(pofile
, 2, "And this")
271 def test_carriage_return(self
):
272 """Remove carriage returns from files in dos format."""
273 htmlsource
= '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\r
274 <html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->\r
276 <!-- InstanceBeginEditable name="doctitle" -->\r
277 <link href="fmfi.css" rel="stylesheet" type="text/css">\r
281 <p>The rapid expansion of telecommunications infrastructure in recent\r
282 years has helped to bridge the digital divide to a limited extent.</p> \r
284 <!-- InstanceEnd --></html>\r
287 self
.check_single(htmlsource
, 'The rapid expansion of telecommunications infrastructure in recent years has helped to bridge the digital divide to a limited extent.')
289 def test_encoding_latin1(self
):
290 """Convert HTML input in iso-8859-1 correctly to unicode."""
291 htmlsource
= '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
292 <html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->
294 <!-- InstanceBeginEditable name="doctitle" -->
295 <title>FMFI - South Africa - CSIR Openphone - Overview</title>
296 <!-- InstanceEndEditable -->
297 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
298 <meta name="keywords" content="fmfi, first mile, first inch, wireless, rural development, access devices, mobile devices, wifi, connectivity, rural connectivty, ict, low cost, cheap, digital divide, csir, idrc, community">
300 <!-- InstanceBeginEditable name="head" -->
301 <!-- InstanceEndEditable -->
302 <link href="../../../fmfi.css" rel="stylesheet" type="text/css">
306 <p>We aim to please \x96 will you aim too, please?</p>
307 <p>South Africa\x92s language diversity can be challenging.</p>
311 pofile
= self
.html2po(htmlsource
)
313 self
.countunits(pofile
, 4)
314 self
.compareunit(pofile
, 3, u
'We aim to please \x96 will you aim too, please?')
315 self
.compareunit(pofile
, 4, u
'South Africa\x92s language diversity can be challenging.')
317 def test_strip_html(self
):
318 """Ensure that unnecessary html is stripped from the resulting unit."""
320 htmlsource
= '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
323 <title>FMFI - Contact</title>
326 <table width="100%" border="0" cellpadding="0" cellspacing="0">
327 <tr align="left" valign="top">
328 <td width="150" height="556">
329 <table width="157" height="100%" border="0" cellspacing="0" id="leftmenubg-color">
331 <td align="left" valign="top" height="555">
332 <table width="100%" border="0" cellspacing="0" cellpadding="2">
333 <tr align="left" valign="top" bgcolor="#660000">
334 <td width="4%"><strong></strong></td>
335 <td width="96%"><strong><font class="headingwhite">Projects</font></strong></td>
337 <tr align="left" valign="top">
338 <td valign="middle" width="4%"><img src="images/arrow.gif" width="8" height="8"></td>
339 <td width="96%"><a href="index.html">Home Page</a></td>
349 pofile
= self
.html2po(htmlsource
)
350 self
.countunits(pofile
, 3)
351 self
.compareunit(pofile
, 2, u
'Projects')
352 self
.compareunit(pofile
, 3, u
'Home Page')
354 # Translate and convert back:
355 pofile
.units
[1].target
= 'Projekte'
356 pofile
.units
[2].target
= 'Tuisblad'
357 htmlresult
= self
.po2html(str(pofile
), htmlsource
).replace('\n', ' ').replace('= "', '="').replace('> <', '><')
358 snippet
= '<td width="96%"><strong><font class="headingwhite">Projekte</font></strong></td>'
359 assert snippet
in htmlresult
360 snippet
= '<td width="96%"><a href="index.html">Tuisblad</a></td>'
361 assert snippet
in htmlresult
363 class TestHTML2POCommand(test_convert
.TestConvertCommand
, TestHTML2PO
):
364 """Tests running actual html2po commands on files"""
365 convertmodule
= html2po
366 defaultoptions
= {"progress": "none"}
369 """tests getting help"""
370 options
= test_convert
.TestConvertCommand
.test_help(self
)
371 options
= self
.help_check(options
, "-P, --pot")
372 options
= self
.help_check(options
, "--duplicates=DUPLICATESTYLE")
373 options
= self
.help_check(options
, "-u, --untagged", last
=True)