fix git support for v1.5.3 (or higher) by setting "--work-tree"
[translate_toolkit.git] / tools / test_pocount.py
blob0a0ca1dafa72a2b602a111248b5047a91076c289
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 from translate.storage import po
5 from translate.storage import statsdb
7 class TestPOCount:
8 def count(self, source, expectedsource, target=None, expectedtarget=None):
9 """simple helper to check the respective word counts"""
10 poelement = po.pounit(source)
11 if target is not None:
12 poelement.target = target
13 wordssource, wordstarget = statsdb.wordsinunit(poelement)
14 print 'Source (expected=%d; actual=%d): "%s"' % (expectedsource, wordssource, source)
15 assert wordssource == expectedsource
16 if target is not None:
17 print 'Target (expected=%d; actual=%d): "%s"' % (expectedtarget, wordstarget, target)
18 assert wordstarget == expectedtarget
20 def test_simple_count_zero(self):
21 """no content"""
22 self.count("", 0)
24 def test_simple_count_one(self):
25 """simplest one word count"""
26 self.count("One", 1)
28 def test_simple_count_two(self):
29 """simplest one word count"""
30 self.count("One two", 2)
32 def test_punctuation_divides_words(self):
33 """test that we break words when there is punctuation"""
34 self.count("One. Two", 2)
35 self.count("One.Two", 2)
37 def test_xml_tags(self):
38 """test that we do not count XML tags as words"""
39 # <br> is a word break
40 self.count("A word<br>Another word", 4)
41 self.count("A word<br/>Another word", 4)
42 self.count("A word<br />Another word", 4)
43 # \n is a word break
44 self.count("<p>A word</p>\n<p>Another word</p>", 4)
46 def test_newlines(self):
47 """test to see that newlines divide words"""
48 # newlines break words
49 self.count("A word.\nAnother word", 4)
50 self.count(r"A word.\\n\nAnother word", 4)
52 def test_variables_are_words(self):
53 """test that we count variables as words"""
54 self.count("%PROGRAMNAME %PROGRAM% %s $file $1", 5)
56 def test_plurals(self):
57 """test that we can handle plural PO elements"""
58 # #: gdk-pixbuf/gdk-pixdata.c:430
59 # #, c-format
60 # msgid "failed to allocate image buffer of %u byte"
61 # msgid_plural "failed to allocate image buffer of %u bytes"
62 # msgstr[0] "e paletšwe go hwetša sešireletši sa seswantšho sa paete ya %u"
63 # msgstr[1] "e paletšwe go hwetša sešireletši sa seswantšho sa dipaete tša %u"
65 def test_plurals_kde(self):
66 """test that we correcly count old style KDE plurals"""
67 self.count("_n: Singular\\n\nPlural", 2, "Een\\n\ntwee\\n\ndrie", 3)
69 def test_msgid_blank(self):
70 """counts a message id"""
71 self.count(" ", 0)
73 # Counting strings
74 # We need to check how we count strings also and if we call it translated or untranslated
75 # ie an all spaces msgid should be translated if there are spaces in the msgstr
77 # Make sure we don't count obsolete messages
79 # Do we correctly identify a translated yet blank message?
81 # Need to test that we can differentiate between fuzzy, translated and untranslated