2 # -*- coding: utf-8 -*-
4 # Copyright 2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Module to provide statistics and related functionality.
24 @organization: Zuza Software Foundation
25 @copyright: 2007 Zuza Software Foundation
26 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
29 from translate
import lang
30 from translate
.lang
import factory
32 # calling classifyunits() in the constructor is probably not ideal.
33 # idea: have a property for .classification that calls it if necessary
35 # If we add units or change translations, statistics are out of date
36 # Compare with modules/Status.py in pootling that uses a bitmask to
39 # Add support for reading and writing Pootle style .stats files
41 # Consider providing quickstats
43 class Statistics(object):
44 """Manages statistics for storage objects."""
46 def __init__(self
, sourcelanguage
='en', targetlanguage
='en', checkerstyle
=None):
47 self
.sourcelanguage
= sourcelanguage
48 self
.targetlanguage
= targetlanguage
49 self
.language
= lang
.factory
.getlanguage(self
.sourcelanguage
)
50 # self.init_checker(checkerstyle)
52 self
.classification
= {}
54 def init_checker(self
, checkerstyle
=None):
55 from translate
.filters
import checks
56 from translate
.filters
import pofilter
57 checkerclasses
= [checkerstyle
or checks
.StandardChecker
, pofilter
.StandardPOChecker
]
58 self
.checker
= pofilter
.POTeeChecker(checkerclasses
=checkerclasses
)
60 def fuzzy_units(self
):
61 """Return a list of fuzzy units."""
62 if not self
.classification
:
64 units
= self
.getunits()
65 return [units
[item
] for item
in self
.classification
["fuzzy"]]
67 def fuzzy_unitcount(self
):
68 """Returns the number of fuzzy units."""
69 return len(self
.fuzzy_units())
71 def translated_units(self
):
72 """Return a list of translated units."""
73 if not self
.classification
:
75 units
= self
.getunits()
76 return [units
[item
] for item
in self
.classification
["translated"]]
78 def translated_unitcount(self
):
79 """Returns the number of translated units."""
80 return len(self
.translated_units())
82 def untranslated_units(self
):
83 """Return a list of untranslated units."""
84 if not self
.classification
:
86 units
= self
.getunits()
87 return [units
[item
] for item
in self
.classification
["blank"]]
89 def untranslated_unitcount(self
):
90 """Returns the number of untranslated units."""
92 return len(self
.untranslated_units())
95 """Returns a list of all units in this object."""
98 def get_source_text(self
, units
):
99 """Joins the unit source strings in a single string of text."""
102 source_text
+= unit
.source
+ "\n"
103 plurals
= getattr(unit
.source
, "strings", [])
105 source_text
+= "\n".join(plurals
[1:])
108 def wordcount(self
, text
):
109 """Returns the number of words in the given text."""
110 return len(self
.language
.words(text
))
112 def source_wordcount(self
):
113 """Returns the number of words in the source text."""
114 source_text
= self
.get_source_text(self
.getunits())
115 return self
.wordcount(source_text
)
117 def translated_wordcount(self
):
118 """Returns the number of translated words in this object."""
120 text
= self
.get_source_text(self
.translated_units())
121 return self
.wordcount(text
)
123 def untranslated_wordcount(self
):
124 """Returns the number of untranslated words in this object."""
126 text
= self
.get_source_text(self
.untranslated_units())
127 return self
.wordcount(text
)
129 def classifyunit(self
, unit
):
130 """Returns a list of the classes that the unit belongs to.
132 @param unit: the unit to classify
136 classes
.append("fuzzy")
137 if unit
.gettargetlen() == 0:
138 classes
.append("blank")
139 if unit
.istranslated():
140 classes
.append("translated")
141 #TODO: we don't handle checking plurals at all yet, as this is tricky...
144 if isinstance(source
, str) and isinstance(target
, unicode):
145 source
= source
.decode(getattr(unit
, "encoding", "utf-8"))
146 #TODO: decoding should not be done here
147 # checkresult = self.checker.run_filters(unit, source, target)
149 for checkname
, checkmessage
in checkresult
.iteritems():
150 classes
.append("check-" + checkname
)
153 def classifyunits(self
):
154 """Makes a dictionary of which units fall into which classifications.
156 This method iterates over all units.
158 self
.classification
= {}
159 self
.classification
["fuzzy"] = []
160 self
.classification
["blank"] = []
161 self
.classification
["translated"] = []
162 self
.classification
["has-suggestion"] = []
163 self
.classification
["total"] = []
164 # for checkname in self.checker.getfilters().keys():
165 # self.classification["check-" + checkname] = []
166 for item
, unit
in enumerate(self
.unit_iter()):
167 classes
= self
.classifyunit(unit
)
168 # if self.basefile.getsuggestions(item):
169 # classes.append("has-suggestion")
170 for classname
in classes
:
171 if classname
in self
.classification
:
172 self
.classification
[classname
].append(item
)
174 self
.classification
[classname
] = item
177 def countwords(self
):
178 """Counts the source and target words in each of the units."""
179 self
.sourcewordcounts
= []
180 self
.targetwordcounts
= []
181 for unit
in self
.unit_iter():
182 self
.sourcewordcounts
.append([self
.wordcount(text
) for text
in getattr(unit
.source
, "strings", [""])])
183 self
.targetwordcounts
.append([self
.wordcount(text
) for text
in getattr(unit
.target
, "strings", [""])])
185 def reclassifyunit(self
, item
):
186 """Updates the classification of a unit in self.classification.
188 @param item: an integer that is an index in .getunits().
190 unit
= self
.getunits()[item
]
191 self
.sourcewordcounts
[item
] = [self
.wordcount(text
) for text
in unit
.source
.strings
]
192 self
.targetwordcounts
[item
] = [self
.wordcount(text
) for text
in unit
.target
.strings
]
193 classes
= self
.classifyunit(unit
)
194 # if self.basefile.getsuggestions(item):
195 # classes.append("has-suggestion")
196 for classname
, matchingitems
in self
.classification
.items():
197 if (classname
in classes
) != (item
in matchingitems
):
198 if classname
in classes
:
199 self
.classification
[classname
].append(item
)
201 self
.classification
[classname
].remove(item
)
202 self
.classification
[classname
].sort()