fix git support for v1.5.3 (or higher) by setting "--work-tree"
[translate_toolkit.git] / storage / statistics.py
blobe7e4ec47ec1537d57dafa829fe22a004a5dfbbee
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2007 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Module to provide statistics and related functionality.
24 @organization: Zuza Software Foundation
25 @copyright: 2007 Zuza Software Foundation
26 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
27 """
29 from translate import lang
30 from translate.lang import factory
32 # calling classifyunits() in the constructor is probably not ideal.
33 # idea: have a property for .classification that calls it if necessary
35 # If we add units or change translations, statistics are out of date
36 # Compare with modules/Status.py in pootling that uses a bitmask to
37 # filter units
39 # Add support for reading and writing Pootle style .stats files
41 # Consider providing quickstats
43 class Statistics(object):
44 """Manages statistics for storage objects."""
46 def __init__(self, sourcelanguage='en', targetlanguage='en', checkerstyle=None):
47 self.sourcelanguage = sourcelanguage
48 self.targetlanguage = targetlanguage
49 self.language = lang.factory.getlanguage(self.sourcelanguage)
50 # self.init_checker(checkerstyle)
52 self.classification = {}
54 def init_checker(self, checkerstyle=None):
55 from translate.filters import checks
56 from translate.filters import pofilter
57 checkerclasses = [checkerstyle or checks.StandardChecker, pofilter.StandardPOChecker]
58 self.checker = pofilter.POTeeChecker(checkerclasses=checkerclasses)
60 def fuzzy_units(self):
61 """Return a list of fuzzy units."""
62 if not self.classification:
63 self.classifyunits()
64 units = self.getunits()
65 return [units[item] for item in self.classification["fuzzy"]]
67 def fuzzy_unitcount(self):
68 """Returns the number of fuzzy units."""
69 return len(self.fuzzy_units())
71 def translated_units(self):
72 """Return a list of translated units."""
73 if not self.classification:
74 self.classifyunits()
75 units = self.getunits()
76 return [units[item] for item in self.classification["translated"]]
78 def translated_unitcount(self):
79 """Returns the number of translated units."""
80 return len(self.translated_units())
82 def untranslated_units(self):
83 """Return a list of untranslated units."""
84 if not self.classification:
85 self.classifyunits()
86 units = self.getunits()
87 return [units[item] for item in self.classification["blank"]]
89 def untranslated_unitcount(self):
90 """Returns the number of untranslated units."""
92 return len(self.untranslated_units())
94 def getunits(self):
95 """Returns a list of all units in this object."""
96 return []
98 def get_source_text(self, units):
99 """Joins the unit source strings in a single string of text."""
100 source_text = ""
101 for unit in units:
102 source_text += unit.source + "\n"
103 plurals = getattr(unit.source, "strings", [])
104 if plurals:
105 source_text += "\n".join(plurals[1:])
106 return source_text
108 def wordcount(self, text):
109 """Returns the number of words in the given text."""
110 return len(self.language.words(text))
112 def source_wordcount(self):
113 """Returns the number of words in the source text."""
114 source_text = self.get_source_text(self.getunits())
115 return self.wordcount(source_text)
117 def translated_wordcount(self):
118 """Returns the number of translated words in this object."""
120 text = self.get_source_text(self.translated_units())
121 return self.wordcount(text)
123 def untranslated_wordcount(self):
124 """Returns the number of untranslated words in this object."""
126 text = self.get_source_text(self.untranslated_units())
127 return self.wordcount(text)
129 def classifyunit(self, unit):
130 """Returns a list of the classes that the unit belongs to.
132 @param unit: the unit to classify
134 classes = ["total"]
135 if unit.isfuzzy():
136 classes.append("fuzzy")
137 if unit.gettargetlen() == 0:
138 classes.append("blank")
139 if unit.istranslated():
140 classes.append("translated")
141 #TODO: we don't handle checking plurals at all yet, as this is tricky...
142 source = unit.source
143 target = unit.target
144 if isinstance(source, str) and isinstance(target, unicode):
145 source = source.decode(getattr(unit, "encoding", "utf-8"))
146 #TODO: decoding should not be done here
147 # checkresult = self.checker.run_filters(unit, source, target)
148 checkresult = {}
149 for checkname, checkmessage in checkresult.iteritems():
150 classes.append("check-" + checkname)
151 return classes
153 def classifyunits(self):
154 """Makes a dictionary of which units fall into which classifications.
156 This method iterates over all units.
158 self.classification = {}
159 self.classification["fuzzy"] = []
160 self.classification["blank"] = []
161 self.classification["translated"] = []
162 self.classification["has-suggestion"] = []
163 self.classification["total"] = []
164 # for checkname in self.checker.getfilters().keys():
165 # self.classification["check-" + checkname] = []
166 for item, unit in enumerate(self.unit_iter()):
167 classes = self.classifyunit(unit)
168 # if self.basefile.getsuggestions(item):
169 # classes.append("has-suggestion")
170 for classname in classes:
171 if classname in self.classification:
172 self.classification[classname].append(item)
173 else:
174 self.classification[classname] = item
175 self.countwords()
177 def countwords(self):
178 """Counts the source and target words in each of the units."""
179 self.sourcewordcounts = []
180 self.targetwordcounts = []
181 for unit in self.unit_iter():
182 self.sourcewordcounts.append([self.wordcount(text) for text in getattr(unit.source, "strings", [""])])
183 self.targetwordcounts.append([self.wordcount(text) for text in getattr(unit.target, "strings", [""])])
185 def reclassifyunit(self, item):
186 """Updates the classification of a unit in self.classification.
188 @param item: an integer that is an index in .getunits().
190 unit = self.getunits()[item]
191 self.sourcewordcounts[item] = [self.wordcount(text) for text in unit.source.strings]
192 self.targetwordcounts[item] = [self.wordcount(text) for text in unit.target.strings]
193 classes = self.classifyunit(unit)
194 # if self.basefile.getsuggestions(item):
195 # classes.append("has-suggestion")
196 for classname, matchingitems in self.classification.items():
197 if (classname in classes) != (item in matchingitems):
198 if classname in classes:
199 self.classification[classname].append(item)
200 else:
201 self.classification[classname].remove(item)
202 self.classification[classname].sort()
203 # self.savestats()