storage/statistics.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2007 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """Module to provide statistics and related functionality.
  23
  24 @organization: Zuza Software Foundation
  25 @copyright: 2007 Zuza Software Foundation
  26 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
  27 """
  28
  29 from translate import lang
  30 from translate.lang import factory
  31
  32 # calling classifyunits() in the constructor is probably not ideal.
  33 # idea: have a property for .classification that calls it if necessary
  34
  35 # If we add units or change translations, statistics are out of date
  36 # Compare with modules/Status.py in pootling that uses a bitmask to
  37 # filter units
  38
  39 # Add support for reading and writing Pootle style .stats files
  40
  41 # Consider providing quickstats
  42
  43 class Statistics(object):
  44     """Manages statistics for storage objects."""
  45
  46     def __init__(self, sourcelanguage='en', targetlanguage='en', checkerstyle=None):
  47         self.sourcelanguage = sourcelanguage
  48         self.targetlanguage = targetlanguage
  49         self.language = lang.factory.getlanguage(self.sourcelanguage)
  50 #        self.init_checker(checkerstyle)
  51
  52         self.classification = {}
  53
  54     def init_checker(self, checkerstyle=None):
  55         from translate.filters import checks
  56         from translate.filters import pofilter
  57         checkerclasses = [checkerstyle or checks.StandardChecker, pofilter.StandardPOChecker]
  58         self.checker = pofilter.POTeeChecker(checkerclasses=checkerclasses)
  59
  60     def fuzzy_units(self):
  61         """Return a list of fuzzy units."""
  62         if not self.classification:
  63             self.classifyunits()
  64         units = self.getunits()
  65         return [units[item] for item in self.classification["fuzzy"]]
  66
  67     def fuzzy_unitcount(self):
  68         """Returns the number of fuzzy units."""
  69         return len(self.fuzzy_units())
  70
  71     def translated_units(self):
  72         """Return a list of translated units."""
  73         if not self.classification:
  74             self.classifyunits()
  75         units = self.getunits()
  76         return [units[item] for item in self.classification["translated"]]
  77
  78     def translated_unitcount(self):
  79         """Returns the number of translated units."""
  80         return len(self.translated_units())
  81
  82     def untranslated_units(self):
  83         """Return a list of untranslated units."""
  84         if not self.classification:
  85             self.classifyunits()
  86         units = self.getunits()
  87         return [units[item] for item in self.classification["blank"]]
  88
  89     def untranslated_unitcount(self):
  90         """Returns the number of untranslated units."""
  91
  92         return len(self.untranslated_units())
  93
  94     def getunits(self):
  95         """Returns a list of all units in this object."""
  96         return []
  97
  98     def get_source_text(self, units):
  99         """Joins the unit source strings in a single string of text."""
 100         source_text = ""
 101         for unit in units:
 102             source_text += unit.source + "\n"
 103             plurals = getattr(unit.source, "strings", [])
 104             if plurals:
 105                 source_text += "\n".join(plurals[1:])
 106         return source_text
 107
 108     def wordcount(self, text):
 109         """Returns the number of words in the given text."""
 110         return len(self.language.words(text))
 111
 112     def source_wordcount(self):
 113         """Returns the number of words in the source text."""
 114         source_text = self.get_source_text(self.getunits())
 115         return self.wordcount(source_text)
 116
 117     def translated_wordcount(self):
 118         """Returns the number of translated words in this object."""
 119
 120         text = self.get_source_text(self.translated_units())
 121         return self.wordcount(text)
 122
 123     def untranslated_wordcount(self):
 124         """Returns the number of untranslated words in this object."""
 125
 126         text = self.get_source_text(self.untranslated_units())
 127         return self.wordcount(text)
 128
 129     def classifyunit(self, unit):
 130         """Returns a list of the classes that the unit belongs to.
 131
 132         @param unit: the unit to classify
 133         """
 134         classes = ["total"]
 135         if unit.isfuzzy():
 136             classes.append("fuzzy")
 137         if unit.gettargetlen() == 0:
 138             classes.append("blank")
 139         if unit.istranslated():
 140             classes.append("translated")
 141         #TODO: we don't handle checking plurals at all yet, as this is tricky...
 142         source = unit.source
 143         target = unit.target
 144         if isinstance(source, str) and isinstance(target, unicode):
 145             source = source.decode(getattr(unit, "encoding", "utf-8"))
 146         #TODO: decoding should not be done here
 147 #        checkresult = self.checker.run_filters(unit, source, target)
 148         checkresult = {}
 149         for checkname, checkmessage in checkresult.iteritems():
 150             classes.append("check-" + checkname)
 151         return classes
 152
 153     def classifyunits(self):
 154         """Makes a dictionary of which units fall into which classifications.
 155
 156         This method iterates over all units.
 157         """
 158         self.classification = {}
 159         self.classification["fuzzy"] = []
 160         self.classification["blank"] = []
 161         self.classification["translated"] = []
 162         self.classification["has-suggestion"] = []
 163         self.classification["total"] = []
 164 #        for checkname in self.checker.getfilters().keys():
 165 #            self.classification["check-" + checkname] = []
 166         for item, unit in enumerate(self.unit_iter()):
 167             classes = self.classifyunit(unit)
 168 #            if self.basefile.getsuggestions(item):
 169 #                classes.append("has-suggestion")
 170             for classname in classes:
 171                 if classname in self.classification:
 172                     self.classification[classname].append(item)
 173                 else:
 174                     self.classification[classname] = item
 175         self.countwords()
 176
 177     def countwords(self):
 178         """Counts the source and target words in each of the units."""
 179         self.sourcewordcounts = []
 180         self.targetwordcounts = []
 181         for unit in self.unit_iter():
 182             self.sourcewordcounts.append([self.wordcount(text) for text in getattr(unit.source, "strings", [""])])
 183             self.targetwordcounts.append([self.wordcount(text) for text in getattr(unit.target, "strings", [""])])
 184
 185     def reclassifyunit(self, item):
 186         """Updates the classification of a unit in self.classification.
 187
 188         @param item: an integer that is an index in .getunits().
 189         """
 190         unit = self.getunits()[item]
 191         self.sourcewordcounts[item] = [self.wordcount(text) for text in unit.source.strings]
 192         self.targetwordcounts[item] = [self.wordcount(text) for text in unit.target.strings]
 193         classes = self.classifyunit(unit)
 194 #        if self.basefile.getsuggestions(item):
 195 #            classes.append("has-suggestion")
 196         for classname, matchingitems in self.classification.items():
 197             if (classname in classes) != (item in matchingitems):
 198                 if classname in classes:
 199                     self.classification[classname].append(item)
 200                 else:
 201                     self.classification[classname].remove(item)
 202                 self.classification[classname].sort()
 203 #        self.savestats()
 204
 205