tools/pocount.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2003-2007 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """Create string and word counts for supported localization files including:
  23 XLIFF, TMX, Gettex PO and MO, Qt .ts and .qm, Wordfast TM, etc
  24
  25 See: http://translate.sourceforge.net/wiki/toolkit/pocount for examples and
  26 usage instructions
  27 """
  28
  29 from translate.storage import factory
  30 from translate.storage import statsdb
  31 import sys
  32 import os
  33
  34 def calcstats_old(filename):
  35     """This is the previous implementation of calcstats() and is left for
  36     comparison and debuging purposes."""
  37     # ignore totally blank or header units
  38     try:
  39         store = factory.getobject(filename)
  40     except ValueError, e:
  41         print str(e)
  42         return {}
  43     units = filter(lambda unit: not unit.isheader(), store.units)
  44     translated = translatedmessages(units)
  45     fuzzy = fuzzymessages(units)
  46     review = filter(lambda unit: unit.isreview(), units)
  47     untranslated = untranslatedmessages(units)
  48     wordcounts = dict(map(lambda unit: (unit, statsdb.wordsinunit(unit)), units))
  49     sourcewords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][0], elementlist))
  50     targetwords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][1], elementlist))
  51     stats = {}
  52
  53     #units
  54     stats["translated"] = len(translated)
  55     stats["fuzzy"] = len(fuzzy)
  56     stats["untranslated"] = len(untranslated)
  57     stats["review"] = len(review)
  58     stats["total"] = stats["translated"] + stats["fuzzy"] + stats["untranslated"]
  59
  60     #words
  61     stats["translatedsourcewords"] = sourcewords(translated)
  62     stats["translatedtargetwords"] = targetwords(translated)
  63     stats["fuzzysourcewords"] = sourcewords(fuzzy)
  64     stats["untranslatedsourcewords"] = sourcewords(untranslated)
  65     stats["reviewsourcewords"] = sourcewords(review)
  66     stats["totalsourcewords"] = stats["translatedsourcewords"] + \
  67                                 stats["fuzzysourcewords"] + \
  68                                 stats["untranslatedsourcewords"]
  69     return stats
  70
  71 def calcstats(filename):
  72     statscache = statsdb.StatsCache()
  73     return statscache.filetotals(filename)
  74
  75 def summarize(title, stats, CSVstyle=False):
  76     def percent(denominator, devisor):
  77         if devisor == 0:
  78             return 0
  79         else:
  80             return denominator*100/devisor
  81
  82     if CSVstyle:
  83         print "%s, " % title,
  84         print "%d, %d, %d," % (stats["translated"], stats["translatedsourcewords"], stats["translatedtargetwords"]),
  85         print "%d, %d," % (stats["fuzzy"], stats["fuzzysourcewords"]),
  86         print "%d, %d," % (stats["untranslated"], stats["untranslatedsourcewords"]),
  87         print "%d, %d" % (stats["total"], stats["totalsourcewords"]),
  88         if stats["review"] > 0:
  89             print ", %d, %d" % (stats["review"], stats["reviewsourdcewords"]),
  90         print
  91     else:
  92         print title
  93         print "type              strings      words (source)    words (translation)"
  94         print "translated:   %5d (%3d%%) %10d (%3d%%) %15d" % \
  95                 (stats["translated"], \
  96                 percent(stats["translated"], stats["total"]), \
  97                 stats["translatedsourcewords"], \
  98                 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \
  99                 stats["translatedtargetwords"])
 100         print "fuzzy:        %5d (%3d%%) %10d (%3d%%)             n/a" % \
 101                 (stats["fuzzy"], \
 102                 percent(stats["fuzzy"], stats["total"]), \
 103                 stats["fuzzysourcewords"], \
 104                 percent(stats["fuzzysourcewords"], stats["totalsourcewords"]))
 105         print "untranslated: %5d (%3d%%) %10d (%3d%%)             n/a" % \
 106                 (stats["untranslated"], \
 107                 percent(stats["untranslated"], stats["total"]), \
 108                 stats["untranslatedsourcewords"], \
 109                 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"]))
 110         print "Total:        %5d %17d %22d" % \
 111                 (stats["total"], \
 112                 stats["totalsourcewords"], \
 113                 stats["translatedtargetwords"])
 114         if stats["review"] > 0:
 115             print "review:       %5d %17d                    n/a" % \
 116                     (stats["review"], stats["reviewsourcewords"])
 117         print
 118
 119 def fuzzymessages(units):
 120     return filter(lambda unit: unit.isfuzzy() and unit.target, units)
 121
 122 def translatedmessages(units):
 123     return filter(lambda unit: unit.istranslated(), units)
 124
 125 def untranslatedmessages(units):
 126     return filter(lambda unit: not (unit.istranslated() or unit.isfuzzy()) and unit.source, units)
 127
 128 class summarizer:
 129     def __init__(self, filenames, CSVstyle):
 130         self.totals = {}
 131         self.filecount = 0
 132         self.CSVstyle = CSVstyle
 133         if self.CSVstyle:
 134             print "Filename, Translated Messages, Translated Source Words, Translated \
 135 Target Words, Fuzzy Messages, Fuzzy Source Words, Untranslated Messages, \
 136 Untranslated Source Words, Total Message, Total Source Words, \
 137 Review Messages, Review Source Words"
 138         for filename in filenames:
 139             if not os.path.exists(filename):
 140                 print >> sys.stderr, "cannot process %s: does not exist" % filename
 141                 continue
 142             elif os.path.isdir(filename):
 143                 self.handledir(filename)
 144             else:
 145                 self.handlefile(filename)
 146         if self.filecount > 1 and not self.CSVstyle:
 147             summarize("TOTAL:", self.totals)
 148             print "File count:   %5d" % (self.filecount)
 149             print
 150
 151     def updatetotals(self, stats):
 152         """Update self.totals with the statistics in stats."""
 153         for key in stats.keys():
 154             if not self.totals.has_key(key):
 155                 self.totals[key] = 0
 156             self.totals[key] += stats[key]
 157
 158     def handlefile(self, filename):
 159         stats = calcstats(filename)
 160         if stats:
 161             self.updatetotals(stats)
 162             summarize(filename, stats, self.CSVstyle)
 163             self.filecount += 1
 164
 165     def handlefiles(self, dirname, filenames):
 166         for filename in filenames:
 167             pathname = os.path.join(dirname, filename)
 168             if os.path.isdir(pathname):
 169                 self.handledir(pathname)
 170             else:
 171                 self.handlefile(pathname)
 172
 173     def handledir(self, dirname):
 174         path, name = os.path.split(dirname)
 175         if name in ["CVS", ".svn", "_darcs"]:
 176             return
 177         entries = os.listdir(dirname)
 178         self.handlefiles(dirname, entries)
 179
 180 def main():
 181     # TODO: make this handle command line options using optparse...
 182     CSVstyle = False
 183     if "--csv" in sys.argv:
 184         sys.argv.remove("--csv")
 185         CSVstyle = True
 186     try:
 187         import psyco
 188         psyco.full()
 189     except Exception:
 190         pass
 191     summarizer(sys.argv[1:], CSVstyle)
 192
 193 if __name__ == '__main__':
 194     main()