for git v1.5.2 (and below): chdir to the directory of the target file before executin...
[translate_toolkit.git] / tools / pocount.py
blob1c3f3c55baa46fbcf31704ebb349bf5eb4b0b8bb
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2003-2007 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Create string and word counts for supported localization files including:
23 XLIFF, TMX, Gettex PO and MO, Qt .ts and .qm, Wordfast TM, etc
25 See: http://translate.sourceforge.net/wiki/toolkit/pocount for examples and
26 usage instructions
27 """
29 from translate.storage import factory
30 from translate.storage import statsdb
31 import sys
32 import os
34 def calcstats_old(filename):
35 """This is the previous implementation of calcstats() and is left for
36 comparison and debuging purposes."""
37 # ignore totally blank or header units
38 try:
39 store = factory.getobject(filename)
40 except ValueError, e:
41 print str(e)
42 return {}
43 units = filter(lambda unit: not unit.isheader(), store.units)
44 translated = translatedmessages(units)
45 fuzzy = fuzzymessages(units)
46 review = filter(lambda unit: unit.isreview(), units)
47 untranslated = untranslatedmessages(units)
48 wordcounts = dict(map(lambda unit: (unit, statsdb.wordsinunit(unit)), units))
49 sourcewords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][0], elementlist))
50 targetwords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][1], elementlist))
51 stats = {}
53 #units
54 stats["translated"] = len(translated)
55 stats["fuzzy"] = len(fuzzy)
56 stats["untranslated"] = len(untranslated)
57 stats["review"] = len(review)
58 stats["total"] = stats["translated"] + stats["fuzzy"] + stats["untranslated"]
60 #words
61 stats["translatedsourcewords"] = sourcewords(translated)
62 stats["translatedtargetwords"] = targetwords(translated)
63 stats["fuzzysourcewords"] = sourcewords(fuzzy)
64 stats["untranslatedsourcewords"] = sourcewords(untranslated)
65 stats["reviewsourcewords"] = sourcewords(review)
66 stats["totalsourcewords"] = stats["translatedsourcewords"] + \
67 stats["fuzzysourcewords"] + \
68 stats["untranslatedsourcewords"]
69 return stats
71 def calcstats(filename):
72 statscache = statsdb.StatsCache()
73 return statscache.filetotals(filename)
75 def summarize(title, stats, CSVstyle=False):
76 def percent(denominator, devisor):
77 if devisor == 0:
78 return 0
79 else:
80 return denominator*100/devisor
82 if CSVstyle:
83 print "%s, " % title,
84 print "%d, %d, %d," % (stats["translated"], stats["translatedsourcewords"], stats["translatedtargetwords"]),
85 print "%d, %d," % (stats["fuzzy"], stats["fuzzysourcewords"]),
86 print "%d, %d," % (stats["untranslated"], stats["untranslatedsourcewords"]),
87 print "%d, %d" % (stats["total"], stats["totalsourcewords"]),
88 if stats["review"] > 0:
89 print ", %d, %d" % (stats["review"], stats["reviewsourdcewords"]),
90 print
91 else:
92 print title
93 print "type strings words (source) words (translation)"
94 print "translated: %5d (%3d%%) %10d (%3d%%) %15d" % \
95 (stats["translated"], \
96 percent(stats["translated"], stats["total"]), \
97 stats["translatedsourcewords"], \
98 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \
99 stats["translatedtargetwords"])
100 print "fuzzy: %5d (%3d%%) %10d (%3d%%) n/a" % \
101 (stats["fuzzy"], \
102 percent(stats["fuzzy"], stats["total"]), \
103 stats["fuzzysourcewords"], \
104 percent(stats["fuzzysourcewords"], stats["totalsourcewords"]))
105 print "untranslated: %5d (%3d%%) %10d (%3d%%) n/a" % \
106 (stats["untranslated"], \
107 percent(stats["untranslated"], stats["total"]), \
108 stats["untranslatedsourcewords"], \
109 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"]))
110 print "Total: %5d %17d %22d" % \
111 (stats["total"], \
112 stats["totalsourcewords"], \
113 stats["translatedtargetwords"])
114 if stats["review"] > 0:
115 print "review: %5d %17d n/a" % \
116 (stats["review"], stats["reviewsourcewords"])
117 print
119 def fuzzymessages(units):
120 return filter(lambda unit: unit.isfuzzy() and unit.target, units)
122 def translatedmessages(units):
123 return filter(lambda unit: unit.istranslated(), units)
125 def untranslatedmessages(units):
126 return filter(lambda unit: not (unit.istranslated() or unit.isfuzzy()) and unit.source, units)
128 class summarizer:
129 def __init__(self, filenames, CSVstyle):
130 self.totals = {}
131 self.filecount = 0
132 self.CSVstyle = CSVstyle
133 if self.CSVstyle:
134 print "Filename, Translated Messages, Translated Source Words, Translated \
135 Target Words, Fuzzy Messages, Fuzzy Source Words, Untranslated Messages, \
136 Untranslated Source Words, Total Message, Total Source Words, \
137 Review Messages, Review Source Words"
138 for filename in filenames:
139 if not os.path.exists(filename):
140 print >> sys.stderr, "cannot process %s: does not exist" % filename
141 continue
142 elif os.path.isdir(filename):
143 self.handledir(filename)
144 else:
145 self.handlefile(filename)
146 if self.filecount > 1 and not self.CSVstyle:
147 summarize("TOTAL:", self.totals)
148 print "File count: %5d" % (self.filecount)
149 print
151 def updatetotals(self, stats):
152 """Update self.totals with the statistics in stats."""
153 for key in stats.keys():
154 if not self.totals.has_key(key):
155 self.totals[key] = 0
156 self.totals[key] += stats[key]
158 def handlefile(self, filename):
159 stats = calcstats(filename)
160 if stats:
161 self.updatetotals(stats)
162 summarize(filename, stats, self.CSVstyle)
163 self.filecount += 1
165 def handlefiles(self, dirname, filenames):
166 for filename in filenames:
167 pathname = os.path.join(dirname, filename)
168 if os.path.isdir(pathname):
169 self.handledir(pathname)
170 else:
171 self.handlefile(pathname)
173 def handledir(self, dirname):
174 path, name = os.path.split(dirname)
175 if name in ["CVS", ".svn", "_darcs"]:
176 return
177 entries = os.listdir(dirname)
178 self.handlefiles(dirname, entries)
180 def main():
181 # TODO: make this handle command line options using optparse...
182 CSVstyle = False
183 if "--csv" in sys.argv:
184 sys.argv.remove("--csv")
185 CSVstyle = True
186 try:
187 import psyco
188 psyco.full()
189 except Exception:
190 pass
191 summarizer(sys.argv[1:], CSVstyle)
193 if __name__ == '__main__':
194 main()