2 # -*- coding: utf-8 -*-
4 # Copyright 2003-2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Create string and word counts for supported localization files including:
23 XLIFF, TMX, Gettex PO and MO, Qt .ts and .qm, Wordfast TM, etc
25 See: http://translate.sourceforge.net/wiki/toolkit/pocount for examples and
29 from translate
.storage
import factory
30 from translate
.storage
import statsdb
34 def calcstats_old(filename
):
35 """This is the previous implementation of calcstats() and is left for
36 comparison and debuging purposes."""
37 # ignore totally blank or header units
39 store
= factory
.getobject(filename
)
43 units
= filter(lambda unit
: not unit
.isheader(), store
.units
)
44 translated
= translatedmessages(units
)
45 fuzzy
= fuzzymessages(units
)
46 review
= filter(lambda unit
: unit
.isreview(), units
)
47 untranslated
= untranslatedmessages(units
)
48 wordcounts
= dict(map(lambda unit
: (unit
, statsdb
.wordsinunit(unit
)), units
))
49 sourcewords
= lambda elementlist
: sum(map(lambda unit
: wordcounts
[unit
][0], elementlist
))
50 targetwords
= lambda elementlist
: sum(map(lambda unit
: wordcounts
[unit
][1], elementlist
))
54 stats
["translated"] = len(translated
)
55 stats
["fuzzy"] = len(fuzzy
)
56 stats
["untranslated"] = len(untranslated
)
57 stats
["review"] = len(review
)
58 stats
["total"] = stats
["translated"] + stats
["fuzzy"] + stats
["untranslated"]
61 stats
["translatedsourcewords"] = sourcewords(translated
)
62 stats
["translatedtargetwords"] = targetwords(translated
)
63 stats
["fuzzysourcewords"] = sourcewords(fuzzy
)
64 stats
["untranslatedsourcewords"] = sourcewords(untranslated
)
65 stats
["reviewsourcewords"] = sourcewords(review
)
66 stats
["totalsourcewords"] = stats
["translatedsourcewords"] + \
67 stats
["fuzzysourcewords"] + \
68 stats
["untranslatedsourcewords"]
71 def calcstats(filename
):
72 statscache
= statsdb
.StatsCache()
73 return statscache
.filetotals(filename
)
75 def summarize(title
, stats
, CSVstyle
=False):
76 def percent(denominator
, devisor
):
80 return denominator
*100/devisor
84 print "%d, %d, %d," % (stats
["translated"], stats
["translatedsourcewords"], stats
["translatedtargetwords"]),
85 print "%d, %d," % (stats
["fuzzy"], stats
["fuzzysourcewords"]),
86 print "%d, %d," % (stats
["untranslated"], stats
["untranslatedsourcewords"]),
87 print "%d, %d" % (stats
["total"], stats
["totalsourcewords"]),
88 if stats
["review"] > 0:
89 print ", %d, %d" % (stats
["review"], stats
["reviewsourdcewords"]),
93 print "type strings words (source) words (translation)"
94 print "translated: %5d (%3d%%) %10d (%3d%%) %15d" % \
95 (stats
["translated"], \
96 percent(stats
["translated"], stats
["total"]), \
97 stats
["translatedsourcewords"], \
98 percent(stats
["translatedsourcewords"], stats
["totalsourcewords"]), \
99 stats
["translatedtargetwords"])
100 print "fuzzy: %5d (%3d%%) %10d (%3d%%) n/a" % \
102 percent(stats
["fuzzy"], stats
["total"]), \
103 stats
["fuzzysourcewords"], \
104 percent(stats
["fuzzysourcewords"], stats
["totalsourcewords"]))
105 print "untranslated: %5d (%3d%%) %10d (%3d%%) n/a" % \
106 (stats
["untranslated"], \
107 percent(stats
["untranslated"], stats
["total"]), \
108 stats
["untranslatedsourcewords"], \
109 percent(stats
["untranslatedsourcewords"], stats
["totalsourcewords"]))
110 print "Total: %5d %17d %22d" % \
112 stats
["totalsourcewords"], \
113 stats
["translatedtargetwords"])
114 if stats
["review"] > 0:
115 print "review: %5d %17d n/a" % \
116 (stats
["review"], stats
["reviewsourcewords"])
119 def fuzzymessages(units
):
120 return filter(lambda unit
: unit
.isfuzzy() and unit
.target
, units
)
122 def translatedmessages(units
):
123 return filter(lambda unit
: unit
.istranslated(), units
)
125 def untranslatedmessages(units
):
126 return filter(lambda unit
: not (unit
.istranslated() or unit
.isfuzzy()) and unit
.source
, units
)
129 def __init__(self
, filenames
, CSVstyle
):
132 self
.CSVstyle
= CSVstyle
134 print "Filename, Translated Messages, Translated Source Words, Translated \
135 Target Words, Fuzzy Messages, Fuzzy Source Words, Untranslated Messages, \
136 Untranslated Source Words, Total Message, Total Source Words, \
137 Review Messages, Review Source Words"
138 for filename
in filenames
:
139 if not os
.path
.exists(filename
):
140 print >> sys
.stderr
, "cannot process %s: does not exist" % filename
142 elif os
.path
.isdir(filename
):
143 self
.handledir(filename
)
145 self
.handlefile(filename
)
146 if self
.filecount
> 1 and not self
.CSVstyle
:
147 summarize("TOTAL:", self
.totals
)
148 print "File count: %5d" % (self
.filecount
)
151 def updatetotals(self
, stats
):
152 """Update self.totals with the statistics in stats."""
153 for key
in stats
.keys():
154 if not self
.totals
.has_key(key
):
156 self
.totals
[key
] += stats
[key
]
158 def handlefile(self
, filename
):
159 stats
= calcstats(filename
)
161 self
.updatetotals(stats
)
162 summarize(filename
, stats
, self
.CSVstyle
)
165 def handlefiles(self
, dirname
, filenames
):
166 for filename
in filenames
:
167 pathname
= os
.path
.join(dirname
, filename
)
168 if os
.path
.isdir(pathname
):
169 self
.handledir(pathname
)
171 self
.handlefile(pathname
)
173 def handledir(self
, dirname
):
174 path
, name
= os
.path
.split(dirname
)
175 if name
in ["CVS", ".svn", "_darcs"]:
177 entries
= os
.listdir(dirname
)
178 self
.handlefiles(dirname
, entries
)
181 # TODO: make this handle command line options using optparse...
183 if "--csv" in sys
.argv
:
184 sys
.argv
.remove("--csv")
191 summarizer(sys
.argv
[1:], CSVstyle
)
193 if __name__
== '__main__':