3 """pygettext -- Python equivalent of xgettext(1)
5 Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
6 internationalization of C programs. Most of these tools are independent of
7 the programming language and can be used from within Python programs. Martin
8 von Loewis' work[1] helps considerably in this regard.
10 There's one hole though; xgettext is the program that scans source code
11 looking for message strings, but it groks only C (or C++). Python introduces
12 a few wrinkles, such as dual quoting characters, triple quoted strings, and
13 raw strings. xgettext understands none of this.
15 Enter pygettext, which uses Python's standard tokenize module to scan Python
16 source code, generating .pot files identical to what GNU xgettext[2] generates
17 for C and C++ code. From there, the standard GNU tools can be used.
19 A word about marking Python strings as candidates for translation. GNU
20 xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
21 gettext_noop. But those can be a lot of text to include all over your code.
22 C and C++ have a trick: they use the C preprocessor. Most internationalized C
23 source includes a #define for gettext() to _() so that what has to be written
24 in the source is much less. Thus these are both translatable strings:
26 gettext("Translatable String")
27 _("Translatable String")
29 Python of course has no preprocessor so this doesn't work so well. Thus,
30 pygettext searches only for _() by default, but see the -k/--keyword flag
31 below for how to augment this.
33 [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
34 [2] http://www.gnu.org/software/gettext/gettext.html
37 NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
40 Usage: pygettext [options] filename ...
49 --default-domain=default-domain
50 Rename the default output file from messages.pot to default-domain.pot
54 Additional keywords to look for. Without `word' means not to use the
55 default keywords. The default keywords, which are always looked for
56 if not explicitly disabled: _
58 The default keyword list is different than GNU xgettext. You can have
59 multiple -k flags on the command line.
62 Do not write filename/lineno location comments
65 --add-location[=style]
66 Write filename/lineno location comments indicating where each
67 extracted string is found in the source. These lines appear before
68 each msgid. Two styles are supported:
70 Solaris # File: filename, line: line-number
73 If style is omitted, Gnu is used. The style name is case
74 insensitive. By default, locations are included.
78 print this help message and exit
93 def usage(code
, msg
=''):
94 print __doc__
% globals()
102 # This converts the various Python string types into a format that is
103 # appropriate for .po files, namely much closer to C style.
105 # unwrap quotes, safely
106 s
= eval(s
, {'__builtins__':{}}, {})
107 # now escape any embedded double quotes
110 i
= string
.find(s
, '"')
112 # find the number of preceding backslashes
115 while j
>= 0 and s
[i
] == '\\':
119 parts
.append(s
[last
:j
])
123 parts
.append(s
[last
:i
])
125 i
= string
.find(s
, '"', i
+1)
127 parts
.append(s
[last
:])
129 return '"' + string
.join(parts
, '') + '"'
136 def __init__(self
, options
):
137 self
.__options
= options
139 self
.__state
= self
.__waiting
143 def __call__(self
, ttype
, tstring
, stup
, etup
, line
):
145 self
.__state
(ttype
, tstring
, stup
[0])
147 def __waiting(self
, ttype
, tstring
, lineno
):
148 if ttype
== tokenize
.NAME
and tstring
in self
.__options
.keywords
:
149 self
.__state
= self
.__keywordseen
151 def __keywordseen(self
, ttype
, tstring
, lineno
):
152 if ttype
== tokenize
.OP
and tstring
== '(':
154 self
.__lineno
= lineno
155 self
.__state
= self
.__openseen
157 self
.__state
= self
.__waiting
159 def __openseen(self
, ttype
, tstring
, lineno
):
160 if ttype
== tokenize
.OP
and tstring
== ')':
161 # We've seen the last of the translatable strings. Record the
162 # line number of the first line of the strings and update the list
163 # of messages seen. Reset state for the next batch. If there
164 # were no strings inside _(), then just ignore this entry.
166 msg
= string
.join(self
.__data
, '')
167 entry
= (self
.__curfile
, self
.__lineno
)
168 linenos
= self
.__messages
.get(msg
)
170 self
.__messages
[msg
] = [entry
]
172 linenos
.append(entry
)
173 self
.__state
= self
.__waiting
174 elif ttype
== tokenize
.STRING
:
175 self
.__data
.append(normalize(tstring
))
176 # TBD: should we warn if we seen anything else?
178 def set_filename(self
, filename
):
179 self
.__curfile
= filename
182 options
= self
.__options
183 timestamp
= time
.ctime(time
.time())
187 print '# POT file generated by pygettext.py', __version__
190 for k
, v
in self
.__messages
.items():
191 for filename
, lineno
in v
:
192 # location comments are different b/w Solaris and GNU
193 if options
.location
== options
.SOLARIS
:
194 print '# File: %s,' % filename
, 'line: %d' % lineno
195 elif options
.location
== options
.GNU
:
196 print '#: %s:%d' % (filename
, lineno
)
197 # TBD: sorting, normalizing
202 sys
.stdout
= sys
.__stdout
__
206 default_keywords
= ['_']
208 opts
, args
= getopt
.getopt(
211 ['keyword', 'default-domain', 'help',
212 'add-location=', 'no-location'])
213 except getopt
.error
, msg
:
216 # for holding option values
223 outfile
= 'messages.pot'
227 locations
= {'gnu' : options
.GNU
,
228 'solaris' : options
.SOLARIS
,
232 for opt
, arg
in opts
:
233 if opt
in ('-h', '--help'):
235 elif opt
in ('-k', '--keyword'):
237 default_keywords
= []
238 options
.keywords
.append(arg
)
239 elif opt
in ('-d', '--default-domain'):
240 options
.outfile
= arg
+ '.pot'
241 elif opt
in ('-n', '--add-location'):
245 options
.location
= locations
[string
.lower(arg
)]
247 usage(1, 'Invalid value for --add-location: ' + arg
)
248 elif opt
in ('--no-location',):
251 # calculate all keywords
252 options
.keywords
.extend(default_keywords
)
254 # slurp through all the files
255 eater
= TokenEater(options
)
256 for filename
in args
:
258 eater
.set_filename(filename
)
259 tokenize
.tokenize(fp
.readline
, eater
)
262 fp
= open(options
.outfile
, 'w')
268 if __name__
== '__main__':