2 # -*- coding: utf-8 -*-
4 # Copyright 2002-2006 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """script to convert a mozilla .dtd UTF-8 localization format to a
23 gettext .po localization file using the po and dtd modules, and the
24 dtd2po convertor class which is in this module
25 You can convert back to .dtd using po2dtd.py"""
27 from translate
.storage
import po
28 from translate
.storage
import dtd
29 from translate
.misc
import quote
32 def __init__(self
, blankmsgstr
=False, duplicatestyle
="msgctxt"):
33 self
.currentgroup
= None
34 self
.blankmsgstr
= blankmsgstr
35 self
.duplicatestyle
= duplicatestyle
37 def convertcomments(self
, thedtd
, thepo
):
38 entity
= quote
.rstripeol(thedtd
.entity
)
40 thepo
.addlocation(thedtd
.entity
)
41 for commenttype
, comment
in thedtd
.comments
:
43 if (commenttype
== "locgroupstart"):
44 groupcomment
= comment
.replace('BEGIN','GROUP')
45 self
.currentgroup
= groupcomment
46 elif (commenttype
== "locgroupend"):
47 groupcomment
= comment
.replace('END','GROUP')
48 self
.currentgroup
= None
49 # handle automatic comment
50 if commenttype
== "automaticcomment":
51 thepo
.addnote(comment
, origin
="developer")
52 # handle normal comments
54 thepo
.addnote(quote
.stripcomment(comment
), origin
="developer")
56 if self
.currentgroup
is not None:
57 thepo
.addnote(quote
.stripcomment(self
.currentgroup
), origin
="translator")
58 if entity
.endswith(".height") or entity
.endswith(".width") or entity
.endswith(".size"):
59 thepo
.addnote("Do not translate this. Only change the numeric values if you need this dialogue box to appear bigger", origin
="developer")
61 def convertstrings(self
, thedtd
, thepo
):
62 # extract the string, get rid of quoting
63 unquoted
= dtd
.unquotefromdtd(thedtd
.definition
).replace("\r", "")
64 # escape backslashes... but not if they're for a newline
65 # unquoted = unquoted.replace("\\", "\\\\").replace("\\\\n", "\\n")
66 # now split the string into lines and quote them
67 lines
= unquoted
.split('\n')
68 while lines
and not lines
[0].strip():
70 while lines
and not lines
[-1].strip():
72 # quotes have been escaped already by escapeforpo, so just add the start and end quotes
74 thepo
.source
= "\n".join([lines
[0].rstrip() + ' '] + \
75 [line
.strip() + ' ' for line
in lines
[1:-1]] + \
78 thepo
.source
= lines
[0]
83 def convertunit(self
, thedtd
):
84 """converts a dtd unit to a po unit, returns None if empty or not for translation"""
87 if getattr(thedtd
, "entityparameter", None) == "SYSTEM":
89 thepo
= po
.pounit(encoding
="UTF-8")
90 # remove unwanted stuff
91 for commentnum
in range(len(thedtd
.comments
)):
92 commenttype
, locnote
= thedtd
.comments
[commentnum
]
93 # if this is a localization note
94 if commenttype
== 'locnote':
95 # parse the locnote into the entity and the actual note
96 typeend
= quote
.findend(locnote
,'LOCALIZATION NOTE')
98 idstart
= locnote
.find('(', typeend
)
99 if idstart
== -1: continue
100 idend
= locnote
.find(')', idstart
+1)
101 entity
= locnote
[idstart
+1:idend
].strip()
102 # parse the actual note
103 actualnotestart
= locnote
.find(':', idend
+1)
104 actualnoteend
= locnote
.find('-->', idend
)
105 actualnote
= locnote
[actualnotestart
+1:actualnoteend
].strip()
106 # if it's for this entity, process it
107 if thedtd
.entity
== entity
:
108 # if it says don't translate (and nothing more),
109 if actualnote
.startswith("DONT_TRANSLATE"):
110 # take out the entity,definition and the DONT_TRANSLATE comment
112 thedtd
.definition
= ""
113 del thedtd
.comments
[commentnum
]
114 # finished this for loop
117 # convert it into an automatic comment, to be processed by convertcomments
118 thedtd
.comments
[commentnum
] = ("automaticcomment", actualnote
)
119 # do a standard translation
120 self
.convertcomments(thedtd
, thepo
)
121 self
.convertstrings(thedtd
, thepo
)
122 if thepo
.isblank() and not thepo
.getlocations():
127 # labelsuffixes and accesskeysuffixes are combined to accelerator notation
128 labelsuffixes
= (".label", ".title")
129 accesskeysuffixes
= (".accesskey", ".accessKey", ".akey")
131 def convertmixedunit(self
, labeldtd
, accesskeydtd
):
132 labelpo
= self
.convertunit(labeldtd
)
133 accesskeypo
= self
.convertunit(accesskeydtd
)
136 if accesskeypo
is None:
138 thepo
= po
.pounit(encoding
="UTF-8")
139 thepo
.addlocations(labelpo
.getlocations())
140 thepo
.addlocations(accesskeypo
.getlocations())
141 thepo
.msgidcomment
= thepo
._extract
_msgidcomments
() + labelpo
._extract
_msgidcomments
()
142 thepo
.msgidcomment
= thepo
._extract
_msgidcomments
() + accesskeypo
._extract
_msgidcomments
()
143 thepo
.addnote(labelpo
.getnotes("developer"), "developer")
144 thepo
.addnote(accesskeypo
.getnotes("developer"), "developer")
145 thepo
.addnote(labelpo
.getnotes("translator"), "translator")
146 thepo
.addnote(accesskeypo
.getnotes("translator"), "translator")
147 # redo the strings from original dtd...
148 label
= dtd
.unquotefromdtd(labeldtd
.definition
).decode('UTF-8')
149 accesskey
= dtd
.unquotefromdtd(accesskeydtd
.definition
).decode('UTF-8')
150 if len(accesskey
) == 0:
152 # try and put the & in front of the accesskey in the label...
153 # make sure to avoid muddling up &-type strings
157 accesskeyaltcasepos
= -1
158 while (accesskeypos
< 0) and searchpos
< len(label
):
159 searchchar
= label
[searchpos
]
160 if searchchar
== '&':
162 elif searchchar
== ';':
166 if searchchar
== accesskey
.upper():
167 # always prefer uppercase
168 accesskeypos
= searchpos
169 if searchchar
== accesskey
.lower():
170 # take lower case otherwise...
171 if accesskeyaltcasepos
== -1:
172 # only want to remember first altcasepos
173 accesskeyaltcasepos
= searchpos
174 # note: we keep on looping through in hope of exact match
176 # if we didn't find an exact case match, use an alternate one if available
177 if accesskeypos
== -1:
178 accesskeypos
= accesskeyaltcasepos
179 # now we want to handle whatever we found...
180 if accesskeypos
>= 0:
181 label
= label
[:accesskeypos
] + '&' + label
[accesskeypos
:]
182 label
= label
.encode("UTF-8", "replace")
184 # can't currently mix accesskey if it's not in label
190 def findmixedentities(self
, thedtdfile
):
191 """creates self.mixedentities from the dtd file..."""
192 self
.mixedentities
= {} # those entities which have a .label/.title and .accesskey combined
193 for entity
in thedtdfile
.index
.keys():
194 for labelsuffix
in self
.labelsuffixes
:
195 if entity
.endswith(labelsuffix
):
196 entitybase
= entity
[:entity
.rfind(labelsuffix
)]
197 # see if there is a matching accesskey in this line, making this a
199 for akeytype
in self
.accesskeysuffixes
:
200 if thedtdfile
.index
.has_key(entitybase
+ akeytype
):
201 # add both versions to the list of mixed entities
202 self
.mixedentities
[entity
] = {}
203 self
.mixedentities
[entitybase
+akeytype
] = {}
204 # check if this could be a mixed entity (labelsuffix and ".accesskey")
206 def convertdtdunit(self
, thedtdfile
, thedtd
, mixbucket
="dtd"):
207 """converts a dtd unit from thedtdfile to a po unit, handling mixed entities along the way..."""
208 # keep track of whether accesskey and label were combined
209 if thedtd
.entity
in self
.mixedentities
:
210 # use special convertmixed unit which produces one pounit with
211 # both combined for the label and None for the accesskey
212 alreadymixed
= self
.mixedentities
[thedtd
.entity
].get(mixbucket
, None)
214 # we are successfully throwing this away...
216 elif alreadymixed
is None:
217 # depending on what we come across first, work out the label and the accesskey
218 labeldtd
, accesskeydtd
= None, None
219 labelentity
, accesskeyentity
= None, None
220 for labelsuffix
in self
.labelsuffixes
:
221 if thedtd
.entity
.endswith(labelsuffix
):
222 entitybase
= thedtd
.entity
[:thedtd
.entity
.rfind(labelsuffix
)]
223 for akeytype
in self
.accesskeysuffixes
:
224 if thedtdfile
.index
.has_key(entitybase
+ akeytype
):
225 labelentity
, labeldtd
= thedtd
.entity
, thedtd
226 accesskeyentity
= labelentity
[:labelentity
.rfind(labelsuffix
)]+akeytype
227 accesskeydtd
= thedtdfile
.index
[accesskeyentity
]
230 for akeytype
in self
.accesskeysuffixes
:
231 if thedtd
.entity
.endswith(akeytype
):
232 accesskeyentity
, accesskeydtd
= thedtd
.entity
, thedtd
233 for labelsuffix
in self
.labelsuffixes
:
234 labelentity
= accesskeyentity
[:accesskeyentity
.rfind(akeytype
)]+labelsuffix
235 if thedtdfile
.index
.has_key(labelentity
):
236 labeldtd
= thedtdfile
.index
[labelentity
]
240 accesskeyentity
= None
241 thepo
= self
.convertmixedunit(labeldtd
, accesskeydtd
)
242 if thepo
is not None:
243 if accesskeyentity
is not None:
244 self
.mixedentities
[accesskeyentity
][mixbucket
] = True
245 if labelentity
is not None:
246 self
.mixedentities
[labelentity
][mixbucket
] = True
249 # otherwise the mix failed. add each one separately and remember they weren't mixed
250 if accesskeyentity
is not None:
251 self
.mixedentities
[accesskeyentity
][mixbucket
] = False
252 if labelentity
is not None:
253 self
.mixedentities
[labelentity
][mixbucket
] = False
254 return self
.convertunit(thedtd
)
256 def convertstore(self
, thedtdfile
):
257 thetargetfile
= po
.pofile()
258 targetheader
= thetargetfile
.makeheader(charset
="UTF-8", encoding
="8bit", x_accelerator_marker
="&")
259 targetheader
.addnote("extracted from %s" % thedtdfile
.filename
, "developer")
260 thetargetfile
.addunit(targetheader
)
261 thedtdfile
.makeindex()
262 self
.findmixedentities(thedtdfile
)
263 # go through the dtd and convert each unit
264 for thedtd
in thedtdfile
.units
:
267 thepo
= self
.convertdtdunit(thedtdfile
, thedtd
)
268 if thepo
is not None:
269 thetargetfile
.addunit(thepo
)
270 thetargetfile
.removeduplicates(self
.duplicatestyle
)
273 def mergestore(self
, origdtdfile
, translateddtdfile
):
274 thetargetfile
= po
.pofile()
275 targetheader
= thetargetfile
.makeheader(charset
="UTF-8", encoding
="8bit")
276 targetheader
.addnote("extracted from %s, %s" % (origdtdfile
.filename
, translateddtdfile
.filename
), "developer")
277 thetargetfile
.addunit(targetheader
)
278 origdtdfile
.makeindex()
279 self
.findmixedentities(origdtdfile
)
280 translateddtdfile
.makeindex()
281 self
.findmixedentities(translateddtdfile
)
282 # go through the dtd files and convert each unit
283 for origdtd
in origdtdfile
.units
:
286 origpo
= self
.convertdtdunit(origdtdfile
, origdtd
, mixbucket
="orig")
287 if origdtd
.entity
in self
.mixedentities
:
288 mixedentitydict
= self
.mixedentities
[origdtd
.entity
]
289 if "orig" not in mixedentitydict
:
290 # this means that the entity is mixed in the translation, but not the original - treat as unmixed
292 del self
.mixedentities
[origdtd
.entity
]
293 elif mixedentitydict
["orig"]:
294 # the original entity is already mixed successfully
295 mixbucket
= "translate"
300 mixbucket
= "translate"
302 # this means its a mixed entity (with accesskey) that's already been dealt with)
304 if origdtd
.entity
in translateddtdfile
.index
:
305 translateddtd
= translateddtdfile
.index
[origdtd
.entity
]
306 translatedpo
= self
.convertdtdunit(translateddtdfile
, translateddtd
, mixbucket
=mixbucket
)
309 if origpo
is not None:
310 if translatedpo
is not None and not self
.blankmsgstr
:
311 origpo
.target
= translatedpo
.source
312 thetargetfile
.addunit(origpo
)
313 thetargetfile
.removeduplicates(self
.duplicatestyle
)
316 def convertdtd(inputfile
, outputfile
, templatefile
, pot
=False, duplicatestyle
="msgctxt"):
317 """reads in inputfile and templatefile using dtd, converts using dtd2po, writes to outputfile"""
318 inputstore
= dtd
.dtdfile(inputfile
)
319 convertor
= dtd2po(blankmsgstr
=pot
, duplicatestyle
=duplicatestyle
)
320 if templatefile
is None:
321 outputstore
= convertor
.convertstore(inputstore
)
323 templatestore
= dtd
.dtdfile(templatefile
)
324 outputstore
= convertor
.mergestore(templatestore
, inputstore
)
325 if outputstore
.isempty():
327 outputfile
.write(str(outputstore
))
331 from translate
.convert
import convert
332 formats
= {"dtd": ("po", convertdtd
), ("dtd", "dtd"): ("po", convertdtd
)}
333 parser
= convert
.ConvertOptionParser(formats
, usetemplates
=True, usepots
=True, description
=__doc__
)
334 parser
.add_duplicates_option()
335 parser
.passthrough
.append("pot")
338 if __name__
== '__main__':