2 # -*- coding: utf-8 -*-
4 # Copyright 2002-2006 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """script that converts a .po file to a UTF-8 encoded .dtd file as used by mozilla
23 either done using a template or just using the .po file"""
25 from translate
.storage
import dtd
26 from translate
.storage
import po
27 from translate
.misc
import quote
30 # labelsuffixes and accesskeysuffixes are combined to accelerator notation
31 labelsuffixes
= (".label", ".title")
32 accesskeysuffixes
= (".accesskey", ".accessKey", ".akey")
34 def getlabel(unquotedstr
):
35 """retrieve the label from a mixed label+accesskey entity"""
36 if isinstance(unquotedstr
, str):
37 unquotedstr
= unquotedstr
.decode("UTF-8")
38 # mixed labels just need the & taken out
39 # except that &entity; needs to be avoided...
42 amppos
= unquotedstr
.find("&", amppos
)
45 semipos
= unquotedstr
.find(";", amppos
)
47 if unquotedstr
[amppos
:semipos
].isalnum():
49 # otherwise, cut it out... only the first one need be changed
50 # (see below to see how the accesskey is done)
51 unquotedstr
= unquotedstr
[:amppos
-1] + unquotedstr
[amppos
:]
53 return unquotedstr
.encode("UTF-8")
55 def getaccesskey(unquotedstr
):
56 """retrieve the access key from a mixed label+accesskey entity"""
57 if isinstance(unquotedstr
, str):
58 unquotedstr
= unquotedstr
.decode("UTF-8")
59 # mixed access keys need the key extracted from after the &
60 # but we must avoid proper entities i.e. > etc...
63 amppos
= unquotedstr
.find("&", amppos
)
66 semipos
= unquotedstr
.find(";", amppos
)
68 if unquotedstr
[amppos
:semipos
].isalnum():
69 # what we have found is an entity, not a shortcut key...
71 # otherwise, we found the shortcut key
72 return unquotedstr
[amppos
].encode("UTF-8")
73 # if we didn't find the shortcut key, return an empty string rather than the original string
74 # this will come out as "don't have a translation for this" because the string is not changed...
75 # so the string from the original dtd will be used instead
78 def removeinvalidamps(entity
, unquotedstr
):
79 """find ampersands that aren't part of an entity definition..."""
83 amppos
= unquotedstr
.find("&", amppos
)
86 semipos
= unquotedstr
.find(";", amppos
)
88 checkentity
= unquotedstr
[amppos
:semipos
]
89 if checkentity
.replace('.', '').isalnum():
90 # what we have found is an entity, not a problem...
92 elif checkentity
[0] == '#' and checkentity
[1:].isalnum():
93 # what we have found is an entity, not a problem...
95 # otherwise, we found a problem
96 invalidamps
.append(amppos
-1)
97 if len(invalidamps
) > 0:
98 warnings
.warn("invalid ampersands in dtd entity %s" % (entity
))
100 for amppos
in invalidamps
:
101 unquotedstr
= unquotedstr
[:amppos
-comp
] + unquotedstr
[amppos
-comp
+1:]
105 def getmixedentities(entities
):
106 """returns a list of mixed .label and .accesskey entities from a list of entities"""
107 mixedentities
= [] # those entities which have a .label and .accesskey combined
108 # search for mixed entities...
109 for entity
in entities
:
110 for labelsuffix
in labelsuffixes
:
111 if entity
.endswith(labelsuffix
):
112 entitybase
= entity
[:entity
.rfind(labelsuffix
)]
113 # see if there is a matching accesskey, making this a mixed entity
114 for akeytype
in accesskeysuffixes
:
115 if entitybase
+ akeytype
in entities
:
116 # add both versions to the list of mixed entities
117 mixedentities
+= [entity
, entitybase
+akeytype
]
120 def applytranslation(entity
, dtdunit
, inputunit
, mixedentities
):
121 """applies the translation for entity in the po unit to the dtd unit"""
122 # this converts the po-style string to a dtd-style string
123 unquotedstr
= inputunit
.target
124 # check there aren't missing entities...
125 if len(unquotedstr
.strip()) == 0:
127 # handle mixed entities
128 for labelsuffix
in labelsuffixes
:
129 if entity
.endswith(labelsuffix
):
130 if entity
in mixedentities
:
131 unquotedstr
= getlabel(unquotedstr
)
134 for akeytype
in accesskeysuffixes
:
135 if entity
.endswith(akeytype
):
136 if entity
in mixedentities
:
137 unquotedstr
= getaccesskey(unquotedstr
)
139 warnings
.warn("Could not find accesskey for %s" % entity
)
141 original
= dtd
.unquotefromdtd(dtdunit
.definition
)
142 if original
.isupper() and unquotedstr
.islower():
143 unquotedstr
= unquotedstr
.upper()
144 elif original
.islower() and unquotedstr
.isupper():
145 unquotedstr
= unquotedstr
.lower()
146 # handle invalid left-over ampersands (usually unneeded access key shortcuts)
147 unquotedstr
= removeinvalidamps(entity
, unquotedstr
)
148 # finally set the new definition in the dtd, but not if its empty
149 if len(unquotedstr
) > 0:
150 dtdunit
.definition
= dtd
.quotefordtd(unquotedstr
)
153 """this is a convertor class that creates a new dtd based on a template using translations in a po"""
154 def __init__(self
, dtdfile
):
155 self
.dtdfile
= dtdfile
157 def convertstore(self
, inputstore
, includefuzzy
=False):
158 # translate the strings
159 for inunit
in inputstore
.units
:
160 # there may be more than one entity due to msguniq merge
161 if includefuzzy
or not inunit
.isfuzzy():
162 self
.handleinunit(inunit
)
165 def handleinunit(self
, inunit
):
166 entities
= inunit
.getlocations()
167 mixedentities
= getmixedentities(entities
)
168 for entity
in entities
:
169 if self
.dtdfile
.index
.has_key(entity
):
170 # now we need to replace the definition of entity with msgstr
171 dtdunit
= self
.dtdfile
.index
[entity
] # find the dtd
172 applytranslation(entity
, dtdunit
, inunit
, mixedentities
)
175 """this is a convertor class that creates a new dtd file based on a po file without a template"""
176 def convertcomments(self
, inputunit
, dtdunit
):
177 entities
= inputunit
.getlocations()
178 if len(entities
) > 1:
179 # don't yet handle multiple entities
180 dtdunit
.comments
.append(("conversionnote",'<!-- CONVERSION NOTE - multiple entities -->\n'))
181 dtdunit
.entity
= entities
[0]
182 elif len(entities
) == 1:
183 dtdunit
.entity
= entities
[0]
185 # this produces a blank entity, which doesn't write anything out
188 if inputunit
.isfuzzy():
189 dtdunit
.comments
.append(("potype", "fuzzy\n"))
190 for note
in inputunit
.getnotes("translator").split("\n"):
193 note
= quote
.unstripcomment(note
)
194 if (note
.find('LOCALIZATION NOTE') == -1) or (note
.find('GROUP') == -1):
195 dtdunit
.comments
.append(("comment", note
))
196 # msgidcomments are special - they're actually localization notes
197 msgidcomment
= inputunit
._extract
_msgidcomments
()
199 locnote
= quote
.unstripcomment("LOCALIZATION NOTE ("+dtdunit
.entity
+"): "+msgidcomment
)
200 dtdunit
.comments
.append(("locnote", locnote
))
203 def convertstrings(self
, inputunit
, dtdunit
):
204 if inputunit
.istranslated():
205 unquoted
= inputunit
.target
207 unquoted
= inputunit
.source
208 unquoted
= removeinvalidamps(dtdunit
.entity
, unquoted
)
209 dtdunit
.definition
= dtd
.quotefordtd(unquoted
)
211 def convertunit(self
, inputunit
):
212 dtdunit
= dtd
.dtdunit()
213 self
.convertcomments(inputunit
, dtdunit
)
214 self
.convertstrings(inputunit
, dtdunit
)
217 def convertstore(self
, inputstore
, includefuzzy
=False):
218 outputstore
= dtd
.dtdfile()
219 self
.currentgroups
= []
220 for inputunit
in inputstore
.units
:
221 if includefuzzy
or not inputunit
.isfuzzy():
222 dtdunit
= self
.convertunit(inputunit
)
223 if dtdunit
is not None:
224 outputstore
.addunit(dtdunit
)
227 def convertdtd(inputfile
, outputfile
, templatefile
, includefuzzy
=False):
228 inputstore
= po
.pofile(inputfile
)
229 if templatefile
is None:
232 templatestore
= dtd
.dtdfile(templatefile
)
233 convertor
= redtd(templatestore
)
234 outputstore
= convertor
.convertstore(inputstore
, includefuzzy
)
235 outputfile
.write(str(outputstore
))
239 # handle command line options
240 from translate
.convert
import convert
241 formats
= {"po": ("dtd", convertdtd
), ("po", "dtd"): ("dtd", convertdtd
)}
242 parser
= convert
.ConvertOptionParser(formats
, usetemplates
=True, description
=__doc__
)
243 parser
.add_fuzzy_option()
246 if __name__
== '__main__':