fix git support for v1.5.3 (or higher) by setting "--work-tree"
[translate_toolkit.git] / convert / po2dtd.py
blobf2677adaf5df0d3aaeb0fab65958bc41875770e9
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2002-2006 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """script that converts a .po file to a UTF-8 encoded .dtd file as used by mozilla
23 either done using a template or just using the .po file"""
25 from translate.storage import dtd
26 from translate.storage import po
27 from translate.misc import quote
28 import warnings
30 # labelsuffixes and accesskeysuffixes are combined to accelerator notation
31 labelsuffixes = (".label", ".title")
32 accesskeysuffixes = (".accesskey", ".accessKey", ".akey")
34 def getlabel(unquotedstr):
35 """retrieve the label from a mixed label+accesskey entity"""
36 if isinstance(unquotedstr, str):
37 unquotedstr = unquotedstr.decode("UTF-8")
38 # mixed labels just need the & taken out
39 # except that &entity; needs to be avoided...
40 amppos = 0
41 while amppos >= 0:
42 amppos = unquotedstr.find("&", amppos)
43 if amppos != -1:
44 amppos += 1
45 semipos = unquotedstr.find(";", amppos)
46 if semipos != -1:
47 if unquotedstr[amppos:semipos].isalnum():
48 continue
49 # otherwise, cut it out... only the first one need be changed
50 # (see below to see how the accesskey is done)
51 unquotedstr = unquotedstr[:amppos-1] + unquotedstr[amppos:]
52 break
53 return unquotedstr.encode("UTF-8")
55 def getaccesskey(unquotedstr):
56 """retrieve the access key from a mixed label+accesskey entity"""
57 if isinstance(unquotedstr, str):
58 unquotedstr = unquotedstr.decode("UTF-8")
59 # mixed access keys need the key extracted from after the &
60 # but we must avoid proper entities i.e. > etc...
61 amppos = 0
62 while amppos >= 0:
63 amppos = unquotedstr.find("&", amppos)
64 if amppos != -1:
65 amppos += 1
66 semipos = unquotedstr.find(";", amppos)
67 if semipos != -1:
68 if unquotedstr[amppos:semipos].isalnum():
69 # what we have found is an entity, not a shortcut key...
70 continue
71 # otherwise, we found the shortcut key
72 return unquotedstr[amppos].encode("UTF-8")
73 # if we didn't find the shortcut key, return an empty string rather than the original string
74 # this will come out as "don't have a translation for this" because the string is not changed...
75 # so the string from the original dtd will be used instead
76 return ""
78 def removeinvalidamps(entity, unquotedstr):
79 """find ampersands that aren't part of an entity definition..."""
80 amppos = 0
81 invalidamps = []
82 while amppos >= 0:
83 amppos = unquotedstr.find("&", amppos)
84 if amppos != -1:
85 amppos += 1
86 semipos = unquotedstr.find(";", amppos)
87 if semipos != -1:
88 checkentity = unquotedstr[amppos:semipos]
89 if checkentity.replace('.', '').isalnum():
90 # what we have found is an entity, not a problem...
91 continue
92 elif checkentity[0] == '#' and checkentity[1:].isalnum():
93 # what we have found is an entity, not a problem...
94 continue
95 # otherwise, we found a problem
96 invalidamps.append(amppos-1)
97 if len(invalidamps) > 0:
98 warnings.warn("invalid ampersands in dtd entity %s" % (entity))
99 comp = 0
100 for amppos in invalidamps:
101 unquotedstr = unquotedstr[:amppos-comp] + unquotedstr[amppos-comp+1:]
102 comp += 1
103 return unquotedstr
105 def getmixedentities(entities):
106 """returns a list of mixed .label and .accesskey entities from a list of entities"""
107 mixedentities = [] # those entities which have a .label and .accesskey combined
108 # search for mixed entities...
109 for entity in entities:
110 for labelsuffix in labelsuffixes:
111 if entity.endswith(labelsuffix):
112 entitybase = entity[:entity.rfind(labelsuffix)]
113 # see if there is a matching accesskey, making this a mixed entity
114 for akeytype in accesskeysuffixes:
115 if entitybase + akeytype in entities:
116 # add both versions to the list of mixed entities
117 mixedentities += [entity, entitybase+akeytype]
118 return mixedentities
120 def applytranslation(entity, dtdunit, inputunit, mixedentities):
121 """applies the translation for entity in the po unit to the dtd unit"""
122 # this converts the po-style string to a dtd-style string
123 unquotedstr = inputunit.target
124 # check there aren't missing entities...
125 if len(unquotedstr.strip()) == 0:
126 return
127 # handle mixed entities
128 for labelsuffix in labelsuffixes:
129 if entity.endswith(labelsuffix):
130 if entity in mixedentities:
131 unquotedstr = getlabel(unquotedstr)
132 break
133 else:
134 for akeytype in accesskeysuffixes:
135 if entity.endswith(akeytype):
136 if entity in mixedentities:
137 unquotedstr = getaccesskey(unquotedstr)
138 if not unquotedstr:
139 warnings.warn("Could not find accesskey for %s" % entity)
140 else:
141 original = dtd.unquotefromdtd(dtdunit.definition)
142 if original.isupper() and unquotedstr.islower():
143 unquotedstr = unquotedstr.upper()
144 elif original.islower() and unquotedstr.isupper():
145 unquotedstr = unquotedstr.lower()
146 # handle invalid left-over ampersands (usually unneeded access key shortcuts)
147 unquotedstr = removeinvalidamps(entity, unquotedstr)
148 # finally set the new definition in the dtd, but not if its empty
149 if len(unquotedstr) > 0:
150 dtdunit.definition = dtd.quotefordtd(unquotedstr)
152 class redtd:
153 """this is a convertor class that creates a new dtd based on a template using translations in a po"""
154 def __init__(self, dtdfile):
155 self.dtdfile = dtdfile
157 def convertstore(self, inputstore, includefuzzy=False):
158 # translate the strings
159 for inunit in inputstore.units:
160 # there may be more than one entity due to msguniq merge
161 if includefuzzy or not inunit.isfuzzy():
162 self.handleinunit(inunit)
163 return self.dtdfile
165 def handleinunit(self, inunit):
166 entities = inunit.getlocations()
167 mixedentities = getmixedentities(entities)
168 for entity in entities:
169 if self.dtdfile.index.has_key(entity):
170 # now we need to replace the definition of entity with msgstr
171 dtdunit = self.dtdfile.index[entity] # find the dtd
172 applytranslation(entity, dtdunit, inunit, mixedentities)
174 class po2dtd:
175 """this is a convertor class that creates a new dtd file based on a po file without a template"""
176 def convertcomments(self, inputunit, dtdunit):
177 entities = inputunit.getlocations()
178 if len(entities) > 1:
179 # don't yet handle multiple entities
180 dtdunit.comments.append(("conversionnote",'<!-- CONVERSION NOTE - multiple entities -->\n'))
181 dtdunit.entity = entities[0]
182 elif len(entities) == 1:
183 dtdunit.entity = entities[0]
184 else:
185 # this produces a blank entity, which doesn't write anything out
186 dtdunit.entity = ""
188 if inputunit.isfuzzy():
189 dtdunit.comments.append(("potype", "fuzzy\n"))
190 for note in inputunit.getnotes("translator").split("\n"):
191 if not note:
192 continue
193 note = quote.unstripcomment(note)
194 if (note.find('LOCALIZATION NOTE') == -1) or (note.find('GROUP') == -1):
195 dtdunit.comments.append(("comment", note))
196 # msgidcomments are special - they're actually localization notes
197 msgidcomment = inputunit._extract_msgidcomments()
198 if msgidcomment:
199 locnote = quote.unstripcomment("LOCALIZATION NOTE ("+dtdunit.entity+"): "+msgidcomment)
200 dtdunit.comments.append(("locnote", locnote))
203 def convertstrings(self, inputunit, dtdunit):
204 if inputunit.istranslated():
205 unquoted = inputunit.target
206 else:
207 unquoted = inputunit.source
208 unquoted = removeinvalidamps(dtdunit.entity, unquoted)
209 dtdunit.definition = dtd.quotefordtd(unquoted)
211 def convertunit(self, inputunit):
212 dtdunit = dtd.dtdunit()
213 self.convertcomments(inputunit, dtdunit)
214 self.convertstrings(inputunit, dtdunit)
215 return dtdunit
217 def convertstore(self, inputstore, includefuzzy=False):
218 outputstore = dtd.dtdfile()
219 self.currentgroups = []
220 for inputunit in inputstore.units:
221 if includefuzzy or not inputunit.isfuzzy():
222 dtdunit = self.convertunit(inputunit)
223 if dtdunit is not None:
224 outputstore.addunit(dtdunit)
225 return outputstore
227 def convertdtd(inputfile, outputfile, templatefile, includefuzzy=False):
228 inputstore = po.pofile(inputfile)
229 if templatefile is None:
230 convertor = po2dtd()
231 else:
232 templatestore = dtd.dtdfile(templatefile)
233 convertor = redtd(templatestore)
234 outputstore = convertor.convertstore(inputstore, includefuzzy)
235 outputfile.write(str(outputstore))
236 return 1
238 def main(argv=None):
239 # handle command line options
240 from translate.convert import convert
241 formats = {"po": ("dtd", convertdtd), ("po", "dtd"): ("dtd", convertdtd)}
242 parser = convert.ConvertOptionParser(formats, usetemplates=True, description=__doc__)
243 parser.add_fuzzy_option()
244 parser.run(argv)
246 if __name__ == '__main__':
247 main()