fix git support for v1.5.3 (or higher) by setting "--work-tree"
[translate_toolkit.git] / convert / pot2po.py
blobd3c979de4cdf6b04eda328a63ef79d036df605d4
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2007 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """convert Gettext PO templates (.pot) to PO localization files, preserving existing translations
24 See: http://translate.sourceforge.net/wiki/toolkit/pot2po for examples and
25 usage instructions
26 """
28 from translate.storage import po
29 from translate.storage import factory
30 from translate.search import match
31 from translate.misc.multistring import multistring
33 # We don't want to reinitialise the TM each time, so let's store it here.
34 tmmatcher = None
36 def memory(tmfiles, max_candidates=1, min_similarity=75, max_length=1000):
37 """Returns the TM store to use. Only initialises on first call."""
38 global tmmatcher
39 # Only initialise first time
40 if tmmatcher is None:
41 if isinstance(tmfiles, list):
42 tmstore = [factory.getobject(tmfile) for tmfile in tmfiles]
43 else:
44 tmstore = factory.getobject(tmfiles)
45 tmmatcher = match.matcher(tmstore, max_candidates=max_candidates, min_similarity=min_similarity, max_length=max_length)
46 return tmmatcher
48 def convertpot(inputpotfile, outputpofile, templatepofile, tm=None, min_similarity=75, fuzzymatching=True, **kwargs):
49 inputpot = po.pofile(inputpotfile)
50 templatepo = None
51 if templatepofile is not None:
52 templatepo = po.pofile(templatepofile)
53 outputpo = convertpot_stores(inputpot, templatepo, tm, min_similarity, fuzzymatching, **kwargs)
54 outputpofile.write(str(outputpo))
55 return 1
57 def convertpot_stores(inputpot, templatepo, tm=None, min_similarity=75, fuzzymatching=True, **kwargs):
58 """reads in inputpotfile, adjusts header, writes to outputpofile. if templatepofile exists, merge translations from it into outputpofile"""
59 inputpot.makeindex()
60 thetargetfile = po.pofile()
61 # header values
62 charset = "UTF-8"
63 encoding = "8bit"
64 project_id_version = None
65 pot_creation_date = None
66 po_revision_date = None
67 last_translator = None
68 language_team = None
69 mime_version = None
70 plural_forms = None
71 kwargs = {}
72 if templatepo is not None:
73 fuzzyfilematcher = None
74 if fuzzymatching:
75 for unit in templatepo.units:
76 if unit.isobsolete():
77 unit.resurrect()
78 try:
79 fuzzyfilematcher = match.matcher(templatepo, max_candidates=1, min_similarity=min_similarity, max_length=1000, usefuzzy=True)
80 fuzzyfilematcher.addpercentage = False
81 except ValueError:
82 # Probably no usable units
83 pass
85 templatepo.makeindex()
86 templateheadervalues = templatepo.parseheader()
87 for key, value in templateheadervalues.iteritems():
88 if key == "Project-Id-Version":
89 project_id_version = value
90 elif key == "Last-Translator":
91 last_translator = value
92 elif key == "Language-Team":
93 language_team = value
94 elif key == "PO-Revision-Date":
95 po_revision_date = value
96 elif key in ("POT-Creation-Date", "MIME-Version"):
97 # don't know how to handle these keys, or ignoring them
98 pass
99 elif key == "Content-Type":
100 kwargs[key] = value
101 elif key == "Content-Transfer-Encoding":
102 encoding = value
103 elif key == "Plural-Forms":
104 plural_forms = value
105 else:
106 kwargs[key] = value
107 fuzzyglobalmatcher = None
108 if fuzzymatching and tm:
109 fuzzyglobalmatcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000)
110 fuzzyglobalmatcher.addpercentage = False
111 inputheadervalues = inputpot.parseheader()
112 for key, value in inputheadervalues.iteritems():
113 if key in ("Project-Id-Version", "Last-Translator", "Language-Team", "PO-Revision-Date", "Content-Type", "Content-Transfer-Encoding", "Plural-Forms"):
114 # want to carry these from the template so we ignore them
115 pass
116 elif key == "POT-Creation-Date":
117 pot_creation_date = value
118 elif key == "MIME-Version":
119 mime_version = value
120 else:
121 kwargs[key] = value
122 targetheader = thetargetfile.makeheader(charset=charset, encoding=encoding, project_id_version=project_id_version,
123 pot_creation_date=pot_creation_date, po_revision_date=po_revision_date, last_translator=last_translator,
124 language_team=language_team, mime_version=mime_version, plural_forms=plural_forms, **kwargs)
125 # Get the header comments and fuzziness state
126 if templatepo is not None and len(templatepo.units) > 0:
127 if templatepo.units[0].isheader():
128 if templatepo.units[0].getnotes("translator"):
129 targetheader.addnote(templatepo.units[0].getnotes("translator"), "translator")
130 if inputpot.units[0].getnotes("developer"):
131 targetheader.addnote(inputpot.units[0].getnotes("developer"), "developer")
132 targetheader.markfuzzy(templatepo.units[0].isfuzzy())
133 elif len(inputpot.units) > 0 and inputpot.units[0].isheader():
134 targetheader.addnote(inputpot.units[0].getnotes())
135 thetargetfile.addunit(targetheader)
136 # Do matching
137 for inputpotunit in inputpot.units:
138 if not (inputpotunit.isheader() or inputpotunit.isobsolete()):
139 if templatepo:
140 possiblematches = []
141 for location in inputpotunit.getlocations():
142 templatepounit = templatepo.locationindex.get(location, None)
143 if templatepounit is not None:
144 possiblematches.append(templatepounit)
145 if len(inputpotunit.getlocations()) == 0:
146 templatepounit = templatepo.findunit(inputpotunit.source)
147 if templatepounit:
148 possiblematches.append(templatepounit)
149 for templatepounit in possiblematches:
150 if inputpotunit.source == templatepounit.source and templatepounit.target:
151 inputpotunit.merge(templatepounit, authoritative=True)
152 break
153 else:
154 fuzzycandidates = []
155 if fuzzyfilematcher:
156 fuzzycandidates = fuzzyfilematcher.matches(inputpotunit.source)
157 if fuzzycandidates:
158 inputpotunit.merge(fuzzycandidates[0])
159 original = templatepo.findunit(fuzzycandidates[0].source)
160 if original:
161 original.reused = True
162 if fuzzyglobalmatcher and not fuzzycandidates:
163 fuzzycandidates = fuzzyglobalmatcher.matches(inputpotunit.source)
164 if fuzzycandidates:
165 inputpotunit.merge(fuzzycandidates[0])
166 else:
167 if fuzzyglobalmatcher:
168 fuzzycandidates = fuzzyglobalmatcher.matches(inputpotunit.source)
169 if fuzzycandidates:
170 inputpotunit.merge(fuzzycandidates[0])
171 if inputpotunit.hasplural() and len(inputpotunit.target) == 0:
172 # Let's ensure that we have the correct number of plural forms:
173 nplurals, plural = thetargetfile.getheaderplural()
174 if nplurals and nplurals.isdigit() and nplurals != '2':
175 inputpotunit.target = multistring([""]*int(nplurals))
176 thetargetfile.addunit(inputpotunit)
178 #Let's take care of obsoleted messages
179 if templatepo:
180 newlyobsoleted = []
181 for unit in templatepo.units:
182 if unit.isheader():
183 continue
184 if unit.target and not (inputpot.findunit(unit.source) or hasattr(unit, "reused")):
185 #not in .pot, make it obsolete
186 unit.makeobsolete()
187 newlyobsoleted.append(unit)
188 elif unit.isobsolete():
189 thetargetfile.addunit(unit)
190 for unit in newlyobsoleted:
191 thetargetfile.addunit(unit)
192 return thetargetfile
194 def main(argv=None):
195 from translate.convert import convert
196 formats = {"pot": ("po", convertpot), ("pot", "po"): ("po", convertpot)}
197 parser = convert.ConvertOptionParser(formats, usepots=True, usetemplates=True,
198 allowmissingtemplate=True, description=__doc__)
199 parser.add_option("", "--tm", dest="tm", default=None,
200 help="The file to use as translation memory when fuzzy matching")
201 parser.passthrough.append("tm")
202 defaultsimilarity = 75
203 parser.add_option("-s", "--similarity", dest="min_similarity", default=defaultsimilarity,
204 type="float", help="The minimum similarity for inclusion (default: %d%%)" % defaultsimilarity)
205 parser.passthrough.append("min_similarity")
206 parser.add_option("--nofuzzymatching", dest="fuzzymatching", action="store_false",
207 default=True, help="Disable fuzzy matching")
208 parser.passthrough.append("fuzzymatching")
209 parser.run(argv)
212 if __name__ == '__main__':
213 main()