2 # -*- coding: utf-8 -*-
4 # Copyright 2003-2006 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """convert Comma-Separated Value (.csv) files to Gettext PO localization files
24 See: http://translate.sourceforge.net/wiki/toolkit/csv2po for examples and
29 from translate
.misc
import sparse
30 from translate
.storage
import po
31 from translate
.storage
import csvl10n
33 def replacestrings(source
, *pairs
):
34 for orig
, new
in pairs
:
35 source
= source
.replace(orig
, new
)
38 def quotecsvstr(source
):
39 return '"' + replacestrings(source
, ('\\"','"'), ('"','\\"'), ("\\\\'", "\\'"), ('\\\\n', '\\n')) + '"'
42 return filter(type(string
).isalnum
, string
)
43 tokens
= sparse
.SimpleParser().tokenize(string
)
44 return " ".join(tokens
)
47 """a class that takes translations from a .csv file and puts them in a .po file"""
48 def __init__(self
, templatepo
=None, charset
=None, duplicatestyle
="keep"):
49 """construct the converter..."""
50 self
.pofile
= templatepo
51 self
.charset
= charset
52 self
.duplicatestyle
= duplicatestyle
53 if self
.pofile
is not None:
58 """makes indexes required for searching..."""
59 self
.commentindex
= {}
62 self
.duplicatecomments
= []
63 for pounit
in self
.pofile
.units
:
64 joinedcomment
= " ".join(pounit
.getlocations())
65 source
= pounit
.source
66 # the definitive way to match is by source comment (joinedcomment)
67 if joinedcomment
in self
.commentindex
:
68 # unless more than one thing matches...
69 self
.duplicatecomments
.append(joinedcomment
)
71 self
.commentindex
[joinedcomment
] = pounit
72 # do simpler matching in case things have been mangled...
73 simpleid
= simplify(source
)
74 # but check for duplicates
75 if simpleid
in self
.simpleindex
and not (source
in self
.sourceindex
):
76 # keep a list of them...
77 self
.simpleindex
[simpleid
].append(pounit
)
79 self
.simpleindex
[simpleid
] = [pounit
]
80 # also match by standard msgid
81 self
.sourceindex
[source
] = pounit
82 for comment
in self
.duplicatecomments
:
83 if comment
in self
.commentindex
:
84 del self
.commentindex
[comment
]
86 def convertunit(self
, csvunit
):
87 """converts csv unit to po unit"""
88 pounit
= po
.pounit(encoding
="UTF-8")
90 pounit
.addlocation(csvunit
.comment
)
91 pounit
.source
= csvunit
.source
92 pounit
.target
= csvunit
.target
95 def handlecsvunit(self
, csvunit
):
96 """handles reintegrating a csv unit into the .po file"""
97 if len(csvunit
.comment
.strip()) > 0 and csvunit
.comment
in self
.commentindex
:
98 pounit
= self
.commentindex
[csvunit
.comment
]
99 elif csvunit
.source
in self
.sourceindex
:
100 pounit
= self
.sourceindex
[csvunit
.source
]
101 elif simplify(csvunit
.source
) in self
.simpleindex
:
102 thepolist
= self
.simpleindex
[simplify(csvunit
.source
)]
103 if len(thepolist
) > 1:
104 csvfilename
= getattr(self
.csvfile
, "filename", "(unknown)")
105 matches
= "\n ".join(["possible match: " + pounit
.source
for pounit
in thepolist
])
106 print >> sys
.stderr
, "%s - csv entry not found in pofile, multiple matches found:\n location\t%s\n original\t%s\n translation\t%s\n %s" % (csvfilename
, csvunit
.comment
, csvunit
.source
, csvunit
.target
, matches
)
109 pounit
= thepolist
[0]
111 csvfilename
= getattr(self
.csvfile
, "filename", "(unknown)")
112 print >> sys
.stderr
, "%s - csv entry not found in pofile:\n location\t%s\n original\t%s\n translation\t%s" % (csvfilename
, csvunit
.comment
, csvunit
.source
, csvunit
.target
)
115 if pounit
.hasplural():
116 # we need to work out whether we matched the singular or the plural
117 singularid
= pounit
.source
.strings
[0]
118 pluralid
= pounit
.source
.strings
[1]
119 if csvunit
.source
== singularid
:
120 pounit
.msgstr
[0] = csvunit
.target
121 elif csvunit
.source
== pluralid
:
122 pounit
.msgstr
[1] = csvunit
.target
123 elif simplify(csvunit
.source
) == simplify(singularid
):
124 pounit
.msgstr
[0] = csvunit
.target
125 elif simplify(csvunit
.source
) == simplify(pluralid
):
126 pounit
.msgstr
[1] = csvunit
.target
128 print >> sys
.stderr
, "couldn't work out singular or plural: %r, %r, %r" % \
129 (csvunit
.source
, singularid
, pluralid
)
133 pounit
.target
= csvunit
.target
135 def convertstore(self
, thecsvfile
):
136 """converts a csvfile to a pofile, and returns it. uses templatepo if given at construction"""
137 self
.csvfile
= thecsvfile
138 if self
.pofile
is None:
139 self
.pofile
= po
.pofile()
143 if self
.pofile
.units
and self
.pofile
.units
[0].isheader():
144 targetheader
= self
.pofile
.units
[0]
145 targetheader
.msgstr
= [line
.replace("CHARSET", "UTF-8").replace("ENCODING", "8bit") for line
in targetheader
.msgstr
]
147 targetheader
= self
.pofile
.makeheader(charset
="UTF-8", encoding
="8bit")
148 targetheader
.addnote("extracted from %s" % self
.csvfile
.filename
, "developer")
150 for csvunit
in self
.csvfile
.units
:
151 if self
.charset
is not None:
152 csvunit
.source
= csvunit
.source
.decode(self
.charset
)
153 csvunit
.target
= csvunit
.target
.decode(self
.charset
)
155 # ignore typical header strings...
156 mightbeheader
= False
157 if [item
.strip().lower() for item
in csvunit
.comment
, csvunit
.source
, csvunit
.target
] == \
158 ["comment", "original", "translation"]:
160 if len(csvunit
.comment
.strip()) == 0 and csvunit
.source
.find("Content-Type:") != -1:
163 self
.handlecsvunit(csvunit
)
165 pounit
= self
.convertunit(csvunit
)
166 self
.pofile
.addunit(pounit
)
167 self
.pofile
.removeduplicates(self
.duplicatestyle
)
170 def convertcsv(inputfile
, outputfile
, templatefile
, charset
=None, columnorder
=None, duplicatestyle
="msgctxt"):
171 """reads in inputfile using csvl10n, converts using csv2po, writes to outputfile"""
172 inputstore
= csvl10n
.csvfile(inputfile
, fieldnames
=columnorder
)
173 if templatefile
is None:
174 convertor
= csv2po(charset
=charset
, duplicatestyle
=duplicatestyle
)
176 templatestore
= po
.pofile(templatefile
)
177 convertor
= csv2po(templatestore
, charset
=charset
, duplicatestyle
=duplicatestyle
)
178 outputstore
= convertor
.convertstore(inputstore
)
179 if outputstore
.isempty():
181 outputfile
.write(str(outputstore
))
185 from translate
.convert
import convert
186 formats
= {("csv", "po"): ("po", convertcsv
), ("csv", "pot"): ("po", convertcsv
),
187 ("csv", None): ("po", convertcsv
)}
188 parser
= convert
.ConvertOptionParser(formats
, usetemplates
=True, description
=__doc__
)
189 parser
.add_option("", "--charset", dest
="charset", default
=None,
190 help="set charset to decode from csv files", metavar
="CHARSET")
191 parser
.add_option("", "--columnorder", dest
="columnorder", default
=None,
192 help="specify the order and position of columns (source,source,target)")
193 parser
.add_duplicates_option()
194 parser
.passthrough
.append("charset")
195 parser
.passthrough
.append("columnorder")
199 if __name__
== '__main__':