2 # -*- coding: utf-8 -*-
4 # Copyright 2006-2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """convert Comma-Separated Value (.csv) files to a TermBase eXchange (.tbx) glossary file"""
24 from translate
.misc
import sparse
25 from translate
.storage
import tbx
26 from translate
.storage
import csvl10n
29 """a class that takes translations from a .csv file and puts them in a .tbx file"""
30 def __init__(self
, charset
=None):
31 """construct the converter..."""
32 self
.charset
= charset
34 def convertfile(self
, thecsvfile
):
35 """converts a csvfile to a tbxfile, and returns it. uses templatepo if given at construction"""
37 self
.tbxfile
= tbx
.tbxfile()
38 for thecsv
in thecsvfile
.units
:
40 # ignore typical header strings...
42 if [item
.strip().lower() for item
in thecsv
.comment
, thecsv
.source
, thecsv
.target
] == \
43 ["comment", "original", "translation"]:
45 if len(thecsv
.comment
.strip()) == 0 and thecsv
.source
.find("Content-Type:") != -1:
47 term
= tbx
.tbxunit
.buildfromunit(thecsv
)
48 # TODO: we might want to get the location or other information from CSV
49 self
.tbxfile
.addunit(term
)
52 def convertcsv(inputfile
, outputfile
, templatefile
, charset
=None, columnorder
=None):
53 """reads in inputfile using csvl10n, converts using csv2tbx, writes to outputfile"""
54 inputstore
= csvl10n
.csvfile(inputfile
, fieldnames
=columnorder
)
55 convertor
= csv2tbx(charset
=charset
)
56 outputstore
= convertor
.convertfile(inputstore
)
57 if len(outputstore
.units
) == 0:
59 outputfile
.write(str(outputstore
))
63 from translate
.convert
import convert
64 formats
= {("csv", "tbx"): ("tbx", convertcsv
), ("csv", None): ("tbx", convertcsv
)}
65 parser
= convert
.ConvertOptionParser(formats
, usetemplates
=False, description
=__doc__
)
66 parser
.add_option("", "--charset", dest
="charset", default
=None,
67 help="set charset to decode from csv files", metavar
="CHARSET")
68 parser
.add_option("", "--columnorder", dest
="columnorder", default
=None,
69 help="specify the order and position of columns (comment,source,target)")
70 parser
.passthrough
.append("charset")
71 parser
.passthrough
.append("columnorder")
76 if __name__
== '__main__':