convert/csv2tbx.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2006-2007 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """convert Comma-Separated Value (.csv) files to a TermBase eXchange (.tbx) glossary file"""
  23
  24 from translate.misc import sparse
  25 from translate.storage import tbx
  26 from translate.storage import csvl10n
  27
  28 class csv2tbx:
  29     """a class that takes translations from a .csv file and puts them in a .tbx file"""
  30     def __init__(self, charset=None):
  31         """construct the converter..."""
  32         self.charset = charset
  33
  34     def convertfile(self, thecsvfile):
  35         """converts a csvfile to a tbxfile, and returns it. uses templatepo if given at construction"""
  36         mightbeheader = True
  37         self.tbxfile = tbx.tbxfile()
  38         for thecsv in thecsvfile.units:
  39             if mightbeheader:
  40                 # ignore typical header strings...
  41                 mightbeheader = False
  42                 if [item.strip().lower() for item in thecsv.comment, thecsv.source, thecsv.target] == \
  43                      ["comment", "original", "translation"]:
  44                     continue
  45                 if len(thecsv.comment.strip()) == 0 and thecsv.source.find("Content-Type:") != -1:
  46                     continue
  47             term = tbx.tbxunit.buildfromunit(thecsv)
  48             # TODO: we might want to get the location or other information from CSV
  49             self.tbxfile.addunit(term)
  50         return self.tbxfile
  51
  52 def convertcsv(inputfile, outputfile, templatefile, charset=None, columnorder=None):
  53     """reads in inputfile using csvl10n, converts using csv2tbx, writes to outputfile"""
  54     inputstore = csvl10n.csvfile(inputfile, fieldnames=columnorder)
  55     convertor = csv2tbx(charset=charset)
  56     outputstore = convertor.convertfile(inputstore)
  57     if len(outputstore.units) == 0:
  58         return 0
  59     outputfile.write(str(outputstore))
  60     return 1
  61
  62 def main():
  63     from translate.convert import convert
  64     formats = {("csv", "tbx"): ("tbx", convertcsv), ("csv", None): ("tbx", convertcsv)}
  65     parser = convert.ConvertOptionParser(formats, usetemplates=False, description=__doc__)
  66     parser.add_option("", "--charset", dest="charset", default=None,
  67         help="set charset to decode from csv files", metavar="CHARSET")
  68     parser.add_option("", "--columnorder", dest="columnorder", default=None,
  69         help="specify the order and position of columns (comment,source,target)")
  70     parser.passthrough.append("charset")
  71     parser.passthrough.append("columnorder")
  72     parser.run()
  73
  74
  75
  76 if __name__ == '__main__':
  77     main()