storage/csvl10n.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2002-2006 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """classes that hold units of comma-separated values (.csv) files (csvunit)
  23 or entire files (csvfile) for use with localisation
  24 """
  25
  26 import csv
  27
  28 from translate.misc import sparse
  29 from translate.storage import base
  30
  31 class SimpleDictReader:
  32     def __init__(self, fileobj, fieldnames):
  33         self.fieldnames = fieldnames
  34         self.contents = fileobj.read()
  35         self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"], whitespacechars="\r")
  36         self.parser.stringescaping = 0
  37         self.parser.quotechars = '"'
  38         self.tokens = self.parser.tokenize(self.contents)
  39         self.tokenpos = 0
  40
  41     def __iter__(self):
  42         return self
  43
  44     def getvalue(self, value):
  45         """returns a value, evaluating strings as neccessary"""
  46         if (value.startswith("'") and value.endswith("'")) or (value.startswith('"') and value.endswith('"')):
  47             return sparse.stringeval(value)
  48         else:
  49             return value
  50
  51     def next(self):
  52         lentokens = len(self.tokens)
  53         while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
  54             self.tokenpos += 1
  55         if self.tokenpos >= lentokens:
  56             raise StopIteration()
  57         thistokens = []
  58         while self.tokenpos < lentokens and self.tokens[self.tokenpos] != "\n":
  59             thistokens.append(self.tokens[self.tokenpos])
  60             self.tokenpos += 1
  61         while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
  62             self.tokenpos += 1
  63         fields = []
  64         # patch together fields since we can have quotes inside a field
  65         currentfield = ''
  66         fieldparts = 0
  67         for token in thistokens:
  68             if token == ',':
  69                 # a field is only quoted if the whole thing is quoted
  70                 if fieldparts == 1:
  71                     currentfield = self.getvalue(currentfield)
  72                 fields.append(currentfield)
  73                 currentfield = ''
  74                 fieldparts = 0
  75             else:
  76                 currentfield += token
  77                 fieldparts += 1
  78         # things after the last comma...
  79         if fieldparts:
  80             if fieldparts == 1:
  81                 currentfield = self.getvalue(currentfield)
  82             fields.append(currentfield)
  83         values = {}
  84         for fieldnum in range(len(self.fieldnames)):
  85             if fieldnum >= len(fields):
  86                 values[self.fieldnames[fieldnum]] = ""
  87             else:
  88                 values[self.fieldnames[fieldnum]] = fields[fieldnum]
  89         return values
  90
  91 class csvunit(base.TranslationUnit):
  92     spreadsheetescapes = [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")]
  93     def __init__(self, source=None):
  94         super(csvunit, self).__init__(source)
  95         self.comment = ""
  96         self.source = source
  97         self.target = ""
  98
  99     def add_spreadsheet_escapes(self, source, target):
 100         """add common spreadsheet escapes to two strings"""
 101         for unescaped, escaped in self.spreadsheetescapes:
 102             if source.startswith(unescaped):
 103                 source = source.replace(unescaped, escaped, 1)
 104             if target.startswith(unescaped):
 105                 target = target.replace(unescaped, escaped, 1)
 106         return source, target
 107
 108     def remove_spreadsheet_escapes(self, source, target):
 109         """remove common spreadsheet escapes from two strings"""
 110         for unescaped, escaped in self.spreadsheetescapes:
 111             if source.startswith(escaped):
 112                 source = source.replace(escaped, unescaped, 1)
 113             if target.startswith(escaped):
 114                 target = target.replace(escaped, unescaped, 1)
 115         return source, target
 116
 117     def fromdict(self, cedict):
 118         self.comment = cedict.get('comment', '').decode('utf-8')
 119         self.source = cedict.get('source', '').decode('utf-8')
 120         self.target = cedict.get('target', '').decode('utf-8')
 121         if self.comment is None: self.comment = ''
 122         if self.source is None: self.source = ''
 123         if self.target is None: self.target = ''
 124         self.source, self.target = self.remove_spreadsheet_escapes(self.source, self.target)
 125
 126     def todict(self, encoding='utf-8'):
 127         comment, source, target = self.comment, self.source, self.target
 128         source, target = self.add_spreadsheet_escapes(source, target)
 129         if isinstance(comment, unicode):
 130             comment = comment.encode(encoding)
 131         if isinstance(source, unicode):
 132             source = source.encode(encoding)
 133         if isinstance(target, unicode):
 134             target = target.encode(encoding)
 135         return {'comment':comment, 'source': source, 'target': target}
 136
 137 class csvfile(base.TranslationStore):
 138     """This class represents a .csv file with various lines.
 139     The default format contains three columns: comments, source, target"""
 140     UnitClass = csvunit
 141     def __init__(self, inputfile=None, fieldnames=None):
 142         base.TranslationStore.__init__(self, unitclass = self.UnitClass)
 143         self.units = []
 144         if fieldnames is None:
 145             self.fieldnames = ['comment', 'source', 'target']
 146         else:
 147             if isinstance(fieldnames, basestring):
 148                 fieldnames = [fieldname.strip() for fieldname in fieldnames.split(",")]
 149             self.fieldnames = fieldnames
 150         self.filename = getattr(inputfile, 'name', '')
 151         if inputfile is not None:
 152             csvsrc = inputfile.read()
 153             inputfile.close()
 154             self.parse(csvsrc)
 155
 156     def parse(self, csvsrc):
 157         csvfile = csv.StringIO(csvsrc)
 158         reader = SimpleDictReader(csvfile, self.fieldnames)
 159         for row in reader:
 160             newce = self.UnitClass()
 161             newce.fromdict(row)
 162             self.addunit(newce)
 163
 164     def __str__(self):
 165         """convert to a string. double check that unicode is handled somehow here"""
 166         source = self.getoutput()
 167         if isinstance(source, unicode):
 168             return source.encode(getattr(self, "encoding", "UTF-8"))
 169         return source
 170
 171     def getoutput(self):
 172         csvfile = csv.StringIO()
 173         writer = csv.DictWriter(csvfile, self.fieldnames)
 174         for ce in self.units:
 175             cedict = ce.todict()
 176             writer.writerow(cedict)
 177         csvfile.reset()
 178         return "".join(csvfile.readlines())
 179
 180
 181 if __name__ == '__main__':
 182     import sys
 183     cf = csvfile()
 184     cf.parse(sys.stdin.read())
 185     sys.stdout.write(str(cf))
 186
 187