fix git support for v1.5.3 (or higher) by setting "--work-tree"
[translate_toolkit.git] / storage / csvl10n.py
blob692b2dd8bcf0cf402c6dc184e08bf8b49ec6a046
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2002-2006 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """classes that hold units of comma-separated values (.csv) files (csvunit)
23 or entire files (csvfile) for use with localisation
24 """
26 import csv
28 from translate.misc import sparse
29 from translate.storage import base
31 class SimpleDictReader:
32 def __init__(self, fileobj, fieldnames):
33 self.fieldnames = fieldnames
34 self.contents = fileobj.read()
35 self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"], whitespacechars="\r")
36 self.parser.stringescaping = 0
37 self.parser.quotechars = '"'
38 self.tokens = self.parser.tokenize(self.contents)
39 self.tokenpos = 0
41 def __iter__(self):
42 return self
44 def getvalue(self, value):
45 """returns a value, evaluating strings as neccessary"""
46 if (value.startswith("'") and value.endswith("'")) or (value.startswith('"') and value.endswith('"')):
47 return sparse.stringeval(value)
48 else:
49 return value
51 def next(self):
52 lentokens = len(self.tokens)
53 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
54 self.tokenpos += 1
55 if self.tokenpos >= lentokens:
56 raise StopIteration()
57 thistokens = []
58 while self.tokenpos < lentokens and self.tokens[self.tokenpos] != "\n":
59 thistokens.append(self.tokens[self.tokenpos])
60 self.tokenpos += 1
61 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
62 self.tokenpos += 1
63 fields = []
64 # patch together fields since we can have quotes inside a field
65 currentfield = ''
66 fieldparts = 0
67 for token in thistokens:
68 if token == ',':
69 # a field is only quoted if the whole thing is quoted
70 if fieldparts == 1:
71 currentfield = self.getvalue(currentfield)
72 fields.append(currentfield)
73 currentfield = ''
74 fieldparts = 0
75 else:
76 currentfield += token
77 fieldparts += 1
78 # things after the last comma...
79 if fieldparts:
80 if fieldparts == 1:
81 currentfield = self.getvalue(currentfield)
82 fields.append(currentfield)
83 values = {}
84 for fieldnum in range(len(self.fieldnames)):
85 if fieldnum >= len(fields):
86 values[self.fieldnames[fieldnum]] = ""
87 else:
88 values[self.fieldnames[fieldnum]] = fields[fieldnum]
89 return values
91 class csvunit(base.TranslationUnit):
92 spreadsheetescapes = [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")]
93 def __init__(self, source=None):
94 super(csvunit, self).__init__(source)
95 self.comment = ""
96 self.source = source
97 self.target = ""
99 def add_spreadsheet_escapes(self, source, target):
100 """add common spreadsheet escapes to two strings"""
101 for unescaped, escaped in self.spreadsheetescapes:
102 if source.startswith(unescaped):
103 source = source.replace(unescaped, escaped, 1)
104 if target.startswith(unescaped):
105 target = target.replace(unescaped, escaped, 1)
106 return source, target
108 def remove_spreadsheet_escapes(self, source, target):
109 """remove common spreadsheet escapes from two strings"""
110 for unescaped, escaped in self.spreadsheetescapes:
111 if source.startswith(escaped):
112 source = source.replace(escaped, unescaped, 1)
113 if target.startswith(escaped):
114 target = target.replace(escaped, unescaped, 1)
115 return source, target
117 def fromdict(self, cedict):
118 self.comment = cedict.get('comment', '').decode('utf-8')
119 self.source = cedict.get('source', '').decode('utf-8')
120 self.target = cedict.get('target', '').decode('utf-8')
121 if self.comment is None: self.comment = ''
122 if self.source is None: self.source = ''
123 if self.target is None: self.target = ''
124 self.source, self.target = self.remove_spreadsheet_escapes(self.source, self.target)
126 def todict(self, encoding='utf-8'):
127 comment, source, target = self.comment, self.source, self.target
128 source, target = self.add_spreadsheet_escapes(source, target)
129 if isinstance(comment, unicode):
130 comment = comment.encode(encoding)
131 if isinstance(source, unicode):
132 source = source.encode(encoding)
133 if isinstance(target, unicode):
134 target = target.encode(encoding)
135 return {'comment':comment, 'source': source, 'target': target}
137 class csvfile(base.TranslationStore):
138 """This class represents a .csv file with various lines.
139 The default format contains three columns: comments, source, target"""
140 UnitClass = csvunit
141 def __init__(self, inputfile=None, fieldnames=None):
142 base.TranslationStore.__init__(self, unitclass = self.UnitClass)
143 self.units = []
144 if fieldnames is None:
145 self.fieldnames = ['comment', 'source', 'target']
146 else:
147 if isinstance(fieldnames, basestring):
148 fieldnames = [fieldname.strip() for fieldname in fieldnames.split(",")]
149 self.fieldnames = fieldnames
150 self.filename = getattr(inputfile, 'name', '')
151 if inputfile is not None:
152 csvsrc = inputfile.read()
153 inputfile.close()
154 self.parse(csvsrc)
156 def parse(self, csvsrc):
157 csvfile = csv.StringIO(csvsrc)
158 reader = SimpleDictReader(csvfile, self.fieldnames)
159 for row in reader:
160 newce = self.UnitClass()
161 newce.fromdict(row)
162 self.addunit(newce)
164 def __str__(self):
165 """convert to a string. double check that unicode is handled somehow here"""
166 source = self.getoutput()
167 if isinstance(source, unicode):
168 return source.encode(getattr(self, "encoding", "UTF-8"))
169 return source
171 def getoutput(self):
172 csvfile = csv.StringIO()
173 writer = csv.DictWriter(csvfile, self.fieldnames)
174 for ce in self.units:
175 cedict = ce.todict()
176 writer.writerow(cedict)
177 csvfile.reset()
178 return "".join(csvfile.readlines())
181 if __name__ == '__main__':
182 import sys
183 cf = csvfile()
184 cf.parse(sys.stdin.read())
185 sys.stdout.write(str(cf))