2 # -*- coding: utf-8 -*-
4 # Copyright 2002-2006 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """classes that hold units of comma-separated values (.csv) files (csvunit)
23 or entire files (csvfile) for use with localisation
28 from translate
.misc
import sparse
29 from translate
.storage
import base
31 class SimpleDictReader
:
32 def __init__(self
, fileobj
, fieldnames
):
33 self
.fieldnames
= fieldnames
34 self
.contents
= fileobj
.read()
35 self
.parser
= sparse
.SimpleParser(defaulttokenlist
=[",", "\n"], whitespacechars
="\r")
36 self
.parser
.stringescaping
= 0
37 self
.parser
.quotechars
= '"'
38 self
.tokens
= self
.parser
.tokenize(self
.contents
)
44 def getvalue(self
, value
):
45 """returns a value, evaluating strings as neccessary"""
46 if (value
.startswith("'") and value
.endswith("'")) or (value
.startswith('"') and value
.endswith('"')):
47 return sparse
.stringeval(value
)
52 lentokens
= len(self
.tokens
)
53 while self
.tokenpos
< lentokens
and self
.tokens
[self
.tokenpos
] == "\n":
55 if self
.tokenpos
>= lentokens
:
58 while self
.tokenpos
< lentokens
and self
.tokens
[self
.tokenpos
] != "\n":
59 thistokens
.append(self
.tokens
[self
.tokenpos
])
61 while self
.tokenpos
< lentokens
and self
.tokens
[self
.tokenpos
] == "\n":
64 # patch together fields since we can have quotes inside a field
67 for token
in thistokens
:
69 # a field is only quoted if the whole thing is quoted
71 currentfield
= self
.getvalue(currentfield
)
72 fields
.append(currentfield
)
78 # things after the last comma...
81 currentfield
= self
.getvalue(currentfield
)
82 fields
.append(currentfield
)
84 for fieldnum
in range(len(self
.fieldnames
)):
85 if fieldnum
>= len(fields
):
86 values
[self
.fieldnames
[fieldnum
]] = ""
88 values
[self
.fieldnames
[fieldnum
]] = fields
[fieldnum
]
91 class csvunit(base
.TranslationUnit
):
92 spreadsheetescapes
= [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")]
93 def __init__(self
, source
=None):
94 super(csvunit
, self
).__init
__(source
)
99 def add_spreadsheet_escapes(self
, source
, target
):
100 """add common spreadsheet escapes to two strings"""
101 for unescaped
, escaped
in self
.spreadsheetescapes
:
102 if source
.startswith(unescaped
):
103 source
= source
.replace(unescaped
, escaped
, 1)
104 if target
.startswith(unescaped
):
105 target
= target
.replace(unescaped
, escaped
, 1)
106 return source
, target
108 def remove_spreadsheet_escapes(self
, source
, target
):
109 """remove common spreadsheet escapes from two strings"""
110 for unescaped
, escaped
in self
.spreadsheetescapes
:
111 if source
.startswith(escaped
):
112 source
= source
.replace(escaped
, unescaped
, 1)
113 if target
.startswith(escaped
):
114 target
= target
.replace(escaped
, unescaped
, 1)
115 return source
, target
117 def fromdict(self
, cedict
):
118 self
.comment
= cedict
.get('comment', '').decode('utf-8')
119 self
.source
= cedict
.get('source', '').decode('utf-8')
120 self
.target
= cedict
.get('target', '').decode('utf-8')
121 if self
.comment
is None: self
.comment
= ''
122 if self
.source
is None: self
.source
= ''
123 if self
.target
is None: self
.target
= ''
124 self
.source
, self
.target
= self
.remove_spreadsheet_escapes(self
.source
, self
.target
)
126 def todict(self
, encoding
='utf-8'):
127 comment
, source
, target
= self
.comment
, self
.source
, self
.target
128 source
, target
= self
.add_spreadsheet_escapes(source
, target
)
129 if isinstance(comment
, unicode):
130 comment
= comment
.encode(encoding
)
131 if isinstance(source
, unicode):
132 source
= source
.encode(encoding
)
133 if isinstance(target
, unicode):
134 target
= target
.encode(encoding
)
135 return {'comment':comment
, 'source': source
, 'target': target
}
137 class csvfile(base
.TranslationStore
):
138 """This class represents a .csv file with various lines.
139 The default format contains three columns: comments, source, target"""
141 def __init__(self
, inputfile
=None, fieldnames
=None):
142 base
.TranslationStore
.__init
__(self
, unitclass
= self
.UnitClass
)
144 if fieldnames
is None:
145 self
.fieldnames
= ['comment', 'source', 'target']
147 if isinstance(fieldnames
, basestring
):
148 fieldnames
= [fieldname
.strip() for fieldname
in fieldnames
.split(",")]
149 self
.fieldnames
= fieldnames
150 self
.filename
= getattr(inputfile
, 'name', '')
151 if inputfile
is not None:
152 csvsrc
= inputfile
.read()
156 def parse(self
, csvsrc
):
157 csvfile
= csv
.StringIO(csvsrc
)
158 reader
= SimpleDictReader(csvfile
, self
.fieldnames
)
160 newce
= self
.UnitClass()
165 """convert to a string. double check that unicode is handled somehow here"""
166 source
= self
.getoutput()
167 if isinstance(source
, unicode):
168 return source
.encode(getattr(self
, "encoding", "UTF-8"))
172 csvfile
= csv
.StringIO()
173 writer
= csv
.DictWriter(csvfile
, self
.fieldnames
)
174 for ce
in self
.units
:
176 writer
.writerow(cedict
)
178 return "".join(csvfile
.readlines())
181 if __name__
== '__main__':
184 cf
.parse(sys
.stdin
.read())
185 sys
.stdout
.write(str(cf
))