2 # -*- coding: utf-8 -*-
4 # Copyright 2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Produces a clean file from an unclean file (Trados/Wordfast) by stripping
23 out the tw4win indicators.
25 This does not convert an RTF file to PO/XLIFF, but produces the target file
26 with only the target text in from a text version of the RTF.
29 from translate
.storage
import factory
30 from translate
.misc
.multistring
import multistring
33 tw4winre
= re
.compile(r
"\{0>.*?<\}\d{1,3}\{>(.*?)<0\}", re
.M | re
.S
)
36 """cleans the targets in the given unit"""
37 if isinstance(unit
.target
, multistring
):
38 strings
= unit
.target
.strings
40 strings
= [unit
.target
]
41 for index
, string
in enumerate(strings
):
42 string
= string
.replace("\par", "")
43 strings
[index
] = tw4winre
.sub(r
"\1", string
)
45 unit
.target
= strings
[0]
49 def cleanfile(thefile
):
50 """cleans the given file"""
51 for unit
in thefile
.units
:
55 def runclean(inputfile
, outputfile
, templatefile
):
56 """reads in inputfile, cleans, writes to outputfile"""
57 fromfile
= factory
.getobject(inputfile
)
60 # if fromfile.isempty():
62 outputfile
.write(str(fromfile
))
66 from translate
.convert
import convert
67 formats
= {"po":("po", runclean
), "xlf":("xlf", runclean
), None:("po", runclean
)}
68 parser
= convert
.ConvertOptionParser(formats
, usetemplates
=False, description
=__doc__
)
71 if __name__
== '__main__':