2 # -*- coding: utf-8 -*-
4 # Copyright 2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Segment Gettext PO, XLIFF and TMX localization files at the sentence level
24 See: http://translate.sourceforge.net/wiki/toolkit/posegment for examples and
28 from translate
.storage
import factory
29 from translate
.lang
import factory
as lang_factory
35 def __init__(self
, sourcelang
, targetlang
, stripspaces
=True):
36 self
.sourcelang
= sourcelang
37 self
.targetlang
= targetlang
38 self
.stripspaces
= stripspaces
40 def segmentunit(self
, unit
):
41 if unit
.isheader() or unit
.hasplural():
43 sourcesegments
= self
.sourcelang
.sentences(unit
.source
, strip
=self
.stripspaces
)
44 targetsegments
= self
.targetlang
.sentences(unit
.target
, strip
=self
.stripspaces
)
45 if unit
.istranslated() and (len(sourcesegments
) != len(targetsegments
)):
48 for i
in range(len(sourcesegments
)):
50 newunit
.source
= sourcesegments
[i
]
51 if not unit
.istranslated():
54 newunit
.target
= targetsegments
[i
]
58 def convertstore(self
, fromstore
):
59 tostore
= type(fromstore
)()
60 for unit
in fromstore
.units
:
61 newunits
= self
.segmentunit(unit
)
62 for newunit
in newunits
:
63 tostore
.addunit(newunit
)
66 def segmentfile(inputfile
, outputfile
, templatefile
, sourcelanguage
="en", targetlanguage
=None, stripspaces
=True):
67 """reads in inputfile, segments it then, writes to outputfile"""
68 # note that templatefile is not used, but it is required by the converter...
69 inputstore
= factory
.getobject(inputfile
)
70 if inputstore
.isempty():
72 sourcelang
= lang_factory
.getlanguage(sourcelanguage
)
73 targetlang
= lang_factory
.getlanguage(targetlanguage
)
74 convertor
= segment(sourcelang
, targetlang
, stripspaces
=stripspaces
)
75 outputstore
= convertor
.convertstore(inputstore
)
76 outputfile
.write(str(outputstore
))
80 from translate
.convert
import convert
81 formats
= {"po":("po", segmentfile
), "xlf":("xlf", segmentfile
), "tmx": ("tmx", segmentfile
)}
82 parser
= convert
.ConvertOptionParser(formats
, usepots
=True, description
=__doc__
)
83 parser
.add_option("-l", "--language", dest
="targetlanguage", default
=None,
84 help="the target language code", metavar
="LANG")
85 parser
.add_option("", "--source-language", dest
="sourcelanguage", default
=None,
86 help="the source language code (default 'en')", metavar
="LANG")
87 parser
.passthrough
.append("sourcelanguage")
88 parser
.passthrough
.append("targetlanguage")
89 parser
.add_option("", "--keepspaces", dest
="stripspaces", action
="store_false",
90 default
=True, help="Disable automatic stripping of whitespace")
91 parser
.passthrough
.append("stripspaces")
95 if __name__
== '__main__':