for git v1.5.2 (and below): chdir to the directory of the target file before executin...
[translate_toolkit.git] / tools / posegment.py
bloba69bc103a4719398b155d79500e78e56a25b0f92
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # Copyright 2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Segment Gettext PO, XLIFF and TMX localization files at the sentence level
24 See: http://translate.sourceforge.net/wiki/toolkit/posegment for examples and
25 usage instructions
26 """
28 from translate.storage import factory
29 from translate.lang import factory as lang_factory
30 import os
31 import re
33 class segment:
35 def __init__(self, sourcelang, targetlang, stripspaces=True):
36 self.sourcelang = sourcelang
37 self.targetlang = targetlang
38 self.stripspaces = stripspaces
40 def segmentunit(self, unit):
41 if unit.isheader() or unit.hasplural():
42 return [unit]
43 sourcesegments = self.sourcelang.sentences(unit.source, strip=self.stripspaces)
44 targetsegments = self.targetlang.sentences(unit.target, strip=self.stripspaces)
45 if unit.istranslated() and (len(sourcesegments) != len(targetsegments)):
46 return [unit]
47 units = []
48 for i in range(len(sourcesegments)):
49 newunit = unit.copy()
50 newunit.source = sourcesegments[i]
51 if not unit.istranslated():
52 newunit.target = ""
53 else:
54 newunit.target = targetsegments[i]
55 units.append(newunit)
56 return units
58 def convertstore(self, fromstore):
59 tostore = type(fromstore)()
60 for unit in fromstore.units:
61 newunits = self.segmentunit(unit)
62 for newunit in newunits:
63 tostore.addunit(newunit)
64 return tostore
66 def segmentfile(inputfile, outputfile, templatefile, sourcelanguage="en", targetlanguage=None, stripspaces=True):
67 """reads in inputfile, segments it then, writes to outputfile"""
68 # note that templatefile is not used, but it is required by the converter...
69 inputstore = factory.getobject(inputfile)
70 if inputstore.isempty():
71 return 0
72 sourcelang = lang_factory.getlanguage(sourcelanguage)
73 targetlang = lang_factory.getlanguage(targetlanguage)
74 convertor = segment(sourcelang, targetlang, stripspaces=stripspaces)
75 outputstore = convertor.convertstore(inputstore)
76 outputfile.write(str(outputstore))
77 return 1
79 def main():
80 from translate.convert import convert
81 formats = {"po":("po", segmentfile), "xlf":("xlf", segmentfile), "tmx": ("tmx", segmentfile)}
82 parser = convert.ConvertOptionParser(formats, usepots=True, description=__doc__)
83 parser.add_option("-l", "--language", dest="targetlanguage", default=None,
84 help="the target language code", metavar="LANG")
85 parser.add_option("", "--source-language", dest="sourcelanguage", default=None,
86 help="the source language code (default 'en')", metavar="LANG")
87 parser.passthrough.append("sourcelanguage")
88 parser.passthrough.append("targetlanguage")
89 parser.add_option("", "--keepspaces", dest="stripspaces", action="store_false",
90 default=True, help="Disable automatic stripping of whitespace")
91 parser.passthrough.append("stripspaces")
92 parser.run()
95 if __name__ == '__main__':
96 main()