tools/posegment.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2007 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """Segment Gettext PO, XLIFF and TMX localization files at the sentence level
  23
  24 See: http://translate.sourceforge.net/wiki/toolkit/posegment for examples and
  25 usage instructions
  26 """
  27
  28 from translate.storage import factory
  29 from translate.lang import factory as lang_factory
  30 import os
  31 import re
  32
  33 class segment:
  34
  35     def __init__(self, sourcelang, targetlang, stripspaces=True):
  36         self.sourcelang = sourcelang
  37         self.targetlang = targetlang
  38         self.stripspaces = stripspaces
  39
  40     def segmentunit(self, unit):
  41         if unit.isheader() or unit.hasplural():
  42             return [unit]
  43         sourcesegments = self.sourcelang.sentences(unit.source, strip=self.stripspaces)
  44         targetsegments = self.targetlang.sentences(unit.target, strip=self.stripspaces)
  45         if unit.istranslated() and (len(sourcesegments) != len(targetsegments)):
  46             return [unit]
  47         units = []
  48         for i in range(len(sourcesegments)):
  49             newunit = unit.copy()
  50             newunit.source = sourcesegments[i]
  51             if not unit.istranslated():
  52                 newunit.target = ""
  53             else:
  54                 newunit.target = targetsegments[i]
  55             units.append(newunit)
  56         return units
  57
  58     def convertstore(self, fromstore):
  59         tostore = type(fromstore)()
  60         for unit in fromstore.units:
  61             newunits = self.segmentunit(unit)
  62             for newunit in newunits:
  63                 tostore.addunit(newunit)
  64         return tostore
  65
  66 def segmentfile(inputfile, outputfile, templatefile, sourcelanguage="en", targetlanguage=None, stripspaces=True):
  67     """reads in inputfile, segments it then, writes to outputfile"""
  68     # note that templatefile is not used, but it is required by the converter...
  69     inputstore = factory.getobject(inputfile)
  70     if inputstore.isempty():
  71         return 0
  72     sourcelang = lang_factory.getlanguage(sourcelanguage)
  73     targetlang = lang_factory.getlanguage(targetlanguage)
  74     convertor = segment(sourcelang, targetlang, stripspaces=stripspaces)
  75     outputstore = convertor.convertstore(inputstore)
  76     outputfile.write(str(outputstore))
  77     return 1
  78
  79 def main():
  80     from translate.convert import convert
  81     formats = {"po":("po", segmentfile), "xlf":("xlf", segmentfile), "tmx": ("tmx", segmentfile)}
  82     parser = convert.ConvertOptionParser(formats, usepots=True, description=__doc__)
  83     parser.add_option("-l", "--language", dest="targetlanguage", default=None,
  84             help="the target language code", metavar="LANG")
  85     parser.add_option("", "--source-language", dest="sourcelanguage", default=None,
  86             help="the source language code (default 'en')", metavar="LANG")
  87     parser.passthrough.append("sourcelanguage")
  88     parser.passthrough.append("targetlanguage")
  89     parser.add_option("", "--keepspaces", dest="stripspaces", action="store_false",
  90             default=True, help="Disable automatic stripping of whitespace")
  91     parser.passthrough.append("stripspaces")
  92     parser.run()
  93
  94
  95 if __name__ == '__main__':
  96     main()