storage/txt.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2007 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """This class implements the functionality for handling plain text files, or
  23 similar wiki type files.
  24
  25 Supported formats are
  26   - Plain text
  27   - dokuwiki
  28   - MediaWiki
  29 """
  30
  31 from translate.storage import base
  32 import re
  33
  34 dokuwiki = []
  35 dokuwiki.append(("Dokuwiki heading", re.compile(r"( ?={2,6}[\s]*)(.+)"), re.compile("([\s]*={2,6}[\s]*)$")))
  36 dokuwiki.append(("Dokuwiki bullet", re.compile(r"([\s]{2,}\*[\s]*)(.+)"), re.compile("[\s]+$")))
  37 dokuwiki.append(("Dokuwiki numbered item", re.compile(r"([\s]{2,}-[\s]*)(.+)"), re.compile("[\s]+$")))
  38
  39 mediawiki = []
  40 mediawiki.append(("MediaWiki heading", re.compile(r"(={2,5}[\s]*)(.+)"), re.compile("([\s]*={2,5}[\s]*)$")))
  41 mediawiki.append(("MediaWiki bullet", re.compile(r"(\*+[\s]*)(.+)"), re.compile("[\s]+$")))
  42 mediawiki.append(("MediaWiki numbered item", re.compile(r"(#+[\s]*)(.+)"), re.compile("[\s]+$")))
  43
  44 flavours = {
  45 "dokuwiki": dokuwiki,
  46 "mediawiki": mediawiki,
  47 None: [],
  48 "plain": []
  49 }
  50
  51 class TxtUnit(base.TranslationUnit):
  52     """This class represents a block of text from a text file"""
  53     def __init__(self, source="", encoding="utf-8"):
  54         """Construct the txtunit"""
  55         self.encoding = encoding
  56         super(TxtUnit, self).__init__(source)
  57         self.source = source
  58         self.pretext = ""
  59         self.posttext = ""
  60         self.location = []
  61
  62     def __str__(self):
  63         """Convert a txt unit to a string"""
  64         string = u"".join([self.pretext, self.source, self.posttext])
  65         if isinstance(string, unicode):
  66             return string.encode(self.encoding)
  67         return string
  68
  69     # Note that source and target are equivalent for monolingual units
  70     def setsource(self, source):
  71         """Sets the definition to the quoted value of source"""
  72         if isinstance(source, str):
  73             source = source.decode(self.encoding)
  74         self._source = source
  75
  76     def getsource(self):
  77         """gets the unquoted source string"""
  78         return self._source
  79     source = property(getsource, setsource)
  80
  81     def settarget(self, target):
  82         """Sets the definition to the quoted value of target"""
  83         self.source = target
  84
  85     def gettarget(self):
  86         """gets the unquoted target string"""
  87         return self.source
  88     target = property(gettarget, settarget)
  89
  90     def addlocation(self, location):
  91         self.location.append(location)
  92
  93     def getlocations(self):
  94         return self.location
  95
  96 class TxtFile(base.TranslationStore):
  97     """This class represents a text file, made up of txtunits"""
  98     UnitClass = TxtUnit
  99     def __init__(self, inputfile=None, flavour=None, encoding="utf-8"):
 100         base.TranslationStore.__init__(self, unitclass=self.UnitClass)
 101         self.filename = getattr(inputfile, 'name', '')
 102         self.flavour = flavours.get(flavour, [])
 103         if inputfile is not None:
 104             txtsrc = inputfile.readlines()
 105             self.parse(txtsrc)
 106         self.encoding = "utf-8"
 107
 108     def parse(self, lines):
 109         """Read in text lines and create txtunits from the blocks of text"""
 110         block = []
 111         startline = 0
 112         pretext = ""
 113         posttext = ""
 114         if not isinstance(lines, list):
 115             lines = lines.split("\n")
 116         for linenum in range(len(lines)):
 117             line = lines[linenum].rstrip("\n").rstrip("\r")
 118             for rule, prere, postre in self.flavour:
 119                 match = prere.match(line)
 120                 if match:
 121                     pretext, source = match.groups()
 122                     postmatch = postre.search(source)
 123                     if postmatch:
 124                         posttext = postmatch.group()
 125                         source = source[:postmatch.start()]
 126                     block.append(source)
 127                     isbreak = True
 128                     break
 129             else:
 130                 isbreak = not line.strip()
 131             if isbreak and block:
 132                 unit = self.addsourceunit("\n".join(block))
 133                 unit.addlocation("%s:%d" % (self.filename, startline + 1))
 134                 unit.pretext = pretext
 135                 unit.posttext = posttext
 136                 pretext = ""
 137                 posttext = ""
 138                 block = []
 139             elif not isbreak:
 140                 if not block:
 141                     startline = linenum
 142                 block.append(line)
 143         if block:
 144             unit = self.addsourceunit("\n".join(block))
 145             unit.addlocation("%s:%d" % (self.filename, startline + 1))
 146
 147     def __str__(self):
 148         source = self.getoutput()
 149         if isinstance(source, unicode):
 150             return source.encode(getattr(self, "encoding", "UTF-8"))
 151         return source
 152
 153     def getoutput(self):
 154         """Convert the units back to blocks"""
 155         blocks = [str(unit) for unit in self.units]
 156         string = "\n\n".join(blocks)
 157         return string