fix git support for v1.5.3 (or higher) by setting "--work-tree"
[translate_toolkit.git] / storage / txt.py
blob0ffdb4a006b2131977e62456fbf5a11887e928cf
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2007 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """This class implements the functionality for handling plain text files, or
23 similar wiki type files.
25 Supported formats are
26 - Plain text
27 - dokuwiki
28 - MediaWiki
29 """
31 from translate.storage import base
32 import re
34 dokuwiki = []
35 dokuwiki.append(("Dokuwiki heading", re.compile(r"( ?={2,6}[\s]*)(.+)"), re.compile("([\s]*={2,6}[\s]*)$")))
36 dokuwiki.append(("Dokuwiki bullet", re.compile(r"([\s]{2,}\*[\s]*)(.+)"), re.compile("[\s]+$")))
37 dokuwiki.append(("Dokuwiki numbered item", re.compile(r"([\s]{2,}-[\s]*)(.+)"), re.compile("[\s]+$")))
39 mediawiki = []
40 mediawiki.append(("MediaWiki heading", re.compile(r"(={2,5}[\s]*)(.+)"), re.compile("([\s]*={2,5}[\s]*)$")))
41 mediawiki.append(("MediaWiki bullet", re.compile(r"(\*+[\s]*)(.+)"), re.compile("[\s]+$")))
42 mediawiki.append(("MediaWiki numbered item", re.compile(r"(#+[\s]*)(.+)"), re.compile("[\s]+$")))
44 flavours = {
45 "dokuwiki": dokuwiki,
46 "mediawiki": mediawiki,
47 None: [],
48 "plain": []
51 class TxtUnit(base.TranslationUnit):
52 """This class represents a block of text from a text file"""
53 def __init__(self, source="", encoding="utf-8"):
54 """Construct the txtunit"""
55 self.encoding = encoding
56 super(TxtUnit, self).__init__(source)
57 self.source = source
58 self.pretext = ""
59 self.posttext = ""
60 self.location = []
62 def __str__(self):
63 """Convert a txt unit to a string"""
64 string = u"".join([self.pretext, self.source, self.posttext])
65 if isinstance(string, unicode):
66 return string.encode(self.encoding)
67 return string
69 # Note that source and target are equivalent for monolingual units
70 def setsource(self, source):
71 """Sets the definition to the quoted value of source"""
72 if isinstance(source, str):
73 source = source.decode(self.encoding)
74 self._source = source
76 def getsource(self):
77 """gets the unquoted source string"""
78 return self._source
79 source = property(getsource, setsource)
81 def settarget(self, target):
82 """Sets the definition to the quoted value of target"""
83 self.source = target
85 def gettarget(self):
86 """gets the unquoted target string"""
87 return self.source
88 target = property(gettarget, settarget)
90 def addlocation(self, location):
91 self.location.append(location)
93 def getlocations(self):
94 return self.location
96 class TxtFile(base.TranslationStore):
97 """This class represents a text file, made up of txtunits"""
98 UnitClass = TxtUnit
99 def __init__(self, inputfile=None, flavour=None, encoding="utf-8"):
100 base.TranslationStore.__init__(self, unitclass=self.UnitClass)
101 self.filename = getattr(inputfile, 'name', '')
102 self.flavour = flavours.get(flavour, [])
103 if inputfile is not None:
104 txtsrc = inputfile.readlines()
105 self.parse(txtsrc)
106 self.encoding = "utf-8"
108 def parse(self, lines):
109 """Read in text lines and create txtunits from the blocks of text"""
110 block = []
111 startline = 0
112 pretext = ""
113 posttext = ""
114 if not isinstance(lines, list):
115 lines = lines.split("\n")
116 for linenum in range(len(lines)):
117 line = lines[linenum].rstrip("\n").rstrip("\r")
118 for rule, prere, postre in self.flavour:
119 match = prere.match(line)
120 if match:
121 pretext, source = match.groups()
122 postmatch = postre.search(source)
123 if postmatch:
124 posttext = postmatch.group()
125 source = source[:postmatch.start()]
126 block.append(source)
127 isbreak = True
128 break
129 else:
130 isbreak = not line.strip()
131 if isbreak and block:
132 unit = self.addsourceunit("\n".join(block))
133 unit.addlocation("%s:%d" % (self.filename, startline + 1))
134 unit.pretext = pretext
135 unit.posttext = posttext
136 pretext = ""
137 posttext = ""
138 block = []
139 elif not isbreak:
140 if not block:
141 startline = linenum
142 block.append(line)
143 if block:
144 unit = self.addsourceunit("\n".join(block))
145 unit.addlocation("%s:%d" % (self.filename, startline + 1))
147 def __str__(self):
148 source = self.getoutput()
149 if isinstance(source, unicode):
150 return source.encode(getattr(self, "encoding", "UTF-8"))
151 return source
153 def getoutput(self):
154 """Convert the units back to blocks"""
155 blocks = [str(unit) for unit in self.units]
156 string = "\n\n".join(blocks)
157 return string