2 # -*- coding: utf-8 -*-
4 # Copyright 2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """This class implements the functionality for handling plain text files, or
23 similar wiki type files.
31 from translate
.storage
import base
35 dokuwiki
.append(("Dokuwiki heading", re
.compile(r
"( ?={2,6}[\s]*)(.+)"), re
.compile("([\s]*={2,6}[\s]*)$")))
36 dokuwiki
.append(("Dokuwiki bullet", re
.compile(r
"([\s]{2,}\*[\s]*)(.+)"), re
.compile("[\s]+$")))
37 dokuwiki
.append(("Dokuwiki numbered item", re
.compile(r
"([\s]{2,}-[\s]*)(.+)"), re
.compile("[\s]+$")))
40 mediawiki
.append(("MediaWiki heading", re
.compile(r
"(={2,5}[\s]*)(.+)"), re
.compile("([\s]*={2,5}[\s]*)$")))
41 mediawiki
.append(("MediaWiki bullet", re
.compile(r
"(\*+[\s]*)(.+)"), re
.compile("[\s]+$")))
42 mediawiki
.append(("MediaWiki numbered item", re
.compile(r
"(#+[\s]*)(.+)"), re
.compile("[\s]+$")))
46 "mediawiki": mediawiki
,
51 class TxtUnit(base
.TranslationUnit
):
52 """This class represents a block of text from a text file"""
53 def __init__(self
, source
="", encoding
="utf-8"):
54 """Construct the txtunit"""
55 self
.encoding
= encoding
56 super(TxtUnit
, self
).__init
__(source
)
63 """Convert a txt unit to a string"""
64 string
= u
"".join([self
.pretext
, self
.source
, self
.posttext
])
65 if isinstance(string
, unicode):
66 return string
.encode(self
.encoding
)
69 # Note that source and target are equivalent for monolingual units
70 def setsource(self
, source
):
71 """Sets the definition to the quoted value of source"""
72 if isinstance(source
, str):
73 source
= source
.decode(self
.encoding
)
77 """gets the unquoted source string"""
79 source
= property(getsource
, setsource
)
81 def settarget(self
, target
):
82 """Sets the definition to the quoted value of target"""
86 """gets the unquoted target string"""
88 target
= property(gettarget
, settarget
)
90 def addlocation(self
, location
):
91 self
.location
.append(location
)
93 def getlocations(self
):
96 class TxtFile(base
.TranslationStore
):
97 """This class represents a text file, made up of txtunits"""
99 def __init__(self
, inputfile
=None, flavour
=None, encoding
="utf-8"):
100 base
.TranslationStore
.__init
__(self
, unitclass
=self
.UnitClass
)
101 self
.filename
= getattr(inputfile
, 'name', '')
102 self
.flavour
= flavours
.get(flavour
, [])
103 if inputfile
is not None:
104 txtsrc
= inputfile
.readlines()
106 self
.encoding
= "utf-8"
108 def parse(self
, lines
):
109 """Read in text lines and create txtunits from the blocks of text"""
114 if not isinstance(lines
, list):
115 lines
= lines
.split("\n")
116 for linenum
in range(len(lines
)):
117 line
= lines
[linenum
].rstrip("\n").rstrip("\r")
118 for rule
, prere
, postre
in self
.flavour
:
119 match
= prere
.match(line
)
121 pretext
, source
= match
.groups()
122 postmatch
= postre
.search(source
)
124 posttext
= postmatch
.group()
125 source
= source
[:postmatch
.start()]
130 isbreak
= not line
.strip()
131 if isbreak
and block
:
132 unit
= self
.addsourceunit("\n".join(block
))
133 unit
.addlocation("%s:%d" % (self
.filename
, startline
+ 1))
134 unit
.pretext
= pretext
135 unit
.posttext
= posttext
144 unit
= self
.addsourceunit("\n".join(block
))
145 unit
.addlocation("%s:%d" % (self
.filename
, startline
+ 1))
148 source
= self
.getoutput()
149 if isinstance(source
, unicode):
150 return source
.encode(getattr(self
, "encoding", "UTF-8"))
154 """Convert the units back to blocks"""
155 blocks
= [str(unit
) for unit
in self
.units
]
156 string
= "\n\n".join(blocks
)