2 # -*- coding: utf-8 -*-
4 # Copyright 2004-2008 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """This is a set of string filters that strings can be passed through before
25 from translate
.filters
import decoration
26 from translate
.misc
import quote
29 def removekdecomments(str1
):
30 """removed kde-style po comments i.e. starting with _: and ending with litteral \\n"""
31 assert isinstance(str1
, unicode)
33 lines
= str1
.split("\n")
35 for linenum
in range(len(lines
)):
37 if line
.startswith("_:"):
41 removelines
.append(linenum
)
42 if line
.strip() and not iskdecomment
:
44 if iskdecomment
and line
.strip().endswith("\\n"):
46 lines
= [lines
[linenum
] for linenum
in range(len(lines
)) if linenum
not in removelines
]
47 return "\n".join(lines
)
49 def filteraccelerators(accelmarker
):
50 """returns a function that filters accelerators marked using accelmarker in strings"""
51 if accelmarker
is None: accelmarkerlen
= 0
52 else: accelmarkerlen
= len(accelmarker
)
53 def filtermarkedaccelerators(str1
, acceptlist
=None):
54 """modifies the accelerators in str1 marked with a given marker, using a given filter"""
55 acclocs
, badlocs
= decoration
.findaccelerators(str1
, accelmarker
, acceptlist
)
57 for accelstart
, accelerator
in acclocs
:
58 fstr1
+= str1
[pos
:accelstart
]
60 pos
= accelstart
+ accelmarkerlen
+ len(accelerator
)
63 return filtermarkedaccelerators
65 def varname(variable
, startmarker
, endmarker
):
66 """a simple variable filter that returns the variable name without the marking punctuation"""
68 # if the punctuation were included, we'd do the following:
69 if startmarker
is None:
70 return variable
[:variable
.rfind(endmarker
)]
71 elif endmarker
is None:
72 return variable
[variable
.find(startmarker
)+len(startmarker
):]
74 return variable
[variable
.find(startmarker
)+len(startmarker
):variable
.rfind(endmarker
)]
76 def varnone(variable
, startmarker
, endmarker
):
77 """a simple variable filter that returns an emoty string"""
80 def filtervariables(startmarker
, endmarker
, varfilter
):
81 """returns a function that filters variables marked using startmarker and
82 endmarker in strings"""
83 if startmarker
is None:
86 startmarkerlen
= len(startmarker
)
89 elif type(endmarker
) == int:
92 endmarkerlen
= len(endmarker
)
94 def filtermarkedvariables(str1
):
95 """modifies the variables in str1 marked with a given marker, using a given filter"""
96 varlocs
= decoration
.findmarkedvariables(str1
, startmarker
, endmarker
)
98 for varstart
, variable
in varlocs
:
99 fstr1
+= str1
[pos
:varstart
]
100 fstr1
+= varfilter(variable
, startmarker
, endmarker
)
101 pos
= varstart
+ startmarkerlen
+ len(variable
) + endmarkerlen
104 return filtermarkedvariables
106 # a list of special words with punctuation
107 # all apostrophes in the middle of the word are handled already
108 wordswithpunctuation
= ["'n","'t" # Afrikaans
110 # map all the words to their non-punctified equivalent
111 wordswithpunctuation
= dict([(word
, filter(str.isalnum
, word
)) for word
in wordswithpunctuation
])
113 def filterwordswithpunctuation(str1
):
114 """goes through a list of known words that have punctuation and removes the
115 punctuation from them"""
116 assert isinstance(str1
, unicode)
118 for word
, replacement
in wordswithpunctuation
.iteritems():
119 occurrences
.extend([(pos
, word
, replacement
) for pos
in quote
.find_all(str1
, word
)])
120 for match
in re
.finditer("(?u)\w+'\w+", str1
):
122 replacement
= filter(unicode.isalnum
, word
)
123 occurrences
.append((match
.start(), word
, replacement
))
128 for pos
, word
, replacement
in occurrences
:
129 newstr1
+= str1
[lastpos
:pos
]
130 newstr1
+= replacement
131 lastpos
= pos
+ len(word
)
132 newstr1
+= str1
[lastpos
:]