small improvements of the version control systems handling:
[translate_toolkit.git] / filters / prefilters.py
blob962fadd089cf482cafd32cb590af448bf91e00e6
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2008 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """This is a set of string filters that strings can be passed through before
23 certain tests."""
25 from translate.filters import decoration
26 from translate.misc import quote
27 import re
29 def removekdecomments(str1):
30 """removed kde-style po comments i.e. starting with _: and ending with litteral \\n"""
31 assert isinstance(str1, unicode)
32 iskdecomment = False
33 lines = str1.split("\n")
34 removelines = []
35 for linenum in range(len(lines)):
36 line = lines[linenum]
37 if line.startswith("_:"):
38 lines[linenum] = ""
39 iskdecomment = True
40 if iskdecomment:
41 removelines.append(linenum)
42 if line.strip() and not iskdecomment:
43 break
44 if iskdecomment and line.strip().endswith("\\n"):
45 iskdecomment = False
46 lines = [lines[linenum] for linenum in range(len(lines)) if linenum not in removelines]
47 return "\n".join(lines)
49 def filteraccelerators(accelmarker):
50 """returns a function that filters accelerators marked using accelmarker in strings"""
51 if accelmarker is None: accelmarkerlen = 0
52 else: accelmarkerlen = len(accelmarker)
53 def filtermarkedaccelerators(str1, acceptlist=None):
54 """modifies the accelerators in str1 marked with a given marker, using a given filter"""
55 acclocs, badlocs = decoration.findaccelerators(str1, accelmarker, acceptlist)
56 fstr1, pos = "", 0
57 for accelstart, accelerator in acclocs:
58 fstr1 += str1[pos:accelstart]
59 fstr1 += accelerator
60 pos = accelstart + accelmarkerlen + len(accelerator)
61 fstr1 += str1[pos:]
62 return fstr1
63 return filtermarkedaccelerators
65 def varname(variable, startmarker, endmarker):
66 """a simple variable filter that returns the variable name without the marking punctuation"""
67 return variable
68 # if the punctuation were included, we'd do the following:
69 if startmarker is None:
70 return variable[:variable.rfind(endmarker)]
71 elif endmarker is None:
72 return variable[variable.find(startmarker)+len(startmarker):]
73 else:
74 return variable[variable.find(startmarker)+len(startmarker):variable.rfind(endmarker)]
76 def varnone(variable, startmarker, endmarker):
77 """a simple variable filter that returns an emoty string"""
78 return ""
80 def filtervariables(startmarker, endmarker, varfilter):
81 """returns a function that filters variables marked using startmarker and
82 endmarker in strings"""
83 if startmarker is None:
84 startmarkerlen = 0
85 else:
86 startmarkerlen = len(startmarker)
87 if endmarker is None:
88 endmarkerlen = 0
89 elif type(endmarker) == int:
90 endmarkerlen = 0
91 else:
92 endmarkerlen = len(endmarker)
94 def filtermarkedvariables(str1):
95 """modifies the variables in str1 marked with a given marker, using a given filter"""
96 varlocs = decoration.findmarkedvariables(str1, startmarker, endmarker)
97 fstr1, pos = "", 0
98 for varstart, variable in varlocs:
99 fstr1 += str1[pos:varstart]
100 fstr1 += varfilter(variable, startmarker, endmarker)
101 pos = varstart + startmarkerlen + len(variable) + endmarkerlen
102 fstr1 += str1[pos:]
103 return fstr1
104 return filtermarkedvariables
106 # a list of special words with punctuation
107 # all apostrophes in the middle of the word are handled already
108 wordswithpunctuation = ["'n","'t" # Afrikaans
110 # map all the words to their non-punctified equivalent
111 wordswithpunctuation = dict([(word, filter(str.isalnum, word)) for word in wordswithpunctuation])
113 def filterwordswithpunctuation(str1):
114 """goes through a list of known words that have punctuation and removes the
115 punctuation from them"""
116 assert isinstance(str1, unicode)
117 occurrences = []
118 for word, replacement in wordswithpunctuation.iteritems():
119 occurrences.extend([(pos, word, replacement) for pos in quote.find_all(str1, word)])
120 for match in re.finditer("(?u)\w+'\w+", str1):
121 word = match.group()
122 replacement = filter(unicode.isalnum, word)
123 occurrences.append((match.start(), word, replacement))
124 occurrences.sort()
125 if occurrences:
126 lastpos = 0
127 newstr1 = ""
128 for pos, word, replacement in occurrences:
129 newstr1 += str1[lastpos:pos]
130 newstr1 += replacement
131 lastpos = pos + len(word)
132 newstr1 += str1[lastpos:]
133 return newstr1
134 else:
135 return str1