filters/prefilters.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2004-2008 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """This is a set of string filters that strings can be passed through before
  23 certain tests."""
  24
  25 from translate.filters import decoration
  26 from translate.misc import quote
  27 import re
  28
  29 def removekdecomments(str1):
  30     """removed kde-style po comments i.e. starting with _: and ending with litteral \\n"""
  31     assert isinstance(str1, unicode)
  32     iskdecomment = False
  33     lines = str1.split("\n")
  34     removelines = []
  35     for linenum in range(len(lines)):
  36         line = lines[linenum]
  37         if line.startswith("_:"):
  38             lines[linenum] = ""
  39             iskdecomment = True
  40         if iskdecomment:
  41             removelines.append(linenum)
  42         if line.strip() and not iskdecomment:
  43             break
  44         if iskdecomment and line.strip().endswith("\\n"):
  45             iskdecomment = False
  46     lines = [lines[linenum] for linenum in range(len(lines)) if linenum not in removelines]
  47     return "\n".join(lines)
  48
  49 def filteraccelerators(accelmarker):
  50     """returns a function that filters accelerators marked using accelmarker in strings"""
  51     if accelmarker is None: accelmarkerlen = 0
  52     else: accelmarkerlen = len(accelmarker)
  53     def filtermarkedaccelerators(str1, acceptlist=None):
  54         """modifies the accelerators in str1 marked with a given marker, using a given filter"""
  55         acclocs, badlocs = decoration.findaccelerators(str1, accelmarker, acceptlist)
  56         fstr1, pos = "", 0
  57         for accelstart, accelerator in acclocs:
  58             fstr1 += str1[pos:accelstart]
  59             fstr1 += accelerator
  60             pos = accelstart + accelmarkerlen + len(accelerator)
  61         fstr1 += str1[pos:]
  62         return fstr1
  63     return filtermarkedaccelerators
  64
  65 def varname(variable, startmarker, endmarker):
  66     """a simple variable filter that returns the variable name without the marking punctuation"""
  67     return variable
  68     # if the punctuation were included, we'd do the following:
  69     if startmarker is None:
  70         return variable[:variable.rfind(endmarker)]
  71     elif endmarker is None:
  72         return variable[variable.find(startmarker)+len(startmarker):]
  73     else:
  74         return variable[variable.find(startmarker)+len(startmarker):variable.rfind(endmarker)]
  75
  76 def varnone(variable, startmarker, endmarker):
  77     """a simple variable filter that returns an emoty string"""
  78     return ""
  79
  80 def filtervariables(startmarker, endmarker, varfilter):
  81     """returns a function that filters variables marked using startmarker and
  82     endmarker in strings"""
  83     if startmarker is None:
  84         startmarkerlen = 0
  85     else:
  86         startmarkerlen = len(startmarker)
  87     if endmarker is None:
  88         endmarkerlen = 0
  89     elif type(endmarker) == int:
  90         endmarkerlen = 0
  91     else:
  92         endmarkerlen = len(endmarker)
  93
  94     def filtermarkedvariables(str1):
  95         """modifies the variables in str1 marked with a given marker, using a given filter"""
  96         varlocs = decoration.findmarkedvariables(str1, startmarker, endmarker)
  97         fstr1, pos = "", 0
  98         for varstart, variable in varlocs:
  99             fstr1 += str1[pos:varstart]
 100             fstr1 += varfilter(variable, startmarker, endmarker)
 101             pos = varstart + startmarkerlen + len(variable) + endmarkerlen
 102         fstr1 += str1[pos:]
 103         return fstr1
 104     return filtermarkedvariables
 105
 106 # a list of special words with punctuation
 107 # all apostrophes in the middle of the word are handled already
 108 wordswithpunctuation = ["'n","'t" # Afrikaans
 109                        ]
 110 # map all the words to their non-punctified equivalent
 111 wordswithpunctuation = dict([(word, filter(str.isalnum, word)) for word in wordswithpunctuation])
 112
 113 def filterwordswithpunctuation(str1):
 114     """goes through a list of known words that have punctuation and removes the
 115     punctuation from them"""
 116     assert isinstance(str1, unicode)
 117     occurrences = []
 118     for word, replacement in wordswithpunctuation.iteritems():
 119         occurrences.extend([(pos, word, replacement) for pos in quote.find_all(str1, word)])
 120     for match in re.finditer("(?u)\w+'\w+", str1):
 121         word = match.group()
 122         replacement = filter(unicode.isalnum, word)
 123         occurrences.append((match.start(), word, replacement))
 124     occurrences.sort()
 125     if occurrences:
 126         lastpos = 0
 127         newstr1 = ""
 128         for pos, word, replacement in occurrences:
 129             newstr1 += str1[lastpos:pos]
 130             newstr1 += replacement
 131             lastpos = pos + len(word)
 132         newstr1 += str1[lastpos:]
 133         return newstr1
 134     else:
 135         return str1
 136