filters/checks.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2004-2008 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """This is a set of validation checks that can be performed on translation
  23 units.
  24
  25 Derivatives of UnitChecker (like StandardUnitChecker) check translation units,
  26 and derivatives of TranslationChecker (like StandardChecker) check
  27 (source, target) translation pairs.
  28
  29 When adding a new test here, please document and explain the behaviour on the
  30 U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}.
  31 """
  32
  33 from translate.filters import helpers
  34 from translate.filters import decoration
  35 from translate.filters import prefilters
  36 from translate.filters import spelling
  37 from translate.lang import factory
  38 from translate.lang import data
  39 # The import of xliff could fail if the user doesn't have lxml installed. For
  40 # now we try to continue gracefully to help users who aren't interested in
  41 # support for XLIFF or other XML formats.
  42 try:
  43     from translate.storage import xliff
  44 except ImportError, e:
  45     xliff = None
  46 import re
  47
  48 # These are some regular expressions that are compiled for use in some tests
  49
  50 # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as
  51 # this should capture printf types defined in other platforms.
  52 printf_pat = re.compile('%((?:(?P<ord>\d+)\$)*(?P<fullvar>[+#-]*(?:\d+)*(?:\.\d+)*(hh\|h\|l\|ll)*(?P<type>[\w%])))')
  53
  54 # The name of the XML tag
  55 tagname_re = re.compile("<[\s]*([\w\/]*)")
  56
  57 # We allow escaped quotes, probably for old escaping style of OOo helpcontent
  58 #TODO: remove escaped strings once usage is audited
  59 property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))")
  60
  61 # The whole tag
  62 tag_re = re.compile("<[^>]+>")
  63
  64 def tagname(string):
  65     """Returns the name of the XML/HTML tag in string"""
  66     return tagname_re.match(string).groups(1)[0]
  67
  68 def intuplelist(pair, list):
  69     """Tests to see if pair == (a,b,c) is in list, but handles None entries in
  70     list as wildcards (only allowed in positions "a" and "c"). We take a shortcut
  71     by only considering "c" if "b" has already matched."""
  72     a, b, c = pair
  73     if (b, c) == (None, None):
  74         #This is a tagname
  75         return pair
  76     for pattern in list:
  77         x, y, z = pattern
  78         if (x, y) in [(a, b), (None, b)]:
  79             if z in [None, c]:
  80                 return pattern
  81     return pair
  82
  83 def tagproperties(strings, ignore):
  84     """Returns all the properties in the XML/HTML tag string as
  85     (tagname, propertyname, propertyvalue), but ignore those combinations
  86     specified in ignore."""
  87     properties = []
  88     for string in strings:
  89         tag = tagname(string)
  90         properties += [(tag, None, None)]
  91         #Now we isolate the attribute pairs.
  92         pairs = property_re.findall(string)
  93         for property, value, a, b in pairs:
  94             #Strip the quotes:
  95             value = value[1:-1]
  96
  97             canignore = False
  98             if (tag, property, value) in ignore or \
  99                     intuplelist((tag,property,value), ignore) != (tag,property,value):
 100                 canignore = True
 101                 break
 102             if not canignore:
 103                 properties += [(tag, property, value)]
 104     return properties
 105
 106
 107 class FilterFailure(Exception):
 108     """This exception signals that a Filter didn't pass, and gives an explanation
 109     or a comment"""
 110     def __init__(self, messages):
 111         if not isinstance(messages, list):
 112             messages = [messages]
 113         assert isinstance(messages[0], unicode)  # Assumption: all of same type
 114         Exception.__init__(self, u", ".join(messages))
 115
 116 class SeriousFilterFailure(FilterFailure):
 117     """This exception signals that a Filter didn't pass, and the bad translation
 118     might break an application (so the string will be marked fuzzy)"""
 119     pass
 120
 121 #(tag, attribute, value) specifies a certain attribute which can be changed/
 122 #ignored if it exists inside tag. In the case where there is a third element
 123 #in the tuple, it indicates a property value that can be ignored if present
 124 #(like defaults, for example)
 125 #If a certain item is None, it indicates that it is relevant for all values of
 126 #the property/tag that is specified as None. A non-None value of "value"
 127 #indicates that the value of the attribute must be taken into account.
 128 common_ignoretags = [(None, "xml-lang", None)]
 129 common_canchangetags = [("img", "alt", None)]
 130
 131 class CheckerConfig(object):
 132     """object representing the configuration of a checker"""
 133     def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None,
 134                     notranslatewords=None, musttranslatewords=None, validchars=None,
 135                     punctuation=None, endpunctuation=None, ignoretags=None,
 136                     canchangetags=None, criticaltests=None, credit_sources=None):
 137         # Init lists
 138         self.accelmarkers = self._init_list(accelmarkers)
 139         self.varmatches = self._init_list(varmatches)
 140         self.criticaltests = self._init_list(criticaltests)
 141         self.credit_sources = self._init_list(credit_sources)
 142         # Lang data
 143         self.targetlanguage = targetlanguage
 144         self.updatetargetlanguage(targetlanguage)
 145         self.sourcelang = factory.getlanguage('en')
 146         # Inits with default values
 147         self.punctuation = self._init_default(data.forceunicode(punctuation),  self.lang.punctuation)
 148         self.endpunctuation = self._init_default(data.forceunicode(endpunctuation), self.lang.sentenceend)
 149         self.ignoretags = self._init_default(ignoretags, common_ignoretags)
 150         self.canchangetags = self._init_default(canchangetags, common_canchangetags)
 151         # Other data
 152         # TODO: allow user configuration of untranslatable words
 153         self.notranslatewords = dict.fromkeys([data.forceunicode(key) for key in self._init_list(notranslatewords)])
 154         self.musttranslatewords = dict.fromkeys([data.forceunicode(key) for key in self._init_list(musttranslatewords)])
 155         validchars = data.forceunicode(validchars)
 156         self.validcharsmap = {}
 157         self.updatevalidchars(validchars)
 158
 159     def _init_list(self, list):
 160         """initialise configuration paramaters that are lists
 161
 162         @type list: List
 163         @param list: None (we'll initialise a blank list) or a list paramater
 164         @rtype: List
 165         """
 166         if list is None:
 167             list = []
 168         return list
 169
 170     def _init_default(self, param, default):
 171         """initialise parameters that can have default options
 172
 173         @param param: the user supplied paramater value
 174         @param default: default values when param is not specified
 175         @return: the paramater as specified by the user of the default settings
 176         """
 177         if param is None:
 178             return default
 179         return param
 180
 181     def update(self, otherconfig):
 182         """combines the info in otherconfig into this config object"""
 183         self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage
 184         self.updatetargetlanguage(self.targetlanguage)
 185         self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers])
 186         self.varmatches.extend(otherconfig.varmatches)
 187         self.notranslatewords.update(otherconfig.notranslatewords)
 188         self.musttranslatewords.update(otherconfig.musttranslatewords)
 189         self.validcharsmap.update(otherconfig.validcharsmap)
 190         self.punctuation += otherconfig.punctuation
 191         self.endpunctuation += otherconfig.endpunctuation
 192         #TODO: consider also updating in the following cases:
 193         self.ignoretags = otherconfig.ignoretags
 194         self.canchangetags = otherconfig.canchangetags
 195         self.criticaltests.extend(otherconfig.criticaltests)
 196         self.credit_sources = otherconfig.credit_sources
 197
 198     def updatevalidchars(self, validchars):
 199         """updates the map that eliminates valid characters"""
 200         if validchars is None:
 201             return True
 202         validcharsmap = dict([(ord(validchar), None) for validchar in data.forceunicode(validchars)])
 203         self.validcharsmap.update(validcharsmap)
 204
 205     def updatetargetlanguage(self, langcode):
 206         """Updates the target language in the config to the given target language"""
 207         self.lang = factory.getlanguage(langcode)
 208
 209 def cache_results(f):
 210     def cached_f(self, param1):
 211         key = (f.__name__, param1)
 212         res_cache = self.results_cache
 213         if key in res_cache:
 214             return res_cache[key]
 215         else:
 216             value = f(self, param1)
 217             res_cache[key] = value
 218             return value
 219     return cached_f
 220
 221 class UnitChecker(object):
 222     """Parent Checker class which does the checking based on functions available
 223     in derived classes."""
 224     preconditions = {}
 225
 226     def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
 227         self.errorhandler = errorhandler
 228         if checkerconfig is None:
 229             self.setconfig(CheckerConfig())
 230         else:
 231             self.setconfig(checkerconfig)
 232         # exclude functions defined in UnitChecker from being treated as tests...
 233         self.helperfunctions = {}
 234         for functionname in dir(UnitChecker):
 235             function = getattr(self, functionname)
 236             if callable(function):
 237                 self.helperfunctions[functionname] = function
 238         self.defaultfilters = self.getfilters(excludefilters, limitfilters)
 239
 240         self.results_cache = {}
 241
 242     def getfilters(self, excludefilters=None, limitfilters=None):
 243         """returns dictionary of available filters, including/excluding those in
 244         the given lists"""
 245         filters = {}
 246         if limitfilters is None:
 247             # use everything available unless instructed
 248             limitfilters = dir(self)
 249         if excludefilters is None:
 250             excludefilters = {}
 251         for functionname in limitfilters:
 252             if functionname in excludefilters: continue
 253             if functionname in self.helperfunctions: continue
 254             if functionname == "errorhandler": continue
 255             filterfunction = getattr(self, functionname, None)
 256             if not callable(filterfunction): continue
 257             filters[functionname] = filterfunction
 258         return filters
 259
 260     def setconfig(self, config):
 261         """sets the accelerator list"""
 262         self.config = config
 263         self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers]
 264         self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname)
 265                 for startmatch, endmatch in self.config.varmatches]
 266         self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone)
 267                 for startmatch, endmatch in self.config.varmatches]
 268
 269     def setsuggestionstore(self, store):
 270         """Sets the filename that a checker should use for evaluating suggestions."""
 271         self.suggestion_store = store
 272
 273     def filtervariables(self, str1):
 274         """filter out variables from str1"""
 275         return helpers.multifilter(str1, self.varfilters)
 276     filtervariables = cache_results(filtervariables)
 277
 278     def removevariables(self, str1):
 279         """remove variables from str1"""
 280         return helpers.multifilter(str1, self.removevarfilter)
 281     removevariables = cache_results(removevariables)
 282
 283     def filteraccelerators(self, str1):
 284         """filter out accelerators from str1"""
 285         return helpers.multifilter(str1, self.accfilters, None)
 286     filteraccelerators = cache_results(filteraccelerators)
 287
 288     def filteraccelerators_by_list(self, str1, acceptlist=None):
 289         """filter out accelerators from str1"""
 290         return helpers.multifilter(str1, self.accfilters, acceptlist)
 291
 292     def filterwordswithpunctuation(self, str1):
 293         """replaces words with punctuation with their unpunctuated equivalents"""
 294         return prefilters.filterwordswithpunctuation(str1)
 295     filterwordswithpunctuation = cache_results(filterwordswithpunctuation)
 296
 297     def filterxml(self, str1):
 298         """filter out XML from the string so only text remains"""
 299         return tag_re.sub("", str1)
 300     filterxml = cache_results(filterxml)
 301
 302     def run_test(self, test, unit):
 303         """Runs the given test on the given unit.
 304
 305         Note that this can raise a FilterFailure as part of normal operation"""
 306         return test(unit)
 307
 308     def run_filters(self, unit):
 309         """run all the tests in this suite, return failures as testname, message_or_exception"""
 310         self.results_cache = {}
 311         failures = {}
 312         ignores = self.config.lang.ignoretests[:]
 313         functionnames = self.defaultfilters.keys()
 314         priorityfunctionnames = self.preconditions.keys()
 315         otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames)
 316         for functionname in priorityfunctionnames + otherfunctionnames:
 317             if functionname in ignores:
 318                 continue
 319             filterfunction = getattr(self, functionname, None)
 320             # this filterfunction may only be defined on another checker if using TeeChecker
 321             if filterfunction is None:
 322                 continue
 323             filtermessage = filterfunction.__doc__
 324             try:
 325                 filterresult = self.run_test(filterfunction, unit)
 326             except FilterFailure, e:
 327                 filterresult = False
 328                 filtermessage = e.args[0]
 329             except Exception, e:
 330                 if self.errorhandler is None:
 331                     raise ValueError("error in filter %s: %r, %r, %s" % \
 332                             (functionname, unit.source, unit.target, e))
 333                 else:
 334                     filterresult = self.errorhandler(functionname, unit.source, unit.target, e)
 335             if not filterresult:
 336                 # we test some preconditions that aren't actually a cause for failure
 337                 if functionname in self.defaultfilters:
 338                     failures[functionname] = filtermessage
 339                 if functionname in self.preconditions:
 340                     for ignoredfunctionname in self.preconditions[functionname]:
 341                         ignores.append(ignoredfunctionname)
 342         self.results_cache = {}
 343         return failures
 344
 345 class TranslationChecker(UnitChecker):
 346     """A checker that passes source and target strings to the checks, not the
 347     whole unit.
 348
 349     This provides some speedup and simplifies testing."""
 350     def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
 351         super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
 352
 353     def run_test(self, test, unit):
 354         """Runs the given test on the given unit.
 355
 356         Note that this can raise a FilterFailure as part of normal operation."""
 357         if self.hasplural:
 358             filtermessages = []
 359             filterresult = True
 360             for pluralform in unit.target.strings:
 361                 try:
 362                     if not test(self.str1, pluralform):
 363                         filterresult = False
 364                 except FilterFailure, e:
 365                     filterresult = False
 366                     filtermessages.append( str(e).decode('utf-8') )
 367             if not filterresult and filtermessages:
 368                 raise FilterFailure(filtermessages)
 369             else:
 370                 return True
 371         else:
 372             return test(self.str1, self.str2)
 373
 374     def run_filters(self, unit):
 375         """Do some optimisation by caching some data of the unit for the benefit
 376         of run_test()."""
 377         self.str1 = data.forceunicode(unit.source)
 378         self.str2 = data.forceunicode(unit.target)
 379         self.hasplural = unit.hasplural()
 380         return super(TranslationChecker, self).run_filters(unit)
 381
 382 class TeeChecker:
 383     """A Checker that controls multiple checkers."""
 384     def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None,
 385             checkerclasses=None, errorhandler=None, languagecode=None):
 386         """construct a TeeChecker from the given checkers"""
 387         self.limitfilters = limitfilters
 388         if checkerclasses is None:
 389             checkerclasses = [StandardChecker]
 390         self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses]
 391         if languagecode:
 392             for checker in self.checkers:
 393                 checker.config.updatetargetlanguage(languagecode)
 394             # Let's hook up the language specific checker
 395             lang_checker = self.checkers[0].config.lang.checker
 396             if lang_checker:
 397                 self.checkers.append(lang_checker)
 398
 399         self.combinedfilters = self.getfilters(excludefilters, limitfilters)
 400         self.config = checkerconfig or self.checkers[0].config
 401
 402     def getfilters(self, excludefilters=None, limitfilters=None):
 403         """returns dictionary of available filters, including/excluding those in
 404         the given lists"""
 405         if excludefilters is None:
 406             excludefilters = {}
 407         filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers]
 408         self.combinedfilters = {}
 409         for filters in filterslist:
 410             self.combinedfilters.update(filters)
 411         # TODO: move this somewhere more sensible (a checkfilters method?)
 412         if limitfilters is not None:
 413             for filtername in limitfilters:
 414                 if not filtername in self.combinedfilters:
 415                     import sys
 416                     print >> sys.stderr, "warning: could not find filter %s" % filtername
 417         return self.combinedfilters
 418
 419     def run_filters(self, unit):
 420         """run all the tests in the checker's suites"""
 421         failures = {}
 422         for checker in self.checkers:
 423             failures.update(checker.run_filters(unit))
 424         return failures
 425
 426     def setsuggestionstore(self, store):
 427         """Sets the filename that a checker should use for evaluating suggestions."""
 428         for checker in self.checkers:
 429             checker.setsuggestionstore(store)
 430
 431
 432 class StandardChecker(TranslationChecker):
 433     """The basic test suite for source -> target translations."""
 434     def untranslated(self, str1, str2):
 435         """checks whether a string has been translated at all"""
 436         str2 = prefilters.removekdecomments(str2)
 437         return not (len(str1.strip()) > 0 and len(str2) == 0)
 438
 439     def unchanged(self, str1, str2):
 440         """checks whether a translation is basically identical to the original string"""
 441         str1 = self.filteraccelerators(str1)
 442         str2 = self.filteraccelerators(str2)
 443         if len(str1.strip()) == 0:
 444             return True
 445         if str1.isupper() and str1 == str2:
 446             return True
 447         if self.config.notranslatewords:
 448             words1 = str1.split()
 449             if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]:
 450                 return True
 451         str1 = self.removevariables(str1)
 452         str2 = self.removevariables(str2)
 453         if not (str1.strip().isdigit() or len(str1) < 2 or decoration.ispurepunctuation(str1.strip())) and (str1.strip().lower() == str2.strip().lower()):
 454             raise FilterFailure(u"please translate")
 455         return True
 456
 457     def blank(self, str1, str2):
 458         """checks whether a translation only contains spaces"""
 459         len1 = len(str1.strip())
 460         len2 = len(str2.strip())
 461         return not (len1 > 0 and len(str2) != 0 and len2 == 0)
 462
 463     def short(self, str1, str2):
 464         """checks whether a translation is much shorter than the original string"""
 465         len1 = len(str1.strip())
 466         len2 = len(str2.strip())
 467         return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
 468
 469     def long(self, str1, str2):
 470         """checks whether a translation is much longer than the original string"""
 471         len1 = len(str1.strip())
 472         len2 = len(str2.strip())
 473         return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
 474
 475     def escapes(self, str1, str2):
 476         """checks whether escaping is consistent between the two strings"""
 477         if not helpers.countsmatch(str1, str2, ("\\", "\\\\")):
 478             escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if "\\" in word])
 479             escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if "\\" in word])
 480             raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2))
 481         else:
 482             return True
 483
 484     def newlines(self, str1, str2):
 485         """checks whether newlines are consistent between the two strings"""
 486         if not helpers.countsmatch(str1, str2, ("\n", "\r")):
 487             raise FilterFailure(u"line endings in original don't match line endings in translation")
 488         else:
 489             return True
 490
 491     def tabs(self, str1, str2):
 492         """checks whether tabs are consistent between the two strings"""
 493         if not helpers.countmatch(str1, str2, "\t"):
 494             raise SeriousFilterFailure(u"tabs in original don't match tabs in translation")
 495         else:
 496             return True
 497
 498
 499     def singlequoting(self, str1, str2):
 500         """checks whether singlequoting is consistent between the two strings"""
 501         str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1)))
 502         str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2)))
 503         return helpers.countsmatch(str1, str2, ("'", "''", "\\'"))
 504
 505     def doublequoting(self, str1, str2):
 506         """checks whether doublequoting is consistent between the two strings"""
 507         str1 = self.filteraccelerators(self.filtervariables(str1))
 508         str1 = self.filterxml(str1)
 509         str1 = self.config.lang.punctranslate(str1)
 510         str2 = self.filteraccelerators(self.filtervariables(str2))
 511         str2 = self.filterxml(str2)
 512         return helpers.countsmatch(str1, str2, ('"', '""', '\\"', u"«", u"»"))
 513
 514     def doublespacing(self, str1, str2):
 515         """checks for bad double-spaces by comparing to original"""
 516         str1 = self.filteraccelerators(str1)
 517         str2 = self.filteraccelerators(str2)
 518         return helpers.countmatch(str1, str2, "  ")
 519
 520     def puncspacing(self, str1, str2):
 521         """checks for bad spacing after punctuation"""
 522         if str1.find(u" ") == -1:
 523             return True
 524         str1 = self.filteraccelerators(self.filtervariables(str1))
 525         str1 = self.config.lang.punctranslate(str1)
 526         str2 = self.filteraccelerators(self.filtervariables(str2))
 527         for puncchar in self.config.punctuation:
 528             plaincount1 = str1.count(puncchar)
 529             plaincount2 = str2.count(puncchar)
 530             if not plaincount1 or plaincount1 != plaincount2:
 531                 continue
 532             spacecount1 = str1.count(puncchar+" ")
 533             spacecount2 = str2.count(puncchar+" ")
 534             if spacecount1 != spacecount2:
 535                 # handle extra spaces that are because of transposed punctuation
 536                 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1:
 537                     continue
 538                 return False
 539         return True
 540
 541     def printf(self, str1, str2):
 542         """checks whether printf format strings match"""
 543         count1 = count2 = None
 544         for var_num2, match2 in enumerate(printf_pat.finditer(str2)):
 545             count2 = var_num2 + 1
 546             if match2.group('ord'):
 547                 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
 548                     count1 = var_num1 + 1
 549                     if int(match2.group('ord')) == var_num1 + 1:
 550                         if match2.group('fullvar') != match1.group('fullvar'):
 551                             return 0
 552             else:
 553                 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
 554                     count1 = var_num1 + 1
 555                     if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')):
 556                         return 0
 557
 558         if count2 is None:
 559             if list(printf_pat.finditer(str1)):
 560                 return 0
 561
 562         if (count1 or count2) and (count1 != count2):
 563             return 0
 564         return 1
 565
 566     def accelerators(self, str1, str2):
 567         """checks whether accelerators are consistent between the two strings"""
 568         str1 = self.filtervariables(str1)
 569         str2 = self.filtervariables(str2)
 570         messages = []
 571         for accelmarker in self.config.accelmarkers:
 572             counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel)
 573             counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel)
 574             count1, countbad1 = counter1(str1)
 575             count2, countbad2 = counter2(str2)
 576             getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel)
 577             accel2, bad2 = getaccel(str2)
 578             if count1 == count2:
 579                 continue
 580             if count1 == 1 and count2 == 0:
 581                 if countbad2 == 1:
 582                     messages.append("accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0]))
 583                 else:
 584                     messages.append(u"accelerator %s is missing from translation" % accelmarker)
 585             elif count1 == 0:
 586                 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker)
 587             elif count1 == 1 and count2 > count1:
 588                 messages.append("accelerator %s is repeated in translation" % accelmarker)
 589             else:
 590                 messages.append("accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2))
 591         if messages:
 592             if "accelerators" in self.config.criticaltests:
 593                 raise SeriousFilterFailure(messages)
 594             else:
 595                 raise FilterFailure(messages)
 596         return True
 597
 598 #    def acceleratedvariables(self, str1, str2):
 599 #        """checks that no variables are accelerated"""
 600 #        messages = []
 601 #        for accelerator in self.config.accelmarkers:
 602 #            for variablestart, variableend in self.config.varmatches:
 603 #                error = accelerator + variablestart
 604 #                if str1.find(error) >= 0:
 605 #                    messages.append("original has an accelerated variable")
 606 #                if str2.find(error) >= 0:
 607 #                    messages.append("translation has an accelerated variable")
 608 #        if messages:
 609 #            raise FilterFailure(messages)
 610 #        return True
 611
 612     def variables(self, str1, str2):
 613         """checks whether variables of various forms are consistent between the two strings"""
 614         messages = []
 615         mismatch1, mismatch2 = [], []
 616         varnames1, varnames2 = [], []
 617         for startmarker, endmarker in self.config.varmatches:
 618             varchecker = decoration.getvariables(startmarker, endmarker)
 619             if startmarker and endmarker:
 620                 if isinstance(endmarker, int):
 621                     redecorate = lambda var: startmarker + var
 622                 else:
 623                     redecorate = lambda var: startmarker + var + endmarker
 624             elif startmarker:
 625                 redecorate = lambda var: startmarker + var
 626             else:
 627                 redecorate = lambda var: var
 628             vars1 = varchecker(str1)
 629             vars2 = varchecker(str2)
 630             if vars1 != vars2:
 631                 # we use counts to compare so we can handle multiple variables
 632                 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)]
 633                 # filter variable names we've already seen, so they aren't matched by more than one filter...
 634                 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2]
 635                 varnames1.extend(vars1)
 636                 varnames2.extend(vars2)
 637                 vars1 = map(redecorate, vars1)
 638                 vars2 = map(redecorate, vars2)
 639                 mismatch1.extend(vars1)
 640                 mismatch2.extend(vars2)
 641         if mismatch1:
 642             messages.append("do not translate: %s" % ", ".join(mismatch1))
 643         elif mismatch2:
 644             messages.append("translation contains variables not in original: %s" % ", ".join(mismatch2))
 645         if messages and mismatch1:
 646             raise SeriousFilterFailure(messages)
 647         elif messages:
 648             raise FilterFailure(messages)
 649         return True
 650
 651     def functions(self, str1, str2):
 652         """checks that function names are not translated"""
 653         return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
 654
 655     def emails(self, str1, str2):
 656         """checks that emails are not translated"""
 657         return helpers.funcmatch(str1, str2, decoration.getemails)
 658
 659     def urls(self, str1, str2):
 660         """checks that URLs are not translated"""
 661         return helpers.funcmatch(str1, str2, decoration.geturls)
 662
 663     def numbers(self, str1, str2):
 664         """checks whether numbers of various forms are consistent between the two strings"""
 665         return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
 666
 667     def startwhitespace(self, str1, str2):
 668         """checks whether whitespace at the beginning of the strings matches"""
 669         str1 = self.filteraccelerators(self.filtervariables(str1))
 670         str2 = self.filteraccelerators(self.filtervariables(str2))
 671         return helpers.funcmatch(str1, str2, decoration.spacestart)
 672
 673     def endwhitespace(self, str1, str2):
 674         """checks whether whitespace at the end of the strings matches"""
 675         str1 = self.filteraccelerators(self.filtervariables(str1))
 676         str2 = self.filteraccelerators(self.filtervariables(str2))
 677         return helpers.funcmatch(str1, str2, decoration.spaceend)
 678
 679     def startpunc(self, str1, str2):
 680         """checks whether punctuation at the beginning of the strings match"""
 681         str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))
 682         str1 = self.config.lang.punctranslate(str1)
 683         str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))
 684         return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
 685
 686     def endpunc(self, str1, str2):
 687         """checks whether punctuation at the end of the strings match"""
 688         str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))
 689         str1 = self.config.lang.punctranslate(str1)
 690         str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))
 691         return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation)
 692
 693     def purepunc(self, str1, str2):
 694         """checks that strings that are purely punctuation are not changed"""
 695         # this test is a subset of startandend
 696         if (decoration.ispurepunctuation(str1)):
 697             return str1 == str2
 698         else:
 699             return not decoration.ispurepunctuation(str2)
 700
 701     def brackets(self, str1, str2):
 702         """checks that the number of brackets in both strings match"""
 703         str1 = self.filtervariables(str1)
 704         str2 = self.filtervariables(str2)
 705         messages = []
 706         missing = []
 707         extra = []
 708         for bracket in ("[", "]", "{", "}", "(", ")"):
 709             count1 = str1.count(bracket)
 710             count2 = str2.count(bracket)
 711             if count2 < count1:
 712                 missing.append("'%s'" % bracket)
 713             elif count2 > count1:
 714                 extra.append("'%s'" % bracket)
 715         if missing:
 716             messages.append(u"translation is missing %s" % ", ".join(missing))
 717         if extra:
 718             messages.append(u"translation has extra %s" % ", ".join(extra))
 719         if messages:
 720             raise FilterFailure(messages)
 721         return True
 722
 723     def sentencecount(self, str1, str2):
 724         """checks that the number of sentences in both strings match"""
 725         sentences1 = len(self.config.sourcelang.sentences(str1))
 726         sentences2 = len(self.config.lang.sentences(str2))
 727         if not sentences1 == sentences2:
 728             raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2))
 729         return True
 730
 731     def options(self, str1, str2):
 732         """checks that options are not translated"""
 733         str1 = self.filtervariables(str1)
 734         for word1 in str1.split():
 735             if word1 != "--" and word1.startswith("--") and word1[-1].isalnum():
 736                 parts = word1.split("=")
 737                 if not parts[0] in str2:
 738                     raise FilterFailure("The option %s does not occur or is translated in the translation." % parts[0])
 739                 if len(parts) > 1 and parts[1] in str2:
 740                     raise FilterFailure("The parameter %(param)s in option %(option)s is not translated." % {"param": parts[0], "option": parts[1]})
 741         return True
 742
 743     def startcaps(self, str1, str2):
 744         """checks that the message starts with the correct capitalisation"""
 745         str1 = self.filteraccelerators(str1)
 746         str2 = self.filteraccelerators(str2)
 747         if len(str1) > 1 and len(str2) > 1:
 748             return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2)
 749         if len(str1) == 0 and len(str2) == 0:
 750             return True
 751         if len(str1) == 0 or len(str2) == 0:
 752             return False
 753         return True
 754
 755     def simplecaps(self, str1, str2):
 756         """checks the capitalisation of two strings isn't wildly different"""
 757         str1 = self.removevariables(str1)
 758         str2 = self.removevariables(str2)
 759         # TODO: review this. The 'I' is specific to English, so it probably serves
 760         # no purpose to get sourcelang.sentenceend
 761         str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, " i ", str1)
 762         capitals1 = helpers.filtercount(str1, type(str1).isupper)
 763         capitals2 = helpers.filtercount(str2, type(str2).isupper)
 764         alpha1 = helpers.filtercount(str1, type(str1).isalpha)
 765         alpha2 = helpers.filtercount(str2, type(str2).isalpha)
 766         # Capture the all caps case
 767         if capitals1 == alpha1:
 768             return capitals2 == alpha2
 769         # some heuristic tests to try and see that the style of capitals is vaguely the same
 770         if capitals1 == 0 or capitals1 == 1:
 771             return capitals2 == capitals1
 772         elif capitals1 < len(str1) / 10:
 773             return capitals2 < len(str2) / 8
 774         elif len(str1) < 10:
 775             return abs(capitals1 - capitals2) < 3
 776         elif capitals1 > len(str1) * 6 / 10:
 777             return capitals2 > len(str2) * 6 / 10
 778         else:
 779             return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
 780
 781     def acronyms(self, str1, str2):
 782         """checks that acronyms that appear are unchanged"""
 783         acronyms = []
 784         allowed = []
 785         for startmatch, endmatch in self.config.varmatches:
 786             allowed += decoration.getvariables(startmatch, endmatch)(str1)
 787         allowed += self.config.musttranslatewords.keys()
 788         str1 = self.filteraccelerators(self.filtervariables(str1))
 789         iter = self.config.lang.word_iter(str1)
 790         str2 = self.filteraccelerators(self.filtervariables(str2))
 791         #TODO: strip XML? - should provide better error messsages
 792         # see mail/chrome/messanger/smime.properties.po
 793         #TODO: consider limiting the word length for recognising acronyms to
 794         #something like 5/6 characters
 795         for word in iter:
 796             if word.isupper() and len(word) > 1 and word not in allowed:
 797                 if str2.find(word) == -1:
 798                     acronyms.append(word)
 799         if acronyms:
 800             raise FilterFailure("acronyms should not be translated: " + ", ".join(acronyms))
 801         return True
 802
 803     def doublewords(self, str1, str2):
 804         """checks for repeated words in the translation"""
 805         lastword = ""
 806         without_newlines = "\n".join(str2.split("\n"))
 807         words = self.filteraccelerators(self.removevariables(without_newlines)).replace(".", "").lower().split()
 808         for word in words:
 809             if word == lastword:
 810                 raise FilterFailure("The word '%s' is repeated" % word)
 811             lastword = word
 812         return True
 813
 814     def notranslatewords(self, str1, str2):
 815         """checks that words configured as untranslatable appear in the translation too"""
 816         if not self.config.notranslatewords:
 817             return True
 818         str1 = self.filtervariables(str1)
 819         str2 = self.filtervariables(str2)
 820         #The above is full of strange quotes and things in utf-8 encoding.
 821         #single apostrophe perhaps problematic in words like "doesn't"
 822         for seperator in self.config.punctuation:
 823             str1 = str1.replace(seperator, u" ")
 824             str2 = str2.replace(seperator, u" ")
 825         words1 = self.filteraccelerators(str1).split()
 826         words2 = self.filteraccelerators(str2).split()
 827         stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
 828         if stopwords:
 829             raise FilterFailure("do not translate: %s" % (", ".join(stopwords)))
 830         return True
 831
 832     def musttranslatewords(self, str1, str2):
 833         """checks that words configured as definitely translatable don't appear in
 834         the translation"""
 835         if not self.config.musttranslatewords:
 836             return True
 837         str1 = self.removevariables(str1)
 838         str2 = self.removevariables(str2)
 839         #The above is full of strange quotes and things in utf-8 encoding.
 840         #single apostrophe perhaps problematic in words like "doesn't"
 841         for seperator in self.config.punctuation:
 842             str1 = str1.replace(seperator, " ")
 843             str2 = str2.replace(seperator, " ")
 844         words1 = self.filteraccelerators(str1).split()
 845         words2 = self.filteraccelerators(str2).split()
 846         stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2]
 847         if stopwords:
 848             raise FilterFailure("please translate: %s" % (", ".join(stopwords)))
 849         return True
 850
 851     def validchars(self, str1, str2):
 852         """checks that only characters specified as valid appear in the translation"""
 853         if not self.config.validcharsmap:
 854             return True
 855         invalid1 = str1.translate(self.config.validcharsmap)
 856         invalid2 = str2.translate(self.config.validcharsmap)
 857         invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1]
 858         if invalidchars:
 859             raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars)))
 860         return True
 861
 862     def filepaths(self, str1, str2):
 863         """checks that file paths have not been translated"""
 864         for word1 in self.filteraccelerators(str1).split():
 865             if word1.startswith("/"):
 866                 if not helpers.countsmatch(str1, str2, (word1,)):
 867                     return False
 868         return True
 869
 870     def xmltags(self, str1, str2):
 871         """checks that XML/HTML tags have not been translated"""
 872         tags1 = tag_re.findall(str1)
 873         if len(tags1) > 0:
 874             if (len(tags1[0]) == len(str1)) and not "=" in tags1[0]:
 875                 return True
 876             tags2 = tag_re.findall(str2)
 877             properties1 = tagproperties(tags1, self.config.ignoretags)
 878             properties2 = tagproperties(tags2, self.config.ignoretags)
 879             filtered1 = []
 880             filtered2 = []
 881             for property1 in properties1:
 882                 filtered1 += [intuplelist(property1, self.config.canchangetags)]
 883             for property2 in properties2:
 884                 filtered2 += [intuplelist(property2, self.config.canchangetags)]
 885
 886             #TODO: consider the consequences of different ordering of attributes/tags
 887             if filtered1 != filtered2:
 888                 return False
 889         else:
 890             # No tags in str1, let's just check that none were added in str2. This
 891             # might be useful for fuzzy strings wrongly unfuzzied, for example.
 892             tags2 = tag_re.findall(str2)
 893             if len(tags2) > 0:
 894                 return False
 895         return True
 896
 897     def kdecomments(self, str1, str2):
 898         """checks to ensure that no KDE style comments appear in the translation"""
 899         return str2.find("\n_:") == -1 and not str2.startswith("_:")
 900
 901     def compendiumconflicts(self, str1, str2):
 902         """checks for Gettext compendium conflicts (#-#-#-#-#)"""
 903         return str2.find("#-#-#-#-#") == -1
 904
 905     def simpleplurals(self, str1, str2):
 906         """checks for English style plural(s) for you to review"""
 907         def numberofpatterns(string, patterns):
 908             number = 0
 909             for pattern in patterns:
 910                 number += len(re.findall(pattern, string))
 911             return number
 912
 913         sourcepatterns = ["\(s\)"]
 914         targetpatterns = ["\(s\)"]
 915         sourcecount = numberofpatterns(str1, sourcepatterns)
 916         targetcount = numberofpatterns(str2, targetpatterns)
 917         if self.config.lang.nplurals == 1:
 918             return not targetcount
 919         return sourcecount == targetcount
 920
 921     def spellcheck(self, str1, str2):
 922         """checks words that don't pass a spell check"""
 923         if not self.config.targetlanguage:
 924             return True
 925         if not spelling.available:
 926             return True
 927         str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel)
 928         str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel)
 929         ignore1 = []
 930         messages = []
 931         for word, index, suggestions in spelling.check(str1, lang="en"):
 932             ignore1.append(word)
 933         for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage):
 934             if word in self.config.notranslatewords:
 935                 continue
 936             if word in ignore1:
 937                 continue
 938             # hack to ignore hyphenisation rules
 939             if word in suggestions:
 940                 continue
 941             messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions)))
 942         if messages:
 943             raise FilterFailure(messages)
 944         return True
 945
 946     def credits(self, str1, str2):
 947         """checks for messages containing translation credits instead of normal translations."""
 948         return not str1 in self.config.credit_sources
 949
 950     # If the precondition filter is run and fails then the other tests listed are ignored
 951     preconditions = {"untranslated": ("simplecaps", "variables", "startcaps",
 952                                     "accelerators", "brackets", "endpunc",
 953                                     "acronyms", "xmltags", "startpunc",
 954                                     "endwhitespace", "startwhitespace",
 955                                     "escapes", "doublequoting", "singlequoting",
 956                                     "filepaths", "purepunc", "doublespacing",
 957                                     "sentencecount", "numbers", "isfuzzy",
 958                                     "isreview", "notranslatewords", "musttranslatewords",
 959                                     "emails", "simpleplurals", "urls", "printf",
 960                                     "tabs", "newlines", "functions", "options",
 961                                     "blank", "nplurals"),
 962                     "blank":        ("simplecaps", "variables", "startcaps",
 963                                     "accelerators", "brackets", "endpunc",
 964                                     "acronyms", "xmltags", "startpunc",
 965                                     "endwhitespace", "startwhitespace",
 966                                     "escapes", "doublequoting", "singlequoting",
 967                                     "filepaths", "purepunc", "doublespacing",
 968                                     "sentencecount", "numbers", "isfuzzy",
 969                                     "isreview", "notranslatewords", "musttranslatewords",
 970                                     "emails", "simpleplurals", "urls", "printf",
 971                                     "tabs", "newlines", "functions", "options"),
 972                     "credits":      ("simplecaps", "variables", "startcaps",
 973                                     "accelerators", "brackets", "endpunc",
 974                                     "acronyms", "xmltags", "startpunc",
 975                                     "escapes", "doublequoting", "singlequoting",
 976                                     "filepaths", "doublespacing",
 977                                     "sentencecount", "numbers",
 978                                     "emails", "simpleplurals", "urls", "printf",
 979                                     "tabs", "newlines", "functions", "options"),
 980                    "purepunc":      ("startcaps", "options"),
 981                    "startcaps":     ("simplecaps",),
 982                    "endwhitespace": ("endpunc",),
 983                    "startwhitespace":("startpunc",),
 984                    "unchanged":     ("doublewords",),
 985                    "compendiumconflicts": ("accelerators", "brackets", "escapes",
 986                                     "numbers", "startpunc", "long", "variables",
 987                                     "startcaps", "sentencecount", "simplecaps",
 988                                     "doublespacing", "endpunc", "xmltags",
 989                                     "startwhitespace", "endwhitespace",
 990                                     "singlequoting", "doublequoting",
 991                                     "filepaths", "purepunc", "doublewords", "printf") }
 992
 993 # code to actually run the tests (use unittest?)
 994
 995 openofficeconfig = CheckerConfig(
 996     accelmarkers = ["~"],
 997     varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)],
 998     ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)],
 999     canchangetags = [("link", "name", None)]
1000     )
1001
1002 class OpenOfficeChecker(StandardChecker):
1003     def __init__(self, **kwargs):
1004         checkerconfig = kwargs.get("checkerconfig", None)
1005         if checkerconfig is None:
1006             checkerconfig = CheckerConfig()
1007             kwargs["checkerconfig"] = checkerconfig
1008         checkerconfig.update(openofficeconfig)
1009         StandardChecker.__init__(self, **kwargs)
1010
1011 mozillaconfig = CheckerConfig(
1012     accelmarkers = ["&"],
1013     varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")],
1014     criticaltests = ["accelerators"]
1015     )
1016
1017 class MozillaChecker(StandardChecker):
1018     def __init__(self, **kwargs):
1019         checkerconfig = kwargs.get("checkerconfig", None)
1020         if checkerconfig is None:
1021             checkerconfig = CheckerConfig()
1022             kwargs["checkerconfig"] = checkerconfig
1023         checkerconfig.update(mozillaconfig)
1024         StandardChecker.__init__(self, **kwargs)
1025
1026 gnomeconfig = CheckerConfig(
1027     accelmarkers = ["_"],
1028     varmatches = [("%", 1), ("$(", ")")],
1029     credit_sources = [u"translator-credits"]
1030     )
1031
1032 class GnomeChecker(StandardChecker):
1033     def __init__(self, **kwargs):
1034         checkerconfig = kwargs.get("checkerconfig", None)
1035         if checkerconfig is None:
1036             checkerconfig = CheckerConfig()
1037             kwargs["checkerconfig"] = checkerconfig
1038         checkerconfig.update(gnomeconfig)
1039         StandardChecker.__init__(self, **kwargs)
1040
1041 kdeconfig = CheckerConfig(
1042     accelmarkers = ["&"],
1043     varmatches = [("%", 1)],
1044     credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"]
1045     )
1046
1047 class KdeChecker(StandardChecker):
1048     def __init__(self, **kwargs):
1049         # TODO allow setup of KDE plural and translator comments so that they do
1050         # not create false postives
1051         checkerconfig = kwargs.get("checkerconfig", None)
1052         if checkerconfig is None:
1053             checkerconfig = CheckerConfig()
1054             kwargs["checkerconfig"] = checkerconfig
1055         checkerconfig.update(kdeconfig)
1056         StandardChecker.__init__(self, **kwargs)
1057
1058 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1059 class CCLicenseChecker(StandardChecker):
1060     def __init__(self, **kwargs):
1061         checkerconfig = kwargs.get("checkerconfig", None)
1062         if checkerconfig is None:
1063             checkerconfig = CheckerConfig()
1064             kwargs["checkerconfig"] = checkerconfig
1065         checkerconfig.update(cclicenseconfig)
1066         StandardChecker.__init__(self, **kwargs)
1067
1068 projectcheckers = {
1069     "openoffice": OpenOfficeChecker,
1070     "mozilla": MozillaChecker,
1071     "kde": KdeChecker,
1072     "wx": KdeChecker,
1073     "gnome": GnomeChecker,
1074     "creativecommons": CCLicenseChecker
1075     }
1076
1077
1078 class StandardUnitChecker(UnitChecker):
1079     """The standard checks for common checks on translation units."""
1080     def isfuzzy(self, unit):
1081         """Check if the unit has been marked fuzzy."""
1082         return not unit.isfuzzy()
1083
1084     def isreview(self, unit):
1085         """Check if the unit has been marked review."""
1086         return not unit.isreview()
1087
1088     def nplurals(self, unit):
1089         """Checks for the correct number of noun forms for plural translations."""
1090         if unit.hasplural():
1091             # if we don't have a valid nplurals value, don't run the test
1092             nplurals = self.config.lang.nplurals
1093             if nplurals > 0:
1094                 return len(unit.target.strings) == nplurals
1095         return True
1096
1097     def hassuggestion(self, unit):
1098         """Checks if there is at least one suggested translation for this unit."""
1099         self.suggestion_store = getattr(self, 'suggestion_store', None)
1100         suggestions = []
1101         if self.suggestion_store:
1102             source = unit.source
1103             suggestions = [unit for unit in self.suggestion_store.units if unit.source == source]
1104         elif xliff and isinstance(unit, xliff.xliffunit):
1105             # TODO: we probably want to filter them somehow
1106             suggestions = unit.getalttrans()
1107         return not bool(suggestions)
1108
1109
1110 def runtests(str1, str2, ignorelist=()):
1111     """verifies that the tests pass for a pair of strings"""
1112     from translate.storage import base
1113     str1 = data.forceunicode(str1)
1114     str2 = data.forceunicode(str2)
1115     unit = base.TranslationUnit(str1)
1116     unit.target = str2
1117     checker = StandardChecker(excludefilters=ignorelist)
1118     failures = checker.run_filters(unit)
1119     for testname, message in failures:
1120         print "failure: %s: %s\n  %r\n  %r" % (testname, message, str1, str2)
1121     return failures
1122
1123 def batchruntests(pairs):
1124     """runs test on a batch of string pairs"""
1125     passed, numpairs = 0, len(pairs)
1126     for str1, str2 in pairs:
1127         if runtests(str1, str2):
1128             passed += 1
1129     print
1130     print "total: %d/%d pairs passed" % (passed, numpairs)
1131
1132 if __name__ == '__main__':
1133     testset = [(r"simple", r"somple"),
1134             (r"\this equals \that", r"does \this equal \that?"),
1135             (r"this \'equals\' that", r"this 'equals' that"),
1136             (r" start and end! they must match.", r"start and end! they must match."),
1137             (r"check for matching %variables marked like %this", r"%this %variable is marked"),
1138             (r"check for mismatching %variables marked like %this", r"%that %variable is marked"),
1139             (r"check for mismatching %variables% too", r"how many %variable% are marked"),
1140             (r"%% %%", r"%%"),
1141             (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"),
1142             (r"simple lowercase", r"it is all lowercase"),
1143             (r"simple lowercase", r"It Is All Lowercase"),
1144             (r"Simple First Letter Capitals", r"First Letters"),
1145             (r"SIMPLE CAPITALS", r"First Letters"),
1146             (r"SIMPLE CAPITALS", r"ALL CAPITALS"),
1147             (r"forgot to translate", r"  ")
1148             ]
1149     batchruntests(testset)
1150