lang/km.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2007 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """This module represents Khmer language.
  23
  24 For more information, see U{http://en.wikipedia.org/wiki/Khmer_language}
  25 """
  26
  27 import re
  28
  29 from translate.lang import common
  30
  31 class km(common.Common):
  32     """This class represents Khmer."""
  33
  34     khmerpunc = u"។៕៖៘"
  35     """These marks are only used for Khmer."""
  36
  37     punctuation = u"".join([common.Common.commonpunc, common.Common.quotes, common.Common.miscpunc, khmerpunc])
  38
  39     sentenceend = u"!?…។៕៘"
  40
  41     sentencere = re.compile(r"""(?s)    #make . also match newlines
  42                             .*?         #anything, but match non-greedy
  43                             [%s]        #the puntuation for sentence ending
  44                             \s+         #the spacing after the puntuation
  45                             (?=[^a-z\d])#lookahead that next part starts with caps
  46                             """ % sentenceend, re.VERBOSE)
  47     #\u00a0 is non-breaking space
  48     puncdict = {
  49         u".": u"\u00a0។",
  50         u":": u"\u00a0៖",
  51         u"!": u"\u00a0!",
  52         u"?": u"\u00a0?",
  53     }
  54
  55     ignoretests = ["startcaps", "simplecaps"]