lang/zh.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2007 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """This module represents Chinese language. (Both tradisional and simplified)
  23
  24 For more information, see U{http://en.wikipedia.org/wiki/Chinese_language}
  25 """
  26
  27 from translate.lang import common
  28 import re
  29
  30 class zh(common.Common):
  31     """This class represents Chinese."""
  32
  33     listseperator = u"、"
  34
  35     sentenceend = u"。！？…"
  36
  37     # Compared to common.py, we make the space after the sentence ending
  38     # optional and don't demand an uppercase letter to follow.
  39     sentencere = re.compile(r"""(?s)    #make . also match newlines
  40                             .*?         #any text, but match non-greedy
  41                             [%s]        #the puntuation for sentence ending
  42                             \s*         #the optional space after the puntuation
  43                             """ % sentenceend, re.VERBOSE)
  44
  45     # The following transformation rules should be mostly useful for all types
  46     # of Chinese. The comma (,) is not handled here, since it maps to two
  47     # different characters, depending on context.
  48     # If comma is used as seperation of sentence, then it is converted to a
  49     # fullwidth comma ("，"). If comma is used as seperation of list items like
  50     # "apple, orange, grape, .....", "、" is used.
  51     puncdict = {
  52         u". ": u"。",
  53         u"; ": u"；",
  54         u": ": u"：",
  55         u"! ": u"！",
  56         u"? ": u"？",
  57         u".\n": u"。\n",
  58         u";\n": u"；\n",
  59         u":\n": u"：\n",
  60         u"!\n": u"！\n",
  61         u"?\n": u"？",
  62         u"% ": u"%",
  63     }
  64
  65     ignoretests = ["startcaps", "simplecaps"]