2 # -*- coding: utf-8 -*-
4 # Copyright 2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """This module represents Khmer language.
24 For more information, see U{http://en.wikipedia.org/wiki/Khmer_language}
29 from translate
.lang
import common
31 class km(common
.Common
):
32 """This class represents Khmer."""
35 """These marks are only used for Khmer."""
37 punctuation
= u
"".join([common
.Common
.commonpunc
, common
.Common
.quotes
, common
.Common
.miscpunc
, khmerpunc
])
39 sentenceend
= u
"!?…។៕៘"
41 sentencere
= re
.compile(r
"""(?s) #make . also match newlines
42 .*? #anything, but match non-greedy
43 [%s] #the puntuation for sentence ending
44 \s+ #the spacing after the puntuation
45 (?=[^a-z\d])#lookahead that next part starts with caps
46 """ % sentenceend
, re
.VERBOSE
)
47 #\u00a0 is non-breaking space
55 ignoretests
= ["startcaps", "simplecaps"]