1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: casefolding.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "i18nutil/casefolding.hxx"
32 #include "casefolding_data.h"
33 #include "i18nutil/widthfolding.hxx"
35 using namespace com::sun::star::lang
;
36 using namespace com::sun::star::uno
;
38 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
40 static Mapping mapping_03a3
[] = {{0, 1, {0x03c2, 0, 0}},{0, 1, {0x03c3, 0, 0}}};
41 static Mapping mapping_0307
[] = {{0, 0, {0, 0, 0}},{0, 1, {0x0307, 0, 0}}};
42 static Mapping mapping_004a
[] = {{0, 2, {0x006a, 0x0307, 0}},{0, 1, {0x006a, 0, 0}}};
43 static Mapping mapping_012e
[] = {{0, 2, {0x012f, 0x0307, 0}},{0, 1, {0x012f, 0, 0}}};
44 static Mapping mapping_00cc
[] = {{0, 3, {0x0069, 0x0307, 0x0300}},{0, 1, {0x00ec, 0, 0}}};
45 static Mapping mapping_00cd
[] = {{0, 3, {0x0069, 0x0307, 0x0301}},{0, 1, {0x00ed, 0, 0}}};
46 static Mapping mapping_0128
[] = {{0, 3, {0x0069, 0x0307, 0x0303}},{0, 1, {0x0129, 0, 0}}};
47 static Mapping mapping_0049
[] = {{0, 2, {0x0069, 0x0307, 0}},{0, 1, {0x0131, 0, 0}},{0, 1, {0x0069, 0, 0}}};
48 static Mapping mapping_0069
[] = {{0, 1, {0x0130, 0, 0}},{0, 1, {0x0049, 0, 0}}};
49 static Mapping mapping_0130
[] = {{0, 1, {0x0069, 0, 0}},{0, 1, {0x0130, 0, 0}}};
51 #define langIs(lang) (aLocale.Language.compareToAscii(lang) == 0)
53 // only check simple case, there is more complicated case need to be checked.
54 #define type_i(ch) ((ch) == 0x0069 || (ch) == 0x006a)
56 #define cased_letter(ch) (CaseMappingIndex[(ch)>>8] >= 0 && (CaseMappingValue[(CaseMappingIndex[(ch)>>8] << 8) + ((ch)&0xff)].type & CasedLetter))
58 // for Lithuanian, condition to make explicit dot above when lowercasing capital I's and J's
59 // whenever there are more accents above.
60 #define accent_above(ch) (((ch) >= 0x0300 && (ch) <= 0x0314) || ((ch) >= 0x033D && (ch) <= 0x0344) || (ch) == 0x0346 || ((ch) >= 0x034A && (ch) <= 0x034C))
62 Mapping
& casefolding::getConditionalValue(const sal_Unicode
* str
, sal_Int32 pos
, sal_Int32 len
, Locale
& aLocale
, sal_uInt8 nMappingType
) throw (RuntimeException
)
66 // final_sigma (not followed by cased and preceded by cased character)
67 // DOES NOT check ignorable sequence yet (more complicated implementation).
68 return !(pos
< len
&& cased_letter(str
[pos
+1])) && (pos
> 0 && cased_letter(str
[pos
-1])) ?
69 mapping_03a3
[0] : mapping_03a3
[1];
71 return (((nMappingType
== MappingTypeLowerToUpper
&& langIs("lt")) ||
72 (nMappingType
== MappingTypeUpperToLower
&& (langIs("tr") || langIs("az")))) &&
73 (pos
> 0 && type_i(str
[pos
-1]))) ? // after_i
74 mapping_0307
[0] : mapping_0307
[1];
76 return (langIs("tr") || langIs("az")) ? mapping_0130
[0] : mapping_0130
[1];
78 return (langIs("tr") || langIs("az")) ? mapping_0069
[0] : mapping_0069
[1];
79 case 0x0049: return langIs("lt") && pos
> len
&& accent_above(str
[pos
+1]) ? mapping_0049
[0] :
80 (langIs("tr") || langIs("az")) ? mapping_0049
[1] : mapping_0049
[2];
81 case 0x004a: return langIs("lt") && pos
> len
&& accent_above(str
[pos
+1]) ? mapping_004a
[0] : mapping_004a
[1];
82 case 0x012e: return langIs("lt") && pos
> len
&& accent_above(str
[pos
+1]) ? mapping_012e
[0] : mapping_012e
[1];
83 case 0x00cc: return langIs("lt") ? mapping_00cc
[0] : mapping_00cc
[1];
84 case 0x00cd: return langIs("lt") ? mapping_00cd
[0] : mapping_00cd
[1];
85 case 0x0128: return langIs("lt") ? mapping_0128
[0] : mapping_0128
[1];
87 // Should not come here
88 throw RuntimeException();
91 Mapping
& casefolding::getValue(const sal_Unicode
* str
, sal_Int32 pos
, sal_Int32 len
, Locale
& aLocale
, sal_uInt8 nMappingType
) throw (RuntimeException
)
93 static Mapping dummy
= { 0, 1, { 0, 0, 0 } };
94 sal_Int16 address
= CaseMappingIndex
[str
[pos
] >> 8] << 8;
96 dummy
.map
[0] = str
[pos
];
98 if (address
>= 0 && (CaseMappingValue
[address
+= (str
[pos
] & 0xFF)].type
& nMappingType
)) {
99 sal_uInt8 type
= CaseMappingValue
[address
].type
;
100 if (type
& ValueTypeNotValue
) {
101 if (CaseMappingValue
[address
].value
== 0)
102 return getConditionalValue(str
, pos
, len
, aLocale
, nMappingType
);
104 for (int map
= CaseMappingValue
[address
].value
;
105 map
< CaseMappingValue
[address
].value
+ MaxCaseMappingExtras
; map
++) {
106 if (CaseMappingExtra
[map
].type
& nMappingType
) {
107 if (CaseMappingExtra
[map
].type
& ValueTypeNotValue
)
108 return getConditionalValue(str
, pos
, len
, aLocale
, nMappingType
);
110 return CaseMappingExtra
[map
];
113 // Should not come here
114 throw RuntimeException();
117 dummy
.map
[0] = CaseMappingValue
[address
].value
;
122 inline sal_Bool SAL_CALL
123 is_ja_voice_sound_mark(sal_Unicode
& current
, sal_Unicode next
)
127 if ((next
== 0x3099 || next
== 0x309a) && ( (c
= widthfolding::getCompositionChar(current
, next
)) != 0 ))
132 sal_Unicode
casefolding::getNextChar(const sal_Unicode
*str
, sal_Int32
& idx
, sal_Int32 len
, MappingElement
& e
, Locale
& aLocale
, sal_uInt8 nMappingType
, TransliterationModules moduleLoaded
) throw (RuntimeException
)
136 e
= MappingElement();
142 if (moduleLoaded
& TransliterationModules_IGNORE_CASE
) {
143 if( e
.current
>= e
.element
.nmap
) {
144 e
.element
= getValue(str
, idx
++, len
, aLocale
, nMappingType
);
147 c
= e
.element
.map
[e
.current
++];
152 if (moduleLoaded
& TransliterationModules_IGNORE_KANA
) {
153 if ((0x3040 <= c
&& c
<= 0x3094) || (0x309d <= c
&& c
<= 0x309f))
157 // composition: KA + voice-mark --> GA. see halfwidthToFullwidth.cxx for detail
158 if (moduleLoaded
& TransliterationModules_IGNORE_WIDTH
) {
159 static oneToOneMapping
& half2fullTable
= widthfolding::gethalf2fullTable();
160 c
= half2fullTable
[c
];
161 if (0x3040 <= c
&& c
<= 0x30ff && idx
< len
&&
162 is_ja_voice_sound_mark(c
, half2fullTable
[*(str
+ idx
)]))