Update ooo320-m1
[ooovba.git] / i18npool / source / characterclassification / cclass_unicode.cxx
bloba1d7e40d3c3b194ed36ff9a07e3d1ba098e97593
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: cclass_unicode.cxx,v $
10 * $Revision: 1.13 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include <cclass_unicode.hxx>
35 #include <com/sun/star/i18n/UnicodeScript.hpp>
36 #include <com/sun/star/i18n/UnicodeType.hpp>
37 #include <com/sun/star/i18n/KCharacterType.hpp>
38 #include <unicode/uchar.h>
39 #include <i18nutil/x_rtl_ustring.h>
40 #include <breakiteratorImpl.hxx>
42 using namespace ::com::sun::star::uno;
43 using namespace ::com::sun::star::lang;
44 using namespace ::rtl;
46 namespace com { namespace sun { namespace star { namespace i18n {
47 // ----------------------------------------------------
48 // class cclass_Unicode
49 // ----------------------------------------------------;
51 cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ),
52 pTable( NULL ),
53 pStart( NULL ),
54 pCont( NULL ),
55 nStartTypes( 0 ),
56 nContTypes( 0 ),
57 eState( ssGetChar ),
58 cGroupSep( ',' ),
59 cDecimalSep( '.' )
61 trans = new Transliteration_casemapping();
62 cClass = "com.sun.star.i18n.CharacterClassification_Unicode";
65 cclass_Unicode::~cclass_Unicode() {
66 destroyParserTable();
67 delete trans;
71 OUString SAL_CALL
72 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
73 sal_Int32 len = Text.getLength();
74 if (nPos >= len)
75 return OUString();
76 if (nCount + nPos > len)
77 nCount = len - nPos;
79 trans->setMappingType(MappingTypeToUpper, rLocale);
80 return trans->transliterateString2String(Text, nPos, nCount);
83 OUString SAL_CALL
84 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
85 sal_Int32 len = Text.getLength();
86 if (nPos >= len)
87 return OUString();
88 if (nCount + nPos > len)
89 nCount = len - nPos;
91 trans->setMappingType(MappingTypeToLower, rLocale);
92 return trans->transliterateString2String(Text, nPos, nCount);
95 OUString SAL_CALL
96 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
97 sal_Int32 len = Text.getLength();
98 if (nPos >= len)
99 return OUString();
100 if (nCount + nPos > len)
101 nCount = len - nPos;
103 trans->setMappingType(MappingTypeToTitle, rLocale);
104 rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount, 1 );
105 sal_Unicode* out = pStr->buffer;
106 BreakIteratorImpl brk(xMSF);
107 Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
108 WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
109 for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
110 if (i >= bdy.endPos)
111 bdy = brk.nextWord(Text, bdy.endPos, rLocale,
112 WordType::ANYWORD_IGNOREWHITESPACES);
113 *out = (i == bdy.startPos) ?
114 trans->transliterateChar2Char(Text[i]) : Text[i];
116 *out = 0;
117 return OUString( pStr, SAL_NO_ACQUIRE );
120 sal_Int16 SAL_CALL
121 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
122 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
123 return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
126 sal_Int16 SAL_CALL
127 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
128 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
129 return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
133 sal_Int16 SAL_CALL
134 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
135 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
136 // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
137 // while OO.o enum UnicideScript starts from 0.
138 // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
139 return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
143 sal_Int32 SAL_CALL
144 cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
145 using namespace ::com::sun::star::i18n::KCharacterType;
147 sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
148 if (increment > 0) ch = Text.iterateCodePoints(nPos, 0);
149 switch ( u_charType(ch) ) {
150 // Upper
151 case U_UPPERCASE_LETTER :
152 return UPPER|LETTER|PRINTABLE|BASE_FORM;
154 // Lower
155 case U_LOWERCASE_LETTER :
156 return LOWER|LETTER|PRINTABLE|BASE_FORM;
158 // Title
159 case U_TITLECASE_LETTER :
160 return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
162 // Letter
163 case U_MODIFIER_LETTER :
164 case U_OTHER_LETTER :
165 return LETTER|PRINTABLE|BASE_FORM;
167 // Digit
168 case U_DECIMAL_DIGIT_NUMBER:
169 case U_LETTER_NUMBER:
170 case U_OTHER_NUMBER:
171 return DIGIT|PRINTABLE|BASE_FORM;
173 // Base
174 case U_NON_SPACING_MARK:
175 case U_ENCLOSING_MARK:
176 case U_COMBINING_SPACING_MARK:
177 return BASE_FORM|PRINTABLE;
179 // Print
180 case U_SPACE_SEPARATOR:
182 case U_DASH_PUNCTUATION:
183 case U_INITIAL_PUNCTUATION:
184 case U_FINAL_PUNCTUATION:
185 case U_CONNECTOR_PUNCTUATION:
186 case U_OTHER_PUNCTUATION:
188 case U_MATH_SYMBOL:
189 case U_CURRENCY_SYMBOL:
190 case U_MODIFIER_SYMBOL:
191 case U_OTHER_SYMBOL:
192 return PRINTABLE;
194 // Control
195 case U_CONTROL_CHAR:
196 case U_FORMAT_CHAR:
197 return CONTROL;
199 case U_LINE_SEPARATOR:
200 case U_PARAGRAPH_SEPARATOR:
201 return CONTROL|PRINTABLE;
203 // for all others
204 default:
205 return U_GENERAL_OTHER_TYPES;
209 sal_Int32 SAL_CALL
210 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) {
211 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
212 return getCharType(Text, &nPos, 0);
216 sal_Int32 SAL_CALL
217 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) {
218 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
220 sal_Int32 result = getCharType(Text, &nPos, 0);
221 for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++)
222 result |= getCharType(Text, &nPos, 1);
223 return result;
226 ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
227 const OUString& Text,
228 sal_Int32 nPos,
229 const Locale& rLocale,
230 sal_Int32 startCharTokenType,
231 const OUString& userDefinedCharactersStart,
232 sal_Int32 contCharTokenType,
233 const OUString& userDefinedCharactersCont )
234 throw(RuntimeException)
236 ParseResult r;
237 if ( Text.getLength() <= nPos )
238 return r;
240 setupParserTable( rLocale,
241 startCharTokenType, userDefinedCharactersStart,
242 contCharTokenType, userDefinedCharactersCont );
243 parseText( r, Text, nPos );
245 return r;
249 ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
250 sal_Int32 nTokenType,
251 const OUString& Text,
252 sal_Int32 nPos,
253 const Locale& rLocale,
254 sal_Int32 startCharTokenType,
255 const OUString& userDefinedCharactersStart,
256 sal_Int32 contCharTokenType,
257 const OUString& userDefinedCharactersCont )
258 throw(RuntimeException)
260 ParseResult r;
261 if ( Text.getLength() <= nPos )
262 return r;
264 setupParserTable( rLocale,
265 startCharTokenType, userDefinedCharactersStart,
266 contCharTokenType, userDefinedCharactersCont );
267 parseText( r, Text, nPos, nTokenType );
269 return r;
272 OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException )
274 return OUString::createFromAscii(cClass);
278 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException )
280 return !rServiceName.compareToAscii(cClass);
283 Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException )
285 Sequence< OUString > aRet(1);
286 aRet[0] = OUString::createFromAscii(cClass);
287 return aRet;
290 } } } }