Version 4.0.0.1, tag libreoffice-4.0.0.1
[LibreOffice.git] / i18npool / source / characterclassification / cclass_unicode.cxx
blob5e27fa50d1d6be1e736e5c4d3b5dc55d21289b3c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <cclass_unicode.hxx>
22 #include <com/sun/star/i18n/UnicodeScript.hpp>
23 #include <com/sun/star/i18n/UnicodeType.hpp>
24 #include <com/sun/star/i18n/KCharacterType.hpp>
25 #include <unicode/uchar.h>
26 #include <comphelper/string.hxx>
27 #include <breakiteratorImpl.hxx>
29 using namespace ::com::sun::star::uno;
30 using namespace ::com::sun::star::lang;
31 using namespace ::rtl;
33 namespace com { namespace sun { namespace star { namespace i18n {
34 // ----------------------------------------------------
35 // class cclass_Unicode
36 // ----------------------------------------------------;
38 cclass_Unicode::cclass_Unicode( const uno::Reference < XComponentContext >& rxContext ) : m_xContext( rxContext ),
39 pTable( NULL ),
40 pStart( NULL ),
41 pCont( NULL ),
42 nStartTypes( 0 ),
43 nContTypes( 0 ),
44 eState( ssGetChar ),
45 cGroupSep( ',' ),
46 cDecimalSep( '.' )
48 trans = new Transliteration_casemapping();
49 cClass = "com.sun.star.i18n.CharacterClassification_Unicode";
52 cclass_Unicode::~cclass_Unicode() {
53 destroyParserTable();
54 delete trans;
58 OUString SAL_CALL
59 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
60 sal_Int32 len = Text.getLength();
61 if (nPos >= len)
62 return OUString();
63 if (nCount + nPos > len)
64 nCount = len - nPos;
66 trans->setMappingType(MappingTypeToUpper, rLocale);
67 return trans->transliterateString2String(Text, nPos, nCount);
70 OUString SAL_CALL
71 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
72 sal_Int32 len = Text.getLength();
73 if (nPos >= len)
74 return OUString();
75 if (nCount + nPos > len)
76 nCount = len - nPos;
78 trans->setMappingType(MappingTypeToLower, rLocale);
79 return trans->transliterateString2String(Text, nPos, nCount);
82 OUString SAL_CALL
83 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
84 sal_Int32 len = Text.getLength();
85 if (nPos >= len)
86 return OUString();
87 if (nCount + nPos > len)
88 nCount = len - nPos;
90 trans->setMappingType(MappingTypeToTitle, rLocale);
91 rtl_uString* pStr = comphelper::string::rtl_uString_alloc(nCount);
92 sal_Unicode* out = pStr->buffer;
93 BreakIteratorImpl brk(m_xContext);
94 Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
95 WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
96 for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
97 if (i >= bdy.endPos)
98 bdy = brk.nextWord(Text, bdy.endPos, rLocale,
99 WordType::ANYWORD_IGNOREWHITESPACES);
100 *out = (i == bdy.startPos) ?
101 trans->transliterateChar2Char(Text[i]) : Text[i];
103 *out = 0;
104 return OUString( pStr, SAL_NO_ACQUIRE );
107 sal_Int16 SAL_CALL
108 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
109 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
110 return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
113 sal_Int16 SAL_CALL
114 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
115 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
116 return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
120 sal_Int16 SAL_CALL
121 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
122 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
123 // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
124 // while OO.o enum UnicideScript starts from 0.
125 // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
126 return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
130 sal_Int32 SAL_CALL
131 cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
132 using namespace ::com::sun::star::i18n::KCharacterType;
134 sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
135 if (increment > 0) ch = Text.iterateCodePoints(nPos, 0);
136 switch ( u_charType(ch) ) {
137 // Upper
138 case U_UPPERCASE_LETTER :
139 return UPPER|LETTER|PRINTABLE|BASE_FORM;
141 // Lower
142 case U_LOWERCASE_LETTER :
143 return LOWER|LETTER|PRINTABLE|BASE_FORM;
145 // Title
146 case U_TITLECASE_LETTER :
147 return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
149 // Letter
150 case U_MODIFIER_LETTER :
151 case U_OTHER_LETTER :
152 return LETTER|PRINTABLE|BASE_FORM;
154 // Digit
155 case U_DECIMAL_DIGIT_NUMBER:
156 case U_LETTER_NUMBER:
157 case U_OTHER_NUMBER:
158 return DIGIT|PRINTABLE|BASE_FORM;
160 // Base
161 case U_NON_SPACING_MARK:
162 case U_ENCLOSING_MARK:
163 case U_COMBINING_SPACING_MARK:
164 return BASE_FORM|PRINTABLE;
166 // Print
167 case U_SPACE_SEPARATOR:
169 case U_DASH_PUNCTUATION:
170 case U_INITIAL_PUNCTUATION:
171 case U_FINAL_PUNCTUATION:
172 case U_CONNECTOR_PUNCTUATION:
173 case U_OTHER_PUNCTUATION:
175 case U_MATH_SYMBOL:
176 case U_CURRENCY_SYMBOL:
177 case U_MODIFIER_SYMBOL:
178 case U_OTHER_SYMBOL:
179 return PRINTABLE;
181 // Control
182 case U_CONTROL_CHAR:
183 case U_FORMAT_CHAR:
184 return CONTROL;
186 case U_LINE_SEPARATOR:
187 case U_PARAGRAPH_SEPARATOR:
188 return CONTROL|PRINTABLE;
190 // for all others
191 default:
192 return U_GENERAL_OTHER_TYPES;
196 sal_Int32 SAL_CALL
197 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) {
198 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
199 return getCharType(Text, &nPos, 0);
203 sal_Int32 SAL_CALL
204 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) {
205 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
207 sal_Int32 result = getCharType(Text, &nPos, 0);
208 for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++)
209 result |= getCharType(Text, &nPos, 1);
210 return result;
213 ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
214 const OUString& Text,
215 sal_Int32 nPos,
216 const Locale& rLocale,
217 sal_Int32 startCharTokenType,
218 const OUString& userDefinedCharactersStart,
219 sal_Int32 contCharTokenType,
220 const OUString& userDefinedCharactersCont )
221 throw(RuntimeException)
223 ParseResult r;
224 if ( Text.getLength() <= nPos )
225 return r;
227 setupParserTable( rLocale,
228 startCharTokenType, userDefinedCharactersStart,
229 contCharTokenType, userDefinedCharactersCont );
230 parseText( r, Text, nPos );
232 return r;
236 ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
237 sal_Int32 nTokenType,
238 const OUString& Text,
239 sal_Int32 nPos,
240 const Locale& rLocale,
241 sal_Int32 startCharTokenType,
242 const OUString& userDefinedCharactersStart,
243 sal_Int32 contCharTokenType,
244 const OUString& userDefinedCharactersCont )
245 throw(RuntimeException)
247 ParseResult r;
248 if ( Text.getLength() <= nPos )
249 return r;
251 setupParserTable( rLocale,
252 startCharTokenType, userDefinedCharactersStart,
253 contCharTokenType, userDefinedCharactersCont );
254 parseText( r, Text, nPos, nTokenType );
256 return r;
259 OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException )
261 return OUString::createFromAscii(cClass);
265 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException )
267 return !rServiceName.compareToAscii(cClass);
270 Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException )
272 Sequence< OUString > aRet(1);
273 aRet[0] = OUString::createFromAscii(cClass);
274 return aRet;
277 } } } }
279 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */