Version 6.1.4.1, tag libreoffice-6.1.4.1
[LibreOffice.git] / i18npool / source / characterclassification / cclass_unicode.cxx
blob70727dea8cb196795c6dd5db02526ceeefcf5ec6
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <cclass_unicode.hxx>
21 #include <com/sun/star/i18n/UnicodeScript.hpp>
22 #include <com/sun/star/i18n/UnicodeType.hpp>
23 #include <com/sun/star/i18n/KCharacterType.hpp>
24 #include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
25 #include <unicode/uchar.h>
26 #include <comphelper/string.hxx>
27 #include <cppuhelper/exc_hlp.hxx>
28 #include <cppuhelper/supportsservice.hxx>
29 #include <breakiteratorImpl.hxx>
30 #include <rtl/ref.hxx>
32 using namespace ::com::sun::star;
33 using namespace ::com::sun::star::uno;
34 using namespace ::com::sun::star::i18n;
35 using namespace ::com::sun::star::lang;
37 namespace i18npool {
39 // class cclass_Unicode
40 // ----------------------------------------------------;
42 cclass_Unicode::cclass_Unicode( const uno::Reference < XComponentContext >& rxContext ) :
43 trans( new Transliteration_casemapping() ),
44 m_xContext( rxContext ),
45 pTable( nullptr ),
46 pStart( nullptr ),
47 pCont( nullptr ),
48 nStartTypes( 0 ),
49 nContTypes( 0 ),
50 eState( ssGetChar ),
51 cGroupSep( ',' ),
52 cDecimalSep( '.' ),
53 cDecimalSepAlt( 0 )
57 cclass_Unicode::~cclass_Unicode() {
58 destroyParserTable();
62 OUString SAL_CALL
63 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
64 sal_Int32 len = Text.getLength();
65 if (nPos >= len)
66 return OUString();
67 if (nCount + nPos > len)
68 nCount = len - nPos;
70 trans->setMappingType(MappingType::ToUpper, rLocale);
71 return trans->transliterateString2String(Text, nPos, nCount);
74 OUString SAL_CALL
75 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
76 sal_Int32 len = Text.getLength();
77 if (nPos >= len)
78 return OUString();
79 if (nCount + nPos > len)
80 nCount = len - nPos;
82 trans->setMappingType(MappingType::ToLower, rLocale);
83 return trans->transliterateString2String(Text, nPos, nCount);
86 OUString SAL_CALL
87 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
88 try
90 sal_Int32 len = Text.getLength();
91 if (nPos >= len)
92 return OUString();
93 if (nCount + nPos > len)
94 nCount = len - nPos;
96 trans->setMappingType(MappingType::ToTitle, rLocale);
97 rtl_uString* pStr = rtl_uString_alloc(nCount);
98 sal_Unicode* out = pStr->buffer;
99 rtl::Reference< BreakIteratorImpl > xBrk(new BreakIteratorImpl(m_xContext));
100 Boundary bdy = xBrk->getWordBoundary(Text, nPos, rLocale,
101 WordType::ANYWORD_IGNOREWHITESPACES, true);
102 for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
103 if (i >= bdy.endPos)
104 bdy = xBrk->nextWord(Text, bdy.endPos, rLocale,
105 WordType::ANYWORD_IGNOREWHITESPACES);
106 *out = (i == bdy.startPos) ?
107 trans->transliterateChar2Char(Text[i]) : Text[i];
109 *out = 0;
110 return OUString( pStr, SAL_NO_ACQUIRE );
112 catch (const RuntimeException&)
114 throw;
116 catch (const Exception& e)
118 uno::Any a(cppu::getCaughtException());
119 throw lang::WrappedTargetRuntimeException(
120 "wrapped " + a.getValueTypeName() + ": " + e.Message,
121 uno::Reference<uno::XInterface>(), a);
125 sal_Int16 SAL_CALL
126 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) {
127 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
128 return static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nPos, 0)));
131 sal_Int16 SAL_CALL
132 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) {
133 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
134 return static_cast<sal_Int16>(u_charDirection(Text.iterateCodePoints(&nPos, 0)));
138 sal_Int16 SAL_CALL
139 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) {
140 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
141 // ICU Unicode script type UBlockCode starts from 1 for Basic Latin,
142 // while OO.o enum UnicideScript starts from 0.
143 // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
144 return static_cast<sal_Int16>(ublock_getCode(Text.iterateCodePoints(&nPos, 0)))-1;
148 sal_Int32
149 cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
150 using namespace ::com::sun::star::i18n::KCharacterType;
152 sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
153 switch ( u_charType(ch) ) {
154 // Upper
155 case U_UPPERCASE_LETTER :
156 return UPPER|LETTER|PRINTABLE|BASE_FORM;
158 // Lower
159 case U_LOWERCASE_LETTER :
160 return LOWER|LETTER|PRINTABLE|BASE_FORM;
162 // Title
163 case U_TITLECASE_LETTER :
164 return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
166 // Letter
167 case U_MODIFIER_LETTER :
168 case U_OTHER_LETTER :
169 return LETTER|PRINTABLE|BASE_FORM;
171 // Digit
172 case U_DECIMAL_DIGIT_NUMBER:
173 case U_LETTER_NUMBER:
174 case U_OTHER_NUMBER:
175 return DIGIT|PRINTABLE|BASE_FORM;
177 // Base
178 case U_NON_SPACING_MARK:
179 case U_ENCLOSING_MARK:
180 case U_COMBINING_SPACING_MARK:
181 return BASE_FORM|PRINTABLE;
183 // Print
184 case U_SPACE_SEPARATOR:
186 case U_DASH_PUNCTUATION:
187 case U_INITIAL_PUNCTUATION:
188 case U_FINAL_PUNCTUATION:
189 case U_CONNECTOR_PUNCTUATION:
190 case U_OTHER_PUNCTUATION:
192 case U_MATH_SYMBOL:
193 case U_CURRENCY_SYMBOL:
194 case U_MODIFIER_SYMBOL:
195 case U_OTHER_SYMBOL:
196 return PRINTABLE;
198 // Control
199 case U_CONTROL_CHAR:
200 case U_FORMAT_CHAR:
201 return CONTROL;
203 case U_LINE_SEPARATOR:
204 case U_PARAGRAPH_SEPARATOR:
205 return CONTROL|PRINTABLE;
207 // for all others
208 default:
209 return U_GENERAL_OTHER_TYPES;
213 sal_Int32 SAL_CALL
214 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) {
215 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
216 return getCharType(Text, &nPos, 0);
220 sal_Int32 SAL_CALL
221 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) {
222 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
224 sal_Int32 result = 0;
226 while (nCount > 0 && nPos < Text.getLength())
228 sal_Int32 nOrigPos = nPos;
229 result |= getCharType(Text, &nPos, 1);
230 sal_Int32 nUtf16Units = nPos - nOrigPos;
231 nCount -= nUtf16Units;
234 return result;
237 ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
238 const OUString& Text,
239 sal_Int32 nPos,
240 const Locale& rLocale,
241 sal_Int32 startCharTokenType,
242 const OUString& userDefinedCharactersStart,
243 sal_Int32 contCharTokenType,
244 const OUString& userDefinedCharactersCont )
246 ParseResult r;
247 if ( Text.getLength() <= nPos )
248 return r;
250 setupParserTable( rLocale,
251 startCharTokenType, userDefinedCharactersStart,
252 contCharTokenType, userDefinedCharactersCont );
253 parseText( r, Text, nPos );
255 return r;
259 ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
260 sal_Int32 nTokenType,
261 const OUString& Text,
262 sal_Int32 nPos,
263 const Locale& rLocale,
264 sal_Int32 startCharTokenType,
265 const OUString& userDefinedCharactersStart,
266 sal_Int32 contCharTokenType,
267 const OUString& userDefinedCharactersCont )
269 ParseResult r;
270 if ( Text.getLength() <= nPos )
271 return r;
273 setupParserTable( rLocale,
274 startCharTokenType, userDefinedCharactersStart,
275 contCharTokenType, userDefinedCharactersCont );
276 parseText( r, Text, nPos, nTokenType );
278 return r;
281 OUString SAL_CALL cclass_Unicode::getImplementationName()
283 return OUString("com.sun.star.i18n.CharacterClassification_Unicode");
286 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName)
288 return cppu::supportsService(this, rServiceName);
291 Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames()
293 Sequence< OUString > aRet { "com.sun.star.i18n.CharacterClassification_Unicode" };
294 return aRet;
299 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
300 com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(
301 css::uno::XComponentContext *context,
302 css::uno::Sequence<css::uno::Any> const &)
304 return cppu::acquire(new i18npool::cclass_Unicode(context));
307 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */