1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <cclass_unicode.hxx>
21 #include <com/sun/star/i18n/KCharacterType.hpp>
22 #include <com/sun/star/i18n/WordType.hpp>
23 #include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
24 #include <unicode/uchar.h>
25 #include <cppuhelper/exc_hlp.hxx>
26 #include <cppuhelper/supportsservice.hxx>
27 #include <breakiteratorImpl.hxx>
28 #include <transliteration_body.hxx>
29 #include <rtl/ref.hxx>
30 #include <o3tl/string_view.hxx>
33 using namespace ::com::sun::star
;
34 using namespace ::com::sun::star::uno
;
35 using namespace ::com::sun::star::i18n
;
36 using namespace ::com::sun::star::lang
;
40 // class cclass_Unicode
41 // ----------------------------------------------------;
43 cclass_Unicode::cclass_Unicode( uno::Reference
< XComponentContext
> xContext
) :
44 transToUpper( new Transliteration_casemapping() ),
45 transToLower( new Transliteration_casemapping() ),
46 transToTitle( new Transliteration_casemapping() ),
47 m_xContext(std::move( xContext
)),
54 transToUpper
->setMappingType(MappingType::ToUpper
);
55 transToLower
->setMappingType(MappingType::ToLower
);
56 transToTitle
->setMappingType(MappingType::ToTitle
);
59 cclass_Unicode::~cclass_Unicode() {
65 cclass_Unicode::toUpper( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& rLocale
) {
66 sal_Int32 len
= Text
.getLength();
69 if (nCount
+ nPos
> len
)
72 transToUpper
->setLocale(rLocale
);
73 return transToUpper
->transliterateString2String(Text
, nPos
, nCount
);
77 cclass_Unicode::toLower( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& rLocale
) {
78 sal_Int32 len
= Text
.getLength();
81 if (nCount
+ nPos
> len
)
84 transToLower
->setLocale(rLocale
);
85 return transToLower
->transliterateString2String(Text
, nPos
, nCount
);
89 cclass_Unicode::toTitle( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& rLocale
) {
92 sal_Int32 len
= Text
.getLength();
95 if (nCount
+ nPos
> len
)
98 transToTitle
->setLocale(rLocale
);
99 rtl_uString
* pStr
= rtl_uString_alloc(nCount
);
100 sal_Unicode
* out
= pStr
->buffer
;
101 rtl::Reference
< BreakIteratorImpl
> xBrk(new BreakIteratorImpl(m_xContext
));
102 Boundary bdy
= xBrk
->getWordBoundary(Text
, nPos
, rLocale
,
103 WordType::ANYWORD_IGNOREWHITESPACES
, true);
104 for (sal_Int32 i
= nPos
; i
< nCount
+ nPos
; i
++, out
++) {
106 bdy
= xBrk
->nextWord(Text
, bdy
.endPos
, rLocale
,
107 WordType::ANYWORD_IGNOREWHITESPACES
);
108 *out
= (i
== bdy
.startPos
) ?
109 transToTitle
->transliterateChar2Char(Text
[i
]) : Text
[i
];
112 return OUString( pStr
, SAL_NO_ACQUIRE
);
114 catch (const RuntimeException
&)
118 catch (const Exception
& e
)
120 uno::Any
a(cppu::getCaughtException());
121 throw lang::WrappedTargetRuntimeException(
122 "wrapped " + a
.getValueTypeName() + ": " + e
.Message
,
123 uno::Reference
<uno::XInterface
>(), a
);
128 cclass_Unicode::getType( const OUString
& Text
, sal_Int32 nPos
) {
129 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
130 return static_cast<sal_Int16
>(u_charType(Text
.iterateCodePoints(&nPos
, 0)));
134 cclass_Unicode::getCharacterDirection( const OUString
& Text
, sal_Int32 nPos
) {
135 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
136 return static_cast<sal_Int16
>(u_charDirection(Text
.iterateCodePoints(&nPos
, 0)));
141 cclass_Unicode::getScript( const OUString
& Text
, sal_Int32 nPos
) {
142 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
143 // ICU Unicode script type UBlockCode starts from 1 for Basic Latin,
144 // while OO.o enum UnicideScript starts from 0.
145 // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
146 return static_cast<sal_Int16
>(ublock_getCode(Text
.iterateCodePoints(&nPos
, 0)))-1;
151 cclass_Unicode::getCharType( std::u16string_view Text
, sal_Int32
* nPos
, sal_Int32 increment
) {
152 using namespace ::com::sun::star::i18n::KCharacterType
;
154 sal_uInt32 ch
= o3tl::iterateCodePoints(Text
, nPos
, increment
);
155 switch ( u_charType(ch
) ) {
157 case U_UPPERCASE_LETTER
:
158 return UPPER
|LETTER
|PRINTABLE
|BASE_FORM
;
161 case U_LOWERCASE_LETTER
:
162 return LOWER
|LETTER
|PRINTABLE
|BASE_FORM
;
165 case U_TITLECASE_LETTER
:
166 return TITLE_CASE
|LETTER
|PRINTABLE
|BASE_FORM
;
169 case U_MODIFIER_LETTER
:
170 case U_OTHER_LETTER
:
171 return LETTER
|PRINTABLE
|BASE_FORM
;
174 case U_DECIMAL_DIGIT_NUMBER
:
175 case U_LETTER_NUMBER
:
177 return DIGIT
|PRINTABLE
|BASE_FORM
;
180 case U_NON_SPACING_MARK
:
181 case U_ENCLOSING_MARK
:
182 case U_COMBINING_SPACING_MARK
:
183 return BASE_FORM
|PRINTABLE
;
186 case U_SPACE_SEPARATOR
:
188 case U_DASH_PUNCTUATION
:
189 case U_INITIAL_PUNCTUATION
:
190 case U_FINAL_PUNCTUATION
:
191 case U_CONNECTOR_PUNCTUATION
:
192 case U_OTHER_PUNCTUATION
:
195 case U_CURRENCY_SYMBOL
:
196 case U_MODIFIER_SYMBOL
:
205 case U_LINE_SEPARATOR
:
206 case U_PARAGRAPH_SEPARATOR
:
207 return CONTROL
|PRINTABLE
;
211 return U_GENERAL_OTHER_TYPES
;
216 cclass_Unicode::getCharacterType( const OUString
& Text
, sal_Int32 nPos
, const Locale
& /*rLocale*/ ) {
217 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
218 return getCharType(Text
, &nPos
, 0);
223 cclass_Unicode::getStringType( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& /*rLocale*/ ) {
224 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
226 sal_Int32 result
= 0;
228 while (nCount
> 0 && nPos
< Text
.getLength())
230 sal_Int32 nOrigPos
= nPos
;
231 result
|= getCharType(Text
, &nPos
, 1);
232 sal_Int32 nUtf16Units
= nPos
- nOrigPos
;
233 nCount
-= nUtf16Units
;
239 ParseResult SAL_CALL
cclass_Unicode::parseAnyToken(
240 const OUString
& Text
,
242 const Locale
& rLocale
,
243 sal_Int32 startCharTokenType
,
244 const OUString
& userDefinedCharactersStart
,
245 sal_Int32 contCharTokenType
,
246 const OUString
& userDefinedCharactersCont
)
249 if ( Text
.getLength() <= nPos
)
252 setupParserTable( rLocale
,
253 startCharTokenType
, userDefinedCharactersStart
,
254 contCharTokenType
, userDefinedCharactersCont
);
255 parseText( r
, Text
, nPos
);
261 ParseResult SAL_CALL
cclass_Unicode::parsePredefinedToken(
262 sal_Int32 nTokenType
,
263 const OUString
& Text
,
265 const Locale
& rLocale
,
266 sal_Int32 startCharTokenType
,
267 const OUString
& userDefinedCharactersStart
,
268 sal_Int32 contCharTokenType
,
269 const OUString
& userDefinedCharactersCont
)
272 if ( Text
.getLength() <= nPos
)
275 setupParserTable( rLocale
,
276 startCharTokenType
, userDefinedCharactersStart
,
277 contCharTokenType
, userDefinedCharactersCont
);
278 parseText( r
, Text
, nPos
, nTokenType
);
283 OUString SAL_CALL
cclass_Unicode::getImplementationName()
285 return u
"com.sun.star.i18n.CharacterClassification_Unicode"_ustr
;
288 sal_Bool SAL_CALL
cclass_Unicode::supportsService(const OUString
& rServiceName
)
290 return cppu::supportsService(this, rServiceName
);
293 Sequence
< OUString
> SAL_CALL
cclass_Unicode::getSupportedServiceNames()
295 return { u
"com.sun.star.i18n.CharacterClassification_Unicode"_ustr
};
300 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
*
301 com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(
302 css::uno::XComponentContext
*context
,
303 css::uno::Sequence
<css::uno::Any
> const &)
305 return cppu::acquire(new i18npool::cclass_Unicode(context
));
308 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */