1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <cclass_unicode.hxx>
22 #include <com/sun/star/i18n/UnicodeScript.hpp>
23 #include <com/sun/star/i18n/UnicodeType.hpp>
24 #include <com/sun/star/i18n/KCharacterType.hpp>
25 #include <unicode/uchar.h>
26 #include <comphelper/string.hxx>
27 #include <breakiteratorImpl.hxx>
29 using namespace ::com::sun::star::uno
;
30 using namespace ::com::sun::star::lang
;
31 using namespace ::rtl
;
33 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
34 // ----------------------------------------------------
35 // class cclass_Unicode
36 // ----------------------------------------------------;
38 cclass_Unicode::cclass_Unicode( const uno::Reference
< XComponentContext
>& rxContext
) : m_xContext( rxContext
),
48 trans
= new Transliteration_casemapping();
49 cClass
= "com.sun.star.i18n.CharacterClassification_Unicode";
52 cclass_Unicode::~cclass_Unicode() {
59 cclass_Unicode::toUpper( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& rLocale
) throw(RuntimeException
) {
60 sal_Int32 len
= Text
.getLength();
63 if (nCount
+ nPos
> len
)
66 trans
->setMappingType(MappingTypeToUpper
, rLocale
);
67 return trans
->transliterateString2String(Text
, nPos
, nCount
);
71 cclass_Unicode::toLower( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& rLocale
) throw(RuntimeException
) {
72 sal_Int32 len
= Text
.getLength();
75 if (nCount
+ nPos
> len
)
78 trans
->setMappingType(MappingTypeToLower
, rLocale
);
79 return trans
->transliterateString2String(Text
, nPos
, nCount
);
83 cclass_Unicode::toTitle( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& rLocale
) throw(RuntimeException
) {
84 sal_Int32 len
= Text
.getLength();
87 if (nCount
+ nPos
> len
)
90 trans
->setMappingType(MappingTypeToTitle
, rLocale
);
91 rtl_uString
* pStr
= comphelper::string::rtl_uString_alloc(nCount
);
92 sal_Unicode
* out
= pStr
->buffer
;
93 BreakIteratorImpl
brk(m_xContext
);
94 Boundary bdy
= brk
.getWordBoundary(Text
, nPos
, rLocale
,
95 WordType::ANYWORD_IGNOREWHITESPACES
, sal_True
);
96 for (sal_Int32 i
= nPos
; i
< nCount
+ nPos
; i
++, out
++) {
98 bdy
= brk
.nextWord(Text
, bdy
.endPos
, rLocale
,
99 WordType::ANYWORD_IGNOREWHITESPACES
);
100 *out
= (i
== bdy
.startPos
) ?
101 trans
->transliterateChar2Char(Text
[i
]) : Text
[i
];
104 return OUString( pStr
, SAL_NO_ACQUIRE
);
108 cclass_Unicode::getType( const OUString
& Text
, sal_Int32 nPos
) throw(RuntimeException
) {
109 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
110 return (sal_Int16
) u_charType(Text
.iterateCodePoints(&nPos
, 0));
114 cclass_Unicode::getCharacterDirection( const OUString
& Text
, sal_Int32 nPos
) throw(RuntimeException
) {
115 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
116 return (sal_Int16
) u_charDirection(Text
.iterateCodePoints(&nPos
, 0));
121 cclass_Unicode::getScript( const OUString
& Text
, sal_Int32 nPos
) throw(RuntimeException
) {
122 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
123 // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
124 // while OO.o enum UnicideScript starts from 0.
125 // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
126 return (sal_Int16
) ublock_getCode(Text
.iterateCodePoints(&nPos
, 0))-1;
131 cclass_Unicode::getCharType( const OUString
& Text
, sal_Int32
* nPos
, sal_Int32 increment
) {
132 using namespace ::com::sun::star::i18n::KCharacterType
;
134 sal_uInt32 ch
= Text
.iterateCodePoints(nPos
, increment
);
135 if (increment
> 0) ch
= Text
.iterateCodePoints(nPos
, 0);
136 switch ( u_charType(ch
) ) {
138 case U_UPPERCASE_LETTER
:
139 return UPPER
|LETTER
|PRINTABLE
|BASE_FORM
;
142 case U_LOWERCASE_LETTER
:
143 return LOWER
|LETTER
|PRINTABLE
|BASE_FORM
;
146 case U_TITLECASE_LETTER
:
147 return TITLE_CASE
|LETTER
|PRINTABLE
|BASE_FORM
;
150 case U_MODIFIER_LETTER
:
151 case U_OTHER_LETTER
:
152 return LETTER
|PRINTABLE
|BASE_FORM
;
155 case U_DECIMAL_DIGIT_NUMBER
:
156 case U_LETTER_NUMBER
:
158 return DIGIT
|PRINTABLE
|BASE_FORM
;
161 case U_NON_SPACING_MARK
:
162 case U_ENCLOSING_MARK
:
163 case U_COMBINING_SPACING_MARK
:
164 return BASE_FORM
|PRINTABLE
;
167 case U_SPACE_SEPARATOR
:
169 case U_DASH_PUNCTUATION
:
170 case U_INITIAL_PUNCTUATION
:
171 case U_FINAL_PUNCTUATION
:
172 case U_CONNECTOR_PUNCTUATION
:
173 case U_OTHER_PUNCTUATION
:
176 case U_CURRENCY_SYMBOL
:
177 case U_MODIFIER_SYMBOL
:
186 case U_LINE_SEPARATOR
:
187 case U_PARAGRAPH_SEPARATOR
:
188 return CONTROL
|PRINTABLE
;
192 return U_GENERAL_OTHER_TYPES
;
197 cclass_Unicode::getCharacterType( const OUString
& Text
, sal_Int32 nPos
, const Locale
& /*rLocale*/ ) throw(RuntimeException
) {
198 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
199 return getCharType(Text
, &nPos
, 0);
204 cclass_Unicode::getStringType( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& /*rLocale*/ ) throw(RuntimeException
) {
205 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
207 sal_Int32 result
= getCharType(Text
, &nPos
, 0);
208 for (sal_Int32 i
= 1; i
< nCount
&& nPos
< Text
.getLength(); i
++)
209 result
|= getCharType(Text
, &nPos
, 1);
213 ParseResult SAL_CALL
cclass_Unicode::parseAnyToken(
214 const OUString
& Text
,
216 const Locale
& rLocale
,
217 sal_Int32 startCharTokenType
,
218 const OUString
& userDefinedCharactersStart
,
219 sal_Int32 contCharTokenType
,
220 const OUString
& userDefinedCharactersCont
)
221 throw(RuntimeException
)
224 if ( Text
.getLength() <= nPos
)
227 setupParserTable( rLocale
,
228 startCharTokenType
, userDefinedCharactersStart
,
229 contCharTokenType
, userDefinedCharactersCont
);
230 parseText( r
, Text
, nPos
);
236 ParseResult SAL_CALL
cclass_Unicode::parsePredefinedToken(
237 sal_Int32 nTokenType
,
238 const OUString
& Text
,
240 const Locale
& rLocale
,
241 sal_Int32 startCharTokenType
,
242 const OUString
& userDefinedCharactersStart
,
243 sal_Int32 contCharTokenType
,
244 const OUString
& userDefinedCharactersCont
)
245 throw(RuntimeException
)
248 if ( Text
.getLength() <= nPos
)
251 setupParserTable( rLocale
,
252 startCharTokenType
, userDefinedCharactersStart
,
253 contCharTokenType
, userDefinedCharactersCont
);
254 parseText( r
, Text
, nPos
, nTokenType
);
259 OUString SAL_CALL
cclass_Unicode::getImplementationName() throw( RuntimeException
)
261 return OUString::createFromAscii(cClass
);
265 sal_Bool SAL_CALL
cclass_Unicode::supportsService(const OUString
& rServiceName
) throw( RuntimeException
)
267 return !rServiceName
.compareToAscii(cClass
);
270 Sequence
< OUString
> SAL_CALL
cclass_Unicode::getSupportedServiceNames() throw( RuntimeException
)
272 Sequence
< OUString
> aRet(1);
273 aRet
[0] = OUString::createFromAscii(cClass
);
279 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */