1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: cclass_unicode.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include <cclass_unicode.hxx>
35 #include <com/sun/star/i18n/UnicodeScript.hpp>
36 #include <com/sun/star/i18n/UnicodeType.hpp>
37 #include <com/sun/star/i18n/KCharacterType.hpp>
38 #include <unicode/uchar.h>
39 #include <i18nutil/x_rtl_ustring.h>
40 #include <breakiteratorImpl.hxx>
42 using namespace ::com::sun::star::uno
;
43 using namespace ::com::sun::star::lang
;
44 using namespace ::rtl
;
46 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
47 // ----------------------------------------------------
48 // class cclass_Unicode
49 // ----------------------------------------------------;
51 cclass_Unicode::cclass_Unicode( uno::Reference
< XMultiServiceFactory
> xSMgr
) : xMSF( xSMgr
),
61 trans
= new Transliteration_casemapping();
62 cClass
= "com.sun.star.i18n.CharacterClassification_Unicode";
65 cclass_Unicode::~cclass_Unicode() {
72 cclass_Unicode::toUpper( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& rLocale
) throw(RuntimeException
) {
73 sal_Int32 len
= Text
.getLength();
76 if (nCount
+ nPos
> len
)
79 trans
->setMappingType(MappingTypeToUpper
, rLocale
);
80 return trans
->transliterateString2String(Text
, nPos
, nCount
);
84 cclass_Unicode::toLower( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& rLocale
) throw(RuntimeException
) {
85 sal_Int32 len
= Text
.getLength();
88 if (nCount
+ nPos
> len
)
91 trans
->setMappingType(MappingTypeToLower
, rLocale
);
92 return trans
->transliterateString2String(Text
, nPos
, nCount
);
96 cclass_Unicode::toTitle( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& rLocale
) throw(RuntimeException
) {
97 sal_Int32 len
= Text
.getLength();
100 if (nCount
+ nPos
> len
)
103 trans
->setMappingType(MappingTypeToTitle
, rLocale
);
104 rtl_uString
* pStr
= x_rtl_uString_new_WithLength( nCount
, 1 );
105 sal_Unicode
* out
= pStr
->buffer
;
106 BreakIteratorImpl
brk(xMSF
);
107 Boundary bdy
= brk
.getWordBoundary(Text
, nPos
, rLocale
,
108 WordType::ANYWORD_IGNOREWHITESPACES
, sal_True
);
109 for (sal_Int32 i
= nPos
; i
< nCount
+ nPos
; i
++, out
++) {
111 bdy
= brk
.nextWord(Text
, bdy
.endPos
, rLocale
,
112 WordType::ANYWORD_IGNOREWHITESPACES
);
113 *out
= (i
== bdy
.startPos
) ?
114 trans
->transliterateChar2Char(Text
[i
]) : Text
[i
];
117 return OUString( pStr
, SAL_NO_ACQUIRE
);
121 cclass_Unicode::getType( const OUString
& Text
, sal_Int32 nPos
) throw(RuntimeException
) {
122 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
123 return (sal_Int16
) u_charType(Text
.iterateCodePoints(&nPos
, 0));
127 cclass_Unicode::getCharacterDirection( const OUString
& Text
, sal_Int32 nPos
) throw(RuntimeException
) {
128 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
129 return (sal_Int16
) u_charDirection(Text
.iterateCodePoints(&nPos
, 0));
134 cclass_Unicode::getScript( const OUString
& Text
, sal_Int32 nPos
) throw(RuntimeException
) {
135 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
136 // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
137 // while OO.o enum UnicideScript starts from 0.
138 // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
139 return (sal_Int16
) ublock_getCode(Text
.iterateCodePoints(&nPos
, 0))-1;
144 cclass_Unicode::getCharType( const OUString
& Text
, sal_Int32
* nPos
, sal_Int32 increment
) {
145 using namespace ::com::sun::star::i18n::KCharacterType
;
147 sal_uInt32 ch
= Text
.iterateCodePoints(nPos
, increment
);
148 if (increment
> 0) ch
= Text
.iterateCodePoints(nPos
, 0);
149 switch ( u_charType(ch
) ) {
151 case U_UPPERCASE_LETTER
:
152 return UPPER
|LETTER
|PRINTABLE
|BASE_FORM
;
155 case U_LOWERCASE_LETTER
:
156 return LOWER
|LETTER
|PRINTABLE
|BASE_FORM
;
159 case U_TITLECASE_LETTER
:
160 return TITLE_CASE
|LETTER
|PRINTABLE
|BASE_FORM
;
163 case U_MODIFIER_LETTER
:
164 case U_OTHER_LETTER
:
165 return LETTER
|PRINTABLE
|BASE_FORM
;
168 case U_DECIMAL_DIGIT_NUMBER
:
169 case U_LETTER_NUMBER
:
171 return DIGIT
|PRINTABLE
|BASE_FORM
;
174 case U_NON_SPACING_MARK
:
175 case U_ENCLOSING_MARK
:
176 case U_COMBINING_SPACING_MARK
:
177 return BASE_FORM
|PRINTABLE
;
180 case U_SPACE_SEPARATOR
:
182 case U_DASH_PUNCTUATION
:
183 case U_INITIAL_PUNCTUATION
:
184 case U_FINAL_PUNCTUATION
:
185 case U_CONNECTOR_PUNCTUATION
:
186 case U_OTHER_PUNCTUATION
:
189 case U_CURRENCY_SYMBOL
:
190 case U_MODIFIER_SYMBOL
:
199 case U_LINE_SEPARATOR
:
200 case U_PARAGRAPH_SEPARATOR
:
201 return CONTROL
|PRINTABLE
;
205 return U_GENERAL_OTHER_TYPES
;
210 cclass_Unicode::getCharacterType( const OUString
& Text
, sal_Int32 nPos
, const Locale
& /*rLocale*/ ) throw(RuntimeException
) {
211 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
212 return getCharType(Text
, &nPos
, 0);
217 cclass_Unicode::getStringType( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
, const Locale
& /*rLocale*/ ) throw(RuntimeException
) {
218 if ( nPos
< 0 || Text
.getLength() <= nPos
) return 0;
220 sal_Int32 result
= getCharType(Text
, &nPos
, 0);
221 for (sal_Int32 i
= 1; i
< nCount
&& nPos
< Text
.getLength(); i
++)
222 result
|= getCharType(Text
, &nPos
, 1);
226 ParseResult SAL_CALL
cclass_Unicode::parseAnyToken(
227 const OUString
& Text
,
229 const Locale
& rLocale
,
230 sal_Int32 startCharTokenType
,
231 const OUString
& userDefinedCharactersStart
,
232 sal_Int32 contCharTokenType
,
233 const OUString
& userDefinedCharactersCont
)
234 throw(RuntimeException
)
237 if ( Text
.getLength() <= nPos
)
240 setupParserTable( rLocale
,
241 startCharTokenType
, userDefinedCharactersStart
,
242 contCharTokenType
, userDefinedCharactersCont
);
243 parseText( r
, Text
, nPos
);
249 ParseResult SAL_CALL
cclass_Unicode::parsePredefinedToken(
250 sal_Int32 nTokenType
,
251 const OUString
& Text
,
253 const Locale
& rLocale
,
254 sal_Int32 startCharTokenType
,
255 const OUString
& userDefinedCharactersStart
,
256 sal_Int32 contCharTokenType
,
257 const OUString
& userDefinedCharactersCont
)
258 throw(RuntimeException
)
261 if ( Text
.getLength() <= nPos
)
264 setupParserTable( rLocale
,
265 startCharTokenType
, userDefinedCharactersStart
,
266 contCharTokenType
, userDefinedCharactersCont
);
267 parseText( r
, Text
, nPos
, nTokenType
);
272 OUString SAL_CALL
cclass_Unicode::getImplementationName() throw( RuntimeException
)
274 return OUString::createFromAscii(cClass
);
278 sal_Bool SAL_CALL
cclass_Unicode::supportsService(const OUString
& rServiceName
) throw( RuntimeException
)
280 return !rServiceName
.compareToAscii(cClass
);
283 Sequence
< OUString
> SAL_CALL
cclass_Unicode::getSupportedServiceNames() throw( RuntimeException
)
285 Sequence
< OUString
> aRet(1);
286 aRet
[0] = OUString::createFromAscii(cClass
);