1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/ref.hxx>
21 #include <i18nutil/casefolding.hxx>
22 #include <i18nutil/unicode.hxx>
23 #include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
24 #include <com/sun/star/i18n/TransliterationType.hpp>
25 #include <comphelper/processfactory.hxx>
27 #include <characterclassificationImpl.hxx>
29 #include <transliteration_body.hxx>
32 using namespace ::com::sun::star::uno
;
33 using namespace ::com::sun::star::i18n
;
34 using namespace ::com::sun::star::lang
;
38 Transliteration_body::Transliteration_body()
40 nMappingType
= MappingType::NONE
;
41 transliterationName
= "Transliteration_body";
42 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_body";
45 sal_Int16 SAL_CALL
Transliteration_body::getType()
47 return TransliterationType::ONE_TO_ONE
;
50 sal_Bool SAL_CALL
Transliteration_body::equals(
51 const OUString
& /*str1*/, sal_Int32
/*pos1*/, sal_Int32
/*nCount1*/, sal_Int32
& /*nMatch1*/,
52 const OUString
& /*str2*/, sal_Int32
/*pos2*/, sal_Int32
/*nCount2*/, sal_Int32
& /*nMatch2*/)
54 throw RuntimeException();
57 Sequence
< OUString
> SAL_CALL
58 Transliteration_body::transliterateRange( const OUString
& str1
, const OUString
& str2
)
60 Sequence
< OUString
> ostr(2);
66 static MappingType
lcl_getMappingTypeForToggleCase( MappingType nMappingType
, sal_Unicode cChar
)
68 MappingType nRes
= nMappingType
;
70 // take care of TOGGLE_CASE transliteration:
71 // nMappingType should not be a combination of flags, thuse we decide now
73 if (nMappingType
== (MappingType::LowerToUpper
| MappingType::UpperToLower
))
75 const sal_Int16 nType
= unicode::getUnicodeType( cChar
);
76 if (nType
& 0x02 /* lower case*/)
77 nRes
= MappingType::LowerToUpper
;
80 // should also work properly for non-upper characters like white spaces, numbers, ...
81 nRes
= MappingType::UpperToLower
;
89 Transliteration_body::transliterateImpl(
90 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
91 Sequence
< sal_Int32
>& offset
, bool useOffset
)
93 const sal_Unicode
*in
= inStr
.getStr() + startPos
;
95 // Two different blocks to eliminate the if(useOffset) condition inside the
96 // inner k loop. Yes, on massive use even such small things do count.
99 sal_Int32 nOffCount
= 0, i
;
100 for (i
= 0; i
< nCount
; i
++)
102 // take care of TOGGLE_CASE transliteration:
103 MappingType nTmpMappingType
= nMappingType
;
104 if (nMappingType
== (MappingType::LowerToUpper
| MappingType::UpperToLower
))
105 nTmpMappingType
= lcl_getMappingTypeForToggleCase( nMappingType
, in
[i
] );
107 const i18nutil::Mapping
&map
= i18nutil::casefolding::getValue( in
, i
, nCount
, aLocale
, nTmpMappingType
);
108 nOffCount
+= map
.nmap
;
110 rtl_uString
* pStr
= rtl_uString_alloc(nOffCount
);
111 sal_Unicode
* out
= pStr
->buffer
;
113 if ( nOffCount
!= offset
.getLength() )
114 offset
.realloc( nOffCount
);
117 sal_Int32
* pArr
= offset
.getArray();
118 for (i
= 0; i
< nCount
; i
++)
120 // take care of TOGGLE_CASE transliteration:
121 MappingType nTmpMappingType
= nMappingType
;
122 if (nMappingType
== (MappingType::LowerToUpper
| MappingType::UpperToLower
))
123 nTmpMappingType
= lcl_getMappingTypeForToggleCase( nMappingType
, in
[i
] );
125 const i18nutil::Mapping
&map
= i18nutil::casefolding::getValue( in
, i
, nCount
, aLocale
, nTmpMappingType
);
126 for (sal_Int32 k
= 0; k
< map
.nmap
; k
++)
128 pArr
[j
] = i
+ startPos
;
129 out
[j
++] = map
.map
[k
];
134 return OUString( pStr
, SAL_NO_ACQUIRE
);
138 // In the simple case of no offset sequence used we can eliminate the
139 // first getValue() loop. We could also assume that most calls result
140 // in identical string lengths, thus using a preallocated
141 // OUStringBuffer could be an easy way to assemble the return string
142 // without too much hassle. However, for single characters the
143 // OUStringBuffer::append() method is quite expensive compared to a
144 // simple array operation, so it pays here to copy the final result
147 // Allocate the max possible buffer. Try to use stack instead of heap,
148 // which would have to be reallocated most times anyways.
149 const sal_Int32 nLocalBuf
= 2048;
150 sal_Unicode aLocalBuf
[ nLocalBuf
* NMAPPINGMAX
], *out
= aLocalBuf
;
151 std::unique_ptr
<sal_Unicode
[]> pHeapBuf
;
152 if ( nCount
> nLocalBuf
) {
153 pHeapBuf
.reset(new sal_Unicode
[ nCount
* NMAPPINGMAX
]);
154 out
= pHeapBuf
.get();
158 for ( sal_Int32 i
= 0; i
< nCount
; i
++)
160 // take care of TOGGLE_CASE transliteration:
161 MappingType nTmpMappingType
= nMappingType
;
162 if (nMappingType
== (MappingType::LowerToUpper
| MappingType::UpperToLower
))
163 nTmpMappingType
= lcl_getMappingTypeForToggleCase( nMappingType
, in
[i
] );
165 const i18nutil::Mapping
&map
= i18nutil::casefolding::getValue( in
, i
, nCount
, aLocale
, nTmpMappingType
);
166 for (sal_Int32 k
= 0; k
< map
.nmap
; k
++)
168 out
[j
++] = map
.map
[k
];
172 OUString
aRet( out
, j
);
178 Transliteration_body::transliterateChar2String( sal_Unicode inChar
)
180 const i18nutil::Mapping
&map
= i18nutil::casefolding::getValue(&inChar
, 0, 1, aLocale
, nMappingType
);
181 rtl_uString
* pStr
= rtl_uString_alloc(map
.nmap
);
182 sal_Unicode
* out
= pStr
->buffer
;
185 for (i
= 0; i
< map
.nmap
; i
++)
189 return OUString( pStr
, SAL_NO_ACQUIRE
);
193 Transliteration_body::transliterateChar2Char( sal_Unicode inChar
)
195 const i18nutil::Mapping
&map
= i18nutil::casefolding::getValue(&inChar
, 0, 1, aLocale
, nMappingType
);
197 throw MultipleCharsOutputException();
202 Transliteration_body::foldingImpl( const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
203 Sequence
< sal_Int32
>& offset
, bool useOffset
)
205 return transliterateImpl(inStr
, startPos
, nCount
, offset
, useOffset
);
208 Transliteration_casemapping::Transliteration_casemapping()
210 nMappingType
= MappingType::NONE
;
211 transliterationName
= "casemapping(generic)";
212 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
216 Transliteration_casemapping::setMappingType( const MappingType rMappingType
, const Locale
& rLocale
)
218 nMappingType
= rMappingType
;
222 Transliteration_u2l::Transliteration_u2l()
224 nMappingType
= MappingType::UpperToLower
;
225 transliterationName
= "upper_to_lower(generic)";
226 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_u2l";
229 Transliteration_l2u::Transliteration_l2u()
231 nMappingType
= MappingType::LowerToUpper
;
232 transliterationName
= "lower_to_upper(generic)";
233 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_l2u";
236 Transliteration_togglecase::Transliteration_togglecase()
238 // usually nMappingType must NOT be a combination of different flags here,
239 // but we take care of that problem in Transliteration_body::transliterate above
240 // before that value is used. There we will decide which of both is to be used on
241 // a per character basis.
242 nMappingType
= MappingType::LowerToUpper
| MappingType::UpperToLower
;
243 transliterationName
= "toggle(generic)";
244 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
247 Transliteration_titlecase::Transliteration_titlecase()
249 nMappingType
= MappingType::ToTitle
;
250 transliterationName
= "title(generic)";
251 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
254 /// @throws RuntimeException
255 static OUString
transliterate_titlecase_Impl(
256 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
257 const Locale
&rLocale
,
258 Sequence
< sal_Int32
>& offset
)
260 const OUString
aText( inStr
.copy( startPos
, nCount
) );
263 if (!aText
.isEmpty())
265 Reference
< XComponentContext
> xContext
= ::comphelper::getProcessComponentContext();
266 rtl::Reference
< CharacterClassificationImpl
> xCharClassImpl( new CharacterClassificationImpl( xContext
) );
268 // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise
269 // an exception we need to handle the first chara manually...
271 // we don't want to change surrogates by accident, thuse we use proper code point iteration
273 sal_uInt32 cFirstChar
= aText
.iterateCodePoints( &nPos
);
274 OUString
aResolvedLigature( &cFirstChar
, 1 );
275 // toUpper can be used to properly resolve ligatures and characters like Beta
276 aResolvedLigature
= xCharClassImpl
->toUpper( aResolvedLigature
, 0, aResolvedLigature
.getLength(), rLocale
);
277 // since toTitle will leave all-uppercase text unchanged we first need to
278 // use toLower to bring possible 2nd and following chars in lowercase
279 aResolvedLigature
= xCharClassImpl
->toLower( aResolvedLigature
, 0, aResolvedLigature
.getLength(), rLocale
);
280 sal_Int32 nResolvedLen
= aResolvedLigature
.getLength();
282 // now we can properly use toTitle to get the expected result for the resolved string.
283 // The rest of the text should just become lowercase.
284 aRes
= xCharClassImpl
->toTitle( aResolvedLigature
, 0, nResolvedLen
, rLocale
);
285 aRes
+= xCharClassImpl
->toLower( aText
, 1, aText
.getLength() - 1, rLocale
);
286 offset
.realloc( aRes
.getLength() );
288 sal_Int32
*pOffset
= offset
.getArray();
289 sal_Int32 nLen
= offset
.getLength();
290 for (sal_Int32 i
= 0; i
< nLen
; ++i
)
293 if (i
>= nResolvedLen
)
294 nIdx
= i
- nResolvedLen
+ 1;
301 // this function expects to be called on a word-by-word basis,
302 // namely that startPos points to the first char of the word
303 OUString
Transliteration_titlecase::transliterateImpl(
304 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
305 Sequence
< sal_Int32
>& offset
, bool )
307 return transliterate_titlecase_Impl( inStr
, startPos
, nCount
, aLocale
, offset
);
310 Transliteration_sentencecase::Transliteration_sentencecase()
312 nMappingType
= MappingType::ToTitle
; // though only to be applied to the first word...
313 transliterationName
= "sentence(generic)";
314 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
317 // this function expects to be called on a sentence-by-sentence basis,
318 // namely that startPos points to the first word (NOT first char!) in the sentence
319 OUString
Transliteration_sentencecase::transliterateImpl(
320 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
321 Sequence
< sal_Int32
>& offset
, bool )
323 return transliterate_titlecase_Impl( inStr
, startPos
, nCount
, aLocale
, offset
);
328 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */