1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/ref.hxx>
21 #include <i18nutil/casefolding.hxx>
22 #include <i18nutil/unicode.hxx>
23 #include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
24 #include <com/sun/star/i18n/TransliterationType.hpp>
25 #include <comphelper/processfactory.hxx>
26 #include <comphelper/sequence.hxx>
28 #include <characterclassificationImpl.hxx>
30 #include <transliteration_body.hxx>
34 using namespace ::com::sun::star::uno
;
35 using namespace ::com::sun::star::i18n
;
36 using namespace ::com::sun::star::lang
;
40 Transliteration_body::Transliteration_body()
42 nMappingType
= MappingType::NONE
;
43 transliterationName
= "Transliteration_body";
44 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_body";
47 sal_Int16 SAL_CALL
Transliteration_body::getType()
49 return TransliterationType::ONE_TO_ONE
;
52 sal_Bool SAL_CALL
Transliteration_body::equals(
53 const OUString
& /*str1*/, sal_Int32
/*pos1*/, sal_Int32
/*nCount1*/, sal_Int32
& /*nMatch1*/,
54 const OUString
& /*str2*/, sal_Int32
/*pos2*/, sal_Int32
/*nCount2*/, sal_Int32
& /*nMatch2*/)
56 throw RuntimeException();
59 Sequence
< OUString
> SAL_CALL
60 Transliteration_body::transliterateRange( const OUString
& str1
, const OUString
& str2
)
62 Sequence
< OUString
> ostr(2);
68 static MappingType
lcl_getMappingTypeForToggleCase( MappingType nMappingType
, sal_Unicode cChar
)
70 MappingType nRes
= nMappingType
;
72 // take care of TOGGLE_CASE transliteration:
73 // nMappingType should not be a combination of flags, thuse we decide now
75 if (nMappingType
== (MappingType::LowerToUpper
| MappingType::UpperToLower
))
77 const sal_Int16 nType
= unicode::getUnicodeType( cChar
);
78 if (nType
& 0x02 /* lower case*/)
79 nRes
= MappingType::LowerToUpper
;
82 // should also work properly for non-upper characters like white spaces, numbers, ...
83 nRes
= MappingType::UpperToLower
;
91 Transliteration_body::transliterateImpl(
92 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
93 Sequence
< sal_Int32
>& offset
, bool useOffset
)
95 const sal_Unicode
*in
= inStr
.getStr() + startPos
;
97 // We could assume that most calls result in identical string lengths,
98 // thus using a preallocated OUStringBuffer could be an easy way
99 // to assemble the return string without too much hassle. However,
100 // for single characters the OUStringBuffer::append() method is quite
101 // expensive compared to a simple array operation, so it pays here
102 // to copy the final result instead.
104 // Allocate the max possible buffer. Try to use stack instead of heap,
105 // which would have to be reallocated most times anyways.
106 constexpr sal_Int32 nLocalBuf
= 2048;
107 sal_Unicode aLocalBuf
[ nLocalBuf
* NMAPPINGMAX
], *out
= aLocalBuf
;
108 std::unique_ptr
<sal_Unicode
[]> pHeapBuf
;
109 if (nCount
> nLocalBuf
)
111 pHeapBuf
.reset(new sal_Unicode
[ nCount
* NMAPPINGMAX
]);
112 out
= pHeapBuf
.get();
116 // Two different blocks to eliminate the if(useOffset) condition inside the loop.
117 // Yes, on massive use even such small things do count.
120 std::vector
<sal_Int32
> aVec
;
121 aVec
.reserve(std::max
<sal_Int32
>(nLocalBuf
, nCount
) * NMAPPINGMAX
);
123 for (sal_Int32 i
= 0; i
< nCount
; i
++)
125 // take care of TOGGLE_CASE transliteration:
126 MappingType nTmpMappingType
= lcl_getMappingTypeForToggleCase( nMappingType
, in
[i
] );
128 const i18nutil::Mapping
&map
= i18nutil::casefolding::getValue( in
, i
, nCount
, aLocale
, nTmpMappingType
);
129 std::fill_n(std::back_inserter(aVec
), map
.nmap
, i
+ startPos
);
130 std::copy_n(map
.map
, map
.nmap
, out
+ j
);
134 offset
= comphelper::containerToSequence(aVec
);
138 for ( sal_Int32 i
= 0; i
< nCount
; i
++)
140 // take care of TOGGLE_CASE transliteration:
141 MappingType nTmpMappingType
= lcl_getMappingTypeForToggleCase( nMappingType
, in
[i
] );
143 const i18nutil::Mapping
&map
= i18nutil::casefolding::getValue( in
, i
, nCount
, aLocale
, nTmpMappingType
);
144 std::copy_n(map
.map
, map
.nmap
, out
+ j
);
149 return OUString(out
, j
);
153 Transliteration_body::transliterateChar2String( sal_Unicode inChar
)
155 const i18nutil::Mapping
&map
= i18nutil::casefolding::getValue(&inChar
, 0, 1, aLocale
, nMappingType
);
156 rtl_uString
* pStr
= rtl_uString_alloc(map
.nmap
);
157 sal_Unicode
* out
= pStr
->buffer
;
160 for (i
= 0; i
< map
.nmap
; i
++)
164 return OUString( pStr
, SAL_NO_ACQUIRE
);
168 Transliteration_body::transliterateChar2Char( sal_Unicode inChar
)
170 const i18nutil::Mapping
&map
= i18nutil::casefolding::getValue(&inChar
, 0, 1, aLocale
, nMappingType
);
172 throw MultipleCharsOutputException();
177 Transliteration_body::foldingImpl( const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
178 Sequence
< sal_Int32
>& offset
, bool useOffset
)
180 return transliterateImpl(inStr
, startPos
, nCount
, offset
, useOffset
);
183 Transliteration_casemapping::Transliteration_casemapping()
185 nMappingType
= MappingType::NONE
;
186 transliterationName
= "casemapping(generic)";
187 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
191 Transliteration_casemapping::setMappingType( const MappingType rMappingType
, const Locale
& rLocale
)
193 nMappingType
= rMappingType
;
197 Transliteration_u2l::Transliteration_u2l()
199 nMappingType
= MappingType::UpperToLower
;
200 transliterationName
= "upper_to_lower(generic)";
201 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_u2l";
204 Transliteration_l2u::Transliteration_l2u()
206 nMappingType
= MappingType::LowerToUpper
;
207 transliterationName
= "lower_to_upper(generic)";
208 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_l2u";
211 Transliteration_togglecase::Transliteration_togglecase()
213 // usually nMappingType must NOT be a combination of different flags here,
214 // but we take care of that problem in Transliteration_body::transliterate above
215 // before that value is used. There we will decide which of both is to be used on
216 // a per character basis.
217 nMappingType
= MappingType::LowerToUpper
| MappingType::UpperToLower
;
218 transliterationName
= "toggle(generic)";
219 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
222 Transliteration_titlecase::Transliteration_titlecase()
224 nMappingType
= MappingType::ToTitle
;
225 transliterationName
= "title(generic)";
226 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
229 /// @throws RuntimeException
230 static OUString
transliterate_titlecase_Impl(
231 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
232 const Locale
&rLocale
,
233 Sequence
< sal_Int32
>& offset
)
235 const OUString
aText( inStr
.copy( startPos
, nCount
) );
238 if (!aText
.isEmpty())
240 Reference
< XComponentContext
> xContext
= ::comphelper::getProcessComponentContext();
241 rtl::Reference
< CharacterClassificationImpl
> xCharClassImpl( new CharacterClassificationImpl( xContext
) );
243 // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise
244 // an exception we need to handle the first chara manually...
246 // we don't want to change surrogates by accident, thuse we use proper code point iteration
248 sal_uInt32 cFirstChar
= aText
.iterateCodePoints( &nPos
);
249 OUString
aResolvedLigature( &cFirstChar
, 1 );
250 // toUpper can be used to properly resolve ligatures and characters like Beta
251 aResolvedLigature
= xCharClassImpl
->toUpper( aResolvedLigature
, 0, aResolvedLigature
.getLength(), rLocale
);
252 // since toTitle will leave all-uppercase text unchanged we first need to
253 // use toLower to bring possible 2nd and following chars in lowercase
254 aResolvedLigature
= xCharClassImpl
->toLower( aResolvedLigature
, 0, aResolvedLigature
.getLength(), rLocale
);
255 sal_Int32 nResolvedLen
= aResolvedLigature
.getLength();
257 // now we can properly use toTitle to get the expected result for the resolved string.
258 // The rest of the text should just become lowercase.
259 aRes
= xCharClassImpl
->toTitle( aResolvedLigature
, 0, nResolvedLen
, rLocale
) +
260 xCharClassImpl
->toLower( aText
, 1, aText
.getLength() - 1, rLocale
);
261 offset
.realloc( aRes
.getLength() );
263 sal_Int32
* pOffset
= std::fill_n(offset
.begin(), nResolvedLen
, 0);
264 std::iota(pOffset
, offset
.end(), 1);
269 // this function expects to be called on a word-by-word basis,
270 // namely that startPos points to the first char of the word
271 OUString
Transliteration_titlecase::transliterateImpl(
272 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
273 Sequence
< sal_Int32
>& offset
, bool )
275 return transliterate_titlecase_Impl( inStr
, startPos
, nCount
, aLocale
, offset
);
278 Transliteration_sentencecase::Transliteration_sentencecase()
280 nMappingType
= MappingType::ToTitle
; // though only to be applied to the first word...
281 transliterationName
= "sentence(generic)";
282 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
285 // this function expects to be called on a sentence-by-sentence basis,
286 // namely that startPos points to the first word (NOT first char!) in the sentence
287 OUString
Transliteration_sentencecase::transliterateImpl(
288 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
289 Sequence
< sal_Int32
>& offset
, bool )
291 return transliterate_titlecase_Impl( inStr
, startPos
, nCount
, aLocale
, offset
);
296 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */