1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 // Silence spurious Werror=maybe-uninitialized in transliterateImpl emitted at least by GCC 11.2.0
20 #if defined __GNUC__ && !defined __clang__
21 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
24 #include <rtl/ref.hxx>
25 #include <i18nutil/casefolding.hxx>
26 #include <i18nutil/unicode.hxx>
27 #include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
28 #include <com/sun/star/i18n/TransliterationType.hpp>
29 #include <comphelper/processfactory.hxx>
30 #include <comphelper/sequence.hxx>
31 #include <o3tl/temporary.hxx>
33 #include <characterclassificationImpl.hxx>
35 #include <transliteration_body.hxx>
39 using namespace ::com::sun::star::uno
;
40 using namespace ::com::sun::star::i18n
;
41 using namespace ::com::sun::star::lang
;
45 Transliteration_body::Transliteration_body()
47 nMappingType
= MappingType::NONE
;
48 transliterationName
= "Transliteration_body";
49 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_body";
52 sal_Int16 SAL_CALL
Transliteration_body::getType()
54 return TransliterationType::ONE_TO_ONE
;
57 sal_Bool SAL_CALL
Transliteration_body::equals(
58 const OUString
& /*str1*/, sal_Int32
/*pos1*/, sal_Int32
/*nCount1*/, sal_Int32
& /*nMatch1*/,
59 const OUString
& /*str2*/, sal_Int32
/*pos2*/, sal_Int32
/*nCount2*/, sal_Int32
& /*nMatch2*/)
61 throw RuntimeException();
64 Sequence
< OUString
> SAL_CALL
65 Transliteration_body::transliterateRange( const OUString
& str1
, const OUString
& str2
)
67 return { str1
, str2
};
70 static MappingType
lcl_getMappingTypeForToggleCase( MappingType nMappingType
, sal_Unicode cChar
)
72 MappingType nRes
= nMappingType
;
74 // take care of TOGGLE_CASE transliteration:
75 // nMappingType should not be a combination of flags, thuse we decide now
77 if (nMappingType
== (MappingType::LowerToUpper
| MappingType::UpperToLower
))
79 const sal_Int16 nType
= unicode::getUnicodeType( cChar
);
80 if (nType
& 0x02 /* lower case*/)
81 nRes
= MappingType::LowerToUpper
;
84 // should also work properly for non-upper characters like white spaces, numbers, ...
85 nRes
= MappingType::UpperToLower
;
93 Transliteration_body::transliterateImpl(
94 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
95 Sequence
< sal_Int32
>* pOffset
)
97 const sal_Unicode
*in
= inStr
.getStr() + startPos
;
99 // We could assume that most calls result in identical string lengths,
100 // thus using a preallocated OUStringBuffer could be an easy way
101 // to assemble the return string without too much hassle. However,
102 // for single characters the OUStringBuffer::append() method is quite
103 // expensive compared to a simple array operation, so it pays here
104 // to copy the final result instead.
106 // Allocate the max possible buffer. Try to use stack instead of heap,
107 // which would have to be reallocated most times anyways.
108 constexpr sal_Int32 nLocalBuf
= 2048;
110 std::unique_ptr
<sal_Unicode
[]> pHeapBuf
;
111 if (nCount
<= nLocalBuf
)
112 out
= static_cast<sal_Unicode
*>(alloca(nCount
* NMAPPINGMAX
* sizeof(sal_Unicode
)));
115 pHeapBuf
.reset(new sal_Unicode
[ nCount
* NMAPPINGMAX
]);
116 out
= pHeapBuf
.get();
120 // Two different blocks to eliminate the if(useOffset) condition inside the loop.
121 // Yes, on massive use even such small things do count.
124 sal_Int32
* offsetData
;
125 std::unique_ptr
<sal_Int32
[]> pOffsetHeapBuf
;
126 sal_Int32 nOffsetCount
= std::max
<sal_Int32
>(nLocalBuf
, nCount
);
127 if (nOffsetCount
<= nLocalBuf
)
128 offsetData
= static_cast<sal_Int32
*>(alloca(nOffsetCount
* NMAPPINGMAX
* sizeof(sal_Int32
)));
131 pOffsetHeapBuf
.reset(new sal_Int32
[ nOffsetCount
* NMAPPINGMAX
]);
132 offsetData
= pOffsetHeapBuf
.get();
134 sal_Int32
* offsetDataEnd
= offsetData
;
136 for (sal_Int32 i
= 0; i
< nCount
; i
++)
138 // take care of TOGGLE_CASE transliteration:
139 MappingType nTmpMappingType
= lcl_getMappingTypeForToggleCase( nMappingType
, in
[i
] );
141 const i18nutil::Mapping map
= i18nutil::casefolding::getValue( in
, i
, nCount
, aLocale
, nTmpMappingType
);
142 std::fill_n(offsetDataEnd
, map
.nmap
, i
+ startPos
);
143 offsetDataEnd
+= map
.nmap
;
144 std::copy_n(map
.map
, map
.nmap
, out
+ j
);
148 *pOffset
= css::uno::Sequence
< sal_Int32
>(offsetData
, offsetDataEnd
- offsetData
);
152 for ( sal_Int32 i
= 0; i
< nCount
; i
++)
154 // take care of TOGGLE_CASE transliteration:
155 MappingType nTmpMappingType
= lcl_getMappingTypeForToggleCase( nMappingType
, in
[i
] );
157 const i18nutil::Mapping map
= i18nutil::casefolding::getValue( in
, i
, nCount
, aLocale
, nTmpMappingType
);
158 std::copy_n(map
.map
, map
.nmap
, out
+ j
);
163 return OUString(out
, j
);
167 Transliteration_body::transliterateChar2String( sal_Unicode inChar
)
169 const i18nutil::Mapping map
= i18nutil::casefolding::getValue(&inChar
, 0, 1, aLocale
, nMappingType
);
170 rtl_uString
* pStr
= rtl_uString_alloc(map
.nmap
);
171 sal_Unicode
* out
= pStr
->buffer
;
174 for (i
= 0; i
< map
.nmap
; i
++)
178 return OUString( pStr
, SAL_NO_ACQUIRE
);
182 Transliteration_body::transliterateChar2Char( sal_Unicode inChar
)
184 const i18nutil::Mapping map
= i18nutil::casefolding::getValue(&inChar
, 0, 1, aLocale
, nMappingType
);
186 throw MultipleCharsOutputException();
191 Transliteration_body::foldingImpl( const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
192 Sequence
< sal_Int32
>* pOffset
)
194 return transliterateImpl(inStr
, startPos
, nCount
, pOffset
);
197 Transliteration_casemapping::Transliteration_casemapping()
199 nMappingType
= MappingType::NONE
;
200 transliterationName
= "casemapping(generic)";
201 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
204 Transliteration_u2l::Transliteration_u2l()
206 nMappingType
= MappingType::UpperToLower
;
207 transliterationName
= "upper_to_lower(generic)";
208 implementationName
= "com.sun.star.i18n.Transliteration.UPPERCASE_LOWERCASE";
211 Transliteration_l2u::Transliteration_l2u()
213 nMappingType
= MappingType::LowerToUpper
;
214 transliterationName
= "lower_to_upper(generic)";
215 implementationName
= "com.sun.star.i18n.Transliteration.LOWERCASE_UPPERCASE";
218 Transliteration_togglecase::Transliteration_togglecase()
220 // usually nMappingType must NOT be a combination of different flags here,
221 // but we take care of that problem in Transliteration_body::transliterate above
222 // before that value is used. There we will decide which of both is to be used on
223 // a per character basis.
224 nMappingType
= MappingType::LowerToUpper
| MappingType::UpperToLower
;
225 transliterationName
= "toggle(generic)";
226 implementationName
= "com.sun.star.i18n.Transliteration.TOGGLE_CASE";
229 Transliteration_titlecase::Transliteration_titlecase()
231 nMappingType
= MappingType::ToTitle
;
232 transliterationName
= "title(generic)";
233 implementationName
= "com.sun.star.i18n.Transliteration.TITLE_CASE";
236 /// @throws RuntimeException
237 static OUString
transliterate_titlecase_Impl(
238 std::u16string_view inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
239 const Locale
&rLocale
,
240 Sequence
< sal_Int32
>* pOffset
)
242 const OUString
aText( inStr
.substr( startPos
, nCount
) );
245 if (!aText
.isEmpty())
247 const Reference
< XComponentContext
>& xContext
= ::comphelper::getProcessComponentContext();
248 rtl::Reference
< CharacterClassificationImpl
> xCharClassImpl( new CharacterClassificationImpl( xContext
) );
250 // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise
251 // an exception we need to handle the first chara manually...
253 // we don't want to change surrogates by accident, thuse we use proper code point iteration
254 sal_uInt32 cFirstChar
= aText
.iterateCodePoints( &o3tl::temporary(sal_Int32(0)) );
255 OUString
aResolvedLigature( &cFirstChar
, 1 );
256 // toUpper can be used to properly resolve ligatures and characters like Beta
257 aResolvedLigature
= xCharClassImpl
->toUpper( aResolvedLigature
, 0, aResolvedLigature
.getLength(), rLocale
);
258 // since toTitle will leave all-uppercase text unchanged we first need to
259 // use toLower to bring possible 2nd and following chars in lowercase
260 aResolvedLigature
= xCharClassImpl
->toLower( aResolvedLigature
, 0, aResolvedLigature
.getLength(), rLocale
);
261 sal_Int32 nResolvedLen
= aResolvedLigature
.getLength();
263 // now we can properly use toTitle to get the expected result for the resolved string.
264 // The rest of the text should just become lowercase.
265 aRes
= xCharClassImpl
->toTitle( aResolvedLigature
, 0, nResolvedLen
, rLocale
) +
266 xCharClassImpl
->toLower( aText
, 1, aText
.getLength() - 1, rLocale
);
269 pOffset
->realloc( aRes
.getLength() );
271 auto [begin
, end
] = asNonConstRange(*pOffset
);
272 sal_Int32
* pOffsetInt
= std::fill_n(begin
, nResolvedLen
, 0);
273 std::iota(pOffsetInt
, end
, 1);
279 // this function expects to be called on a word-by-word basis,
280 // namely that startPos points to the first char of the word
281 OUString
Transliteration_titlecase::transliterateImpl(
282 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
283 Sequence
< sal_Int32
>* pOffset
)
285 return transliterate_titlecase_Impl( inStr
, startPos
, nCount
, aLocale
, pOffset
);
288 Transliteration_sentencecase::Transliteration_sentencecase()
290 nMappingType
= MappingType::ToTitle
; // though only to be applied to the first word...
291 transliterationName
= "sentence(generic)";
292 implementationName
= "com.sun.star.i18n.Transliteration.SENTENCE_CASE";
295 // this function expects to be called on a sentence-by-sentence basis,
296 // namely that startPos points to the first word (NOT first char!) in the sentence
297 OUString
Transliteration_sentencecase::transliterateImpl(
298 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
299 Sequence
< sal_Int32
>* pOffset
)
301 return transliterate_titlecase_Impl( inStr
, startPos
, nCount
, aLocale
, pOffset
);
306 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */