tdf#130857 qt weld: Implement QtInstanceWidget::get_text_height
[LibreOffice.git] / i18npool / source / transliteration / transliteration_body.cxx
blobf4679571d51011565ffefe04bf307ee07bde9a00
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 // Silence spurious Werror=maybe-uninitialized in transliterateImpl emitted at least by GCC 11.2.0
20 #if defined __GNUC__ && !defined __clang__
21 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
22 #endif
24 #include <rtl/ref.hxx>
25 #include <i18nutil/casefolding.hxx>
26 #include <i18nutil/unicode.hxx>
27 #include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
28 #include <com/sun/star/i18n/TransliterationType.hpp>
29 #include <comphelper/processfactory.hxx>
30 #include <comphelper/sequence.hxx>
31 #include <o3tl/temporary.hxx>
33 #include <characterclassificationImpl.hxx>
35 #include <transliteration_body.hxx>
36 #include <memory>
37 #include <numeric>
39 using namespace ::com::sun::star::uno;
40 using namespace ::com::sun::star::i18n;
41 using namespace ::com::sun::star::lang;
43 namespace i18npool {
45 Transliteration_body::Transliteration_body()
47 nMappingType = MappingType::NONE;
48 transliterationName = "Transliteration_body";
49 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
52 sal_Int16 SAL_CALL Transliteration_body::getType()
54 return TransliterationType::ONE_TO_ONE;
57 sal_Bool SAL_CALL Transliteration_body::equals(
58 const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
59 const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
61 throw RuntimeException();
64 Sequence< OUString > SAL_CALL
65 Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
67 return { str1, str2 };
70 static MappingType lcl_getMappingTypeForToggleCase( MappingType nMappingType, sal_Unicode cChar )
72 MappingType nRes = nMappingType;
74 // take care of TOGGLE_CASE transliteration:
75 // nMappingType should not be a combination of flags, thuse we decide now
76 // which one to use.
77 if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower))
79 const sal_Int16 nType = unicode::getUnicodeType( cChar );
80 if (nType & 0x02 /* lower case*/)
81 nRes = MappingType::LowerToUpper;
82 else
84 // should also work properly for non-upper characters like white spaces, numbers, ...
85 nRes = MappingType::UpperToLower;
89 return nRes;
92 OUString
93 Transliteration_body::transliterateImpl(
94 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
95 Sequence< sal_Int32 >* pOffset)
97 const sal_Unicode *in = inStr.getStr() + startPos;
99 // We could assume that most calls result in identical string lengths,
100 // thus using a preallocated OUStringBuffer could be an easy way
101 // to assemble the return string without too much hassle. However,
102 // for single characters the OUStringBuffer::append() method is quite
103 // expensive compared to a simple array operation, so it pays here
104 // to copy the final result instead.
106 // Allocate the max possible buffer. Try to use stack instead of heap,
107 // which would have to be reallocated most times anyways.
108 constexpr sal_Int32 nLocalBuf = 2048;
109 sal_Unicode* out;
110 std::unique_ptr<sal_Unicode[]> pHeapBuf;
111 if (nCount <= nLocalBuf)
112 out = static_cast<sal_Unicode*>(alloca(nCount * NMAPPINGMAX * sizeof(sal_Unicode)));
113 else
115 pHeapBuf.reset(new sal_Unicode[ nCount * NMAPPINGMAX ]);
116 out = pHeapBuf.get();
119 sal_Int32 j = 0;
120 // Two different blocks to eliminate the if(useOffset) condition inside the loop.
121 // Yes, on massive use even such small things do count.
122 if ( pOffset )
124 sal_Int32* offsetData;
125 std::unique_ptr<sal_Int32[]> pOffsetHeapBuf;
126 sal_Int32 nOffsetCount = std::max<sal_Int32>(nLocalBuf, nCount);
127 if (nOffsetCount <= nLocalBuf)
128 offsetData = static_cast<sal_Int32*>(alloca(nOffsetCount * NMAPPINGMAX * sizeof(sal_Int32)));
129 else
131 pOffsetHeapBuf.reset(new sal_Int32[ nOffsetCount * NMAPPINGMAX ]);
132 offsetData = pOffsetHeapBuf.get();
134 sal_Int32* offsetDataEnd = offsetData;
136 for (sal_Int32 i = 0; i < nCount; i++)
138 // take care of TOGGLE_CASE transliteration:
139 MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
141 const i18nutil::Mapping map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
142 std::fill_n(offsetDataEnd, map.nmap, i + startPos);
143 offsetDataEnd += map.nmap;
144 std::copy_n(map.map, map.nmap, out + j);
145 j += map.nmap;
148 *pOffset = css::uno::Sequence< sal_Int32 >(offsetData, offsetDataEnd - offsetData);
150 else
152 for ( sal_Int32 i = 0; i < nCount; i++)
154 // take care of TOGGLE_CASE transliteration:
155 MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
157 const i18nutil::Mapping map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
158 std::copy_n(map.map, map.nmap, out + j);
159 j += map.nmap;
163 return OUString(out, j);
166 OUString SAL_CALL
167 Transliteration_body::transliterateChar2String( sal_Unicode inChar )
169 const i18nutil::Mapping map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
170 rtl_uString* pStr = rtl_uString_alloc(map.nmap);
171 sal_Unicode* out = pStr->buffer;
172 sal_Int32 i;
174 for (i = 0; i < map.nmap; i++)
175 out[i] = map.map[i];
176 out[i] = 0;
178 return OUString( pStr, SAL_NO_ACQUIRE );
181 sal_Unicode SAL_CALL
182 Transliteration_body::transliterateChar2Char( sal_Unicode inChar )
184 const i18nutil::Mapping map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
185 if (map.nmap > 1)
186 throw MultipleCharsOutputException();
187 return map.map[0];
190 OUString
191 Transliteration_body::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
192 Sequence< sal_Int32 >* pOffset)
194 return transliterateImpl(inStr, startPos, nCount, pOffset);
197 Transliteration_casemapping::Transliteration_casemapping()
199 nMappingType = MappingType::NONE;
200 transliterationName = "casemapping(generic)";
201 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
204 Transliteration_u2l::Transliteration_u2l()
206 nMappingType = MappingType::UpperToLower;
207 transliterationName = "upper_to_lower(generic)";
208 implementationName = "com.sun.star.i18n.Transliteration.UPPERCASE_LOWERCASE";
211 Transliteration_l2u::Transliteration_l2u()
213 nMappingType = MappingType::LowerToUpper;
214 transliterationName = "lower_to_upper(generic)";
215 implementationName = "com.sun.star.i18n.Transliteration.LOWERCASE_UPPERCASE";
218 Transliteration_togglecase::Transliteration_togglecase()
220 // usually nMappingType must NOT be a combination of different flags here,
221 // but we take care of that problem in Transliteration_body::transliterate above
222 // before that value is used. There we will decide which of both is to be used on
223 // a per character basis.
224 nMappingType = MappingType::LowerToUpper | MappingType::UpperToLower;
225 transliterationName = "toggle(generic)";
226 implementationName = "com.sun.star.i18n.Transliteration.TOGGLE_CASE";
229 Transliteration_titlecase::Transliteration_titlecase()
231 nMappingType = MappingType::ToTitle;
232 transliterationName = "title(generic)";
233 implementationName = "com.sun.star.i18n.Transliteration.TITLE_CASE";
236 /// @throws RuntimeException
237 static OUString transliterate_titlecase_Impl(
238 std::u16string_view inStr, sal_Int32 startPos, sal_Int32 nCount,
239 const Locale &rLocale,
240 Sequence< sal_Int32 >* pOffset )
242 const OUString aText( inStr.substr( startPos, nCount ) );
244 OUString aRes;
245 if (!aText.isEmpty())
247 const Reference< XComponentContext >& xContext = ::comphelper::getProcessComponentContext();
248 rtl::Reference< CharacterClassificationImpl > xCharClassImpl( new CharacterClassificationImpl( xContext ) );
250 // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise
251 // an exception we need to handle the first chara manually...
253 // we don't want to change surrogates by accident, thuse we use proper code point iteration
254 sal_uInt32 cFirstChar = aText.iterateCodePoints( &o3tl::temporary(sal_Int32(0)) );
255 OUString aResolvedLigature( &cFirstChar, 1 );
256 // toUpper can be used to properly resolve ligatures and characters like Beta
257 aResolvedLigature = xCharClassImpl->toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
258 // since toTitle will leave all-uppercase text unchanged we first need to
259 // use toLower to bring possible 2nd and following chars in lowercase
260 aResolvedLigature = xCharClassImpl->toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
261 sal_Int32 nResolvedLen = aResolvedLigature.getLength();
263 // now we can properly use toTitle to get the expected result for the resolved string.
264 // The rest of the text should just become lowercase.
265 aRes = xCharClassImpl->toTitle( aResolvedLigature, 0, nResolvedLen, rLocale ) +
266 xCharClassImpl->toLower( aText, 1, aText.getLength() - 1, rLocale );
267 if (pOffset)
269 pOffset->realloc( aRes.getLength() );
271 auto [begin, end] = asNonConstRange(*pOffset);
272 sal_Int32* pOffsetInt = std::fill_n(begin, nResolvedLen, 0);
273 std::iota(pOffsetInt, end, 1);
276 return aRes;
279 // this function expects to be called on a word-by-word basis,
280 // namely that startPos points to the first char of the word
281 OUString Transliteration_titlecase::transliterateImpl(
282 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
283 Sequence< sal_Int32 >* pOffset )
285 return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, pOffset );
288 Transliteration_sentencecase::Transliteration_sentencecase()
290 nMappingType = MappingType::ToTitle; // though only to be applied to the first word...
291 transliterationName = "sentence(generic)";
292 implementationName = "com.sun.star.i18n.Transliteration.SENTENCE_CASE";
295 // this function expects to be called on a sentence-by-sentence basis,
296 // namely that startPos points to the first word (NOT first char!) in the sentence
297 OUString Transliteration_sentencecase::transliterateImpl(
298 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
299 Sequence< sal_Int32 >* pOffset )
301 return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, pOffset );
306 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */