Version 6.4.0.0.beta1, tag libreoffice-6.4.0.0.beta1
[LibreOffice.git] / i18npool / source / transliteration / transliteration_body.cxx
bloba7eae7243835c304c9df9d3ead9b04a0fd7080a1
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/ref.hxx>
21 #include <i18nutil/casefolding.hxx>
22 #include <i18nutil/unicode.hxx>
23 #include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
24 #include <com/sun/star/i18n/TransliterationType.hpp>
25 #include <comphelper/processfactory.hxx>
26 #include <comphelper/sequence.hxx>
28 #include <characterclassificationImpl.hxx>
30 #include <transliteration_body.hxx>
31 #include <memory>
32 #include <numeric>
34 using namespace ::com::sun::star::uno;
35 using namespace ::com::sun::star::i18n;
36 using namespace ::com::sun::star::lang;
38 namespace i18npool {
40 Transliteration_body::Transliteration_body()
42 nMappingType = MappingType::NONE;
43 transliterationName = "Transliteration_body";
44 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
47 sal_Int16 SAL_CALL Transliteration_body::getType()
49 return TransliterationType::ONE_TO_ONE;
52 sal_Bool SAL_CALL Transliteration_body::equals(
53 const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
54 const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
56 throw RuntimeException();
59 Sequence< OUString > SAL_CALL
60 Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
62 Sequence< OUString > ostr(2);
63 ostr[0] = str1;
64 ostr[1] = str2;
65 return ostr;
68 static MappingType lcl_getMappingTypeForToggleCase( MappingType nMappingType, sal_Unicode cChar )
70 MappingType nRes = nMappingType;
72 // take care of TOGGLE_CASE transliteration:
73 // nMappingType should not be a combination of flags, thuse we decide now
74 // which one to use.
75 if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower))
77 const sal_Int16 nType = unicode::getUnicodeType( cChar );
78 if (nType & 0x02 /* lower case*/)
79 nRes = MappingType::LowerToUpper;
80 else
82 // should also work properly for non-upper characters like white spaces, numbers, ...
83 nRes = MappingType::UpperToLower;
87 return nRes;
90 OUString
91 Transliteration_body::transliterateImpl(
92 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
93 Sequence< sal_Int32 >& offset, bool useOffset)
95 const sal_Unicode *in = inStr.getStr() + startPos;
97 // We could assume that most calls result in identical string lengths,
98 // thus using a preallocated OUStringBuffer could be an easy way
99 // to assemble the return string without too much hassle. However,
100 // for single characters the OUStringBuffer::append() method is quite
101 // expensive compared to a simple array operation, so it pays here
102 // to copy the final result instead.
104 // Allocate the max possible buffer. Try to use stack instead of heap,
105 // which would have to be reallocated most times anyways.
106 constexpr sal_Int32 nLocalBuf = 2048;
107 sal_Unicode aLocalBuf[ nLocalBuf * NMAPPINGMAX ], *out = aLocalBuf;
108 std::unique_ptr<sal_Unicode[]> pHeapBuf;
109 if (nCount > nLocalBuf)
111 pHeapBuf.reset(new sal_Unicode[ nCount * NMAPPINGMAX ]);
112 out = pHeapBuf.get();
115 sal_Int32 j = 0;
116 // Two different blocks to eliminate the if(useOffset) condition inside the loop.
117 // Yes, on massive use even such small things do count.
118 if ( useOffset )
120 std::vector<sal_Int32> aVec;
121 aVec.reserve(std::max<sal_Int32>(nLocalBuf, nCount) * NMAPPINGMAX);
123 for (sal_Int32 i = 0; i < nCount; i++)
125 // take care of TOGGLE_CASE transliteration:
126 MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
128 const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
129 std::fill_n(std::back_inserter(aVec), map.nmap, i + startPos);
130 std::copy_n(map.map, map.nmap, out + j);
131 j += map.nmap;
134 offset = comphelper::containerToSequence(aVec);
136 else
138 for ( sal_Int32 i = 0; i < nCount; i++)
140 // take care of TOGGLE_CASE transliteration:
141 MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
143 const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
144 std::copy_n(map.map, map.nmap, out + j);
145 j += map.nmap;
149 return OUString(out, j);
152 OUString SAL_CALL
153 Transliteration_body::transliterateChar2String( sal_Unicode inChar )
155 const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
156 rtl_uString* pStr = rtl_uString_alloc(map.nmap);
157 sal_Unicode* out = pStr->buffer;
158 sal_Int32 i;
160 for (i = 0; i < map.nmap; i++)
161 out[i] = map.map[i];
162 out[i] = 0;
164 return OUString( pStr, SAL_NO_ACQUIRE );
167 sal_Unicode SAL_CALL
168 Transliteration_body::transliterateChar2Char( sal_Unicode inChar )
170 const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
171 if (map.nmap > 1)
172 throw MultipleCharsOutputException();
173 return map.map[0];
176 OUString
177 Transliteration_body::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
178 Sequence< sal_Int32 >& offset, bool useOffset)
180 return transliterateImpl(inStr, startPos, nCount, offset, useOffset);
183 Transliteration_casemapping::Transliteration_casemapping()
185 nMappingType = MappingType::NONE;
186 transliterationName = "casemapping(generic)";
187 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
190 void
191 Transliteration_casemapping::setMappingType( const MappingType rMappingType, const Locale& rLocale )
193 nMappingType = rMappingType;
194 aLocale = rLocale;
197 Transliteration_u2l::Transliteration_u2l()
199 nMappingType = MappingType::UpperToLower;
200 transliterationName = "upper_to_lower(generic)";
201 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l";
204 Transliteration_l2u::Transliteration_l2u()
206 nMappingType = MappingType::LowerToUpper;
207 transliterationName = "lower_to_upper(generic)";
208 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u";
211 Transliteration_togglecase::Transliteration_togglecase()
213 // usually nMappingType must NOT be a combination of different flags here,
214 // but we take care of that problem in Transliteration_body::transliterate above
215 // before that value is used. There we will decide which of both is to be used on
216 // a per character basis.
217 nMappingType = MappingType::LowerToUpper | MappingType::UpperToLower;
218 transliterationName = "toggle(generic)";
219 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
222 Transliteration_titlecase::Transliteration_titlecase()
224 nMappingType = MappingType::ToTitle;
225 transliterationName = "title(generic)";
226 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
229 /// @throws RuntimeException
230 static OUString transliterate_titlecase_Impl(
231 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
232 const Locale &rLocale,
233 Sequence< sal_Int32 >& offset )
235 const OUString aText( inStr.copy( startPos, nCount ) );
237 OUString aRes;
238 if (!aText.isEmpty())
240 Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
241 rtl::Reference< CharacterClassificationImpl > xCharClassImpl( new CharacterClassificationImpl( xContext ) );
243 // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise
244 // an exception we need to handle the first chara manually...
246 // we don't want to change surrogates by accident, thuse we use proper code point iteration
247 sal_Int32 nPos = 0;
248 sal_uInt32 cFirstChar = aText.iterateCodePoints( &nPos );
249 OUString aResolvedLigature( &cFirstChar, 1 );
250 // toUpper can be used to properly resolve ligatures and characters like Beta
251 aResolvedLigature = xCharClassImpl->toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
252 // since toTitle will leave all-uppercase text unchanged we first need to
253 // use toLower to bring possible 2nd and following chars in lowercase
254 aResolvedLigature = xCharClassImpl->toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
255 sal_Int32 nResolvedLen = aResolvedLigature.getLength();
257 // now we can properly use toTitle to get the expected result for the resolved string.
258 // The rest of the text should just become lowercase.
259 aRes = xCharClassImpl->toTitle( aResolvedLigature, 0, nResolvedLen, rLocale ) +
260 xCharClassImpl->toLower( aText, 1, aText.getLength() - 1, rLocale );
261 offset.realloc( aRes.getLength() );
263 sal_Int32* pOffset = std::fill_n(offset.begin(), nResolvedLen, 0);
264 std::iota(pOffset, offset.end(), 1);
266 return aRes;
269 // this function expects to be called on a word-by-word basis,
270 // namely that startPos points to the first char of the word
271 OUString Transliteration_titlecase::transliterateImpl(
272 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
273 Sequence< sal_Int32 >& offset, bool )
275 return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
278 Transliteration_sentencecase::Transliteration_sentencecase()
280 nMappingType = MappingType::ToTitle; // though only to be applied to the first word...
281 transliterationName = "sentence(generic)";
282 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
285 // this function expects to be called on a sentence-by-sentence basis,
286 // namely that startPos points to the first word (NOT first char!) in the sentence
287 OUString Transliteration_sentencecase::transliterateImpl(
288 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
289 Sequence< sal_Int32 >& offset, bool )
291 return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
296 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */