bump product version to 6.3.0.0.beta1
[LibreOffice.git] / i18npool / source / transliteration / transliteration_body.cxx
bloba320b46d36aa0fe8d56cd0e79800d4c7e80eddad
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/ref.hxx>
21 #include <i18nutil/casefolding.hxx>
22 #include <i18nutil/unicode.hxx>
23 #include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
24 #include <com/sun/star/i18n/TransliterationType.hpp>
25 #include <comphelper/processfactory.hxx>
27 #include <characterclassificationImpl.hxx>
29 #include <transliteration_body.hxx>
30 #include <memory>
32 using namespace ::com::sun::star::uno;
33 using namespace ::com::sun::star::i18n;
34 using namespace ::com::sun::star::lang;
36 namespace i18npool {
38 Transliteration_body::Transliteration_body()
40 nMappingType = MappingType::NONE;
41 transliterationName = "Transliteration_body";
42 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
45 sal_Int16 SAL_CALL Transliteration_body::getType()
47 return TransliterationType::ONE_TO_ONE;
50 sal_Bool SAL_CALL Transliteration_body::equals(
51 const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
52 const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
54 throw RuntimeException();
57 Sequence< OUString > SAL_CALL
58 Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
60 Sequence< OUString > ostr(2);
61 ostr[0] = str1;
62 ostr[1] = str2;
63 return ostr;
66 static MappingType lcl_getMappingTypeForToggleCase( MappingType nMappingType, sal_Unicode cChar )
68 MappingType nRes = nMappingType;
70 // take care of TOGGLE_CASE transliteration:
71 // nMappingType should not be a combination of flags, thuse we decide now
72 // which one to use.
73 if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower))
75 const sal_Int16 nType = unicode::getUnicodeType( cChar );
76 if (nType & 0x02 /* lower case*/)
77 nRes = MappingType::LowerToUpper;
78 else
80 // should also work properly for non-upper characters like white spaces, numbers, ...
81 nRes = MappingType::UpperToLower;
85 return nRes;
88 OUString
89 Transliteration_body::transliterateImpl(
90 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
91 Sequence< sal_Int32 >& offset, bool useOffset)
93 const sal_Unicode *in = inStr.getStr() + startPos;
95 // Two different blocks to eliminate the if(useOffset) condition inside the
96 // inner k loop. Yes, on massive use even such small things do count.
97 if ( useOffset )
99 sal_Int32 nOffCount = 0, i;
100 for (i = 0; i < nCount; i++)
102 // take care of TOGGLE_CASE transliteration:
103 MappingType nTmpMappingType = nMappingType;
104 if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower))
105 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
107 const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
108 nOffCount += map.nmap;
110 rtl_uString* pStr = rtl_uString_alloc(nOffCount);
111 sal_Unicode* out = pStr->buffer;
113 if ( nOffCount != offset.getLength() )
114 offset.realloc( nOffCount );
116 sal_Int32 j = 0;
117 sal_Int32 * pArr = offset.getArray();
118 for (i = 0; i < nCount; i++)
120 // take care of TOGGLE_CASE transliteration:
121 MappingType nTmpMappingType = nMappingType;
122 if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower))
123 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
125 const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
126 for (sal_Int32 k = 0; k < map.nmap; k++)
128 pArr[j] = i + startPos;
129 out[j++] = map.map[k];
132 out[j] = 0;
134 return OUString( pStr, SAL_NO_ACQUIRE );
136 else
138 // In the simple case of no offset sequence used we can eliminate the
139 // first getValue() loop. We could also assume that most calls result
140 // in identical string lengths, thus using a preallocated
141 // OUStringBuffer could be an easy way to assemble the return string
142 // without too much hassle. However, for single characters the
143 // OUStringBuffer::append() method is quite expensive compared to a
144 // simple array operation, so it pays here to copy the final result
145 // instead.
147 // Allocate the max possible buffer. Try to use stack instead of heap,
148 // which would have to be reallocated most times anyways.
149 const sal_Int32 nLocalBuf = 2048;
150 sal_Unicode aLocalBuf[ nLocalBuf * NMAPPINGMAX ], *out = aLocalBuf;
151 std::unique_ptr<sal_Unicode[]> pHeapBuf;
152 if ( nCount > nLocalBuf ) {
153 pHeapBuf.reset(new sal_Unicode[ nCount * NMAPPINGMAX ]);
154 out = pHeapBuf.get();
157 sal_Int32 j = 0;
158 for ( sal_Int32 i = 0; i < nCount; i++)
160 // take care of TOGGLE_CASE transliteration:
161 MappingType nTmpMappingType = nMappingType;
162 if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower))
163 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
165 const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
166 for (sal_Int32 k = 0; k < map.nmap; k++)
168 out[j++] = map.map[k];
172 OUString aRet( out, j );
173 return aRet;
177 OUString SAL_CALL
178 Transliteration_body::transliterateChar2String( sal_Unicode inChar )
180 const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
181 rtl_uString* pStr = rtl_uString_alloc(map.nmap);
182 sal_Unicode* out = pStr->buffer;
183 sal_Int32 i;
185 for (i = 0; i < map.nmap; i++)
186 out[i] = map.map[i];
187 out[i] = 0;
189 return OUString( pStr, SAL_NO_ACQUIRE );
192 sal_Unicode SAL_CALL
193 Transliteration_body::transliterateChar2Char( sal_Unicode inChar )
195 const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
196 if (map.nmap > 1)
197 throw MultipleCharsOutputException();
198 return map.map[0];
201 OUString
202 Transliteration_body::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
203 Sequence< sal_Int32 >& offset, bool useOffset)
205 return transliterateImpl(inStr, startPos, nCount, offset, useOffset);
208 Transliteration_casemapping::Transliteration_casemapping()
210 nMappingType = MappingType::NONE;
211 transliterationName = "casemapping(generic)";
212 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
215 void
216 Transliteration_casemapping::setMappingType( const MappingType rMappingType, const Locale& rLocale )
218 nMappingType = rMappingType;
219 aLocale = rLocale;
222 Transliteration_u2l::Transliteration_u2l()
224 nMappingType = MappingType::UpperToLower;
225 transliterationName = "upper_to_lower(generic)";
226 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l";
229 Transliteration_l2u::Transliteration_l2u()
231 nMappingType = MappingType::LowerToUpper;
232 transliterationName = "lower_to_upper(generic)";
233 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u";
236 Transliteration_togglecase::Transliteration_togglecase()
238 // usually nMappingType must NOT be a combination of different flags here,
239 // but we take care of that problem in Transliteration_body::transliterate above
240 // before that value is used. There we will decide which of both is to be used on
241 // a per character basis.
242 nMappingType = MappingType::LowerToUpper | MappingType::UpperToLower;
243 transliterationName = "toggle(generic)";
244 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
247 Transliteration_titlecase::Transliteration_titlecase()
249 nMappingType = MappingType::ToTitle;
250 transliterationName = "title(generic)";
251 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
254 /// @throws RuntimeException
255 static OUString transliterate_titlecase_Impl(
256 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
257 const Locale &rLocale,
258 Sequence< sal_Int32 >& offset )
260 const OUString aText( inStr.copy( startPos, nCount ) );
262 OUString aRes;
263 if (!aText.isEmpty())
265 Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
266 rtl::Reference< CharacterClassificationImpl > xCharClassImpl( new CharacterClassificationImpl( xContext ) );
268 // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise
269 // an exception we need to handle the first chara manually...
271 // we don't want to change surrogates by accident, thuse we use proper code point iteration
272 sal_Int32 nPos = 0;
273 sal_uInt32 cFirstChar = aText.iterateCodePoints( &nPos );
274 OUString aResolvedLigature( &cFirstChar, 1 );
275 // toUpper can be used to properly resolve ligatures and characters like Beta
276 aResolvedLigature = xCharClassImpl->toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
277 // since toTitle will leave all-uppercase text unchanged we first need to
278 // use toLower to bring possible 2nd and following chars in lowercase
279 aResolvedLigature = xCharClassImpl->toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
280 sal_Int32 nResolvedLen = aResolvedLigature.getLength();
282 // now we can properly use toTitle to get the expected result for the resolved string.
283 // The rest of the text should just become lowercase.
284 aRes = xCharClassImpl->toTitle( aResolvedLigature, 0, nResolvedLen, rLocale );
285 aRes += xCharClassImpl->toLower( aText, 1, aText.getLength() - 1, rLocale );
286 offset.realloc( aRes.getLength() );
288 sal_Int32 *pOffset = offset.getArray();
289 sal_Int32 nLen = offset.getLength();
290 for (sal_Int32 i = 0; i < nLen; ++i)
292 sal_Int32 nIdx = 0;
293 if (i >= nResolvedLen)
294 nIdx = i - nResolvedLen + 1;
295 pOffset[i] = nIdx;
298 return aRes;
301 // this function expects to be called on a word-by-word basis,
302 // namely that startPos points to the first char of the word
303 OUString Transliteration_titlecase::transliterateImpl(
304 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
305 Sequence< sal_Int32 >& offset, bool )
307 return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
310 Transliteration_sentencecase::Transliteration_sentencecase()
312 nMappingType = MappingType::ToTitle; // though only to be applied to the first word...
313 transliterationName = "sentence(generic)";
314 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
317 // this function expects to be called on a sentence-by-sentence basis,
318 // namely that startPos points to the first word (NOT first char!) in the sentence
319 OUString Transliteration_sentencecase::transliterateImpl(
320 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
321 Sequence< sal_Int32 >& offset, bool )
323 return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
328 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */