Bump version to 4.3-4
[LibreOffice.git] / i18npool / source / transliteration / transliteration_body.cxx
blobec3f6fc9bfc420cc2bb94aaa74ff4f61b8635017
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/ustrbuf.hxx>
21 #include <i18nutil/casefolding.hxx>
22 #include <i18nutil/unicode.hxx>
24 #include <comphelper/processfactory.hxx>
25 #include <comphelper/string.hxx>
26 #include <osl/diagnose.h>
28 #include <string.h>
30 #include "characterclassificationImpl.hxx"
31 #include "breakiteratorImpl.hxx"
33 #include "transliteration_body.hxx"
34 #include <boost/scoped_array.hpp>
36 using namespace ::com::sun::star::uno;
37 using namespace ::com::sun::star::lang;
38 using namespace ::rtl;
40 namespace com { namespace sun { namespace star { namespace i18n {
42 Transliteration_body::Transliteration_body()
44 nMappingType = 0;
45 transliterationName = "Transliteration_body";
46 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
49 sal_Int16 SAL_CALL Transliteration_body::getType() throw(RuntimeException, std::exception)
51 return TransliterationType::ONE_TO_ONE;
54 sal_Bool SAL_CALL Transliteration_body::equals(
55 const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
56 const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
57 throw(RuntimeException, std::exception)
59 throw RuntimeException();
62 Sequence< OUString > SAL_CALL
63 Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
64 throw( RuntimeException, std::exception)
66 Sequence< OUString > ostr(2);
67 ostr[0] = str1;
68 ostr[1] = str2;
69 return ostr;
72 static sal_uInt8 lcl_getMappingTypeForToggleCase( sal_uInt8 nMappingType, sal_Unicode cChar )
74 sal_uInt8 nRes = nMappingType;
76 // take care of TOGGLE_CASE transliteration:
77 // nMappingType should not be a combination of flags, thuse we decide now
78 // which one to use.
79 if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
81 const sal_Int16 nType = unicode::getUnicodeType( cChar );
82 if (nType & 0x02 /* lower case*/)
83 nRes = MappingTypeLowerToUpper;
84 else
86 // should also work properly for non-upper characters like white spacs, numbers, ...
87 nRes = MappingTypeUpperToLower;
91 return nRes;
94 OUString SAL_CALL
95 Transliteration_body::transliterate(
96 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
97 Sequence< sal_Int32 >& offset)
98 throw(RuntimeException, std::exception)
100 const sal_Unicode *in = inStr.getStr() + startPos;
102 // Two different blocks to eliminate the if(useOffset) condition inside the
103 // inner k loop. Yes, on massive use even such small things do count.
104 if ( useOffset )
106 sal_Int32 nOffCount = 0, i;
107 for (i = 0; i < nCount; i++)
109 // take care of TOGGLE_CASE transliteration:
110 sal_uInt8 nTmpMappingType = nMappingType;
111 if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
112 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
114 const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
115 nOffCount += map.nmap;
117 rtl_uString* pStr = rtl_uString_alloc(nOffCount);
118 sal_Unicode* out = pStr->buffer;
120 if ( nOffCount != offset.getLength() )
121 offset.realloc( nOffCount );
123 sal_Int32 j = 0;
124 sal_Int32 * pArr = offset.getArray();
125 for (i = 0; i < nCount; i++)
127 // take care of TOGGLE_CASE transliteration:
128 sal_uInt8 nTmpMappingType = nMappingType;
129 if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
130 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
132 const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
133 for (sal_Int32 k = 0; k < map.nmap; k++)
135 pArr[j] = i + startPos;
136 out[j++] = map.map[k];
139 out[j] = 0;
141 return OUString( pStr, SAL_NO_ACQUIRE );
143 else
145 // In the simple case of no offset sequence used we can eliminate the
146 // first getValue() loop. We could also assume that most calls result
147 // in identical string lengths, thus using a preallocated
148 // OUStringBuffer could be an easy way to assemble the return string
149 // without too much hassle. However, for single characters the
150 // OUStringBuffer::append() method is quite expensive compared to a
151 // simple array operation, so it pays here to copy the final result
152 // instead.
154 // Allocate the max possible buffer. Try to use stack instead of heap,
155 // which would have to be reallocated most times anyways.
156 const sal_Int32 nLocalBuf = 2048;
157 sal_Unicode aLocalBuf[ nLocalBuf * NMAPPINGMAX ], *out = aLocalBuf;
158 boost::scoped_array<sal_Unicode> pHeapBuf;
159 if ( nCount > nLocalBuf ) {
160 pHeapBuf.reset(new sal_Unicode[ nCount * NMAPPINGMAX ]);
161 out = pHeapBuf.get();
164 sal_Int32 j = 0;
165 for ( sal_Int32 i = 0; i < nCount; i++)
167 // take care of TOGGLE_CASE transliteration:
168 sal_uInt8 nTmpMappingType = nMappingType;
169 if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
170 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
172 const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
173 for (sal_Int32 k = 0; k < map.nmap; k++)
175 out[j++] = map.map[k];
179 OUString aRet( out, j );
180 return aRet;
184 OUString SAL_CALL
185 Transliteration_body::transliterateChar2String( sal_Unicode inChar ) throw(RuntimeException, std::exception)
187 const Mapping &map = casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
188 rtl_uString* pStr = rtl_uString_alloc(map.nmap);
189 sal_Unicode* out = pStr->buffer;
190 sal_Int32 i;
192 for (i = 0; i < map.nmap; i++)
193 out[i] = map.map[i];
194 out[i] = 0;
196 return OUString( pStr, SAL_NO_ACQUIRE );
199 sal_Unicode SAL_CALL
200 Transliteration_body::transliterateChar2Char( sal_Unicode inChar ) throw(MultipleCharsOutputException, RuntimeException, std::exception)
202 const Mapping &map = casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
203 if (map.nmap > 1)
204 throw MultipleCharsOutputException();
205 return map.map[0];
208 OUString SAL_CALL
209 Transliteration_body::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
210 Sequence< sal_Int32 >& offset) throw(RuntimeException, std::exception)
212 return this->transliterate(inStr, startPos, nCount, offset);
215 Transliteration_casemapping::Transliteration_casemapping()
217 nMappingType = 0;
218 transliterationName = "casemapping(generic)";
219 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
222 void SAL_CALL
223 Transliteration_casemapping::setMappingType( const sal_uInt8 rMappingType, const Locale& rLocale )
225 nMappingType = rMappingType;
226 aLocale = rLocale;
229 Transliteration_u2l::Transliteration_u2l()
231 nMappingType = MappingTypeUpperToLower;
232 transliterationName = "upper_to_lower(generic)";
233 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l";
236 Transliteration_l2u::Transliteration_l2u()
238 nMappingType = MappingTypeLowerToUpper;
239 transliterationName = "lower_to_upper(generic)";
240 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u";
243 Transliteration_togglecase::Transliteration_togglecase()
245 // usually nMappingType must NOT be a combiantion of different flages here,
246 // but we take care of that problem in Transliteration_body::transliterate above
247 // before that value is used. There we will decide which of both is to be used on
248 // a per character basis.
249 nMappingType = MappingTypeLowerToUpper | MappingTypeUpperToLower;
250 transliterationName = "toggle(generic)";
251 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
254 Transliteration_titlecase::Transliteration_titlecase()
256 nMappingType = MappingTypeToTitle;
257 transliterationName = "title(generic)";
258 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
261 static OUString transliterate_titlecase_Impl(
262 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
263 const Locale &rLocale,
264 Sequence< sal_Int32 >& offset )
265 throw(RuntimeException)
267 const OUString aText( inStr.copy( startPos, nCount ) );
269 OUString aRes;
270 if (!aText.isEmpty())
272 Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
273 CharacterClassificationImpl aCharClassImpl( xContext );
275 // because aCharClassImpl.toTitle does not handle ligatures or Beta but will raise
276 // an exception we need to handle the first chara manually...
278 // we don't want to change surrogates by accident, thuse we use proper code point iteration
279 sal_Int32 nPos = 0;
280 sal_uInt32 cFirstChar = aText.iterateCodePoints( &nPos );
281 OUString aResolvedLigature( &cFirstChar, 1 );
282 // toUpper can be used to properly resolve ligatures and characters like Beta
283 aResolvedLigature = aCharClassImpl.toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
284 // since toTitle will leave all-uppercase text unchanged we first need to
285 // use toLower to bring possible 2nd and following charas in lowercase
286 aResolvedLigature = aCharClassImpl.toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
287 sal_Int32 nResolvedLen = aResolvedLigature.getLength();
289 // now we can properly use toTitle to get the expected result for the resolved string.
290 // The rest of the text should just become lowercase.
291 aRes = aCharClassImpl.toTitle( aResolvedLigature, 0, nResolvedLen, rLocale );
292 aRes += aCharClassImpl.toLower( aText, 1, aText.getLength() - 1, rLocale );
293 offset.realloc( aRes.getLength() );
295 sal_Int32 *pOffset = offset.getArray();
296 sal_Int32 nLen = offset.getLength();
297 for (sal_Int32 i = 0; i < nLen; ++i)
299 sal_Int32 nIdx = 0;
300 if (i >= nResolvedLen)
301 nIdx = i - nResolvedLen + 1;
302 pOffset[i] = nIdx;
305 #if OSL_DEBUG_LEVEL > 1
306 const sal_Int32 *pCOffset = offset.getConstArray();
307 (void) pCOffset;
308 #endif
310 return aRes;
313 // this function expects to be called on a word-by-word basis,
314 // namely that startPos points to the first char of the word
315 OUString SAL_CALL Transliteration_titlecase::transliterate(
316 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
317 Sequence< sal_Int32 >& offset )
318 throw(RuntimeException, std::exception)
320 return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
323 Transliteration_sentencecase::Transliteration_sentencecase()
325 nMappingType = MappingTypeToTitle; // though only to be applied to the first word...
326 transliterationName = "sentence(generic)";
327 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
330 // this function expects to be called on a sentence-by-sentence basis,
331 // namely that startPos points to the first word (NOT first char!) in the sentence
332 OUString SAL_CALL Transliteration_sentencecase::transliterate(
333 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
334 Sequence< sal_Int32 >& offset )
335 throw(RuntimeException, std::exception)
337 return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
340 } } } }
342 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */