Update ooo320-m1
[ooovba.git] / i18npool / source / transliteration / transliteration_body.cxx
blobf46cf5f942a5d0d9eff5e75ec2865b45084a7d83
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: transliteration_body.cxx,v $
10 * $Revision: 1.9 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include <i18nutil/casefolding.hxx>
35 #define TRANSLITERATION_ALL
36 #include "transliteration_body.hxx"
38 using namespace ::com::sun::star::uno;
39 using namespace ::com::sun::star::lang;
40 using namespace ::rtl;
42 namespace com { namespace sun { namespace star { namespace i18n {
44 Transliteration_body::Transliteration_body()
46 nMappingType = 0;
47 transliterationName = "Transliteration_body";
48 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
51 sal_Int16 SAL_CALL Transliteration_body::getType() throw(RuntimeException)
53 return TransliterationType::ONE_TO_ONE;
56 sal_Bool SAL_CALL Transliteration_body::equals(
57 const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
58 const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
59 throw(RuntimeException)
61 throw RuntimeException();
64 Sequence< OUString > SAL_CALL
65 Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
66 throw( RuntimeException)
68 Sequence< OUString > ostr(2);
69 ostr[0] = str1;
70 ostr[1] = str2;
71 return ostr;
74 OUString SAL_CALL
75 Transliteration_body::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
76 Sequence< sal_Int32 >& offset) throw(RuntimeException)
78 #if 0
79 /* Performance optimization:
80 * The two realloc() consume 48% (32% grow, 16% shrink) runtime of this method!
81 * getValue() needs about 15%, so there is equal balance if we trade the second
82 * (shrinking) realloc() for a getValue(). But if the caller initializes the
83 * sequence to nCount elements there isn't any change in size necessary in most
84 * cases (one-to-one mapping) and we gain 33%.
86 * Of that constellation the getValue() method takes 20% upon each call, so 40%
87 * for both. By remembering the first calls' results we could gain some extra
88 * percentage again, but unfortunately getValue() may return a reference to a
89 * static buffer, so we can't store the pointer directly but would have to
90 * copy-construct an array, which doesn't give us any advantage.
92 * Much more is accomplished by working directly on the sequence buffer
93 * returned by getArray() instead of using operator[] for each and every
94 * access.
96 * And while we're at it: now that we know the size in advance we don't need to
97 * copy the buffer anymore, just create the real string buffer and let the
98 * return value take ownership.
100 * All together these changes result in the new implementation needing only 62%
101 * of the time of the old implementation (in other words: that one was 1.61
102 * times slower ...)
105 // Allocate the max possible buffer. Try to use stack instead of heap which
106 // would have to be reallocated most times anyway.
107 const sal_Int32 nLocalBuf = 512 * NMAPPINGMAX;
108 sal_Unicode aLocalBuf[nLocalBuf], *out = aLocalBuf, *aHeapBuf = NULL;
110 const sal_Unicode *in = inStr.getStr() + startPos;
112 if (nCount > 512)
113 out = aHeapBuf = (sal_Unicode*) malloc((nCount * NMAPPINGMAX) * sizeof(sal_Unicode));
115 if (useOffset)
116 offset.realloc(nCount * NMAPPINGMAX);
117 sal_Int32 j = 0;
118 for (sal_Int32 i = 0; i < nCount; i++) {
119 Mapping &map = casefolding::getValue(in, i, nCount, aLocale, nMappingType);
120 for (sal_Int32 k = 0; k < map.nmap; k++) {
121 if (useOffset)
122 offset[j] = i + startPos;
123 out[j++] = map.map[k];
126 if (useOffset)
127 offset.realloc(j);
129 OUString r(out, j);
131 if (aHeapBuf)
132 free(aHeapBuf);
134 return r;
135 #else
136 const sal_Unicode *in = inStr.getStr() + startPos;
138 // Two different blocks to eliminate the if(useOffset) condition inside the
139 // inner k loop. Yes, on massive use even such small things do count.
140 if ( useOffset )
142 sal_Int32 nOffCount = 0, i;
143 for (i = 0; i < nCount; i++)
145 const Mapping &map = casefolding::getValue(in, i, nCount, aLocale, nMappingType);
146 nOffCount += map.nmap;
148 rtl_uString* pStr = x_rtl_uString_new_WithLength( nOffCount, 1 ); // our x_rtl_ustring.h
149 sal_Unicode* out = pStr->buffer;
151 if ( nOffCount != offset.getLength() )
152 offset.realloc( nOffCount );
154 sal_Int32 j = 0;
155 sal_Int32 * pArr = offset.getArray();
156 for (i = 0; i < nCount; i++)
158 const Mapping &map = casefolding::getValue(in, i, nCount, aLocale, nMappingType);
159 for (sal_Int32 k = 0; k < map.nmap; k++)
161 pArr[j] = i + startPos;
162 out[j++] = map.map[k];
165 out[j] = 0;
167 return OUString( pStr, SAL_NO_ACQUIRE );
169 else
171 // In the simple case of no offset sequence used we can eliminate the
172 // first getValue() loop. We could also assume that most calls result
173 // in identical string lengths, thus using a preallocated
174 // OUStringBuffer could be an easy way to assemble the return string
175 // without too much hassle. However, for single characters the
176 // OUStringBuffer::append() method is quite expensive compared to a
177 // simple array operation, so it pays here to copy the final result
178 // instead.
180 // Allocate the max possible buffer. Try to use stack instead of heap,
181 // which would have to be reallocated most times anyways.
182 const sal_Int32 nLocalBuf = 2048;
183 sal_Unicode aLocalBuf[ nLocalBuf * NMAPPINGMAX ], *out = aLocalBuf, *pHeapBuf = NULL;
184 if ( nCount > nLocalBuf )
185 out = pHeapBuf = new sal_Unicode[ nCount * NMAPPINGMAX ];
187 sal_Int32 j = 0;
188 for ( sal_Int32 i = 0; i < nCount; i++)
190 const Mapping &map = casefolding::getValue(in, i, nCount, aLocale, nMappingType);
191 for (sal_Int32 k = 0; k < map.nmap; k++)
193 out[j++] = map.map[k];
197 OUString aRet( out, j );
198 if ( pHeapBuf )
199 delete [] pHeapBuf;
200 return aRet;
202 #endif
205 OUString SAL_CALL
206 Transliteration_body::transliterateChar2String( sal_Unicode inChar ) throw(RuntimeException)
208 const Mapping &map = casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
209 rtl_uString* pStr = x_rtl_uString_new_WithLength( map.nmap, 1 ); // our x_rtl_ustring.h
210 sal_Unicode* out = pStr->buffer;
211 sal_Int32 i;
213 for (i = 0; i < map.nmap; i++)
214 out[i] = map.map[i];
215 out[i] = 0;
217 return OUString( pStr, SAL_NO_ACQUIRE );
220 sal_Unicode SAL_CALL
221 Transliteration_body::transliterateChar2Char( sal_Unicode inChar ) throw(MultipleCharsOutputException, RuntimeException)
223 const Mapping &map = casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
224 if (map.nmap > 1)
225 throw MultipleCharsOutputException();
226 return map.map[0];
229 OUString SAL_CALL
230 Transliteration_body::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
231 Sequence< sal_Int32 >& offset) throw(RuntimeException)
233 return this->transliterate(inStr, startPos, nCount, offset);
236 Transliteration_casemapping::Transliteration_casemapping()
238 nMappingType = 0;
239 transliterationName = "casemapping(generic)";
240 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
243 void SAL_CALL
244 Transliteration_casemapping::setMappingType( const sal_uInt8 rMappingType, const Locale& rLocale )
246 nMappingType = rMappingType;
247 aLocale = rLocale;
250 Transliteration_u2l::Transliteration_u2l()
252 nMappingType = MappingTypeUpperToLower;
253 transliterationName = "upper_to_lower(generic)";
254 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l";
257 Transliteration_l2u::Transliteration_l2u()
259 nMappingType = MappingTypeLowerToUpper;
260 transliterationName = "lower_to_upper(generic)";
261 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u";
264 } } } }