1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: transliteration_body.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include <i18nutil/casefolding.hxx>
35 #define TRANSLITERATION_ALL
36 #include "transliteration_body.hxx"
38 using namespace ::com::sun::star::uno
;
39 using namespace ::com::sun::star::lang
;
40 using namespace ::rtl
;
42 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
44 Transliteration_body::Transliteration_body()
47 transliterationName
= "Transliteration_body";
48 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_body";
51 sal_Int16 SAL_CALL
Transliteration_body::getType() throw(RuntimeException
)
53 return TransliterationType::ONE_TO_ONE
;
56 sal_Bool SAL_CALL
Transliteration_body::equals(
57 const OUString
& /*str1*/, sal_Int32
/*pos1*/, sal_Int32
/*nCount1*/, sal_Int32
& /*nMatch1*/,
58 const OUString
& /*str2*/, sal_Int32
/*pos2*/, sal_Int32
/*nCount2*/, sal_Int32
& /*nMatch2*/)
59 throw(RuntimeException
)
61 throw RuntimeException();
64 Sequence
< OUString
> SAL_CALL
65 Transliteration_body::transliterateRange( const OUString
& str1
, const OUString
& str2
)
66 throw( RuntimeException
)
68 Sequence
< OUString
> ostr(2);
75 Transliteration_body::transliterate( const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
76 Sequence
< sal_Int32
>& offset
) throw(RuntimeException
)
79 /* Performance optimization:
80 * The two realloc() consume 48% (32% grow, 16% shrink) runtime of this method!
81 * getValue() needs about 15%, so there is equal balance if we trade the second
82 * (shrinking) realloc() for a getValue(). But if the caller initializes the
83 * sequence to nCount elements there isn't any change in size necessary in most
84 * cases (one-to-one mapping) and we gain 33%.
86 * Of that constellation the getValue() method takes 20% upon each call, so 40%
87 * for both. By remembering the first calls' results we could gain some extra
88 * percentage again, but unfortunately getValue() may return a reference to a
89 * static buffer, so we can't store the pointer directly but would have to
90 * copy-construct an array, which doesn't give us any advantage.
92 * Much more is accomplished by working directly on the sequence buffer
93 * returned by getArray() instead of using operator[] for each and every
96 * And while we're at it: now that we know the size in advance we don't need to
97 * copy the buffer anymore, just create the real string buffer and let the
98 * return value take ownership.
100 * All together these changes result in the new implementation needing only 62%
101 * of the time of the old implementation (in other words: that one was 1.61
105 // Allocate the max possible buffer. Try to use stack instead of heap which
106 // would have to be reallocated most times anyway.
107 const sal_Int32 nLocalBuf
= 512 * NMAPPINGMAX
;
108 sal_Unicode aLocalBuf
[nLocalBuf
], *out
= aLocalBuf
, *aHeapBuf
= NULL
;
110 const sal_Unicode
*in
= inStr
.getStr() + startPos
;
113 out
= aHeapBuf
= (sal_Unicode
*) malloc((nCount
* NMAPPINGMAX
) * sizeof(sal_Unicode
));
116 offset
.realloc(nCount
* NMAPPINGMAX
);
118 for (sal_Int32 i
= 0; i
< nCount
; i
++) {
119 Mapping
&map
= casefolding::getValue(in
, i
, nCount
, aLocale
, nMappingType
);
120 for (sal_Int32 k
= 0; k
< map
.nmap
; k
++) {
122 offset
[j
] = i
+ startPos
;
123 out
[j
++] = map
.map
[k
];
136 const sal_Unicode
*in
= inStr
.getStr() + startPos
;
138 // Two different blocks to eliminate the if(useOffset) condition inside the
139 // inner k loop. Yes, on massive use even such small things do count.
142 sal_Int32 nOffCount
= 0, i
;
143 for (i
= 0; i
< nCount
; i
++)
145 const Mapping
&map
= casefolding::getValue(in
, i
, nCount
, aLocale
, nMappingType
);
146 nOffCount
+= map
.nmap
;
148 rtl_uString
* pStr
= x_rtl_uString_new_WithLength( nOffCount
, 1 ); // our x_rtl_ustring.h
149 sal_Unicode
* out
= pStr
->buffer
;
151 if ( nOffCount
!= offset
.getLength() )
152 offset
.realloc( nOffCount
);
155 sal_Int32
* pArr
= offset
.getArray();
156 for (i
= 0; i
< nCount
; i
++)
158 const Mapping
&map
= casefolding::getValue(in
, i
, nCount
, aLocale
, nMappingType
);
159 for (sal_Int32 k
= 0; k
< map
.nmap
; k
++)
161 pArr
[j
] = i
+ startPos
;
162 out
[j
++] = map
.map
[k
];
167 return OUString( pStr
, SAL_NO_ACQUIRE
);
171 // In the simple case of no offset sequence used we can eliminate the
172 // first getValue() loop. We could also assume that most calls result
173 // in identical string lengths, thus using a preallocated
174 // OUStringBuffer could be an easy way to assemble the return string
175 // without too much hassle. However, for single characters the
176 // OUStringBuffer::append() method is quite expensive compared to a
177 // simple array operation, so it pays here to copy the final result
180 // Allocate the max possible buffer. Try to use stack instead of heap,
181 // which would have to be reallocated most times anyways.
182 const sal_Int32 nLocalBuf
= 2048;
183 sal_Unicode aLocalBuf
[ nLocalBuf
* NMAPPINGMAX
], *out
= aLocalBuf
, *pHeapBuf
= NULL
;
184 if ( nCount
> nLocalBuf
)
185 out
= pHeapBuf
= new sal_Unicode
[ nCount
* NMAPPINGMAX
];
188 for ( sal_Int32 i
= 0; i
< nCount
; i
++)
190 const Mapping
&map
= casefolding::getValue(in
, i
, nCount
, aLocale
, nMappingType
);
191 for (sal_Int32 k
= 0; k
< map
.nmap
; k
++)
193 out
[j
++] = map
.map
[k
];
197 OUString
aRet( out
, j
);
206 Transliteration_body::transliterateChar2String( sal_Unicode inChar
) throw(RuntimeException
)
208 const Mapping
&map
= casefolding::getValue(&inChar
, 0, 1, aLocale
, nMappingType
);
209 rtl_uString
* pStr
= x_rtl_uString_new_WithLength( map
.nmap
, 1 ); // our x_rtl_ustring.h
210 sal_Unicode
* out
= pStr
->buffer
;
213 for (i
= 0; i
< map
.nmap
; i
++)
217 return OUString( pStr
, SAL_NO_ACQUIRE
);
221 Transliteration_body::transliterateChar2Char( sal_Unicode inChar
) throw(MultipleCharsOutputException
, RuntimeException
)
223 const Mapping
&map
= casefolding::getValue(&inChar
, 0, 1, aLocale
, nMappingType
);
225 throw MultipleCharsOutputException();
230 Transliteration_body::folding( const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
231 Sequence
< sal_Int32
>& offset
) throw(RuntimeException
)
233 return this->transliterate(inStr
, startPos
, nCount
, offset
);
236 Transliteration_casemapping::Transliteration_casemapping()
239 transliterationName
= "casemapping(generic)";
240 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
244 Transliteration_casemapping::setMappingType( const sal_uInt8 rMappingType
, const Locale
& rLocale
)
246 nMappingType
= rMappingType
;
250 Transliteration_u2l::Transliteration_u2l()
252 nMappingType
= MappingTypeUpperToLower
;
253 transliterationName
= "upper_to_lower(generic)";
254 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_u2l";
257 Transliteration_l2u::Transliteration_l2u()
259 nMappingType
= MappingTypeLowerToUpper
;
260 transliterationName
= "lower_to_upper(generic)";
261 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_l2u";