1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/ustrbuf.hxx>
21 #include <i18nutil/casefolding.hxx>
22 #include <i18nutil/unicode.hxx>
24 #include <comphelper/processfactory.hxx>
25 #include <comphelper/string.hxx>
26 #include <osl/diagnose.h>
30 #include "characterclassificationImpl.hxx"
31 #include "breakiteratorImpl.hxx"
33 #include "transliteration_body.hxx"
34 #include <boost/scoped_array.hpp>
36 using namespace ::com::sun::star::uno
;
37 using namespace ::com::sun::star::lang
;
38 using namespace ::rtl
;
40 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
42 Transliteration_body::Transliteration_body()
45 transliterationName
= "Transliteration_body";
46 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_body";
49 sal_Int16 SAL_CALL
Transliteration_body::getType() throw(RuntimeException
, std::exception
)
51 return TransliterationType::ONE_TO_ONE
;
54 sal_Bool SAL_CALL
Transliteration_body::equals(
55 const OUString
& /*str1*/, sal_Int32
/*pos1*/, sal_Int32
/*nCount1*/, sal_Int32
& /*nMatch1*/,
56 const OUString
& /*str2*/, sal_Int32
/*pos2*/, sal_Int32
/*nCount2*/, sal_Int32
& /*nMatch2*/)
57 throw(RuntimeException
, std::exception
)
59 throw RuntimeException();
62 Sequence
< OUString
> SAL_CALL
63 Transliteration_body::transliterateRange( const OUString
& str1
, const OUString
& str2
)
64 throw( RuntimeException
, std::exception
)
66 Sequence
< OUString
> ostr(2);
72 static sal_uInt8
lcl_getMappingTypeForToggleCase( sal_uInt8 nMappingType
, sal_Unicode cChar
)
74 sal_uInt8 nRes
= nMappingType
;
76 // take care of TOGGLE_CASE transliteration:
77 // nMappingType should not be a combination of flags, thuse we decide now
79 if (nMappingType
== (MappingTypeLowerToUpper
| MappingTypeUpperToLower
))
81 const sal_Int16 nType
= unicode::getUnicodeType( cChar
);
82 if (nType
& 0x02 /* lower case*/)
83 nRes
= MappingTypeLowerToUpper
;
86 // should also work properly for non-upper characters like white spacs, numbers, ...
87 nRes
= MappingTypeUpperToLower
;
95 Transliteration_body::transliterate(
96 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
97 Sequence
< sal_Int32
>& offset
)
98 throw(RuntimeException
, std::exception
)
100 const sal_Unicode
*in
= inStr
.getStr() + startPos
;
102 // Two different blocks to eliminate the if(useOffset) condition inside the
103 // inner k loop. Yes, on massive use even such small things do count.
106 sal_Int32 nOffCount
= 0, i
;
107 for (i
= 0; i
< nCount
; i
++)
109 // take care of TOGGLE_CASE transliteration:
110 sal_uInt8 nTmpMappingType
= nMappingType
;
111 if (nMappingType
== (MappingTypeLowerToUpper
| MappingTypeUpperToLower
))
112 nTmpMappingType
= lcl_getMappingTypeForToggleCase( nMappingType
, in
[i
] );
114 const Mapping
&map
= casefolding::getValue( in
, i
, nCount
, aLocale
, nTmpMappingType
);
115 nOffCount
+= map
.nmap
;
117 rtl_uString
* pStr
= rtl_uString_alloc(nOffCount
);
118 sal_Unicode
* out
= pStr
->buffer
;
120 if ( nOffCount
!= offset
.getLength() )
121 offset
.realloc( nOffCount
);
124 sal_Int32
* pArr
= offset
.getArray();
125 for (i
= 0; i
< nCount
; i
++)
127 // take care of TOGGLE_CASE transliteration:
128 sal_uInt8 nTmpMappingType
= nMappingType
;
129 if (nMappingType
== (MappingTypeLowerToUpper
| MappingTypeUpperToLower
))
130 nTmpMappingType
= lcl_getMappingTypeForToggleCase( nMappingType
, in
[i
] );
132 const Mapping
&map
= casefolding::getValue( in
, i
, nCount
, aLocale
, nTmpMappingType
);
133 for (sal_Int32 k
= 0; k
< map
.nmap
; k
++)
135 pArr
[j
] = i
+ startPos
;
136 out
[j
++] = map
.map
[k
];
141 return OUString( pStr
, SAL_NO_ACQUIRE
);
145 // In the simple case of no offset sequence used we can eliminate the
146 // first getValue() loop. We could also assume that most calls result
147 // in identical string lengths, thus using a preallocated
148 // OUStringBuffer could be an easy way to assemble the return string
149 // without too much hassle. However, for single characters the
150 // OUStringBuffer::append() method is quite expensive compared to a
151 // simple array operation, so it pays here to copy the final result
154 // Allocate the max possible buffer. Try to use stack instead of heap,
155 // which would have to be reallocated most times anyways.
156 const sal_Int32 nLocalBuf
= 2048;
157 sal_Unicode aLocalBuf
[ nLocalBuf
* NMAPPINGMAX
], *out
= aLocalBuf
;
158 boost::scoped_array
<sal_Unicode
> pHeapBuf
;
159 if ( nCount
> nLocalBuf
) {
160 pHeapBuf
.reset(new sal_Unicode
[ nCount
* NMAPPINGMAX
]);
161 out
= pHeapBuf
.get();
165 for ( sal_Int32 i
= 0; i
< nCount
; i
++)
167 // take care of TOGGLE_CASE transliteration:
168 sal_uInt8 nTmpMappingType
= nMappingType
;
169 if (nMappingType
== (MappingTypeLowerToUpper
| MappingTypeUpperToLower
))
170 nTmpMappingType
= lcl_getMappingTypeForToggleCase( nMappingType
, in
[i
] );
172 const Mapping
&map
= casefolding::getValue( in
, i
, nCount
, aLocale
, nTmpMappingType
);
173 for (sal_Int32 k
= 0; k
< map
.nmap
; k
++)
175 out
[j
++] = map
.map
[k
];
179 OUString
aRet( out
, j
);
185 Transliteration_body::transliterateChar2String( sal_Unicode inChar
) throw(RuntimeException
, std::exception
)
187 const Mapping
&map
= casefolding::getValue(&inChar
, 0, 1, aLocale
, nMappingType
);
188 rtl_uString
* pStr
= rtl_uString_alloc(map
.nmap
);
189 sal_Unicode
* out
= pStr
->buffer
;
192 for (i
= 0; i
< map
.nmap
; i
++)
196 return OUString( pStr
, SAL_NO_ACQUIRE
);
200 Transliteration_body::transliterateChar2Char( sal_Unicode inChar
) throw(MultipleCharsOutputException
, RuntimeException
, std::exception
)
202 const Mapping
&map
= casefolding::getValue(&inChar
, 0, 1, aLocale
, nMappingType
);
204 throw MultipleCharsOutputException();
209 Transliteration_body::folding( const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
210 Sequence
< sal_Int32
>& offset
) throw(RuntimeException
, std::exception
)
212 return this->transliterate(inStr
, startPos
, nCount
, offset
);
215 Transliteration_casemapping::Transliteration_casemapping()
218 transliterationName
= "casemapping(generic)";
219 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
223 Transliteration_casemapping::setMappingType( const sal_uInt8 rMappingType
, const Locale
& rLocale
)
225 nMappingType
= rMappingType
;
229 Transliteration_u2l::Transliteration_u2l()
231 nMappingType
= MappingTypeUpperToLower
;
232 transliterationName
= "upper_to_lower(generic)";
233 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_u2l";
236 Transliteration_l2u::Transliteration_l2u()
238 nMappingType
= MappingTypeLowerToUpper
;
239 transliterationName
= "lower_to_upper(generic)";
240 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_l2u";
243 Transliteration_togglecase::Transliteration_togglecase()
245 // usually nMappingType must NOT be a combiantion of different flages here,
246 // but we take care of that problem in Transliteration_body::transliterate above
247 // before that value is used. There we will decide which of both is to be used on
248 // a per character basis.
249 nMappingType
= MappingTypeLowerToUpper
| MappingTypeUpperToLower
;
250 transliterationName
= "toggle(generic)";
251 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
254 Transliteration_titlecase::Transliteration_titlecase()
256 nMappingType
= MappingTypeToTitle
;
257 transliterationName
= "title(generic)";
258 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
261 static OUString
transliterate_titlecase_Impl(
262 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
263 const Locale
&rLocale
,
264 Sequence
< sal_Int32
>& offset
)
265 throw(RuntimeException
)
267 const OUString
aText( inStr
.copy( startPos
, nCount
) );
270 if (!aText
.isEmpty())
272 Reference
< XComponentContext
> xContext
= ::comphelper::getProcessComponentContext();
273 CharacterClassificationImpl
aCharClassImpl( xContext
);
275 // because aCharClassImpl.toTitle does not handle ligatures or Beta but will raise
276 // an exception we need to handle the first chara manually...
278 // we don't want to change surrogates by accident, thuse we use proper code point iteration
280 sal_uInt32 cFirstChar
= aText
.iterateCodePoints( &nPos
);
281 OUString
aResolvedLigature( &cFirstChar
, 1 );
282 // toUpper can be used to properly resolve ligatures and characters like Beta
283 aResolvedLigature
= aCharClassImpl
.toUpper( aResolvedLigature
, 0, aResolvedLigature
.getLength(), rLocale
);
284 // since toTitle will leave all-uppercase text unchanged we first need to
285 // use toLower to bring possible 2nd and following charas in lowercase
286 aResolvedLigature
= aCharClassImpl
.toLower( aResolvedLigature
, 0, aResolvedLigature
.getLength(), rLocale
);
287 sal_Int32 nResolvedLen
= aResolvedLigature
.getLength();
289 // now we can properly use toTitle to get the expected result for the resolved string.
290 // The rest of the text should just become lowercase.
291 aRes
= aCharClassImpl
.toTitle( aResolvedLigature
, 0, nResolvedLen
, rLocale
);
292 aRes
+= aCharClassImpl
.toLower( aText
, 1, aText
.getLength() - 1, rLocale
);
293 offset
.realloc( aRes
.getLength() );
295 sal_Int32
*pOffset
= offset
.getArray();
296 sal_Int32 nLen
= offset
.getLength();
297 for (sal_Int32 i
= 0; i
< nLen
; ++i
)
300 if (i
>= nResolvedLen
)
301 nIdx
= i
- nResolvedLen
+ 1;
305 #if OSL_DEBUG_LEVEL > 1
306 const sal_Int32
*pCOffset
= offset
.getConstArray();
313 // this function expects to be called on a word-by-word basis,
314 // namely that startPos points to the first char of the word
315 OUString SAL_CALL
Transliteration_titlecase::transliterate(
316 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
317 Sequence
< sal_Int32
>& offset
)
318 throw(RuntimeException
, std::exception
)
320 return transliterate_titlecase_Impl( inStr
, startPos
, nCount
, aLocale
, offset
);
323 Transliteration_sentencecase::Transliteration_sentencecase()
325 nMappingType
= MappingTypeToTitle
; // though only to be applied to the first word...
326 transliterationName
= "sentence(generic)";
327 implementationName
= "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
330 // this function expects to be called on a sentence-by-sentence basis,
331 // namely that startPos points to the first word (NOT first char!) in the sentence
332 OUString SAL_CALL
Transliteration_sentencecase::transliterate(
333 const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
,
334 Sequence
< sal_Int32
>& offset
)
335 throw(RuntimeException
, std::exception
)
337 return transliterate_titlecase_Impl( inStr
, startPos
, nCount
, aLocale
, offset
);
342 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */