1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: textconversion_zh.cxx,v $
10 * $Revision: 1.11.22.1 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
35 #include <textconversion.hxx>
36 #include <com/sun/star/i18n/TextConversionType.hpp>
37 #include <com/sun/star/i18n/TextConversionOption.hpp>
38 #include <com/sun/star/linguistic2/ConversionDirection.hpp>
39 #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
40 #include <i18nutil/x_rtl_ustring.h>
42 using namespace com::sun::star::lang
;
43 using namespace com::sun::star::i18n
;
44 using namespace com::sun::star::linguistic2
;
45 using namespace com::sun::star::uno
;
48 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
50 TextConversion_zh::TextConversion_zh( const Reference
< XMultiServiceFactory
>& xMSF
)
52 Reference
< XInterface
> xI
;
53 xI
= xMSF
->createInstance(
54 OUString::createFromAscii( "com.sun.star.linguistic2.ConversionDictionaryList" ));
56 xI
->queryInterface( getCppuType((const Reference
< XConversionDictionaryList
>*)0) ) >>= xCDL
;
58 implementationName
= "com.sun.star.i18n.TextConversion_zh";
61 sal_Unicode SAL_CALL
getOneCharConversion(sal_Unicode ch
, const sal_Unicode
* Data
, const sal_uInt16
* Index
)
64 sal_Unicode address
= Index
[ch
>>8];
65 if (address
!= 0xFFFF)
66 address
= Data
[address
+ (ch
& 0xFF)];
67 return (address
!= 0xFFFF) ? address
: ch
;
74 TextConversion_zh::getCharConversion(const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
, sal_Bool toSChinese
, sal_Int32 nConversionOptions
)
76 const sal_Unicode
*Data
;
77 const sal_uInt16
*Index
;
80 Data
= ((const sal_Unicode
* (*)())getFunctionBySymbol("getSTC_CharData_T2S"))();
81 Index
= ((const sal_uInt16
* (*)())getFunctionBySymbol("getSTC_CharIndex_T2S"))();
82 } else if (nConversionOptions
& TextConversionOption::USE_CHARACTER_VARIANTS
) {
83 Data
= ((const sal_Unicode
* (*)())getFunctionBySymbol("getSTC_CharData_S2V"))();
84 Index
= ((const sal_uInt16
* (*)())getFunctionBySymbol("getSTC_CharIndex_S2V"))();
86 Data
= ((const sal_Unicode
* (*)())getFunctionBySymbol("getSTC_CharData_S2T"))();
87 Index
= ((const sal_uInt16
* (*)())getFunctionBySymbol("getSTC_CharIndex_S2T"))();
90 rtl_uString
* newStr
= x_rtl_uString_new_WithLength( nLength
); // defined in x_rtl_ustring.h
91 for (sal_Int32 i
= 0; i
< nLength
; i
++)
93 getOneCharConversion(aText
[nStartPos
+i
], Data
, Index
);
94 return OUString( newStr
->buffer
, nLength
);
98 TextConversion_zh::getWordConversion(const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
, sal_Bool toSChinese
, sal_Int32 nConversionOptions
, Sequence
<sal_Int32
>& offset
)
100 sal_Int32 dictLen
= 0;
101 sal_Int32 maxLen
= 0;
102 const sal_uInt16
*index
;
103 const sal_uInt16
*entry
;
104 const sal_Unicode
*charData
;
105 const sal_uInt16
*charIndex
;
106 sal_Bool one2one
=sal_True
;
108 const sal_Unicode
*wordData
= ((const sal_Unicode
* (*)(sal_Int32
&)) getFunctionBySymbol("getSTC_WordData"))(dictLen
);
110 index
= ((const sal_uInt16
* (*)(sal_Int32
&)) getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen
);
111 entry
= ((const sal_uInt16
* (*)()) getFunctionBySymbol("getSTC_WordEntry_T2S"))();
112 charData
= ((const sal_Unicode
* (*)()) getFunctionBySymbol("getSTC_CharData_T2S"))();
113 charIndex
= ((const sal_uInt16
* (*)()) getFunctionBySymbol("getSTC_CharIndex_T2S"))();
115 index
= ((const sal_uInt16
* (*)(sal_Int32
&)) getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen
);
116 entry
= ((const sal_uInt16
* (*)()) getFunctionBySymbol("getSTC_WordEntry_S2T"))();
117 if (nConversionOptions
& TextConversionOption::USE_CHARACTER_VARIANTS
) {
118 charData
= ((const sal_Unicode
* (*)()) getFunctionBySymbol("getSTC_CharData_S2V"))();
119 charIndex
= ((const sal_uInt16
* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2V"))();
121 charData
= ((const sal_Unicode
* (*)()) getFunctionBySymbol("getSTC_CharData_S2T"))();
122 charIndex
= ((const sal_uInt16
* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2T"))();
126 if ((!wordData
|| !index
|| !entry
) && !xCDL
.is()) // no word mapping defined, do char2char conversion.
127 return getCharConversion(aText
, nStartPos
, nLength
, toSChinese
, nConversionOptions
);
129 rtl_uString
* newStr
= x_rtl_uString_new_WithLength( nLength
* 2 ); // defined in x_rtl_ustring.h
130 sal_Int32 currPos
= 0, count
= 0;
131 while (currPos
< nLength
) {
132 sal_Int32 len
= nLength
- currPos
;
133 sal_Bool found
= sal_False
;
136 for (; len
> 0 && ! found
; len
--) {
137 OUString word
= aText
.copy(nStartPos
+ currPos
, len
);
138 sal_Int32 current
= 0;
141 Sequence
< OUString
> conversions
;
143 conversions
= xCDL
->queryConversions(word
, 0, len
,
144 aLocale
, ConversionDictionaryType::SCHINESE_TCHINESE
,
145 /*toSChinese ?*/ ConversionDirection_FROM_LEFT
/*: ConversionDirection_FROM_RIGHT*/,
148 catch ( NoSupportException
& ) {
149 // clear reference (when there is no user dictionary) in order
150 // to not always have to catch this exception again
151 // in further calls. (save time)
155 // catch all other exceptions to allow
156 // querying the system dictionary in the next line
158 if (conversions
.getLength() > 0) {
159 if (offset
.getLength() > 0) {
160 if (word
.getLength() != conversions
[0].getLength())
162 while (current
< conversions
[0].getLength()) {
163 offset
[count
] = nStartPos
+ currPos
+ (current
*
164 word
.getLength() / conversions
[0].getLength());
165 newStr
->buffer
[count
++] = conversions
[0][current
++];
167 // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
169 while (current
< conversions
[0].getLength())
170 newStr
->buffer
[count
++] = conversions
[0][current
++];
172 currPos
+= word
.getLength();
177 if (!found
&& index
[len
+1] - index
[len
] > 0) {
178 sal_Int32 bottom
= (sal_Int32
) index
[len
];
179 sal_Int32 top
= (sal_Int32
) index
[len
+1] - 1;
181 while (bottom
<= top
&& !found
) {
182 current
= (top
+ bottom
) / 2;
183 const sal_Int32 result
= word
.compareTo(wordData
+ entry
[current
]);
187 bottom
= current
+ 1;
189 if (toSChinese
) // Traditionary/Simplified conversion,
190 for (current
= entry
[current
]-1; current
> 0 && wordData
[current
-1]; current
--) ;
191 else // Simplified/Traditionary conversion, forwards search for next word
192 current
= entry
[current
] + word
.getLength() + 1;
193 sal_Int32 start
=current
;
194 if (offset
.getLength() > 0) {
195 if (word
.getLength() != OUString(&wordData
[current
]).getLength())
197 sal_Int32 convertedLength
=OUString(&wordData
[current
]).getLength();
198 while (wordData
[current
]) {
199 offset
[count
]=nStartPos
+ currPos
+ ((current
-start
) *
200 word
.getLength() / convertedLength
);
201 newStr
->buffer
[count
++] = wordData
[current
++];
203 // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
205 while (wordData
[current
])
206 newStr
->buffer
[count
++] = wordData
[current
++];
208 currPos
+= word
.getLength();
215 if (offset
.getLength() > 0)
216 offset
[count
]=nStartPos
+currPos
;
217 newStr
->buffer
[count
++] =
218 getOneCharConversion(aText
[nStartPos
+currPos
], charData
, charIndex
);
222 if (offset
.getLength() > 0)
223 offset
.realloc(one2one
? 0 : count
);
224 return OUString( newStr
->buffer
, count
);
227 TextConversionResult SAL_CALL
228 TextConversion_zh::getConversions( const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
,
229 const Locale
& rLocale
, sal_Int16 nConversionType
, sal_Int32 nConversionOptions
)
230 throw( RuntimeException
, IllegalArgumentException
, NoSupportException
)
232 TextConversionResult result
;
234 result
.Candidates
.realloc(1);
235 result
.Candidates
[0] = getConversion( aText
, nStartPos
, nLength
, rLocale
, nConversionType
, nConversionOptions
);
236 result
.Boundary
.startPos
= nStartPos
;
237 result
.Boundary
.endPos
= nStartPos
+ nLength
;
243 TextConversion_zh::getConversion( const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
,
244 const Locale
& rLocale
, sal_Int16 nConversionType
, sal_Int32 nConversionOptions
)
245 throw( RuntimeException
, IllegalArgumentException
, NoSupportException
)
247 if (rLocale
.Language
.equalsAscii("zh") &&
248 ( nConversionType
== TextConversionType::TO_SCHINESE
||
249 nConversionType
== TextConversionType::TO_TCHINESE
) ) {
252 sal_Bool toSChinese
= nConversionType
== TextConversionType::TO_SCHINESE
;
254 if (nConversionOptions
& TextConversionOption::CHARACTER_BY_CHARACTER
)
255 // char to char dictionary
256 return getCharConversion(aText
, nStartPos
, nLength
, toSChinese
, nConversionOptions
);
258 Sequence
<sal_Int32
> offset
;
259 // word to word dictionary
260 return getWordConversion(aText
, nStartPos
, nLength
, toSChinese
, nConversionOptions
, offset
);
263 throw NoSupportException(); // Conversion type is not supported in this service.
267 TextConversion_zh::getConversionWithOffset( const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
,
268 const Locale
& rLocale
, sal_Int16 nConversionType
, sal_Int32 nConversionOptions
, Sequence
<sal_Int32
>& offset
)
269 throw( RuntimeException
, IllegalArgumentException
, NoSupportException
)
271 if (rLocale
.Language
.equalsAscii("zh") &&
272 ( nConversionType
== TextConversionType::TO_SCHINESE
||
273 nConversionType
== TextConversionType::TO_TCHINESE
) ) {
276 sal_Bool toSChinese
= nConversionType
== TextConversionType::TO_SCHINESE
;
278 if (nConversionOptions
& TextConversionOption::CHARACTER_BY_CHARACTER
) {
280 // char to char dictionary
281 return getCharConversion(aText
, nStartPos
, nLength
, toSChinese
, nConversionOptions
);
283 if (offset
.getLength() < 2*nLength
)
284 offset
.realloc(2*nLength
);
285 // word to word dictionary
286 return getWordConversion(aText
, nStartPos
, nLength
, toSChinese
, nConversionOptions
, offset
);
289 throw NoSupportException(); // Conversion type is not supported in this service.
293 TextConversion_zh::interactiveConversion( const Locale
& /*rLocale*/, sal_Int16
/*nTextConversionType*/, sal_Int32
/*nTextConversionOptions*/ )
294 throw( RuntimeException
, IllegalArgumentException
, NoSupportException
)