Bump version to 4.3-4
[LibreOffice.git] / i18npool / source / textconversion / textconversion_zh.cxx
blob74ecb1906f7388dba4b4f1990e7806b9841bf692
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <assert.h>
22 #include <textconversion.hxx>
23 #include <com/sun/star/i18n/TextConversionType.hpp>
24 #include <com/sun/star/i18n/TextConversionOption.hpp>
25 #include <com/sun/star/linguistic2/ConversionDirection.hpp>
26 #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
27 #include <com/sun/star/linguistic2/ConversionDictionaryList.hpp>
28 #include <comphelper/string.hxx>
29 #include <boost/scoped_array.hpp>
31 using namespace com::sun::star::lang;
32 using namespace com::sun::star::i18n;
33 using namespace com::sun::star::linguistic2;
34 using namespace com::sun::star::uno;
37 namespace com { namespace sun { namespace star { namespace i18n {
39 TextConversion_zh::TextConversion_zh( const Reference < XComponentContext >& xContext )
40 : TextConversion("com.sun.star.i18n.TextConversion_zh")
42 xCDL = ConversionDictionaryList::create(xContext);
45 sal_Unicode SAL_CALL getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index)
47 if (Data && Index) {
48 sal_Unicode address = Index[ch>>8];
49 if (address != 0xFFFF)
50 address = Data[address + (ch & 0xFF)];
51 return (address != 0xFFFF) ? address : ch;
52 } else {
53 return ch;
57 #ifdef DISABLE_DYNLOADING
59 extern "C" {
61 const sal_Unicode* getSTC_CharData_T2S();
62 const sal_uInt16* getSTC_CharIndex_T2S();
63 const sal_Unicode* getSTC_CharData_S2V();
64 const sal_uInt16* getSTC_CharIndex_S2V();
65 const sal_Unicode* getSTC_CharData_S2T();
66 const sal_uInt16* getSTC_CharIndex_S2T();
68 const sal_Unicode *getSTC_WordData(sal_Int32&);
70 const sal_uInt16 *getSTC_WordIndex_T2S(sal_Int32&);
71 const sal_uInt16 *getSTC_WordEntry_T2S();
72 const sal_uInt16 *getSTC_WordIndex_S2T(sal_Int32&);
73 const sal_uInt16 *getSTC_WordEntry_S2T();
77 #endif
79 OUString SAL_CALL
80 TextConversion_zh::getCharConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions)
82 const sal_Unicode *Data;
83 const sal_uInt16 *Index;
85 #ifndef DISABLE_DYNLOADING
86 if (toSChinese) {
87 Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_T2S"))();
88 Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_T2S"))();
89 } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
90 Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_S2V"))();
91 Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_S2V"))();
92 } else {
93 Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_S2T"))();
94 Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_S2T"))();
96 #else
97 if (toSChinese) {
98 Data = getSTC_CharData_T2S();
99 Index = getSTC_CharIndex_T2S();
100 } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
101 Data = getSTC_CharData_S2V();
102 Index = getSTC_CharIndex_S2V();
103 } else {
104 Data = getSTC_CharData_S2T();
105 Index = getSTC_CharIndex_S2T();
107 #endif
109 rtl_uString * newStr = rtl_uString_alloc(nLength);
110 for (sal_Int32 i = 0; i < nLength; i++)
111 newStr->buffer[i] =
112 getOneCharConversion(aText[nStartPos+i], Data, Index);
113 return OUString(newStr, SAL_NO_ACQUIRE); //take ownership
116 OUString SAL_CALL
117 TextConversion_zh::getWordConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
119 sal_Int32 dictLen = 0;
120 sal_Int32 maxLen = 0;
121 const sal_uInt16 *index;
122 const sal_uInt16 *entry;
123 const sal_Unicode *charData;
124 const sal_uInt16 *charIndex;
125 bool one2one=true;
127 #ifndef DISABLE_DYNLOADING
128 const sal_Unicode *wordData = ((const sal_Unicode* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordData"))(dictLen);
129 if (toSChinese) {
130 index = ((const sal_uInt16* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen);
131 entry = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_WordEntry_T2S"))();
132 charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_T2S"))();
133 charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_T2S"))();
134 } else {
135 index = ((const sal_uInt16* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen);
136 entry = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_WordEntry_S2T"))();
137 if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
138 charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_S2V"))();
139 charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2V"))();
140 } else {
141 charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_S2T"))();
142 charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2T"))();
145 #else
146 const sal_Unicode *wordData = getSTC_WordData(dictLen);
147 if (toSChinese) {
148 index = getSTC_WordIndex_T2S(maxLen);
149 entry = getSTC_WordEntry_T2S();
150 charData = getSTC_CharData_T2S();
151 charIndex = getSTC_CharIndex_T2S();
152 } else {
153 index = getSTC_WordIndex_S2T(maxLen);
154 entry = getSTC_WordEntry_S2T();
155 if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
156 charData = getSTC_CharData_S2V();
157 charIndex = getSTC_CharIndex_S2V();
158 } else {
159 charData = getSTC_CharData_S2T();
160 charIndex = getSTC_CharIndex_S2T();
163 #endif
165 if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion.
166 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
168 boost::scoped_array<sal_Unicode> newStr(new sal_Unicode[nLength * 2 + 1]);
169 sal_Int32 currPos = 0, count = 0;
170 while (currPos < nLength) {
171 sal_Int32 len = nLength - currPos;
172 bool found = false;
173 if (len > maxLen)
174 len = maxLen;
175 for (; len > 0 && ! found; len--) {
176 OUString word = aText.copy(nStartPos + currPos, len);
177 sal_Int32 current = 0;
178 // user dictionary
179 if (xCDL.is()) {
180 Sequence < OUString > conversions;
181 try {
182 conversions = xCDL->queryConversions(word, 0, len,
183 aLocale, ConversionDictionaryType::SCHINESE_TCHINESE,
184 /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/,
185 nConversionOptions);
187 catch ( NoSupportException & ) {
188 // clear reference (when there is no user dictionary) in order
189 // to not always have to catch this exception again
190 // in further calls. (save time)
191 xCDL = 0;
193 catch (...) {
194 // catch all other exceptions to allow
195 // querying the system dictionary in the next line
197 if (conversions.getLength() > 0) {
198 if (offset.getLength() > 0) {
199 if (word.getLength() != conversions[0].getLength())
200 one2one=false;
201 while (current < conversions[0].getLength()) {
202 offset[count] = nStartPos + currPos + (current *
203 word.getLength() / conversions[0].getLength());
204 newStr[count++] = conversions[0][current++];
206 // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
207 } else {
208 while (current < conversions[0].getLength())
209 newStr[count++] = conversions[0][current++];
211 currPos += word.getLength();
212 found = true;
216 if (!found && index[len+1] - index[len] > 0) {
217 sal_Int32 bottom = (sal_Int32) index[len];
218 sal_Int32 top = (sal_Int32) index[len+1] - 1;
220 while (bottom <= top && !found) {
221 current = (top + bottom) / 2;
222 const sal_Int32 result = word.compareTo(wordData + entry[current]);
223 if (result < 0)
224 top = current - 1;
225 else if (result > 0)
226 bottom = current + 1;
227 else {
228 if (toSChinese) // Traditionary/Simplified conversion,
229 for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ;
230 else // Simplified/Traditionary conversion, forwards search for next word
231 current = entry[current] + word.getLength() + 1;
232 sal_Int32 start=current;
233 if (offset.getLength() > 0) {
234 if (word.getLength() != OUString(&wordData[current]).getLength())
235 one2one=false;
236 sal_Int32 convertedLength=OUString(&wordData[current]).getLength();
237 while (wordData[current]) {
238 offset[count]=nStartPos + currPos + ((current-start) *
239 word.getLength() / convertedLength);
240 newStr[count++] = wordData[current++];
242 // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
243 } else {
244 while (wordData[current])
245 newStr[count++] = wordData[current++];
247 currPos += word.getLength();
248 found = true;
253 if (!found) {
254 if (offset.getLength() > 0)
255 offset[count]=nStartPos+currPos;
256 newStr[count++] =
257 getOneCharConversion(aText[nStartPos+currPos], charData, charIndex);
258 currPos++;
261 if (offset.getLength() > 0)
262 offset.realloc(one2one ? 0 : count);
263 OUString aRet(newStr.get(), count);
264 return aRet;
267 TextConversionResult SAL_CALL
268 TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
269 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
270 throw( RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
272 TextConversionResult result;
274 result.Candidates.realloc(1);
275 result.Candidates[0] = getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions);
276 result.Boundary.startPos = nStartPos;
277 result.Boundary.endPos = nStartPos + nLength;
279 return result;
282 OUString SAL_CALL
283 TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
284 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
285 throw( RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
287 if (rLocale.Language == "zh" && ( nConversionType == TextConversionType::TO_SCHINESE || nConversionType == TextConversionType::TO_TCHINESE) ) {
289 aLocale=rLocale;
290 bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
292 if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)
293 // char to char dictionary
294 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
295 else {
296 Sequence <sal_Int32> offset;
297 // word to word dictionary
298 return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
300 } else
301 throw NoSupportException(); // Conversion type is not supported in this service.
304 OUString SAL_CALL
305 TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
306 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
307 throw( RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
309 if (rLocale.Language == "zh" && ( nConversionType == TextConversionType::TO_SCHINESE || nConversionType == TextConversionType::TO_TCHINESE) ) {
311 aLocale=rLocale;
312 bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
314 if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) {
315 offset.realloc(0);
316 // char to char dictionary
317 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
318 } else {
319 if (offset.getLength() < 2*nLength)
320 offset.realloc(2*nLength);
321 // word to word dictionary
322 return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
324 } else
325 throw NoSupportException(); // Conversion type is not supported in this service.
328 sal_Bool SAL_CALL
329 TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ )
330 throw( RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
332 return sal_False;
335 } } } }
337 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */