1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
22 #include <textconversion.hxx>
23 #include <com/sun/star/i18n/TextConversionType.hpp>
24 #include <com/sun/star/i18n/TextConversionOption.hpp>
25 #include <com/sun/star/lang/NoSupportException.hpp>
26 #include <com/sun/star/linguistic2/ConversionDirection.hpp>
27 #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
28 #include <com/sun/star/linguistic2/ConversionDictionaryList.hpp>
29 #include <comphelper/string.hxx>
32 using namespace com::sun::star::lang
;
33 using namespace com::sun::star::i18n
;
34 using namespace com::sun::star::linguistic2
;
35 using namespace com::sun::star::uno
;
40 TextConversion_zh::TextConversion_zh( const Reference
< XComponentContext
>& xContext
)
41 : TextConversionService("com.sun.star.i18n.TextConversion_zh")
43 xCDL
= ConversionDictionaryList::create(xContext
);
46 sal_Unicode
getOneCharConversion(sal_Unicode ch
, const sal_Unicode
* Data
, const sal_uInt16
* Index
)
49 sal_Unicode address
= Index
[ch
>>8];
50 if (address
!= 0xFFFF)
51 address
= Data
[address
+ (ch
& 0xFF)];
52 return (address
!= 0xFFFF) ? address
: ch
;
58 #ifdef DISABLE_DYNLOADING
62 const sal_Unicode
* getSTC_CharData_T2S();
63 const sal_uInt16
* getSTC_CharIndex_T2S();
64 const sal_Unicode
* getSTC_CharData_S2V();
65 const sal_uInt16
* getSTC_CharIndex_S2V();
66 const sal_Unicode
* getSTC_CharData_S2T();
67 const sal_uInt16
* getSTC_CharIndex_S2T();
69 const sal_Unicode
*getSTC_WordData(sal_Int32
&);
71 const sal_uInt16
*getSTC_WordIndex_T2S(sal_Int32
&);
72 const sal_uInt16
*getSTC_WordEntry_T2S();
73 const sal_uInt16
*getSTC_WordIndex_S2T(sal_Int32
&);
74 const sal_uInt16
*getSTC_WordEntry_S2T();
81 TextConversion_zh::getCharConversion(const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
, bool toSChinese
, sal_Int32 nConversionOptions
)
83 const sal_Unicode
*Data
;
84 const sal_uInt16
*Index
;
86 #ifndef DISABLE_DYNLOADING
88 Data
= reinterpret_cast<const sal_Unicode
* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
89 Index
= reinterpret_cast<const sal_uInt16
* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
90 } else if (nConversionOptions
& TextConversionOption::USE_CHARACTER_VARIANTS
) {
91 Data
= reinterpret_cast<const sal_Unicode
* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
92 Index
= reinterpret_cast<const sal_uInt16
* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
94 Data
= reinterpret_cast<const sal_Unicode
* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
95 Index
= reinterpret_cast<const sal_uInt16
* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
99 Data
= getSTC_CharData_T2S();
100 Index
= getSTC_CharIndex_T2S();
101 } else if (nConversionOptions
& TextConversionOption::USE_CHARACTER_VARIANTS
) {
102 Data
= getSTC_CharData_S2V();
103 Index
= getSTC_CharIndex_S2V();
105 Data
= getSTC_CharData_S2T();
106 Index
= getSTC_CharIndex_S2T();
110 rtl_uString
* newStr
= rtl_uString_alloc(nLength
);
111 for (sal_Int32 i
= 0; i
< nLength
; i
++)
113 getOneCharConversion(aText
[nStartPos
+i
], Data
, Index
);
114 return OUString(newStr
, SAL_NO_ACQUIRE
); //take ownership
118 TextConversion_zh::getWordConversion(const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
, bool toSChinese
, sal_Int32 nConversionOptions
, Sequence
<sal_Int32
>& offset
)
120 sal_Int32 dictLen
= 0;
121 sal_Int32 maxLen
= 0;
122 const sal_uInt16
*index
;
123 const sal_uInt16
*entry
;
124 const sal_Unicode
*charData
;
125 const sal_uInt16
*charIndex
;
128 #ifndef DISABLE_DYNLOADING
129 const sal_Unicode
*wordData
= reinterpret_cast<const sal_Unicode
* (*)(sal_Int32
&)>(getFunctionBySymbol("getSTC_WordData"))(dictLen
);
131 index
= reinterpret_cast<const sal_uInt16
* (*)(sal_Int32
&)>(getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen
);
132 entry
= reinterpret_cast<const sal_uInt16
* (*)()>(getFunctionBySymbol("getSTC_WordEntry_T2S"))();
133 charData
= reinterpret_cast<const sal_Unicode
* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
134 charIndex
= reinterpret_cast<const sal_uInt16
* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
136 index
= reinterpret_cast<const sal_uInt16
* (*)(sal_Int32
&)>(getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen
);
137 entry
= reinterpret_cast<const sal_uInt16
* (*)()>(getFunctionBySymbol("getSTC_WordEntry_S2T"))();
138 if (nConversionOptions
& TextConversionOption::USE_CHARACTER_VARIANTS
) {
139 charData
= reinterpret_cast<const sal_Unicode
* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
140 charIndex
= reinterpret_cast<const sal_uInt16
* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
142 charData
= reinterpret_cast<const sal_Unicode
* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
143 charIndex
= reinterpret_cast<const sal_uInt16
* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
147 const sal_Unicode
*wordData
= getSTC_WordData(dictLen
);
149 index
= getSTC_WordIndex_T2S(maxLen
);
150 entry
= getSTC_WordEntry_T2S();
151 charData
= getSTC_CharData_T2S();
152 charIndex
= getSTC_CharIndex_T2S();
154 index
= getSTC_WordIndex_S2T(maxLen
);
155 entry
= getSTC_WordEntry_S2T();
156 if (nConversionOptions
& TextConversionOption::USE_CHARACTER_VARIANTS
) {
157 charData
= getSTC_CharData_S2V();
158 charIndex
= getSTC_CharIndex_S2V();
160 charData
= getSTC_CharData_S2T();
161 charIndex
= getSTC_CharIndex_S2T();
166 if ((!wordData
|| !index
|| !entry
) && !xCDL
.is()) // no word mapping defined, do char2char conversion.
167 return getCharConversion(aText
, nStartPos
, nLength
, toSChinese
, nConversionOptions
);
169 std::unique_ptr
<sal_Unicode
[]> newStr(new sal_Unicode
[nLength
* 2 + 1]);
170 sal_Int32 currPos
= 0, count
= 0;
171 while (currPos
< nLength
) {
172 sal_Int32 len
= nLength
- currPos
;
176 for (; len
> 0 && ! found
; len
--) {
177 OUString word
= aText
.copy(nStartPos
+ currPos
, len
);
178 sal_Int32 current
= 0;
181 Sequence
< OUString
> conversions
;
183 conversions
= xCDL
->queryConversions(word
, 0, len
,
184 aLocale
, ConversionDictionaryType::SCHINESE_TCHINESE
,
185 /*toSChinese ?*/ ConversionDirection_FROM_LEFT
/*: ConversionDirection_FROM_RIGHT*/,
188 catch ( NoSupportException
& ) {
189 // clear reference (when there is no user dictionary) in order
190 // to not always have to catch this exception again
191 // in further calls. (save time)
195 // catch all other exceptions to allow
196 // querying the system dictionary in the next line
198 if (conversions
.getLength() > 0) {
199 if (offset
.getLength() > 0) {
200 if (word
.getLength() != conversions
[0].getLength())
202 while (current
< conversions
[0].getLength()) {
203 offset
[count
] = nStartPos
+ currPos
+ (current
*
204 word
.getLength() / conversions
[0].getLength());
205 newStr
[count
++] = conversions
[0][current
++];
207 // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
209 while (current
< conversions
[0].getLength())
210 newStr
[count
++] = conversions
[0][current
++];
212 currPos
+= word
.getLength();
217 if (!found
&& index
[len
+1] - index
[len
] > 0) {
218 sal_Int32 bottom
= static_cast<sal_Int32
>(index
[len
]);
219 sal_Int32 top
= static_cast<sal_Int32
>(index
[len
+1]) - 1;
221 while (bottom
<= top
&& !found
) {
222 current
= (top
+ bottom
) / 2;
223 const sal_Int32 result
= word
.compareTo(wordData
+ entry
[current
]);
227 bottom
= current
+ 1;
229 if (toSChinese
) // Traditionary/Simplified conversion,
230 for (current
= entry
[current
]-1; current
> 0 && wordData
[current
-1]; current
--) ;
231 else // Simplified/Traditionary conversion, forwards search for next word
232 current
= entry
[current
] + word
.getLength() + 1;
233 sal_Int32 start
=current
;
234 if (offset
.getLength() > 0) {
235 if (word
.getLength() != OUString(&wordData
[current
]).getLength())
237 sal_Int32 convertedLength
=OUString(&wordData
[current
]).getLength();
238 while (wordData
[current
]) {
239 offset
[count
]=nStartPos
+ currPos
+ ((current
-start
) *
240 word
.getLength() / convertedLength
);
241 newStr
[count
++] = wordData
[current
++];
243 // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
245 while (wordData
[current
])
246 newStr
[count
++] = wordData
[current
++];
248 currPos
+= word
.getLength();
255 if (offset
.getLength() > 0)
256 offset
[count
]=nStartPos
+currPos
;
258 getOneCharConversion(aText
[nStartPos
+currPos
], charData
, charIndex
);
262 if (offset
.getLength() > 0)
263 offset
.realloc(one2one
? 0 : count
);
264 OUString
aRet(newStr
.get(), count
);
268 TextConversionResult SAL_CALL
269 TextConversion_zh::getConversions( const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
,
270 const Locale
& rLocale
, sal_Int16 nConversionType
, sal_Int32 nConversionOptions
)
272 TextConversionResult result
;
274 result
.Candidates
.realloc(1);
275 result
.Candidates
[0] = getConversion( aText
, nStartPos
, nLength
, rLocale
, nConversionType
, nConversionOptions
);
276 result
.Boundary
.startPos
= nStartPos
;
277 result
.Boundary
.endPos
= nStartPos
+ nLength
;
283 TextConversion_zh::getConversion( const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
,
284 const Locale
& rLocale
, sal_Int16 nConversionType
, sal_Int32 nConversionOptions
)
286 if (rLocale
.Language
!= "zh" || ( nConversionType
!= TextConversionType::TO_SCHINESE
&& nConversionType
!= TextConversionType::TO_TCHINESE
) )
287 throw NoSupportException(); // Conversion type is not supported in this service.
290 bool toSChinese
= nConversionType
== TextConversionType::TO_SCHINESE
;
292 if (nConversionOptions
& TextConversionOption::CHARACTER_BY_CHARACTER
)
293 // char to char dictionary
294 return getCharConversion(aText
, nStartPos
, nLength
, toSChinese
, nConversionOptions
);
296 Sequence
<sal_Int32
> offset
;
297 // word to word dictionary
298 return getWordConversion(aText
, nStartPos
, nLength
, toSChinese
, nConversionOptions
, offset
);
303 TextConversion_zh::getConversionWithOffset( const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
,
304 const Locale
& rLocale
, sal_Int16 nConversionType
, sal_Int32 nConversionOptions
, Sequence
<sal_Int32
>& offset
)
306 if (rLocale
.Language
!= "zh" || ( nConversionType
!= TextConversionType::TO_SCHINESE
&& nConversionType
!= TextConversionType::TO_TCHINESE
) )
307 throw NoSupportException(); // Conversion type is not supported in this service.
310 bool toSChinese
= nConversionType
== TextConversionType::TO_SCHINESE
;
312 if (nConversionOptions
& TextConversionOption::CHARACTER_BY_CHARACTER
) {
314 // char to char dictionary
315 return getCharConversion(aText
, nStartPos
, nLength
, toSChinese
, nConversionOptions
);
317 if (offset
.getLength() < 2*nLength
)
318 offset
.realloc(2*nLength
);
319 // word to word dictionary
320 return getWordConversion(aText
, nStartPos
, nLength
, toSChinese
, nConversionOptions
, offset
);
325 TextConversion_zh::interactiveConversion( const Locale
& /*rLocale*/, sal_Int16
/*nTextConversionType*/, sal_Int32
/*nTextConversionOptions*/ )
332 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */