cid#1640468 Dereference after null check
[LibreOffice.git] / i18npool / source / textconversion / textconversion_zh.cxx
bloba2c4429eb4cc380baaa931269efdc621dd4cffcc
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <textconversion.hxx>
22 #include <com/sun/star/i18n/TextConversionType.hpp>
23 #include <com/sun/star/i18n/TextConversionOption.hpp>
24 #include <com/sun/star/lang/NoSupportException.hpp>
25 #include <com/sun/star/linguistic2/ConversionDirection.hpp>
26 #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
27 #include <com/sun/star/linguistic2/ConversionDictionaryList.hpp>
28 #include <memory>
30 using namespace com::sun::star::lang;
31 using namespace com::sun::star::i18n;
32 using namespace com::sun::star::linguistic2;
33 using namespace com::sun::star::uno;
36 namespace i18npool {
38 TextConversion_zh::TextConversion_zh( const Reference < XComponentContext >& xContext )
39 : TextConversionService("com.sun.star.i18n.TextConversion_zh")
41 xCDL = ConversionDictionaryList::create(xContext);
44 static sal_Unicode getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index)
46 if (Data && Index) {
47 sal_Unicode address = Index[ch>>8];
48 if (address != 0xFFFF)
49 address = Data[address + (ch & 0xFF)];
50 return (address != 0xFFFF) ? address : ch;
51 } else {
52 return ch;
56 #ifdef DISABLE_DYNLOADING
58 extern "C" {
63 #endif
65 OUString
66 TextConversion_zh::getCharConversion(std::u16string_view aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions)
68 const sal_Unicode *Data;
69 const sal_uInt16 *Index;
71 if (toSChinese) {
72 Data = getSTC_CharData_T2S();
73 Index = getSTC_CharIndex_T2S();
74 } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
75 Data = getSTC_CharData_S2V();
76 Index = getSTC_CharIndex_S2V();
77 } else {
78 Data = getSTC_CharData_S2T();
79 Index = getSTC_CharIndex_S2T();
82 rtl_uString * newStr = rtl_uString_alloc(nLength);
83 for (sal_Int32 i = 0; i < nLength; i++)
84 newStr->buffer[i] =
85 getOneCharConversion(aText[nStartPos+i], Data, Index);
86 return OUString(newStr, SAL_NO_ACQUIRE); //take ownership
89 OUString
90 TextConversion_zh::getWordConversion(std::u16string_view aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
92 sal_Int32 dictLen = 0;
93 sal_Int32 maxLen = 0;
94 const sal_uInt16 *index;
95 const sal_uInt16 *entry;
96 const sal_Unicode *charData;
97 const sal_uInt16 *charIndex;
98 bool one2one=true;
100 const sal_Unicode *wordData = getSTC_WordData(dictLen);
101 if (toSChinese) {
102 index = getSTC_WordIndex_T2S(maxLen);
103 entry = getSTC_WordEntry_T2S();
104 charData = getSTC_CharData_T2S();
105 charIndex = getSTC_CharIndex_T2S();
106 } else {
107 index = getSTC_WordIndex_S2T(maxLen);
108 entry = getSTC_WordEntry_S2T();
109 if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
110 charData = getSTC_CharData_S2V();
111 charIndex = getSTC_CharIndex_S2V();
112 } else {
113 charData = getSTC_CharData_S2T();
114 charIndex = getSTC_CharIndex_S2T();
118 if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion.
119 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
121 std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nLength * 2 + 1]);
122 sal_Int32 currPos = 0, count = 0;
123 auto offsetRange = asNonConstRange(offset);
124 while (currPos < nLength) {
125 sal_Int32 len = nLength - currPos;
126 bool found = false;
127 if (len > maxLen)
128 len = maxLen;
129 for (; len > 0 && ! found; len--) {
130 OUString word( aText.substr(nStartPos + currPos, len) );
131 sal_Int32 current = 0;
132 // user dictionary
133 if (xCDL.is()) {
134 Sequence < OUString > conversions;
135 try {
136 conversions = xCDL->queryConversions(word, 0, len,
137 aLocale, ConversionDictionaryType::SCHINESE_TCHINESE,
138 /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/,
139 nConversionOptions);
141 catch ( NoSupportException & ) {
142 // clear reference (when there is no user dictionary) in order
143 // to not always have to catch this exception again
144 // in further calls. (save time)
145 xCDL = nullptr;
147 catch (...) {
148 // catch all other exceptions to allow
149 // querying the system dictionary in the next line
151 if (conversions.hasElements()) {
152 if (offset.hasElements()) {
153 if (word.getLength() != conversions[0].getLength())
154 one2one=false;
155 while (current < conversions[0].getLength()) {
156 offsetRange[count] = nStartPos + currPos + (current *
157 word.getLength() / conversions[0].getLength());
158 newStr[count++] = conversions[0][current++];
160 // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
161 } else {
162 while (current < conversions[0].getLength())
163 newStr[count++] = conversions[0][current++];
165 currPos += word.getLength();
166 found = true;
170 if (wordData && !found && index[len+1] - index[len] > 0) {
171 sal_Int32 bottom = static_cast<sal_Int32>(index[len]);
172 sal_Int32 top = static_cast<sal_Int32>(index[len+1]) - 1;
174 while (bottom <= top && !found) {
175 current = (top + bottom) / 2;
176 const sal_Int32 result = rtl_ustr_compare(
177 word.getStr(), wordData + entry[current]);
178 if (result < 0)
179 top = current - 1;
180 else if (result > 0)
181 bottom = current + 1;
182 else {
183 if (toSChinese) // Traditionary/Simplified conversion,
184 for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ;
185 else // Simplified/Traditionary conversion, forwards search for next word
186 current = entry[current] + word.getLength() + 1;
187 sal_Int32 start=current;
188 if (offset.hasElements()) {
189 if (word.getLength() != static_cast<sal_Int32>(std::u16string_view(&wordData[current]).size()))
190 one2one=false;
191 sal_Int32 convertedLength=std::u16string_view(&wordData[current]).size();
192 while (wordData[current]) {
193 offsetRange[count]=nStartPos + currPos + ((current-start) *
194 word.getLength() / convertedLength);
195 newStr[count++] = wordData[current++];
197 // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
198 } else {
199 while (wordData[current])
200 newStr[count++] = wordData[current++];
202 currPos += word.getLength();
203 found = true;
208 if (!found) {
209 if (offset.hasElements())
210 offsetRange[count]=nStartPos+currPos;
211 newStr[count++] =
212 getOneCharConversion(aText[nStartPos+currPos], charData, charIndex);
213 currPos++;
216 if (offset.hasElements())
217 offset.realloc(one2one ? 0 : count);
218 OUString aRet(newStr.get(), count);
219 return aRet;
222 TextConversionResult SAL_CALL
223 TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
224 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
226 TextConversionResult result;
228 result.Candidates =
229 { getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions) };
230 result.Boundary.startPos = nStartPos;
231 result.Boundary.endPos = nStartPos + nLength;
233 return result;
236 OUString SAL_CALL
237 TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
238 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
240 if (rLocale.Language != "zh" || ( nConversionType != TextConversionType::TO_SCHINESE && nConversionType != TextConversionType::TO_TCHINESE) )
241 throw NoSupportException(); // Conversion type is not supported in this service.
243 aLocale=rLocale;
244 bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
246 if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)
247 // char to char dictionary
248 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
249 else {
250 Sequence <sal_Int32> offset;
251 // word to word dictionary
252 return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
256 OUString SAL_CALL
257 TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
258 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
260 if (rLocale.Language != "zh" || ( nConversionType != TextConversionType::TO_SCHINESE && nConversionType != TextConversionType::TO_TCHINESE) )
261 throw NoSupportException(); // Conversion type is not supported in this service.
263 aLocale=rLocale;
264 bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
266 if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) {
267 offset.realloc(0);
268 // char to char dictionary
269 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
270 } else {
271 if (offset.getLength() < 2*nLength)
272 offset.realloc(2*nLength);
273 // word to word dictionary
274 return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
278 sal_Bool SAL_CALL
279 TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ )
281 return false;
286 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */