Version 6.4.0.3, tag libreoffice-6.4.0.3
[LibreOffice.git] / sal / textenc / converteuctw.cxx
blobedb3c07fa934de94b00daf3b39a9ac2a88766219
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <cassert>
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
28 #include "context.hxx"
29 #include "converter.hxx"
30 #include "converteuctw.hxx"
31 #include "tenchelp.hxx"
32 #include "unichars.hxx"
34 namespace {
36 enum ImplEucTwToUnicodeState
38 IMPL_EUC_TW_TO_UNICODE_STATE_0,
39 IMPL_EUC_TW_TO_UNICODE_STATE_1,
40 IMPL_EUC_TW_TO_UNICODE_STATE_2_1,
41 IMPL_EUC_TW_TO_UNICODE_STATE_2_2,
42 IMPL_EUC_TW_TO_UNICODE_STATE_2_3
45 struct ImplEucTwToUnicodeContext
47 ImplEucTwToUnicodeState m_eState;
48 sal_Int32 m_nPlane; // 0--15
49 sal_Int32 m_nRow; // 0--93
54 void * ImplCreateEucTwToUnicodeContext()
56 ImplEucTwToUnicodeContext * pContext = new ImplEucTwToUnicodeContext;
57 pContext->m_eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
58 return pContext;
61 void ImplResetEucTwToUnicodeContext(void * pContext)
63 if (pContext)
64 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState
65 = IMPL_EUC_TW_TO_UNICODE_STATE_0;
68 void ImplDestroyEucTwToUnicodeContext(void * pContext)
70 delete static_cast< ImplEucTwToUnicodeContext * >(pContext);
73 sal_Size ImplConvertEucTwToUnicode(void const * pData,
74 void * pContext,
75 char const * pSrcBuf,
76 sal_Size nSrcBytes,
77 sal_Unicode * pDestBuf,
78 sal_Size nDestChars,
79 sal_uInt32 nFlags,
80 sal_uInt32 * pInfo,
81 sal_Size * pSrcCvtBytes)
83 sal_uInt16 const * pCns116431992Data
84 = static_cast< ImplEucTwConverterData const * >(pData)->
85 m_pCns116431992ToUnicodeData;
86 sal_Int32 const * pCns116431992RowOffsets
87 = static_cast< ImplEucTwConverterData const * >(pData)->
88 m_pCns116431992ToUnicodeRowOffsets;
89 sal_Int32 const * pCns116431992PlaneOffsets
90 = static_cast< ImplEucTwConverterData const * >(pData)->
91 m_pCns116431992ToUnicodePlaneOffsets;
92 ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
93 sal_Int32 nPlane = 0;
94 sal_Int32 nRow = 0;
95 sal_uInt32 nInfo = 0;
96 sal_Size nConverted = 0;
97 sal_Unicode * pDestBufPtr = pDestBuf;
98 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
99 sal_Size startOfCurrentChar = 0;
101 if (pContext)
103 eState = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState;
104 nPlane = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nPlane;
105 nRow = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nRow;
108 for (; nConverted < nSrcBytes; ++nConverted)
110 bool bUndefined = true;
111 sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
112 switch (eState)
114 case IMPL_EUC_TW_TO_UNICODE_STATE_0:
115 if (nChar < 0x80)
116 if (pDestBufPtr != pDestBufEnd) {
117 *pDestBufPtr++ = static_cast<sal_Unicode>(nChar);
118 startOfCurrentChar = nConverted + 1;
119 } else
120 goto no_output;
121 else if (nChar >= 0xA1 && nChar <= 0xFE)
123 nRow = nChar - 0xA1;
124 eState = IMPL_EUC_TW_TO_UNICODE_STATE_1;
126 else if (nChar == 0x8E)
127 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1;
128 else
130 bUndefined = false;
131 goto bad_input;
133 break;
135 case IMPL_EUC_TW_TO_UNICODE_STATE_1:
136 if (nChar >= 0xA1 && nChar <= 0xFE)
138 nPlane = 0;
139 goto transform;
141 else
143 bUndefined = false;
144 goto bad_input;
146 break;
148 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1:
149 if (nChar >= 0xA1 && nChar <= 0xB0)
151 nPlane = nChar - 0xA1;
152 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_2;
154 else
156 bUndefined = false;
157 goto bad_input;
159 break;
161 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2:
162 if (nChar >= 0xA1 && nChar <= 0xFE)
164 nRow = nChar - 0xA1;
165 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_3;
167 else
169 bUndefined = false;
170 goto bad_input;
172 break;
174 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3:
175 if (nChar >= 0xA1 && nChar <= 0xFE)
176 goto transform;
177 else
179 bUndefined = false;
180 goto bad_input;
182 break;
184 continue;
186 transform:
188 sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane];
189 if (nPlaneOffset == -1)
190 goto bad_input;
191 else
193 sal_Int32 nOffset
194 = pCns116431992RowOffsets[nPlaneOffset + nRow];
195 if (nOffset == -1)
196 goto bad_input;
197 else
199 sal_uInt32 nFirstLast = pCns116431992Data[nOffset++];
200 sal_uInt32 nFirst = nFirstLast & 0xFF;
201 sal_uInt32 nLast = nFirstLast >> 8;
202 nChar -= 0xA0;
203 if (nChar >= nFirst && nChar <= nLast)
205 sal_uInt32 nUnicode
206 = pCns116431992Data[nOffset + (nChar - nFirst)];
207 if (nUnicode == 0xFFFF)
208 goto bad_input;
209 else if (ImplIsHighSurrogate(nUnicode))
210 if (pDestBufEnd - pDestBufPtr >= 2)
212 nOffset += nLast - nFirst + 1;
213 nFirst = pCns116431992Data[nOffset++];
214 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
215 *pDestBufPtr++
216 = static_cast<sal_Unicode>(pCns116431992Data[
217 nOffset + (nChar - nFirst)]);
218 startOfCurrentChar = nConverted + 1;
220 else
221 goto no_output;
222 else
223 if (pDestBufPtr != pDestBufEnd) {
224 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
225 startOfCurrentChar = nConverted + 1;
226 } else
227 goto no_output;
229 else
230 goto bad_input;
231 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
234 continue;
237 bad_input:
238 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
239 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
240 &nInfo))
242 case sal::detail::textenc::BAD_INPUT_STOP:
243 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
244 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
245 ++nConverted;
246 } else {
247 nConverted = startOfCurrentChar;
249 break;
251 case sal::detail::textenc::BAD_INPUT_CONTINUE:
252 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
253 startOfCurrentChar = nConverted + 1;
254 continue;
256 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
257 goto no_output;
259 break;
261 no_output:
262 --pSrcBuf;
263 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
264 break;
267 if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0
268 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
269 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL))
270 == 0)
272 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
273 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
274 else
275 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
276 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
277 &nInfo))
279 case sal::detail::textenc::BAD_INPUT_STOP:
280 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) != 0) {
281 nConverted = startOfCurrentChar;
283 [[fallthrough]];
284 case sal::detail::textenc::BAD_INPUT_CONTINUE:
285 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
286 break;
288 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
289 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
290 break;
294 if (pContext)
296 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState = eState;
297 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nPlane = nPlane;
298 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nRow = nRow;
300 if (pInfo)
301 *pInfo = nInfo;
302 if (pSrcCvtBytes)
303 *pSrcCvtBytes = nConverted;
305 return pDestBufPtr - pDestBuf;
308 sal_Size ImplConvertUnicodeToEucTw(void const * pData,
309 void * pContext,
310 sal_Unicode const * pSrcBuf,
311 sal_Size nSrcChars,
312 char * pDestBuf,
313 sal_Size nDestBytes,
314 sal_uInt32 nFlags,
315 sal_uInt32 * pInfo,
316 sal_Size * pSrcCvtChars)
318 sal_uInt8 const * pCns116431992Data
319 = static_cast< ImplEucTwConverterData const * >(pData)->
320 m_pUnicodeToCns116431992Data;
321 sal_Int32 const * pCns116431992PageOffsets
322 = static_cast< ImplEucTwConverterData const * >(pData)->
323 m_pUnicodeToCns116431992PageOffsets;
324 sal_Int32 const * pCns116431992PlaneOffsets
325 = static_cast< ImplEucTwConverterData const * >(pData)->
326 m_pUnicodeToCns116431992PlaneOffsets;
327 sal_Unicode nHighSurrogate = 0;
328 sal_uInt32 nInfo = 0;
329 sal_Size nConverted = 0;
330 char * pDestBufPtr = pDestBuf;
331 char * pDestBufEnd = pDestBuf + nDestBytes;
333 if (pContext)
334 nHighSurrogate
335 = static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate;
337 for (; nConverted < nSrcChars; ++nConverted)
339 bool bUndefined = true;
340 sal_uInt32 nChar = *pSrcBuf++;
341 if (nHighSurrogate == 0)
343 if (ImplIsHighSurrogate(nChar))
345 nHighSurrogate = static_cast<sal_Unicode>(nChar);
346 continue;
348 else if (ImplIsLowSurrogate(nChar))
350 bUndefined = false;
351 goto bad_input;
354 else if (ImplIsLowSurrogate(nChar))
355 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
356 else
358 bUndefined = false;
359 goto bad_input;
362 assert(rtl::isUnicodeScalarValue(nChar));
364 if (nChar < 0x80)
365 if (pDestBufPtr != pDestBufEnd)
366 *pDestBufPtr++ = static_cast< char >(nChar);
367 else
368 goto no_output;
369 else
371 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
372 sal_uInt32 nFirst;
373 sal_uInt32 nLast;
374 sal_uInt32 nPlane;
375 if (nOffset == -1)
376 goto bad_input;
377 nOffset
378 = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)];
379 if (nOffset == -1)
380 goto bad_input;
381 nFirst = pCns116431992Data[nOffset++];
382 nLast = pCns116431992Data[nOffset++];
383 nChar &= 0xFF;
384 if (nChar < nFirst || nChar > nLast)
385 goto bad_input;
386 nOffset += 3 * (nChar - nFirst);
387 nPlane = pCns116431992Data[nOffset++];
388 if (nPlane == 0)
389 goto bad_input;
390 if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4))
391 goto no_output;
392 if (nPlane != 1)
394 *pDestBufPtr++ = static_cast< char >(static_cast< unsigned char >(0x8E));
395 *pDestBufPtr++ = static_cast< char >(0xA0 + nPlane);
397 *pDestBufPtr++ = static_cast< char >(0xA0 + pCns116431992Data[nOffset++]);
398 *pDestBufPtr++ = static_cast< char >(0xA0 + pCns116431992Data[nOffset]);
400 nHighSurrogate = 0;
401 continue;
403 bad_input:
404 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
405 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
406 &nInfo, nullptr, 0, nullptr))
408 case sal::detail::textenc::BAD_INPUT_STOP:
409 nHighSurrogate = 0;
410 break;
412 case sal::detail::textenc::BAD_INPUT_CONTINUE:
413 nHighSurrogate = 0;
414 continue;
416 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
417 goto no_output;
419 break;
421 no_output:
422 --pSrcBuf;
423 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
424 break;
427 if (nHighSurrogate != 0
428 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
429 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
430 == 0)
432 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
433 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
434 else
435 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
436 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
437 nullptr, 0, nullptr))
439 case sal::detail::textenc::BAD_INPUT_STOP:
440 case sal::detail::textenc::BAD_INPUT_CONTINUE:
441 nHighSurrogate = 0;
442 break;
444 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
445 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
446 break;
450 if (pContext)
451 static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate
452 = nHighSurrogate;
453 if (pInfo)
454 *pInfo = nInfo;
455 if (pSrcCvtChars)
456 *pSrcCvtChars = nConverted;
458 return pDestBufPtr - pDestBuf;
461 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */