bump product version to 5.0.4.1
[LibreOffice.git] / sal / textenc / converteuctw.cxx
blobe871306a1b25d1955a1b02d2a994d87790df6830
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include "rtl/textcvt.h"
23 #include "sal/types.h"
25 #include "context.hxx"
26 #include "converter.hxx"
27 #include "converteuctw.hxx"
28 #include "tenchelp.hxx"
29 #include "unichars.hxx"
31 namespace {
33 enum ImplEucTwToUnicodeState
35 IMPL_EUC_TW_TO_UNICODE_STATE_0,
36 IMPL_EUC_TW_TO_UNICODE_STATE_1,
37 IMPL_EUC_TW_TO_UNICODE_STATE_2_1,
38 IMPL_EUC_TW_TO_UNICODE_STATE_2_2,
39 IMPL_EUC_TW_TO_UNICODE_STATE_2_3
42 struct ImplEucTwToUnicodeContext
44 ImplEucTwToUnicodeState m_eState;
45 sal_Int32 m_nPlane; // 0--15
46 sal_Int32 m_nRow; // 0--93
51 void * ImplCreateEucTwToUnicodeContext()
53 ImplEucTwToUnicodeContext * pContext = new ImplEucTwToUnicodeContext;
54 pContext->m_eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
55 return pContext;
58 void ImplResetEucTwToUnicodeContext(void * pContext)
60 if (pContext)
61 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState
62 = IMPL_EUC_TW_TO_UNICODE_STATE_0;
65 void ImplDestroyEucTwToUnicodeContext(void * pContext)
67 delete static_cast< ImplEucTwToUnicodeContext * >(pContext);
70 sal_Size ImplConvertEucTwToUnicode(void const * pData,
71 void * pContext,
72 char const * pSrcBuf,
73 sal_Size nSrcBytes,
74 sal_Unicode * pDestBuf,
75 sal_Size nDestChars,
76 sal_uInt32 nFlags,
77 sal_uInt32 * pInfo,
78 sal_Size * pSrcCvtBytes)
80 sal_uInt16 const * pCns116431992Data
81 = static_cast< ImplEucTwConverterData const * >(pData)->
82 m_pCns116431992ToUnicodeData;
83 sal_Int32 const * pCns116431992RowOffsets
84 = static_cast< ImplEucTwConverterData const * >(pData)->
85 m_pCns116431992ToUnicodeRowOffsets;
86 sal_Int32 const * pCns116431992PlaneOffsets
87 = static_cast< ImplEucTwConverterData const * >(pData)->
88 m_pCns116431992ToUnicodePlaneOffsets;
89 ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
90 sal_Int32 nPlane = 0;
91 sal_Int32 nRow = 0;
92 sal_uInt32 nInfo = 0;
93 sal_Size nConverted = 0;
94 sal_Unicode * pDestBufPtr = pDestBuf;
95 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
97 if (pContext)
99 eState = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState;
100 nPlane = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nPlane;
101 nRow = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nRow;
104 for (; nConverted < nSrcBytes; ++nConverted)
106 bool bUndefined = true;
107 sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
108 switch (eState)
110 case IMPL_EUC_TW_TO_UNICODE_STATE_0:
111 if (nChar < 0x80)
112 if (pDestBufPtr != pDestBufEnd)
113 *pDestBufPtr++ = (sal_Unicode) nChar;
114 else
115 goto no_output;
116 else if (nChar >= 0xA1 && nChar <= 0xFE)
118 nRow = nChar - 0xA1;
119 eState = IMPL_EUC_TW_TO_UNICODE_STATE_1;
121 else if (nChar == 0x8E)
122 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1;
123 else
125 bUndefined = false;
126 goto bad_input;
128 break;
130 case IMPL_EUC_TW_TO_UNICODE_STATE_1:
131 if (nChar >= 0xA1 && nChar <= 0xFE)
133 nPlane = 0;
134 goto transform;
136 else
138 bUndefined = false;
139 goto bad_input;
141 break;
143 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1:
144 if (nChar >= 0xA1 && nChar <= 0xB0)
146 nPlane = nChar - 0xA1;
147 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_2;
149 else
151 bUndefined = false;
152 goto bad_input;
154 break;
156 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2:
157 if (nChar >= 0xA1 && nChar <= 0xFE)
159 nRow = nChar - 0xA1;
160 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_3;
162 else
164 bUndefined = false;
165 goto bad_input;
167 break;
169 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3:
170 if (nChar >= 0xA1 && nChar <= 0xFE)
171 goto transform;
172 else
174 bUndefined = false;
175 goto bad_input;
177 break;
179 continue;
181 transform:
183 sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane];
184 if (nPlaneOffset == -1)
185 goto bad_input;
186 else
188 sal_Int32 nOffset
189 = pCns116431992RowOffsets[nPlaneOffset + nRow];
190 if (nOffset == -1)
191 goto bad_input;
192 else
194 sal_uInt32 nFirstLast = pCns116431992Data[nOffset++];
195 sal_uInt32 nFirst = nFirstLast & 0xFF;
196 sal_uInt32 nLast = nFirstLast >> 8;
197 nChar -= 0xA0;
198 if (nChar >= nFirst && nChar <= nLast)
200 sal_uInt32 nUnicode
201 = pCns116431992Data[nOffset + (nChar - nFirst)];
202 if (nUnicode == 0xFFFF)
203 goto bad_input;
204 else if (ImplIsHighSurrogate(nUnicode))
205 if (pDestBufEnd - pDestBufPtr >= 2)
207 nOffset += nLast - nFirst + 1;
208 nFirst = pCns116431992Data[nOffset++];
209 *pDestBufPtr++ = (sal_Unicode) nUnicode;
210 *pDestBufPtr++
211 = (sal_Unicode)
212 pCns116431992Data[
213 nOffset + (nChar - nFirst)];
215 else
216 goto no_output;
217 else
218 if (pDestBufPtr != pDestBufEnd)
219 *pDestBufPtr++ = (sal_Unicode) nUnicode;
220 else
221 goto no_output;
223 else
224 goto bad_input;
225 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
228 continue;
231 bad_input:
232 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
233 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
234 &nInfo))
236 case sal::detail::textenc::BAD_INPUT_STOP:
237 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
238 break;
240 case sal::detail::textenc::BAD_INPUT_CONTINUE:
241 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
242 continue;
244 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
245 goto no_output;
247 break;
249 no_output:
250 --pSrcBuf;
251 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
252 break;
255 if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0
256 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
257 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
258 == 0)
260 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
261 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
262 else
263 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
264 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
265 &nInfo))
267 case sal::detail::textenc::BAD_INPUT_STOP:
268 case sal::detail::textenc::BAD_INPUT_CONTINUE:
269 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
270 break;
272 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
273 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
274 break;
278 if (pContext)
280 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState = eState;
281 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nPlane = nPlane;
282 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nRow = nRow;
284 if (pInfo)
285 *pInfo = nInfo;
286 if (pSrcCvtBytes)
287 *pSrcCvtBytes = nConverted;
289 return pDestBufPtr - pDestBuf;
292 sal_Size ImplConvertUnicodeToEucTw(void const * pData,
293 void * pContext,
294 sal_Unicode const * pSrcBuf,
295 sal_Size nSrcChars,
296 char * pDestBuf,
297 sal_Size nDestBytes,
298 sal_uInt32 nFlags,
299 sal_uInt32 * pInfo,
300 sal_Size * pSrcCvtChars)
302 sal_uInt8 const * pCns116431992Data
303 = static_cast< ImplEucTwConverterData const * >(pData)->
304 m_pUnicodeToCns116431992Data;
305 sal_Int32 const * pCns116431992PageOffsets
306 = static_cast< ImplEucTwConverterData const * >(pData)->
307 m_pUnicodeToCns116431992PageOffsets;
308 sal_Int32 const * pCns116431992PlaneOffsets
309 = static_cast< ImplEucTwConverterData const * >(pData)->
310 m_pUnicodeToCns116431992PlaneOffsets;
311 sal_Unicode nHighSurrogate = 0;
312 sal_uInt32 nInfo = 0;
313 sal_Size nConverted = 0;
314 char * pDestBufPtr = pDestBuf;
315 char * pDestBufEnd = pDestBuf + nDestBytes;
317 if (pContext)
318 nHighSurrogate
319 = static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate;
321 for (; nConverted < nSrcChars; ++nConverted)
323 bool bUndefined = true;
324 sal_uInt32 nChar = *pSrcBuf++;
325 if (nHighSurrogate == 0)
327 if (ImplIsHighSurrogate(nChar))
329 nHighSurrogate = (sal_Unicode) nChar;
330 continue;
333 else if (ImplIsLowSurrogate(nChar))
334 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
335 else
337 bUndefined = false;
338 goto bad_input;
341 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
343 bUndefined = false;
344 goto bad_input;
347 if (nChar < 0x80)
348 if (pDestBufPtr != pDestBufEnd)
349 *pDestBufPtr++ = static_cast< char >(nChar);
350 else
351 goto no_output;
352 else
354 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
355 sal_uInt32 nFirst;
356 sal_uInt32 nLast;
357 sal_uInt32 nPlane;
358 if (nOffset == -1)
359 goto bad_input;
360 nOffset
361 = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)];
362 if (nOffset == -1)
363 goto bad_input;
364 nFirst = pCns116431992Data[nOffset++];
365 nLast = pCns116431992Data[nOffset++];
366 nChar &= 0xFF;
367 if (nChar < nFirst || nChar > nLast)
368 goto bad_input;
369 nOffset += 3 * (nChar - nFirst);
370 nPlane = pCns116431992Data[nOffset++];
371 if (nPlane == 0)
372 goto bad_input;
373 if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4))
374 goto no_output;
375 if (nPlane != 1)
377 *pDestBufPtr++ = static_cast< char >(static_cast< unsigned char >(0x8E));
378 *pDestBufPtr++ = static_cast< char >(0xA0 + nPlane);
380 *pDestBufPtr++ = static_cast< char >(0xA0 + pCns116431992Data[nOffset++]);
381 *pDestBufPtr++ = static_cast< char >(0xA0 + pCns116431992Data[nOffset]);
383 nHighSurrogate = 0;
384 continue;
386 bad_input:
387 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
388 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
389 &nInfo, NULL, 0, NULL))
391 case sal::detail::textenc::BAD_INPUT_STOP:
392 nHighSurrogate = 0;
393 break;
395 case sal::detail::textenc::BAD_INPUT_CONTINUE:
396 nHighSurrogate = 0;
397 continue;
399 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
400 goto no_output;
402 break;
404 no_output:
405 --pSrcBuf;
406 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
407 break;
410 if (nHighSurrogate != 0
411 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
412 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
413 == 0)
415 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
416 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
417 else
418 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
419 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
420 NULL, 0, NULL))
422 case sal::detail::textenc::BAD_INPUT_STOP:
423 case sal::detail::textenc::BAD_INPUT_CONTINUE:
424 nHighSurrogate = 0;
425 break;
427 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
428 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
429 break;
433 if (pContext)
434 static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate
435 = nHighSurrogate;
436 if (pInfo)
437 *pInfo = nInfo;
438 if (pSrcCvtChars)
439 *pSrcCvtChars = nConverted;
441 return pDestBufPtr - pDestBuf;
444 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */