Bump for 3.6-28
[LibreOffice.git] / sal / textenc / converteuctw.cxx
blob0274fc3f1211344147f02aff7d398a2976f637bf
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "sal/config.h"
31 #include "rtl/textcvt.h"
32 #include "sal/types.h"
34 #include "context.hxx"
35 #include "converter.hxx"
36 #include "converteuctw.hxx"
37 #include "tenchelp.hxx"
38 #include "unichars.hxx"
40 namespace {
42 enum ImplEucTwToUnicodeState
44 IMPL_EUC_TW_TO_UNICODE_STATE_0,
45 IMPL_EUC_TW_TO_UNICODE_STATE_1,
46 IMPL_EUC_TW_TO_UNICODE_STATE_2_1,
47 IMPL_EUC_TW_TO_UNICODE_STATE_2_2,
48 IMPL_EUC_TW_TO_UNICODE_STATE_2_3
51 struct ImplEucTwToUnicodeContext
53 ImplEucTwToUnicodeState m_eState;
54 sal_Int32 m_nPlane; // 0--15
55 sal_Int32 m_nRow; // 0--93
60 void * ImplCreateEucTwToUnicodeContext()
62 ImplEucTwToUnicodeContext * pContext = new ImplEucTwToUnicodeContext;
63 pContext->m_eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
64 return pContext;
67 void ImplResetEucTwToUnicodeContext(void * pContext)
69 if (pContext)
70 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState
71 = IMPL_EUC_TW_TO_UNICODE_STATE_0;
74 void ImplDestroyEucTwToUnicodeContext(void * pContext)
76 delete static_cast< ImplEucTwToUnicodeContext * >(pContext);
79 sal_Size ImplConvertEucTwToUnicode(void const * pData,
80 void * pContext,
81 char const * pSrcBuf,
82 sal_Size nSrcBytes,
83 sal_Unicode * pDestBuf,
84 sal_Size nDestChars,
85 sal_uInt32 nFlags,
86 sal_uInt32 * pInfo,
87 sal_Size * pSrcCvtBytes)
89 sal_uInt16 const * pCns116431992Data
90 = static_cast< ImplEucTwConverterData const * >(pData)->
91 m_pCns116431992ToUnicodeData;
92 sal_Int32 const * pCns116431992RowOffsets
93 = static_cast< ImplEucTwConverterData const * >(pData)->
94 m_pCns116431992ToUnicodeRowOffsets;
95 sal_Int32 const * pCns116431992PlaneOffsets
96 = static_cast< ImplEucTwConverterData const * >(pData)->
97 m_pCns116431992ToUnicodePlaneOffsets;
98 ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
99 sal_Int32 nPlane = 0;
100 sal_Int32 nRow = 0;
101 sal_uInt32 nInfo = 0;
102 sal_Size nConverted = 0;
103 sal_Unicode * pDestBufPtr = pDestBuf;
104 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
106 if (pContext)
108 eState = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState;
109 nPlane = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nPlane;
110 nRow = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nRow;
113 for (; nConverted < nSrcBytes; ++nConverted)
115 bool bUndefined = true;
116 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
117 switch (eState)
119 case IMPL_EUC_TW_TO_UNICODE_STATE_0:
120 if (nChar < 0x80)
121 if (pDestBufPtr != pDestBufEnd)
122 *pDestBufPtr++ = (sal_Unicode) nChar;
123 else
124 goto no_output;
125 else if (nChar >= 0xA1 && nChar <= 0xFE)
127 nRow = nChar - 0xA1;
128 eState = IMPL_EUC_TW_TO_UNICODE_STATE_1;
130 else if (nChar == 0x8E)
131 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1;
132 else
134 bUndefined = false;
135 goto bad_input;
137 break;
139 case IMPL_EUC_TW_TO_UNICODE_STATE_1:
140 if (nChar >= 0xA1 && nChar <= 0xFE)
142 nPlane = 0;
143 goto transform;
145 else
147 bUndefined = false;
148 goto bad_input;
150 break;
152 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1:
153 if (nChar >= 0xA1 && nChar <= 0xB0)
155 nPlane = nChar - 0xA1;
156 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_2;
158 else
160 bUndefined = false;
161 goto bad_input;
163 break;
165 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2:
166 if (nChar >= 0xA1 && nChar <= 0xFE)
168 nRow = nChar - 0xA1;
169 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_3;
171 else
173 bUndefined = false;
174 goto bad_input;
176 break;
178 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3:
179 if (nChar >= 0xA1 && nChar <= 0xFE)
180 goto transform;
181 else
183 bUndefined = false;
184 goto bad_input;
186 break;
188 continue;
190 transform:
192 sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane];
193 if (nPlaneOffset == -1)
194 goto bad_input;
195 else
197 sal_Int32 nOffset
198 = pCns116431992RowOffsets[nPlaneOffset + nRow];
199 if (nOffset == -1)
200 goto bad_input;
201 else
203 sal_uInt32 nFirstLast = pCns116431992Data[nOffset++];
204 sal_uInt32 nFirst = nFirstLast & 0xFF;
205 sal_uInt32 nLast = nFirstLast >> 8;
206 nChar -= 0xA0;
207 if (nChar >= nFirst && nChar <= nLast)
209 sal_uInt32 nUnicode
210 = pCns116431992Data[nOffset + (nChar - nFirst)];
211 if (nUnicode == 0xFFFF)
212 goto bad_input;
213 else if (ImplIsHighSurrogate(nUnicode))
214 if (pDestBufEnd - pDestBufPtr >= 2)
216 nOffset += nLast - nFirst + 1;
217 nFirst = pCns116431992Data[nOffset++];
218 *pDestBufPtr++ = (sal_Unicode) nUnicode;
219 *pDestBufPtr++
220 = (sal_Unicode)
221 pCns116431992Data[
222 nOffset + (nChar - nFirst)];
224 else
225 goto no_output;
226 else
227 if (pDestBufPtr != pDestBufEnd)
228 *pDestBufPtr++ = (sal_Unicode) nUnicode;
229 else
230 goto no_output;
232 else
233 goto bad_input;
234 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
237 continue;
240 bad_input:
241 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
242 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
243 &nInfo))
245 case sal::detail::textenc::BAD_INPUT_STOP:
246 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
247 break;
249 case sal::detail::textenc::BAD_INPUT_CONTINUE:
250 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
251 continue;
253 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
254 goto no_output;
256 break;
258 no_output:
259 --pSrcBuf;
260 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
261 break;
264 if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0
265 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
266 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
267 == 0)
269 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
270 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
271 else
272 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
273 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
274 &nInfo))
276 case sal::detail::textenc::BAD_INPUT_STOP:
277 case sal::detail::textenc::BAD_INPUT_CONTINUE:
278 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
279 break;
281 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
282 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
283 break;
287 if (pContext)
289 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState = eState;
290 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nPlane = nPlane;
291 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nRow = nRow;
293 if (pInfo)
294 *pInfo = nInfo;
295 if (pSrcCvtBytes)
296 *pSrcCvtBytes = nConverted;
298 return pDestBufPtr - pDestBuf;
301 sal_Size ImplConvertUnicodeToEucTw(void const * pData,
302 void * pContext,
303 sal_Unicode const * pSrcBuf,
304 sal_Size nSrcChars,
305 char * pDestBuf,
306 sal_Size nDestBytes,
307 sal_uInt32 nFlags,
308 sal_uInt32 * pInfo,
309 sal_Size * pSrcCvtChars)
311 sal_uInt8 const * pCns116431992Data
312 = static_cast< ImplEucTwConverterData const * >(pData)->
313 m_pUnicodeToCns116431992Data;
314 sal_Int32 const * pCns116431992PageOffsets
315 = static_cast< ImplEucTwConverterData const * >(pData)->
316 m_pUnicodeToCns116431992PageOffsets;
317 sal_Int32 const * pCns116431992PlaneOffsets
318 = static_cast< ImplEucTwConverterData const * >(pData)->
319 m_pUnicodeToCns116431992PlaneOffsets;
320 sal_Unicode nHighSurrogate = 0;
321 sal_uInt32 nInfo = 0;
322 sal_Size nConverted = 0;
323 char * pDestBufPtr = pDestBuf;
324 char * pDestBufEnd = pDestBuf + nDestBytes;
326 if (pContext)
327 nHighSurrogate
328 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate;
330 for (; nConverted < nSrcChars; ++nConverted)
332 bool bUndefined = true;
333 sal_uInt32 nChar = *pSrcBuf++;
334 if (nHighSurrogate == 0)
336 if (ImplIsHighSurrogate(nChar))
338 nHighSurrogate = (sal_Unicode) nChar;
339 continue;
342 else if (ImplIsLowSurrogate(nChar))
343 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
344 else
346 bUndefined = false;
347 goto bad_input;
350 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
352 bUndefined = false;
353 goto bad_input;
356 if (nChar < 0x80)
357 if (pDestBufPtr != pDestBufEnd)
358 *pDestBufPtr++ = static_cast< char >(nChar);
359 else
360 goto no_output;
361 else
363 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
364 sal_uInt32 nFirst;
365 sal_uInt32 nLast;
366 sal_uInt32 nPlane;
367 if (nOffset == -1)
368 goto bad_input;
369 nOffset
370 = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)];
371 if (nOffset == -1)
372 goto bad_input;
373 nFirst = pCns116431992Data[nOffset++];
374 nLast = pCns116431992Data[nOffset++];
375 nChar &= 0xFF;
376 if (nChar < nFirst || nChar > nLast)
377 goto bad_input;
378 nOffset += 3 * (nChar - nFirst);
379 nPlane = pCns116431992Data[nOffset++];
380 if (nPlane == 0)
381 goto bad_input;
382 if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4))
383 goto no_output;
384 if (nPlane != 1)
386 *pDestBufPtr++ = static_cast< char >(static_cast< unsigned char >(0x8E));
387 *pDestBufPtr++ = static_cast< char >(0xA0 + nPlane);
389 *pDestBufPtr++ = static_cast< char >(0xA0 + pCns116431992Data[nOffset++]);
390 *pDestBufPtr++ = static_cast< char >(0xA0 + pCns116431992Data[nOffset]);
392 nHighSurrogate = 0;
393 continue;
395 bad_input:
396 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
397 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
398 &nInfo, NULL, 0, NULL))
400 case sal::detail::textenc::BAD_INPUT_STOP:
401 nHighSurrogate = 0;
402 break;
404 case sal::detail::textenc::BAD_INPUT_CONTINUE:
405 nHighSurrogate = 0;
406 continue;
408 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
409 goto no_output;
411 break;
413 no_output:
414 --pSrcBuf;
415 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
416 break;
419 if (nHighSurrogate != 0
420 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
421 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
422 == 0)
424 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
425 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
426 else
427 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
428 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
429 NULL, 0, NULL))
431 case sal::detail::textenc::BAD_INPUT_STOP:
432 case sal::detail::textenc::BAD_INPUT_CONTINUE:
433 nHighSurrogate = 0;
434 break;
436 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
437 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
438 break;
442 if (pContext)
443 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate
444 = nHighSurrogate;
445 if (pInfo)
446 *pInfo = nInfo;
447 if (pSrcCvtChars)
448 *pSrcCvtChars = nConverted;
450 return pDestBufPtr - pDestBuf;
453 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */