Update ooo320-m1
[ooovba.git] / sal / textenc / converteuctw.c
blob177637d7bf94ecbb392c6433a06d9c8e20dff7e9
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: converteuctw.c,v $
10 * $Revision: 1.9 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "converteuctw.h"
32 #include "context.h"
33 #include "converter.h"
34 #include "tenchelp.h"
35 #include "unichars.h"
36 #include "rtl/alloc.h"
37 #include "rtl/textcvt.h"
38 #include "sal/types.h"
40 typedef enum
42 IMPL_EUC_TW_TO_UNICODE_STATE_0,
43 IMPL_EUC_TW_TO_UNICODE_STATE_1,
44 IMPL_EUC_TW_TO_UNICODE_STATE_2_1,
45 IMPL_EUC_TW_TO_UNICODE_STATE_2_2,
46 IMPL_EUC_TW_TO_UNICODE_STATE_2_3
47 } ImplEucTwToUnicodeState;
49 typedef struct
51 ImplEucTwToUnicodeState m_eState;
52 sal_Int32 m_nPlane; /* 0--15 */
53 sal_Int32 m_nRow; /* 0--93 */
54 } ImplEucTwToUnicodeContext;
56 void * ImplCreateEucTwToUnicodeContext(void)
58 void * pContext = rtl_allocateMemory(sizeof (ImplEucTwToUnicodeContext));
59 ((ImplEucTwToUnicodeContext *) pContext)->m_eState
60 = IMPL_EUC_TW_TO_UNICODE_STATE_0;
61 return pContext;
64 void ImplResetEucTwToUnicodeContext(void * pContext)
66 if (pContext)
67 ((ImplEucTwToUnicodeContext *) pContext)->m_eState
68 = IMPL_EUC_TW_TO_UNICODE_STATE_0;
71 sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData,
72 void * pContext,
73 sal_Char const * pSrcBuf,
74 sal_Size nSrcBytes,
75 sal_Unicode * pDestBuf,
76 sal_Size nDestChars,
77 sal_uInt32 nFlags,
78 sal_uInt32 * pInfo,
79 sal_Size * pSrcCvtBytes)
81 sal_uInt16 const * pCns116431992Data
82 = ((ImplEucTwConverterData const *) pData)->
83 m_pCns116431992ToUnicodeData;
84 sal_Int32 const * pCns116431992RowOffsets
85 = ((ImplEucTwConverterData const *) pData)->
86 m_pCns116431992ToUnicodeRowOffsets;
87 sal_Int32 const * pCns116431992PlaneOffsets
88 = ((ImplEucTwConverterData const *) pData)->
89 m_pCns116431992ToUnicodePlaneOffsets;
90 ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
91 sal_Int32 nPlane = 0;
92 sal_Int32 nRow = 0;
93 sal_uInt32 nInfo = 0;
94 sal_Size nConverted = 0;
95 sal_Unicode * pDestBufPtr = pDestBuf;
96 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
98 if (pContext)
100 eState = ((ImplEucTwToUnicodeContext *) pContext)->m_eState;
101 nPlane = ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane;
102 nRow = ((ImplEucTwToUnicodeContext *) pContext)->m_nRow;
105 for (; nConverted < nSrcBytes; ++nConverted)
107 sal_Bool bUndefined = sal_True;
108 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
109 switch (eState)
111 case IMPL_EUC_TW_TO_UNICODE_STATE_0:
112 if (nChar < 0x80)
113 if (pDestBufPtr != pDestBufEnd)
114 *pDestBufPtr++ = (sal_Unicode) nChar;
115 else
116 goto no_output;
117 else if (nChar >= 0xA1 && nChar <= 0xFE)
119 nRow = nChar - 0xA1;
120 eState = IMPL_EUC_TW_TO_UNICODE_STATE_1;
122 else if (nChar == 0x8E)
123 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1;
124 else
126 bUndefined = sal_False;
127 goto bad_input;
129 break;
131 case IMPL_EUC_TW_TO_UNICODE_STATE_1:
132 if (nChar >= 0xA1 && nChar <= 0xFE)
134 nPlane = 0;
135 goto transform;
137 else
139 bUndefined = sal_False;
140 goto bad_input;
142 break;
144 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1:
145 if (nChar >= 0xA1 && nChar <= 0xB0)
147 nPlane = nChar - 0xA1;
148 ++eState;
150 else
152 bUndefined = sal_False;
153 goto bad_input;
155 break;
157 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2:
158 if (nChar >= 0xA1 && nChar <= 0xFE)
160 nRow = nChar - 0xA1;
161 ++eState;
163 else
165 bUndefined = sal_False;
166 goto bad_input;
168 break;
170 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3:
171 if (nChar >= 0xA1 && nChar <= 0xFE)
172 goto transform;
173 else
175 bUndefined = sal_False;
176 goto bad_input;
178 break;
180 continue;
182 transform:
184 sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane];
185 if (nPlaneOffset == -1)
186 goto bad_input;
187 else
189 sal_Int32 nOffset
190 = pCns116431992RowOffsets[nPlaneOffset + nRow];
191 if (nOffset == -1)
192 goto bad_input;
193 else
195 sal_uInt32 nFirstLast = pCns116431992Data[nOffset++];
196 sal_uInt32 nFirst = nFirstLast & 0xFF;
197 sal_uInt32 nLast = nFirstLast >> 8;
198 nChar -= 0xA0;
199 if (nChar >= nFirst && nChar <= nLast)
201 sal_uInt32 nUnicode
202 = pCns116431992Data[nOffset + (nChar - nFirst)];
203 if (nUnicode == 0xFFFF)
204 goto bad_input;
205 else if (ImplIsHighSurrogate(nUnicode))
206 if (pDestBufEnd - pDestBufPtr >= 2)
208 nOffset += nLast - nFirst + 1;
209 nFirst = pCns116431992Data[nOffset++];
210 *pDestBufPtr++ = (sal_Unicode) nUnicode;
211 *pDestBufPtr++
212 = (sal_Unicode)
213 pCns116431992Data[
214 nOffset + (nChar - nFirst)];
216 else
217 goto no_output;
218 else
219 if (pDestBufPtr != pDestBufEnd)
220 *pDestBufPtr++ = (sal_Unicode) nUnicode;
221 else
222 goto no_output;
224 else
225 goto bad_input;
226 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
229 continue;
232 bad_input:
233 switch (ImplHandleBadInputTextToUnicodeConversion(
234 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
235 &nInfo))
237 case IMPL_BAD_INPUT_STOP:
238 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
239 break;
241 case IMPL_BAD_INPUT_CONTINUE:
242 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
243 continue;
245 case IMPL_BAD_INPUT_NO_OUTPUT:
246 goto no_output;
248 break;
250 no_output:
251 --pSrcBuf;
252 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
253 break;
256 if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0
257 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
258 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
259 == 0)
261 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
262 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
263 else
264 switch (ImplHandleBadInputTextToUnicodeConversion(
265 sal_False, sal_True, 0, nFlags, &pDestBufPtr,
266 pDestBufEnd, &nInfo))
268 case IMPL_BAD_INPUT_STOP:
269 case IMPL_BAD_INPUT_CONTINUE:
270 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
271 break;
273 case IMPL_BAD_INPUT_NO_OUTPUT:
274 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
275 break;
279 if (pContext)
281 ((ImplEucTwToUnicodeContext *) pContext)->m_eState = eState;
282 ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane = nPlane;
283 ((ImplEucTwToUnicodeContext *) pContext)->m_nRow = nRow;
285 if (pInfo)
286 *pInfo = nInfo;
287 if (pSrcCvtBytes)
288 *pSrcCvtBytes = nConverted;
290 return pDestBufPtr - pDestBuf;
293 sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData,
294 void * pContext,
295 sal_Unicode const * pSrcBuf,
296 sal_Size nSrcChars,
297 sal_Char * pDestBuf,
298 sal_Size nDestBytes,
299 sal_uInt32 nFlags,
300 sal_uInt32 * pInfo,
301 sal_Size * pSrcCvtChars)
303 sal_uInt8 const * pCns116431992Data
304 = ((ImplEucTwConverterData const *) pData)->
305 m_pUnicodeToCns116431992Data;
306 sal_Int32 const * pCns116431992PageOffsets
307 = ((ImplEucTwConverterData const *) pData)->
308 m_pUnicodeToCns116431992PageOffsets;
309 sal_Int32 const * pCns116431992PlaneOffsets
310 = ((ImplEucTwConverterData const *) pData)->
311 m_pUnicodeToCns116431992PlaneOffsets;
312 sal_Unicode nHighSurrogate = 0;
313 sal_uInt32 nInfo = 0;
314 sal_Size nConverted = 0;
315 sal_Char * pDestBufPtr = pDestBuf;
316 sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
318 if (pContext)
319 nHighSurrogate
320 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate;
322 for (; nConverted < nSrcChars; ++nConverted)
324 sal_Bool bUndefined = sal_True;
325 sal_uInt32 nChar = *pSrcBuf++;
326 if (nHighSurrogate == 0)
328 if (ImplIsHighSurrogate(nChar))
330 nHighSurrogate = (sal_Unicode) nChar;
331 continue;
334 else if (ImplIsLowSurrogate(nChar))
335 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
336 else
338 bUndefined = sal_False;
339 goto bad_input;
342 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
344 bUndefined = sal_False;
345 goto bad_input;
348 if (nChar < 0x80)
349 if (pDestBufPtr != pDestBufEnd)
350 *pDestBufPtr++ = (sal_Char) nChar;
351 else
352 goto no_output;
353 else
355 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
356 sal_uInt32 nFirst;
357 sal_uInt32 nLast;
358 sal_uInt32 nPlane;
359 if (nOffset == -1)
360 goto bad_input;
361 nOffset
362 = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)];
363 if (nOffset == -1)
364 goto bad_input;
365 nFirst = pCns116431992Data[nOffset++];
366 nLast = pCns116431992Data[nOffset++];
367 nChar &= 0xFF;
368 if (nChar < nFirst || nChar > nLast)
369 goto bad_input;
370 nOffset += 3 * (nChar - nFirst);
371 nPlane = pCns116431992Data[nOffset++];
372 if (nPlane == 0)
373 goto bad_input;
374 if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4))
375 goto no_output;
376 if (nPlane != 1)
378 *pDestBufPtr++ = (sal_Char) (unsigned char) 0x8E;
379 *pDestBufPtr++ = (sal_Char) (0xA0 + nPlane);
381 *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset++]);
382 *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset]);
384 nHighSurrogate = 0;
385 continue;
387 bad_input:
388 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined,
389 nChar,
390 nFlags,
391 &pDestBufPtr,
392 pDestBufEnd,
393 &nInfo,
394 NULL,
396 NULL))
398 case IMPL_BAD_INPUT_STOP:
399 nHighSurrogate = 0;
400 break;
402 case IMPL_BAD_INPUT_CONTINUE:
403 nHighSurrogate = 0;
404 continue;
406 case IMPL_BAD_INPUT_NO_OUTPUT:
407 goto no_output;
409 break;
411 no_output:
412 --pSrcBuf;
413 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
414 break;
417 if (nHighSurrogate != 0
418 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
419 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
420 == 0)
422 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
423 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
424 else
425 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False,
427 nFlags,
428 &pDestBufPtr,
429 pDestBufEnd,
430 &nInfo,
431 NULL,
433 NULL))
435 case IMPL_BAD_INPUT_STOP:
436 case IMPL_BAD_INPUT_CONTINUE:
437 nHighSurrogate = 0;
438 break;
440 case IMPL_BAD_INPUT_NO_OUTPUT:
441 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
442 break;
446 if (pContext)
447 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate
448 = nHighSurrogate;
449 if (pInfo)
450 *pInfo = nInfo;
451 if (pSrcCvtChars)
452 *pSrcCvtChars = nConverted;
454 return pDestBufPtr - pDestBuf;