1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: converteuctw.c,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "converteuctw.h"
33 #include "converter.h"
36 #include "rtl/alloc.h"
37 #include "rtl/textcvt.h"
38 #include "sal/types.h"
42 IMPL_EUC_TW_TO_UNICODE_STATE_0
,
43 IMPL_EUC_TW_TO_UNICODE_STATE_1
,
44 IMPL_EUC_TW_TO_UNICODE_STATE_2_1
,
45 IMPL_EUC_TW_TO_UNICODE_STATE_2_2
,
46 IMPL_EUC_TW_TO_UNICODE_STATE_2_3
47 } ImplEucTwToUnicodeState
;
51 ImplEucTwToUnicodeState m_eState
;
52 sal_Int32 m_nPlane
; /* 0--15 */
53 sal_Int32 m_nRow
; /* 0--93 */
54 } ImplEucTwToUnicodeContext
;
56 void * ImplCreateEucTwToUnicodeContext(void)
58 void * pContext
= rtl_allocateMemory(sizeof (ImplEucTwToUnicodeContext
));
59 ((ImplEucTwToUnicodeContext
*) pContext
)->m_eState
60 = IMPL_EUC_TW_TO_UNICODE_STATE_0
;
64 void ImplResetEucTwToUnicodeContext(void * pContext
)
67 ((ImplEucTwToUnicodeContext
*) pContext
)->m_eState
68 = IMPL_EUC_TW_TO_UNICODE_STATE_0
;
71 sal_Size
ImplConvertEucTwToUnicode(ImplTextConverterData
const * pData
,
73 sal_Char
const * pSrcBuf
,
75 sal_Unicode
* pDestBuf
,
79 sal_Size
* pSrcCvtBytes
)
81 sal_uInt16
const * pCns116431992Data
82 = ((ImplEucTwConverterData
const *) pData
)->
83 m_pCns116431992ToUnicodeData
;
84 sal_Int32
const * pCns116431992RowOffsets
85 = ((ImplEucTwConverterData
const *) pData
)->
86 m_pCns116431992ToUnicodeRowOffsets
;
87 sal_Int32
const * pCns116431992PlaneOffsets
88 = ((ImplEucTwConverterData
const *) pData
)->
89 m_pCns116431992ToUnicodePlaneOffsets
;
90 ImplEucTwToUnicodeState eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
94 sal_Size nConverted
= 0;
95 sal_Unicode
* pDestBufPtr
= pDestBuf
;
96 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
100 eState
= ((ImplEucTwToUnicodeContext
*) pContext
)->m_eState
;
101 nPlane
= ((ImplEucTwToUnicodeContext
*) pContext
)->m_nPlane
;
102 nRow
= ((ImplEucTwToUnicodeContext
*) pContext
)->m_nRow
;
105 for (; nConverted
< nSrcBytes
; ++nConverted
)
107 sal_Bool bUndefined
= sal_True
;
108 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
111 case IMPL_EUC_TW_TO_UNICODE_STATE_0
:
113 if (pDestBufPtr
!= pDestBufEnd
)
114 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
117 else if (nChar
>= 0xA1 && nChar
<= 0xFE)
120 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_1
;
122 else if (nChar
== 0x8E)
123 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_2_1
;
126 bUndefined
= sal_False
;
131 case IMPL_EUC_TW_TO_UNICODE_STATE_1
:
132 if (nChar
>= 0xA1 && nChar
<= 0xFE)
139 bUndefined
= sal_False
;
144 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1
:
145 if (nChar
>= 0xA1 && nChar
<= 0xB0)
147 nPlane
= nChar
- 0xA1;
152 bUndefined
= sal_False
;
157 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2
:
158 if (nChar
>= 0xA1 && nChar
<= 0xFE)
165 bUndefined
= sal_False
;
170 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3
:
171 if (nChar
>= 0xA1 && nChar
<= 0xFE)
175 bUndefined
= sal_False
;
184 sal_Int32 nPlaneOffset
= pCns116431992PlaneOffsets
[nPlane
];
185 if (nPlaneOffset
== -1)
190 = pCns116431992RowOffsets
[nPlaneOffset
+ nRow
];
195 sal_uInt32 nFirstLast
= pCns116431992Data
[nOffset
++];
196 sal_uInt32 nFirst
= nFirstLast
& 0xFF;
197 sal_uInt32 nLast
= nFirstLast
>> 8;
199 if (nChar
>= nFirst
&& nChar
<= nLast
)
202 = pCns116431992Data
[nOffset
+ (nChar
- nFirst
)];
203 if (nUnicode
== 0xFFFF)
205 else if (ImplIsHighSurrogate(nUnicode
))
206 if (pDestBufEnd
- pDestBufPtr
>= 2)
208 nOffset
+= nLast
- nFirst
+ 1;
209 nFirst
= pCns116431992Data
[nOffset
++];
210 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
214 nOffset
+ (nChar
- nFirst
)];
219 if (pDestBufPtr
!= pDestBufEnd
)
220 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
226 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
233 switch (ImplHandleBadInputTextToUnicodeConversion(
234 bUndefined
, sal_True
, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
237 case IMPL_BAD_INPUT_STOP
:
238 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
241 case IMPL_BAD_INPUT_CONTINUE
:
242 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
245 case IMPL_BAD_INPUT_NO_OUTPUT
:
252 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
256 if (eState
!= IMPL_EUC_TW_TO_UNICODE_STATE_0
257 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
258 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
261 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
262 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
264 switch (ImplHandleBadInputTextToUnicodeConversion(
265 sal_False
, sal_True
, 0, nFlags
, &pDestBufPtr
,
266 pDestBufEnd
, &nInfo
))
268 case IMPL_BAD_INPUT_STOP
:
269 case IMPL_BAD_INPUT_CONTINUE
:
270 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
273 case IMPL_BAD_INPUT_NO_OUTPUT
:
274 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
281 ((ImplEucTwToUnicodeContext
*) pContext
)->m_eState
= eState
;
282 ((ImplEucTwToUnicodeContext
*) pContext
)->m_nPlane
= nPlane
;
283 ((ImplEucTwToUnicodeContext
*) pContext
)->m_nRow
= nRow
;
288 *pSrcCvtBytes
= nConverted
;
290 return pDestBufPtr
- pDestBuf
;
293 sal_Size
ImplConvertUnicodeToEucTw(ImplTextConverterData
const * pData
,
295 sal_Unicode
const * pSrcBuf
,
301 sal_Size
* pSrcCvtChars
)
303 sal_uInt8
const * pCns116431992Data
304 = ((ImplEucTwConverterData
const *) pData
)->
305 m_pUnicodeToCns116431992Data
;
306 sal_Int32
const * pCns116431992PageOffsets
307 = ((ImplEucTwConverterData
const *) pData
)->
308 m_pUnicodeToCns116431992PageOffsets
;
309 sal_Int32
const * pCns116431992PlaneOffsets
310 = ((ImplEucTwConverterData
const *) pData
)->
311 m_pUnicodeToCns116431992PlaneOffsets
;
312 sal_Unicode nHighSurrogate
= 0;
313 sal_uInt32 nInfo
= 0;
314 sal_Size nConverted
= 0;
315 sal_Char
* pDestBufPtr
= pDestBuf
;
316 sal_Char
* pDestBufEnd
= pDestBuf
+ nDestBytes
;
320 = ((ImplUnicodeToTextContext
*) pContext
)->m_nHighSurrogate
;
322 for (; nConverted
< nSrcChars
; ++nConverted
)
324 sal_Bool bUndefined
= sal_True
;
325 sal_uInt32 nChar
= *pSrcBuf
++;
326 if (nHighSurrogate
== 0)
328 if (ImplIsHighSurrogate(nChar
))
330 nHighSurrogate
= (sal_Unicode
) nChar
;
334 else if (ImplIsLowSurrogate(nChar
))
335 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
338 bUndefined
= sal_False
;
342 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
344 bUndefined
= sal_False
;
349 if (pDestBufPtr
!= pDestBufEnd
)
350 *pDestBufPtr
++ = (sal_Char
) nChar
;
355 sal_Int32 nOffset
= pCns116431992PlaneOffsets
[nChar
>> 16];
362 = pCns116431992PageOffsets
[nOffset
+ ((nChar
& 0xFF00) >> 8)];
365 nFirst
= pCns116431992Data
[nOffset
++];
366 nLast
= pCns116431992Data
[nOffset
++];
368 if (nChar
< nFirst
|| nChar
> nLast
)
370 nOffset
+= 3 * (nChar
- nFirst
);
371 nPlane
= pCns116431992Data
[nOffset
++];
374 if (pDestBufEnd
- pDestBufPtr
< (nPlane
== 1 ? 2 : 4))
378 *pDestBufPtr
++ = (sal_Char
) (unsigned char) 0x8E;
379 *pDestBufPtr
++ = (sal_Char
) (0xA0 + nPlane
);
381 *pDestBufPtr
++ = (sal_Char
) (0xA0 + pCns116431992Data
[nOffset
++]);
382 *pDestBufPtr
++ = (sal_Char
) (0xA0 + pCns116431992Data
[nOffset
]);
388 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined
,
398 case IMPL_BAD_INPUT_STOP
:
402 case IMPL_BAD_INPUT_CONTINUE
:
406 case IMPL_BAD_INPUT_NO_OUTPUT
:
413 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
417 if (nHighSurrogate
!= 0
418 && (nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
419 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
422 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
423 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
425 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False
,
435 case IMPL_BAD_INPUT_STOP
:
436 case IMPL_BAD_INPUT_CONTINUE
:
440 case IMPL_BAD_INPUT_NO_OUTPUT
:
441 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
447 ((ImplUnicodeToTextContext
*) pContext
)->m_nHighSurrogate
452 *pSrcCvtChars
= nConverted
;
454 return pDestBufPtr
- pDestBuf
;