1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: convertgb18030.c,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "convertgb18030.h"
33 #include "converter.h"
36 #include "rtl/alloc.h"
37 #include "rtl/textcvt.h"
38 #include "sal/types.h"
42 IMPL_GB_18030_TO_UNICODE_STATE_0
,
43 IMPL_GB_18030_TO_UNICODE_STATE_1
,
44 IMPL_GB_18030_TO_UNICODE_STATE_2
,
45 IMPL_GB_18030_TO_UNICODE_STATE_3
46 } ImplGb18030ToUnicodeState
;
50 ImplGb18030ToUnicodeState m_eState
;
52 } ImplGb18030ToUnicodeContext
;
54 void * ImplCreateGb18030ToUnicodeContext(void)
57 = rtl_allocateMemory(sizeof (ImplGb18030ToUnicodeContext
));
58 ((ImplGb18030ToUnicodeContext
*) pContext
)->m_eState
59 = IMPL_GB_18030_TO_UNICODE_STATE_0
;
63 void ImplResetGb18030ToUnicodeContext(void * pContext
)
66 ((ImplGb18030ToUnicodeContext
*) pContext
)->m_eState
67 = IMPL_GB_18030_TO_UNICODE_STATE_0
;
70 sal_Size
ImplConvertGb18030ToUnicode(ImplTextConverterData
const * pData
,
72 sal_Char
const * pSrcBuf
,
74 sal_Unicode
* pDestBuf
,
78 sal_Size
* pSrcCvtBytes
)
80 sal_Unicode
const * pGb18030Data
81 = ((ImplGb18030ConverterData
const *) pData
)->m_pGb18030ToUnicodeData
;
82 ImplGb180302000ToUnicodeRange
const * pGb18030Ranges
83 = ((ImplGb18030ConverterData
const *) pData
)->
84 m_pGb18030ToUnicodeRanges
;
85 ImplGb18030ToUnicodeState eState
= IMPL_GB_18030_TO_UNICODE_STATE_0
;
88 sal_Size nConverted
= 0;
89 sal_Unicode
* pDestBufPtr
= pDestBuf
;
90 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
94 eState
= ((ImplGb18030ToUnicodeContext
*) pContext
)->m_eState
;
95 nCode
= ((ImplGb18030ToUnicodeContext
*) pContext
)->m_nCode
;
98 for (; nConverted
< nSrcBytes
; ++nConverted
)
100 sal_Bool bUndefined
= sal_True
;
101 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
104 case IMPL_GB_18030_TO_UNICODE_STATE_0
:
106 if (pDestBufPtr
!= pDestBufEnd
)
107 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
110 else if (nChar
== 0x80)
112 else if (nChar
<= 0xFE)
114 nCode
= nChar
- 0x81;
115 eState
= IMPL_GB_18030_TO_UNICODE_STATE_1
;
119 bUndefined
= sal_False
;
124 case IMPL_GB_18030_TO_UNICODE_STATE_1
:
125 if (nChar
>= 0x30 && nChar
<= 0x39)
127 nCode
= nCode
* 10 + (nChar
- 0x30);
128 eState
= IMPL_GB_18030_TO_UNICODE_STATE_2
;
130 else if ((nChar
>= 0x40 && nChar
<= 0x7E)
131 || (nChar
>= 0x80 && nChar
<= 0xFE))
133 nCode
= nCode
* 190 + (nChar
<= 0x7E ? nChar
- 0x40 :
135 if (pDestBufPtr
!= pDestBufEnd
)
136 *pDestBufPtr
++ = pGb18030Data
[nCode
];
139 eState
= IMPL_GB_18030_TO_UNICODE_STATE_0
;
143 bUndefined
= sal_False
;
148 case IMPL_GB_18030_TO_UNICODE_STATE_2
:
149 if (nChar
>= 0x81 && nChar
<= 0xFE)
151 nCode
= nCode
* 126 + (nChar
- 0x81);
152 eState
= IMPL_GB_18030_TO_UNICODE_STATE_3
;
156 bUndefined
= sal_False
;
161 case IMPL_GB_18030_TO_UNICODE_STATE_3
:
162 if (nChar
>= 0x30 && nChar
<= 0x39)
164 nCode
= nCode
* 10 + (nChar
- 0x30);
166 /* 90 30 81 30 to E3 32 9A 35 maps to U+10000 to U+10FFFF: */
167 if (nCode
>= 189000 && nCode
<= 1237575)
168 if (pDestBufEnd
- pDestBufPtr
>= 2)
170 nCode
-= 189000 - 0x10000;
172 = (sal_Unicode
) ImplGetHighSurrogate(nCode
);
174 = (sal_Unicode
) ImplGetLowSurrogate(nCode
);
180 ImplGb180302000ToUnicodeRange
const * pRange
182 sal_uInt32 nFirstNonRange
= 0;
185 if (pRange
->m_nNonRangeDataIndex
== -1)
187 else if (nCode
< pRange
->m_nFirstLinear
)
189 if (pDestBufPtr
!= pDestBufEnd
)
192 pRange
->m_nNonRangeDataIndex
193 + (nCode
- nFirstNonRange
)];
198 else if (nCode
< pRange
->m_nPastLinear
)
200 if (pDestBufPtr
!= pDestBufEnd
)
203 (pRange
->m_nFirstUnicode
211 nFirstNonRange
= (pRange
++)->m_nPastLinear
;
214 eState
= IMPL_GB_18030_TO_UNICODE_STATE_0
;
218 bUndefined
= sal_False
;
226 switch (ImplHandleBadInputTextToUnicodeConversion(
227 bUndefined
, sal_True
, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
230 case IMPL_BAD_INPUT_STOP
:
231 eState
= IMPL_GB_18030_TO_UNICODE_STATE_0
;
234 case IMPL_BAD_INPUT_CONTINUE
:
235 eState
= IMPL_GB_18030_TO_UNICODE_STATE_0
;
238 case IMPL_BAD_INPUT_NO_OUTPUT
:
245 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
249 if (eState
!= IMPL_GB_18030_TO_UNICODE_STATE_0
250 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
251 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
254 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
255 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
257 switch (ImplHandleBadInputTextToUnicodeConversion(
258 sal_False
, sal_True
, 0, nFlags
, &pDestBufPtr
,
259 pDestBufEnd
, &nInfo
))
261 case IMPL_BAD_INPUT_STOP
:
262 case IMPL_BAD_INPUT_CONTINUE
:
263 eState
= IMPL_GB_18030_TO_UNICODE_STATE_0
;
266 case IMPL_BAD_INPUT_NO_OUTPUT
:
267 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
274 ((ImplGb18030ToUnicodeContext
*) pContext
)->m_eState
= eState
;
275 ((ImplGb18030ToUnicodeContext
*) pContext
)->m_nCode
= nCode
;
280 *pSrcCvtBytes
= nConverted
;
282 return pDestBufPtr
- pDestBuf
;
285 sal_Size
ImplConvertUnicodeToGb18030(ImplTextConverterData
const * pData
,
287 sal_Unicode
const * pSrcBuf
,
293 sal_Size
* pSrcCvtChars
)
295 sal_uInt32
const * pGb18030Data
296 = ((ImplGb18030ConverterData
const *) pData
)->
297 m_pUnicodeToGb18030Data
;
298 ImplUnicodeToGb180302000Range
const * pGb18030Ranges
299 = ((ImplGb18030ConverterData
const *) pData
)->
300 m_pUnicodeToGb18030Ranges
;
301 sal_Unicode nHighSurrogate
= 0;
302 sal_uInt32 nInfo
= 0;
303 sal_Size nConverted
= 0;
304 sal_Char
* pDestBufPtr
= pDestBuf
;
305 sal_Char
* pDestBufEnd
= pDestBuf
+ nDestBytes
;
309 = ((ImplUnicodeToTextContext
*) pContext
)->m_nHighSurrogate
;
311 for (; nConverted
< nSrcChars
; ++nConverted
)
313 sal_Bool bUndefined
= sal_True
;
314 sal_uInt32 nChar
= *pSrcBuf
++;
315 if (nHighSurrogate
== 0)
317 if (ImplIsHighSurrogate(nChar
))
319 nHighSurrogate
= (sal_Unicode
) nChar
;
323 else if (ImplIsLowSurrogate(nChar
))
324 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
327 bUndefined
= sal_False
;
331 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
333 bUndefined
= sal_False
;
338 if (pDestBufPtr
!= pDestBufEnd
)
339 *pDestBufPtr
++ = (sal_Char
) nChar
;
342 else if (nChar
< 0x10000)
344 ImplUnicodeToGb180302000Range
const * pRange
= pGb18030Ranges
;
345 sal_Unicode nFirstNonRange
= 0x80;
348 if (nChar
< pRange
->m_nFirstUnicode
)
351 = pGb18030Data
[pRange
->m_nNonRangeDataIndex
352 + (nChar
- nFirstNonRange
)];
353 if (pDestBufEnd
- pDestBufPtr
354 >= (nCode
<= 0xFFFF ? 2 : 4))
358 *pDestBufPtr
++ = (sal_Char
) (nCode
>> 24);
359 *pDestBufPtr
++ = (sal_Char
) (nCode
>> 16 & 0xFF);
361 *pDestBufPtr
++ = (sal_Char
) (nCode
>> 8 & 0xFF);
362 *pDestBufPtr
++ = (sal_Char
) (nCode
& 0xFF);
368 else if (nChar
<= pRange
->m_nLastUnicode
)
370 if (pDestBufEnd
- pDestBufPtr
>= 4)
373 = pRange
->m_nFirstLinear
374 + (nChar
- pRange
->m_nFirstUnicode
);
375 *pDestBufPtr
++ = (sal_Char
) (nCode
/ 12600 + 0x81);
377 = (sal_Char
) (nCode
/ 1260 % 10 + 0x30);
378 *pDestBufPtr
++ = (sal_Char
) (nCode
/ 10 % 126 + 0x81);
379 *pDestBufPtr
++ = (sal_Char
) (nCode
% 10 + 0x30);
386 = (sal_Unicode
) ((pRange
++)->m_nLastUnicode
+ 1);
390 if (pDestBufEnd
- pDestBufPtr
>= 4)
392 sal_uInt32 nCode
= nChar
- 0x10000;
393 *pDestBufPtr
++ = (sal_Char
) (nCode
/ 12600 + 0x90);
394 *pDestBufPtr
++ = (sal_Char
) (nCode
/ 1260 % 10 + 0x30);
395 *pDestBufPtr
++ = (sal_Char
) (nCode
/ 10 % 126 + 0x81);
396 *pDestBufPtr
++ = (sal_Char
) (nCode
% 10 + 0x30);
404 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined
,
414 case IMPL_BAD_INPUT_STOP
:
418 case IMPL_BAD_INPUT_CONTINUE
:
422 case IMPL_BAD_INPUT_NO_OUTPUT
:
429 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
433 if (nHighSurrogate
!= 0
434 && (nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
435 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
438 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
439 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
441 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False
,
451 case IMPL_BAD_INPUT_STOP
:
452 case IMPL_BAD_INPUT_CONTINUE
:
456 case IMPL_BAD_INPUT_NO_OUTPUT
:
457 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
463 ((ImplUnicodeToTextContext
*) pContext
)->m_nHighSurrogate
468 *pSrcCvtChars
= nConverted
;
470 return pDestBufPtr
- pDestBuf
;