1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: convertbig5hkscs.c,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "convertbig5hkscs.h"
33 #include "converter.h"
36 #include "osl/diagnose.h"
37 #include "rtl/alloc.h"
38 #include "rtl/textcvt.h"
39 #include "sal/types.h"
43 sal_Int32 m_nRow
; /* 0--255; 0 means none */
44 } ImplBig5HkscsToUnicodeContext
;
46 void * ImplCreateBig5HkscsToUnicodeContext(void)
49 = rtl_allocateMemory(sizeof (ImplBig5HkscsToUnicodeContext
));
50 ((ImplBig5HkscsToUnicodeContext
*) pContext
)->m_nRow
= 0;
54 void ImplResetBig5HkscsToUnicodeContext(void * pContext
)
57 ((ImplBig5HkscsToUnicodeContext
*) pContext
)->m_nRow
= 0;
60 sal_Size
ImplConvertBig5HkscsToUnicode(ImplTextConverterData
const * pData
,
62 sal_Char
const * pSrcBuf
,
64 sal_Unicode
* pDestBuf
,
68 sal_Size
* pSrcCvtBytes
)
70 sal_uInt16
const * pBig5Hkscs2001Data
71 = ((ImplBig5HkscsConverterData
const *) pData
)->
72 m_pBig5Hkscs2001ToUnicodeData
;
73 sal_Int32
const * pBig5Hkscs2001RowOffsets
74 = ((ImplBig5HkscsConverterData
const *) pData
)->
75 m_pBig5Hkscs2001ToUnicodeRowOffsets
;
76 ImplDBCSToUniLeadTab
const * pBig5Data
77 = ((ImplBig5HkscsConverterData
const *) pData
)->
81 sal_Size nConverted
= 0;
82 sal_Unicode
* pDestBufPtr
= pDestBuf
;
83 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
86 nRow
= ((ImplBig5HkscsToUnicodeContext
*) pContext
)->m_nRow
;
88 for (; nConverted
< nSrcBytes
; ++nConverted
)
90 sal_Bool bUndefined
= sal_True
;
91 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
94 if (pDestBufPtr
!= pDestBufEnd
)
95 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
98 else if (nChar
>= 0x81 && nChar
<= 0xFE)
102 bUndefined
= sal_False
;
106 if ((nChar
>= 0x40 && nChar
<= 0x7E)
107 || (nChar
>= 0xA1 && nChar
<= 0xFE))
109 sal_uInt32 nUnicode
= 0xFFFF;
110 sal_Int32 nOffset
= pBig5Hkscs2001RowOffsets
[nRow
];
115 sal_uInt32 nFirstLast
= pBig5Hkscs2001Data
[nOffset
++];
116 nFirst
= nFirstLast
& 0xFF;
117 nLast
= nFirstLast
>> 8;
118 if (nChar
>= nFirst
&& nChar
<= nLast
)
120 = pBig5Hkscs2001Data
[nOffset
+ (nChar
- nFirst
)];
122 if (nUnicode
== 0xFFFF)
124 sal_uInt32 nFirst
= pBig5Data
[nRow
].mnTrailStart
;
126 && nChar
<= pBig5Data
[nRow
].mnTrailEnd
)
129 = pBig5Data
[nRow
].mpToUniTrailTab
[nChar
- nFirst
];
132 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode
));
135 if (nUnicode
== 0xFFFF)
137 ImplDBCSEUDCData
const * p
138 = ((ImplBig5HkscsConverterData
const *) pData
)->
141 = ((ImplBig5HkscsConverterData
const *) pData
)->
144 for (i
= 0; i
< nCount
; ++i
)
146 if (nRow
>= p
->mnLeadStart
&& nRow
<= p
->mnLeadEnd
)
148 if (nChar
< p
->mnTrail1Start
)
150 if (nChar
<= p
->mnTrail1End
)
154 + (nRow
- p
->mnLeadStart
)
155 * p
->mnTrailRangeCount
156 + (nChar
- p
->mnTrail1Start
);
159 if (p
->mnTrailCount
< 2
160 || nChar
< p
->mnTrail2Start
)
162 if (nChar
<= p
->mnTrail2End
)
166 + (nRow
- p
->mnLeadStart
)
167 * p
->mnTrailRangeCount
168 + (nChar
- p
->mnTrail2Start
)
169 + (p
->mnTrail1End
- p
->mnTrail1Start
173 if (p
->mnTrailCount
< 3
174 || nChar
< p
->mnTrail3Start
)
176 if (nChar
<= p
->mnTrail3End
)
180 + (nRow
- p
->mnLeadStart
)
181 * p
->mnTrailRangeCount
182 + (nChar
- p
->mnTrail3Start
)
183 + (p
->mnTrail1End
- p
->mnTrail1Start
185 + (p
->mnTrail2End
- p
->mnTrail2Start
193 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode
));
195 if (nUnicode
== 0xFFFF)
197 if (ImplIsHighSurrogate(nUnicode
))
198 if (pDestBufEnd
- pDestBufPtr
>= 2)
200 nOffset
+= nLast
- nFirst
+ 1;
201 nFirst
= pBig5Hkscs2001Data
[nOffset
++];
202 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
204 = (sal_Unicode
) pBig5Hkscs2001Data
[
205 nOffset
+ (nChar
- nFirst
)];
210 if (pDestBufPtr
!= pDestBufEnd
)
211 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
218 bUndefined
= sal_False
;
224 switch (ImplHandleBadInputTextToUnicodeConversion(
225 bUndefined
, sal_True
, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
228 case IMPL_BAD_INPUT_STOP
:
232 case IMPL_BAD_INPUT_CONTINUE
:
236 case IMPL_BAD_INPUT_NO_OUTPUT
:
243 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
248 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
249 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
252 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
253 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
255 switch (ImplHandleBadInputTextToUnicodeConversion(
256 sal_False
, sal_True
, 0, nFlags
, &pDestBufPtr
,
257 pDestBufEnd
, &nInfo
))
259 case IMPL_BAD_INPUT_STOP
:
260 case IMPL_BAD_INPUT_CONTINUE
:
264 case IMPL_BAD_INPUT_NO_OUTPUT
:
265 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
271 ((ImplBig5HkscsToUnicodeContext
*) pContext
)->m_nRow
= nRow
;
275 *pSrcCvtBytes
= nConverted
;
277 return pDestBufPtr
- pDestBuf
;
280 sal_Size
ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData
const * pData
,
282 sal_Unicode
const * pSrcBuf
,
288 sal_Size
* pSrcCvtChars
)
290 sal_uInt16
const * pBig5Hkscs2001Data
291 = ((ImplBig5HkscsConverterData
const *) pData
)->
292 m_pUnicodeToBig5Hkscs2001Data
;
293 sal_Int32
const * pBig5Hkscs2001PageOffsets
294 = ((ImplBig5HkscsConverterData
const *) pData
)->
295 m_pUnicodeToBig5Hkscs2001PageOffsets
;
296 sal_Int32
const * pBig5Hkscs2001PlaneOffsets
297 = ((ImplBig5HkscsConverterData
const *) pData
)->
298 m_pUnicodeToBig5Hkscs2001PlaneOffsets
;
299 ImplUniToDBCSHighTab
const * pBig5Data
300 = ((ImplBig5HkscsConverterData
const *) pData
)->
301 m_pUnicodeToBig5Data
;
302 sal_Unicode nHighSurrogate
= 0;
303 sal_uInt32 nInfo
= 0;
304 sal_Size nConverted
= 0;
305 sal_Char
* pDestBufPtr
= pDestBuf
;
306 sal_Char
* pDestBufEnd
= pDestBuf
+ nDestBytes
;
310 = ((ImplUnicodeToTextContext
*) pContext
)->m_nHighSurrogate
;
312 for (; nConverted
< nSrcChars
; ++nConverted
)
314 sal_Bool bUndefined
= sal_True
;
315 sal_uInt32 nChar
= *pSrcBuf
++;
316 if (nHighSurrogate
== 0)
318 if (ImplIsHighSurrogate(nChar
))
320 nHighSurrogate
= (sal_Unicode
) nChar
;
324 else if (ImplIsLowSurrogate(nChar
))
325 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
328 bUndefined
= sal_False
;
332 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
334 bUndefined
= sal_False
;
339 if (pDestBufPtr
!= pDestBufEnd
)
340 *pDestBufPtr
++ = (sal_Char
) nChar
;
345 sal_uInt32 nBytes
= 0;
346 sal_Int32 nOffset
= pBig5Hkscs2001PlaneOffsets
[nChar
>> 16];
350 = pBig5Hkscs2001PageOffsets
[nOffset
+ ((nChar
& 0xFF00)
354 sal_uInt32 nFirstLast
= pBig5Hkscs2001Data
[nOffset
++];
355 sal_uInt32 nFirst
= nFirstLast
& 0xFF;
356 sal_uInt32 nLast
= nFirstLast
>> 8;
357 sal_uInt32 nIndex
= nChar
& 0xFF;
358 if (nIndex
>= nFirst
&& nIndex
<= nLast
)
361 = pBig5Hkscs2001Data
[nOffset
+ (nIndex
- nFirst
)];
367 sal_uInt32 nIndex1
= nChar
>> 8;
370 sal_uInt32 nIndex2
= nChar
& 0xFF;
371 sal_uInt32 nFirst
= pBig5Data
[nIndex1
].mnLowStart
;
372 if (nIndex2
>= nFirst
373 && nIndex2
<= pBig5Data
[nIndex1
].mnLowEnd
)
374 nBytes
= pBig5Data
[nIndex1
].
375 mpToUniTrailTab
[nIndex2
- nFirst
];
380 ImplDBCSEUDCData
const * p
381 = ((ImplBig5HkscsConverterData
const *) pData
)->
384 = ((ImplBig5HkscsConverterData
const *) pData
)->
387 for (i
= 0; i
< nCount
; ++i
) {
388 if (nChar
>= p
->mnUniStart
&& nChar
<= p
->mnUniEnd
)
390 sal_uInt32 nIndex
= nChar
- p
->mnUniStart
;
391 sal_uInt32 nLeadOff
= nIndex
/ p
->mnTrailRangeCount
;
392 sal_uInt32 nTrailOff
= nIndex
% p
->mnTrailRangeCount
;
394 nBytes
= (p
->mnLeadStart
+ nLeadOff
) << 8;
395 nSize
= p
->mnTrail1End
- p
->mnTrail1Start
+ 1;
396 if (nTrailOff
< nSize
)
398 nBytes
|= p
->mnTrail1Start
+ nTrailOff
;
402 nSize
= p
->mnTrail2End
- p
->mnTrail2Start
+ 1;
403 if (nTrailOff
< nSize
)
405 nBytes
|= p
->mnTrail2Start
+ nTrailOff
;
409 nBytes
|= p
->mnTrail3Start
+ nTrailOff
;
417 if (pDestBufEnd
- pDestBufPtr
>= 2)
419 *pDestBufPtr
++ = (sal_Char
) (nBytes
>> 8);
420 *pDestBufPtr
++ = (sal_Char
) (nBytes
& 0xFF);
429 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined
,
439 case IMPL_BAD_INPUT_STOP
:
443 case IMPL_BAD_INPUT_CONTINUE
:
447 case IMPL_BAD_INPUT_NO_OUTPUT
:
454 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
458 if (nHighSurrogate
!= 0
459 && (nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
460 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
463 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
464 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
466 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False
,
476 case IMPL_BAD_INPUT_STOP
:
477 case IMPL_BAD_INPUT_CONTINUE
:
481 case IMPL_BAD_INPUT_NO_OUTPUT
:
482 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
488 ((ImplUnicodeToTextContext
*) pContext
)->m_nHighSurrogate
493 *pSrcCvtChars
= nConverted
;
495 return pDestBufPtr
- pDestBuf
;