Update ooo320-m1
[ooovba.git] / sal / textenc / convertbig5hkscs.c
blob8c7e1c8a765497a5934728ef54a7a5ccd8a17c8e
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: convertbig5hkscs.c,v $
10 * $Revision: 1.8 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "convertbig5hkscs.h"
32 #include "context.h"
33 #include "converter.h"
34 #include "tenchelp.h"
35 #include "unichars.h"
36 #include "osl/diagnose.h"
37 #include "rtl/alloc.h"
38 #include "rtl/textcvt.h"
39 #include "sal/types.h"
41 typedef struct
43 sal_Int32 m_nRow; /* 0--255; 0 means none */
44 } ImplBig5HkscsToUnicodeContext;
46 void * ImplCreateBig5HkscsToUnicodeContext(void)
48 void * pContext
49 = rtl_allocateMemory(sizeof (ImplBig5HkscsToUnicodeContext));
50 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0;
51 return pContext;
54 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
56 if (pContext)
57 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0;
60 sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData,
61 void * pContext,
62 sal_Char const * pSrcBuf,
63 sal_Size nSrcBytes,
64 sal_Unicode * pDestBuf,
65 sal_Size nDestChars,
66 sal_uInt32 nFlags,
67 sal_uInt32 * pInfo,
68 sal_Size * pSrcCvtBytes)
70 sal_uInt16 const * pBig5Hkscs2001Data
71 = ((ImplBig5HkscsConverterData const *) pData)->
72 m_pBig5Hkscs2001ToUnicodeData;
73 sal_Int32 const * pBig5Hkscs2001RowOffsets
74 = ((ImplBig5HkscsConverterData const *) pData)->
75 m_pBig5Hkscs2001ToUnicodeRowOffsets;
76 ImplDBCSToUniLeadTab const * pBig5Data
77 = ((ImplBig5HkscsConverterData const *) pData)->
78 m_pBig5ToUnicodeData;
79 sal_Int32 nRow = 0;
80 sal_uInt32 nInfo = 0;
81 sal_Size nConverted = 0;
82 sal_Unicode * pDestBufPtr = pDestBuf;
83 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
85 if (pContext)
86 nRow = ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow;
88 for (; nConverted < nSrcBytes; ++nConverted)
90 sal_Bool bUndefined = sal_True;
91 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
92 if (nRow == 0)
93 if (nChar < 0x80)
94 if (pDestBufPtr != pDestBufEnd)
95 *pDestBufPtr++ = (sal_Unicode) nChar;
96 else
97 goto no_output;
98 else if (nChar >= 0x81 && nChar <= 0xFE)
99 nRow = nChar;
100 else
102 bUndefined = sal_False;
103 goto bad_input;
105 else
106 if ((nChar >= 0x40 && nChar <= 0x7E)
107 || (nChar >= 0xA1 && nChar <= 0xFE))
109 sal_uInt32 nUnicode = 0xFFFF;
110 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
111 sal_uInt32 nFirst=0;
112 sal_uInt32 nLast=0;
113 if (nOffset != -1)
115 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
116 nFirst = nFirstLast & 0xFF;
117 nLast = nFirstLast >> 8;
118 if (nChar >= nFirst && nChar <= nLast)
119 nUnicode
120 = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
122 if (nUnicode == 0xFFFF)
124 sal_uInt32 nFirst = pBig5Data[nRow].mnTrailStart;
125 if (nChar >= nFirst
126 && nChar <= pBig5Data[nRow].mnTrailEnd)
128 nUnicode
129 = pBig5Data[nRow].mpToUniTrailTab[nChar - nFirst];
130 if (nUnicode == 0)
131 nUnicode = 0xFFFF;
132 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode));
135 if (nUnicode == 0xFFFF)
137 ImplDBCSEUDCData const * p
138 = ((ImplBig5HkscsConverterData const *) pData)->
139 m_pEudcData;
140 sal_uInt32 nCount
141 = ((ImplBig5HkscsConverterData const *) pData)->
142 m_nEudcCount;
143 sal_uInt32 i;
144 for (i = 0; i < nCount; ++i)
146 if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
148 if (nChar < p->mnTrail1Start)
149 break;
150 if (nChar <= p->mnTrail1End)
152 nUnicode
153 = p->mnUniStart
154 + (nRow - p->mnLeadStart)
155 * p->mnTrailRangeCount
156 + (nChar - p->mnTrail1Start);
157 break;
159 if (p->mnTrailCount < 2
160 || nChar < p->mnTrail2Start)
161 break;
162 if (nChar <= p->mnTrail2End)
164 nUnicode
165 = p->mnUniStart
166 + (nRow - p->mnLeadStart)
167 * p->mnTrailRangeCount
168 + (nChar - p->mnTrail2Start)
169 + (p->mnTrail1End - p->mnTrail1Start
170 + 1);
171 break;
173 if (p->mnTrailCount < 3
174 || nChar < p->mnTrail3Start)
175 break;
176 if (nChar <= p->mnTrail3End)
178 nUnicode
179 = p->mnUniStart
180 + (nRow - p->mnLeadStart)
181 * p->mnTrailRangeCount
182 + (nChar - p->mnTrail3Start)
183 + (p->mnTrail1End - p->mnTrail1Start
184 + 1)
185 + (p->mnTrail2End - p->mnTrail2Start
186 + 1);
187 break;
189 break;
191 ++p;
193 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode));
195 if (nUnicode == 0xFFFF)
196 goto bad_input;
197 if (ImplIsHighSurrogate(nUnicode))
198 if (pDestBufEnd - pDestBufPtr >= 2)
200 nOffset += nLast - nFirst + 1;
201 nFirst = pBig5Hkscs2001Data[nOffset++];
202 *pDestBufPtr++ = (sal_Unicode) nUnicode;
203 *pDestBufPtr++
204 = (sal_Unicode) pBig5Hkscs2001Data[
205 nOffset + (nChar - nFirst)];
207 else
208 goto no_output;
209 else
210 if (pDestBufPtr != pDestBufEnd)
211 *pDestBufPtr++ = (sal_Unicode) nUnicode;
212 else
213 goto no_output;
214 nRow = 0;
216 else
218 bUndefined = sal_False;
219 goto bad_input;
221 continue;
223 bad_input:
224 switch (ImplHandleBadInputTextToUnicodeConversion(
225 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
226 &nInfo))
228 case IMPL_BAD_INPUT_STOP:
229 nRow = 0;
230 break;
232 case IMPL_BAD_INPUT_CONTINUE:
233 nRow = 0;
234 continue;
236 case IMPL_BAD_INPUT_NO_OUTPUT:
237 goto no_output;
239 break;
241 no_output:
242 --pSrcBuf;
243 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
244 break;
247 if (nRow != 0
248 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
249 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
250 == 0)
252 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
253 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
254 else
255 switch (ImplHandleBadInputTextToUnicodeConversion(
256 sal_False, sal_True, 0, nFlags, &pDestBufPtr,
257 pDestBufEnd, &nInfo))
259 case IMPL_BAD_INPUT_STOP:
260 case IMPL_BAD_INPUT_CONTINUE:
261 nRow = 0;
262 break;
264 case IMPL_BAD_INPUT_NO_OUTPUT:
265 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
266 break;
270 if (pContext)
271 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = nRow;
272 if (pInfo)
273 *pInfo = nInfo;
274 if (pSrcCvtBytes)
275 *pSrcCvtBytes = nConverted;
277 return pDestBufPtr - pDestBuf;
280 sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData,
281 void * pContext,
282 sal_Unicode const * pSrcBuf,
283 sal_Size nSrcChars,
284 sal_Char * pDestBuf,
285 sal_Size nDestBytes,
286 sal_uInt32 nFlags,
287 sal_uInt32 * pInfo,
288 sal_Size * pSrcCvtChars)
290 sal_uInt16 const * pBig5Hkscs2001Data
291 = ((ImplBig5HkscsConverterData const *) pData)->
292 m_pUnicodeToBig5Hkscs2001Data;
293 sal_Int32 const * pBig5Hkscs2001PageOffsets
294 = ((ImplBig5HkscsConverterData const *) pData)->
295 m_pUnicodeToBig5Hkscs2001PageOffsets;
296 sal_Int32 const * pBig5Hkscs2001PlaneOffsets
297 = ((ImplBig5HkscsConverterData const *) pData)->
298 m_pUnicodeToBig5Hkscs2001PlaneOffsets;
299 ImplUniToDBCSHighTab const * pBig5Data
300 = ((ImplBig5HkscsConverterData const *) pData)->
301 m_pUnicodeToBig5Data;
302 sal_Unicode nHighSurrogate = 0;
303 sal_uInt32 nInfo = 0;
304 sal_Size nConverted = 0;
305 sal_Char * pDestBufPtr = pDestBuf;
306 sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
308 if (pContext)
309 nHighSurrogate
310 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate;
312 for (; nConverted < nSrcChars; ++nConverted)
314 sal_Bool bUndefined = sal_True;
315 sal_uInt32 nChar = *pSrcBuf++;
316 if (nHighSurrogate == 0)
318 if (ImplIsHighSurrogate(nChar))
320 nHighSurrogate = (sal_Unicode) nChar;
321 continue;
324 else if (ImplIsLowSurrogate(nChar))
325 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
326 else
328 bUndefined = sal_False;
329 goto bad_input;
332 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
334 bUndefined = sal_False;
335 goto bad_input;
338 if (nChar < 0x80)
339 if (pDestBufPtr != pDestBufEnd)
340 *pDestBufPtr++ = (sal_Char) nChar;
341 else
342 goto no_output;
343 else
345 sal_uInt32 nBytes = 0;
346 sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
347 if (nOffset != -1)
349 nOffset
350 = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
351 >> 8)];
352 if (nOffset != -1)
354 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
355 sal_uInt32 nFirst = nFirstLast & 0xFF;
356 sal_uInt32 nLast = nFirstLast >> 8;
357 sal_uInt32 nIndex = nChar & 0xFF;
358 if (nIndex >= nFirst && nIndex <= nLast)
360 nBytes
361 = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
365 if (nBytes == 0)
367 sal_uInt32 nIndex1 = nChar >> 8;
368 if (nIndex1 < 0x100)
370 sal_uInt32 nIndex2 = nChar & 0xFF;
371 sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
372 if (nIndex2 >= nFirst
373 && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
374 nBytes = pBig5Data[nIndex1].
375 mpToUniTrailTab[nIndex2 - nFirst];
378 if (nBytes == 0)
380 ImplDBCSEUDCData const * p
381 = ((ImplBig5HkscsConverterData const *) pData)->
382 m_pEudcData;
383 sal_uInt32 nCount
384 = ((ImplBig5HkscsConverterData const *) pData)->
385 m_nEudcCount;
386 sal_uInt32 i;
387 for (i = 0; i < nCount; ++i) {
388 if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
390 sal_uInt32 nIndex = nChar - p->mnUniStart;
391 sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
392 sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
393 sal_uInt32 nSize;
394 nBytes = (p->mnLeadStart + nLeadOff) << 8;
395 nSize = p->mnTrail1End - p->mnTrail1Start + 1;
396 if (nTrailOff < nSize)
398 nBytes |= p->mnTrail1Start + nTrailOff;
399 break;
401 nTrailOff -= nSize;
402 nSize = p->mnTrail2End - p->mnTrail2Start + 1;
403 if (nTrailOff < nSize)
405 nBytes |= p->mnTrail2Start + nTrailOff;
406 break;
408 nTrailOff -= nSize;
409 nBytes |= p->mnTrail3Start + nTrailOff;
410 break;
412 ++p;
415 if (nBytes == 0)
416 goto bad_input;
417 if (pDestBufEnd - pDestBufPtr >= 2)
419 *pDestBufPtr++ = (sal_Char) (nBytes >> 8);
420 *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF);
422 else
423 goto no_output;
425 nHighSurrogate = 0;
426 continue;
428 bad_input:
429 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined,
430 nChar,
431 nFlags,
432 &pDestBufPtr,
433 pDestBufEnd,
434 &nInfo,
435 NULL,
437 NULL))
439 case IMPL_BAD_INPUT_STOP:
440 nHighSurrogate = 0;
441 break;
443 case IMPL_BAD_INPUT_CONTINUE:
444 nHighSurrogate = 0;
445 continue;
447 case IMPL_BAD_INPUT_NO_OUTPUT:
448 goto no_output;
450 break;
452 no_output:
453 --pSrcBuf;
454 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
455 break;
458 if (nHighSurrogate != 0
459 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
460 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
461 == 0)
463 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
464 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
465 else
466 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False,
468 nFlags,
469 &pDestBufPtr,
470 pDestBufEnd,
471 &nInfo,
472 NULL,
474 NULL))
476 case IMPL_BAD_INPUT_STOP:
477 case IMPL_BAD_INPUT_CONTINUE:
478 nHighSurrogate = 0;
479 break;
481 case IMPL_BAD_INPUT_NO_OUTPUT:
482 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
483 break;
487 if (pContext)
488 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate
489 = nHighSurrogate;
490 if (pInfo)
491 *pInfo = nInfo;
492 if (pSrcCvtChars)
493 *pSrcCvtChars = nConverted;
495 return pDestBufPtr - pDestBuf;