merged tag ooo/DEV300_m102
[LibreOffice.git] / sal / textenc / convertbig5hkscs.c
blob06fabe33a9b9fd438e1f87ca2c4a53c47ae179ef
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 #include "convertbig5hkscs.h"
29 #include "context.h"
30 #include "converter.h"
31 #include "tenchelp.h"
32 #include "unichars.h"
33 #include "osl/diagnose.h"
34 #include "rtl/alloc.h"
35 #include "rtl/textcvt.h"
36 #include "sal/types.h"
38 typedef struct
40 sal_Int32 m_nRow; /* 0--255; 0 means none */
41 } ImplBig5HkscsToUnicodeContext;
43 void * ImplCreateBig5HkscsToUnicodeContext(void)
45 void * pContext
46 = rtl_allocateMemory(sizeof (ImplBig5HkscsToUnicodeContext));
47 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0;
48 return pContext;
51 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
53 if (pContext)
54 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0;
57 sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData,
58 void * pContext,
59 sal_Char const * pSrcBuf,
60 sal_Size nSrcBytes,
61 sal_Unicode * pDestBuf,
62 sal_Size nDestChars,
63 sal_uInt32 nFlags,
64 sal_uInt32 * pInfo,
65 sal_Size * pSrcCvtBytes)
67 sal_uInt16 const * pBig5Hkscs2001Data
68 = ((ImplBig5HkscsConverterData const *) pData)->
69 m_pBig5Hkscs2001ToUnicodeData;
70 sal_Int32 const * pBig5Hkscs2001RowOffsets
71 = ((ImplBig5HkscsConverterData const *) pData)->
72 m_pBig5Hkscs2001ToUnicodeRowOffsets;
73 ImplDBCSToUniLeadTab const * pBig5Data
74 = ((ImplBig5HkscsConverterData const *) pData)->
75 m_pBig5ToUnicodeData;
76 sal_Int32 nRow = 0;
77 sal_uInt32 nInfo = 0;
78 sal_Size nConverted = 0;
79 sal_Unicode * pDestBufPtr = pDestBuf;
80 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
82 if (pContext)
83 nRow = ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow;
85 for (; nConverted < nSrcBytes; ++nConverted)
87 sal_Bool bUndefined = sal_True;
88 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
89 if (nRow == 0)
90 if (nChar < 0x80)
91 if (pDestBufPtr != pDestBufEnd)
92 *pDestBufPtr++ = (sal_Unicode) nChar;
93 else
94 goto no_output;
95 else if (nChar >= 0x81 && nChar <= 0xFE)
96 nRow = nChar;
97 else
99 bUndefined = sal_False;
100 goto bad_input;
102 else
103 if ((nChar >= 0x40 && nChar <= 0x7E)
104 || (nChar >= 0xA1 && nChar <= 0xFE))
106 sal_uInt32 nUnicode = 0xFFFF;
107 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
108 sal_uInt32 nFirst=0;
109 sal_uInt32 nLast=0;
110 if (nOffset != -1)
112 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
113 nFirst = nFirstLast & 0xFF;
114 nLast = nFirstLast >> 8;
115 if (nChar >= nFirst && nChar <= nLast)
116 nUnicode
117 = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
119 if (nUnicode == 0xFFFF)
121 sal_uInt32 nFirst = pBig5Data[nRow].mnTrailStart;
122 if (nChar >= nFirst
123 && nChar <= pBig5Data[nRow].mnTrailEnd)
125 nUnicode
126 = pBig5Data[nRow].mpToUniTrailTab[nChar - nFirst];
127 if (nUnicode == 0)
128 nUnicode = 0xFFFF;
129 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode));
132 if (nUnicode == 0xFFFF)
134 ImplDBCSEUDCData const * p
135 = ((ImplBig5HkscsConverterData const *) pData)->
136 m_pEudcData;
137 sal_uInt32 nCount
138 = ((ImplBig5HkscsConverterData const *) pData)->
139 m_nEudcCount;
140 sal_uInt32 i;
141 for (i = 0; i < nCount; ++i)
143 if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
145 if (nChar < p->mnTrail1Start)
146 break;
147 if (nChar <= p->mnTrail1End)
149 nUnicode
150 = p->mnUniStart
151 + (nRow - p->mnLeadStart)
152 * p->mnTrailRangeCount
153 + (nChar - p->mnTrail1Start);
154 break;
156 if (p->mnTrailCount < 2
157 || nChar < p->mnTrail2Start)
158 break;
159 if (nChar <= p->mnTrail2End)
161 nUnicode
162 = p->mnUniStart
163 + (nRow - p->mnLeadStart)
164 * p->mnTrailRangeCount
165 + (nChar - p->mnTrail2Start)
166 + (p->mnTrail1End - p->mnTrail1Start
167 + 1);
168 break;
170 if (p->mnTrailCount < 3
171 || nChar < p->mnTrail3Start)
172 break;
173 if (nChar <= p->mnTrail3End)
175 nUnicode
176 = p->mnUniStart
177 + (nRow - p->mnLeadStart)
178 * p->mnTrailRangeCount
179 + (nChar - p->mnTrail3Start)
180 + (p->mnTrail1End - p->mnTrail1Start
181 + 1)
182 + (p->mnTrail2End - p->mnTrail2Start
183 + 1);
184 break;
186 break;
188 ++p;
190 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode));
192 if (nUnicode == 0xFFFF)
193 goto bad_input;
194 if (ImplIsHighSurrogate(nUnicode))
195 if (pDestBufEnd - pDestBufPtr >= 2)
197 nOffset += nLast - nFirst + 1;
198 nFirst = pBig5Hkscs2001Data[nOffset++];
199 *pDestBufPtr++ = (sal_Unicode) nUnicode;
200 *pDestBufPtr++
201 = (sal_Unicode) pBig5Hkscs2001Data[
202 nOffset + (nChar - nFirst)];
204 else
205 goto no_output;
206 else
207 if (pDestBufPtr != pDestBufEnd)
208 *pDestBufPtr++ = (sal_Unicode) nUnicode;
209 else
210 goto no_output;
211 nRow = 0;
213 else
215 bUndefined = sal_False;
216 goto bad_input;
218 continue;
220 bad_input:
221 switch (ImplHandleBadInputTextToUnicodeConversion(
222 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
223 &nInfo))
225 case IMPL_BAD_INPUT_STOP:
226 nRow = 0;
227 break;
229 case IMPL_BAD_INPUT_CONTINUE:
230 nRow = 0;
231 continue;
233 case IMPL_BAD_INPUT_NO_OUTPUT:
234 goto no_output;
236 break;
238 no_output:
239 --pSrcBuf;
240 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
241 break;
244 if (nRow != 0
245 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
246 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
247 == 0)
249 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
250 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
251 else
252 switch (ImplHandleBadInputTextToUnicodeConversion(
253 sal_False, sal_True, 0, nFlags, &pDestBufPtr,
254 pDestBufEnd, &nInfo))
256 case IMPL_BAD_INPUT_STOP:
257 case IMPL_BAD_INPUT_CONTINUE:
258 nRow = 0;
259 break;
261 case IMPL_BAD_INPUT_NO_OUTPUT:
262 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
263 break;
267 if (pContext)
268 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = nRow;
269 if (pInfo)
270 *pInfo = nInfo;
271 if (pSrcCvtBytes)
272 *pSrcCvtBytes = nConverted;
274 return pDestBufPtr - pDestBuf;
277 sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData,
278 void * pContext,
279 sal_Unicode const * pSrcBuf,
280 sal_Size nSrcChars,
281 sal_Char * pDestBuf,
282 sal_Size nDestBytes,
283 sal_uInt32 nFlags,
284 sal_uInt32 * pInfo,
285 sal_Size * pSrcCvtChars)
287 sal_uInt16 const * pBig5Hkscs2001Data
288 = ((ImplBig5HkscsConverterData const *) pData)->
289 m_pUnicodeToBig5Hkscs2001Data;
290 sal_Int32 const * pBig5Hkscs2001PageOffsets
291 = ((ImplBig5HkscsConverterData const *) pData)->
292 m_pUnicodeToBig5Hkscs2001PageOffsets;
293 sal_Int32 const * pBig5Hkscs2001PlaneOffsets
294 = ((ImplBig5HkscsConverterData const *) pData)->
295 m_pUnicodeToBig5Hkscs2001PlaneOffsets;
296 ImplUniToDBCSHighTab const * pBig5Data
297 = ((ImplBig5HkscsConverterData const *) pData)->
298 m_pUnicodeToBig5Data;
299 sal_Unicode nHighSurrogate = 0;
300 sal_uInt32 nInfo = 0;
301 sal_Size nConverted = 0;
302 sal_Char * pDestBufPtr = pDestBuf;
303 sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
305 if (pContext)
306 nHighSurrogate
307 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate;
309 for (; nConverted < nSrcChars; ++nConverted)
311 sal_Bool bUndefined = sal_True;
312 sal_uInt32 nChar = *pSrcBuf++;
313 if (nHighSurrogate == 0)
315 if (ImplIsHighSurrogate(nChar))
317 nHighSurrogate = (sal_Unicode) nChar;
318 continue;
321 else if (ImplIsLowSurrogate(nChar))
322 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
323 else
325 bUndefined = sal_False;
326 goto bad_input;
329 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
331 bUndefined = sal_False;
332 goto bad_input;
335 if (nChar < 0x80)
336 if (pDestBufPtr != pDestBufEnd)
337 *pDestBufPtr++ = (sal_Char) nChar;
338 else
339 goto no_output;
340 else
342 sal_uInt32 nBytes = 0;
343 sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
344 if (nOffset != -1)
346 nOffset
347 = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
348 >> 8)];
349 if (nOffset != -1)
351 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
352 sal_uInt32 nFirst = nFirstLast & 0xFF;
353 sal_uInt32 nLast = nFirstLast >> 8;
354 sal_uInt32 nIndex = nChar & 0xFF;
355 if (nIndex >= nFirst && nIndex <= nLast)
357 nBytes
358 = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
362 if (nBytes == 0)
364 sal_uInt32 nIndex1 = nChar >> 8;
365 if (nIndex1 < 0x100)
367 sal_uInt32 nIndex2 = nChar & 0xFF;
368 sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
369 if (nIndex2 >= nFirst
370 && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
371 nBytes = pBig5Data[nIndex1].
372 mpToUniTrailTab[nIndex2 - nFirst];
375 if (nBytes == 0)
377 ImplDBCSEUDCData const * p
378 = ((ImplBig5HkscsConverterData const *) pData)->
379 m_pEudcData;
380 sal_uInt32 nCount
381 = ((ImplBig5HkscsConverterData const *) pData)->
382 m_nEudcCount;
383 sal_uInt32 i;
384 for (i = 0; i < nCount; ++i) {
385 if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
387 sal_uInt32 nIndex = nChar - p->mnUniStart;
388 sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
389 sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
390 sal_uInt32 nSize;
391 nBytes = (p->mnLeadStart + nLeadOff) << 8;
392 nSize = p->mnTrail1End - p->mnTrail1Start + 1;
393 if (nTrailOff < nSize)
395 nBytes |= p->mnTrail1Start + nTrailOff;
396 break;
398 nTrailOff -= nSize;
399 nSize = p->mnTrail2End - p->mnTrail2Start + 1;
400 if (nTrailOff < nSize)
402 nBytes |= p->mnTrail2Start + nTrailOff;
403 break;
405 nTrailOff -= nSize;
406 nBytes |= p->mnTrail3Start + nTrailOff;
407 break;
409 ++p;
412 if (nBytes == 0)
413 goto bad_input;
414 if (pDestBufEnd - pDestBufPtr >= 2)
416 *pDestBufPtr++ = (sal_Char) (nBytes >> 8);
417 *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF);
419 else
420 goto no_output;
422 nHighSurrogate = 0;
423 continue;
425 bad_input:
426 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined,
427 nChar,
428 nFlags,
429 &pDestBufPtr,
430 pDestBufEnd,
431 &nInfo,
432 NULL,
434 NULL))
436 case IMPL_BAD_INPUT_STOP:
437 nHighSurrogate = 0;
438 break;
440 case IMPL_BAD_INPUT_CONTINUE:
441 nHighSurrogate = 0;
442 continue;
444 case IMPL_BAD_INPUT_NO_OUTPUT:
445 goto no_output;
447 break;
449 no_output:
450 --pSrcBuf;
451 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
452 break;
455 if (nHighSurrogate != 0
456 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
457 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
458 == 0)
460 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
461 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
462 else
463 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False,
465 nFlags,
466 &pDestBufPtr,
467 pDestBufEnd,
468 &nInfo,
469 NULL,
471 NULL))
473 case IMPL_BAD_INPUT_STOP:
474 case IMPL_BAD_INPUT_CONTINUE:
475 nHighSurrogate = 0;
476 break;
478 case IMPL_BAD_INPUT_NO_OUTPUT:
479 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
480 break;
484 if (pContext)
485 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate
486 = nHighSurrogate;
487 if (pInfo)
488 *pInfo = nInfo;
489 if (pSrcCvtChars)
490 *pSrcCvtChars = nConverted;
492 return pDestBufPtr - pDestBuf;