Version 3.6.0.2, tag libreoffice-3.6.0.2
[LibreOffice.git] / sal / textenc / convertbig5hkscs.cxx
blob22bf31091cde3a3764591a0d255f969af81c35ed
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "sal/config.h"
31 #include <cassert>
33 #include "rtl/textcvt.h"
34 #include "sal/types.h"
36 #include "context.hxx"
37 #include "convertbig5hkscs.hxx"
38 #include "converter.hxx"
39 #include "tenchelp.hxx"
40 #include "unichars.hxx"
42 namespace {
44 struct ImplBig5HkscsToUnicodeContext
46 sal_Int32 m_nRow; // 0--255; 0 means none
51 void * ImplCreateBig5HkscsToUnicodeContext()
53 ImplBig5HkscsToUnicodeContext * pContext =
54 new ImplBig5HkscsToUnicodeContext;
55 pContext->m_nRow = 0;
56 return pContext;
59 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
61 if (pContext)
62 static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = 0;
65 void ImplDestroyBig5HkscsToUnicodeContext(void * pContext)
67 delete static_cast< ImplBig5HkscsToUnicodeContext * >(pContext);
70 sal_Size ImplConvertBig5HkscsToUnicode(void const * pData,
71 void * pContext,
72 char const * pSrcBuf,
73 sal_Size nSrcBytes,
74 sal_Unicode * pDestBuf,
75 sal_Size nDestChars,
76 sal_uInt32 nFlags,
77 sal_uInt32 * pInfo,
78 sal_Size * pSrcCvtBytes)
80 sal_uInt16 const * pBig5Hkscs2001Data
81 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
82 m_pBig5Hkscs2001ToUnicodeData;
83 sal_Int32 const * pBig5Hkscs2001RowOffsets
84 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
85 m_pBig5Hkscs2001ToUnicodeRowOffsets;
86 ImplDBCSToUniLeadTab const * pBig5Data
87 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
88 m_pBig5ToUnicodeData;
89 sal_Int32 nRow = 0;
90 sal_uInt32 nInfo = 0;
91 sal_Size nConverted = 0;
92 sal_Unicode * pDestBufPtr = pDestBuf;
93 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
95 if (pContext)
96 nRow = static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow;
98 for (; nConverted < nSrcBytes; ++nConverted)
100 bool bUndefined = true;
101 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
102 if (nRow == 0)
103 if (nChar < 0x80)
104 if (pDestBufPtr != pDestBufEnd)
105 *pDestBufPtr++ = (sal_Unicode) nChar;
106 else
107 goto no_output;
108 else if (nChar >= 0x81 && nChar <= 0xFE)
109 nRow = nChar;
110 else
112 bUndefined = false;
113 goto bad_input;
115 else
116 if ((nChar >= 0x40 && nChar <= 0x7E)
117 || (nChar >= 0xA1 && nChar <= 0xFE))
119 sal_uInt32 nUnicode = 0xFFFF;
120 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
121 sal_uInt32 nFirst=0;
122 sal_uInt32 nLast=0;
123 if (nOffset != -1)
125 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
126 nFirst = nFirstLast & 0xFF;
127 nLast = nFirstLast >> 8;
128 if (nChar >= nFirst && nChar <= nLast)
129 nUnicode
130 = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
132 if (nUnicode == 0xFFFF)
134 sal_uInt32 n = pBig5Data[nRow].mnTrailStart;
135 if (nChar >= n && nChar <= pBig5Data[nRow].mnTrailEnd)
137 nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n];
138 if (nUnicode == 0)
139 nUnicode = 0xFFFF;
140 assert(!ImplIsHighSurrogate(nUnicode));
143 if (nUnicode == 0xFFFF)
145 ImplDBCSEUDCData const * p
146 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
147 m_pEudcData;
148 sal_uInt32 nCount
149 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
150 m_nEudcCount;
151 sal_uInt32 i;
152 for (i = 0; i < nCount; ++i)
154 if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
156 if (nChar < p->mnTrail1Start)
157 break;
158 if (nChar <= p->mnTrail1End)
160 nUnicode
161 = p->mnUniStart
162 + (nRow - p->mnLeadStart)
163 * p->mnTrailRangeCount
164 + (nChar - p->mnTrail1Start);
165 break;
167 if (p->mnTrailCount < 2
168 || nChar < p->mnTrail2Start)
169 break;
170 if (nChar <= p->mnTrail2End)
172 nUnicode
173 = p->mnUniStart
174 + (nRow - p->mnLeadStart)
175 * p->mnTrailRangeCount
176 + (nChar - p->mnTrail2Start)
177 + (p->mnTrail1End - p->mnTrail1Start
178 + 1);
179 break;
181 if (p->mnTrailCount < 3
182 || nChar < p->mnTrail3Start)
183 break;
184 if (nChar <= p->mnTrail3End)
186 nUnicode
187 = p->mnUniStart
188 + (nRow - p->mnLeadStart)
189 * p->mnTrailRangeCount
190 + (nChar - p->mnTrail3Start)
191 + (p->mnTrail1End - p->mnTrail1Start
192 + 1)
193 + (p->mnTrail2End - p->mnTrail2Start
194 + 1);
195 break;
197 break;
199 ++p;
201 assert(!ImplIsHighSurrogate(nUnicode));
203 if (nUnicode == 0xFFFF)
204 goto bad_input;
205 if (ImplIsHighSurrogate(nUnicode))
206 if (pDestBufEnd - pDestBufPtr >= 2)
208 nOffset += nLast - nFirst + 1;
209 nFirst = pBig5Hkscs2001Data[nOffset++];
210 *pDestBufPtr++ = (sal_Unicode) nUnicode;
211 *pDestBufPtr++
212 = (sal_Unicode) pBig5Hkscs2001Data[
213 nOffset + (nChar - nFirst)];
215 else
216 goto no_output;
217 else
218 if (pDestBufPtr != pDestBufEnd)
219 *pDestBufPtr++ = (sal_Unicode) nUnicode;
220 else
221 goto no_output;
222 nRow = 0;
224 else
226 bUndefined = false;
227 goto bad_input;
229 continue;
231 bad_input:
232 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
233 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
234 &nInfo))
236 case sal::detail::textenc::BAD_INPUT_STOP:
237 nRow = 0;
238 break;
240 case sal::detail::textenc::BAD_INPUT_CONTINUE:
241 nRow = 0;
242 continue;
244 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
245 goto no_output;
247 break;
249 no_output:
250 --pSrcBuf;
251 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
252 break;
255 if (nRow != 0
256 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
257 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
258 == 0)
260 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
261 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
262 else
263 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
264 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
265 &nInfo))
267 case sal::detail::textenc::BAD_INPUT_STOP:
268 case sal::detail::textenc::BAD_INPUT_CONTINUE:
269 nRow = 0;
270 break;
272 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
273 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
274 break;
278 if (pContext)
279 static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = nRow;
280 if (pInfo)
281 *pInfo = nInfo;
282 if (pSrcCvtBytes)
283 *pSrcCvtBytes = nConverted;
285 return pDestBufPtr - pDestBuf;
288 sal_Size ImplConvertUnicodeToBig5Hkscs(void const * pData,
289 void * pContext,
290 sal_Unicode const * pSrcBuf,
291 sal_Size nSrcChars,
292 char * pDestBuf,
293 sal_Size nDestBytes,
294 sal_uInt32 nFlags,
295 sal_uInt32 * pInfo,
296 sal_Size * pSrcCvtChars)
298 sal_uInt16 const * pBig5Hkscs2001Data
299 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
300 m_pUnicodeToBig5Hkscs2001Data;
301 sal_Int32 const * pBig5Hkscs2001PageOffsets
302 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
303 m_pUnicodeToBig5Hkscs2001PageOffsets;
304 sal_Int32 const * pBig5Hkscs2001PlaneOffsets
305 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
306 m_pUnicodeToBig5Hkscs2001PlaneOffsets;
307 ImplUniToDBCSHighTab const * pBig5Data
308 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
309 m_pUnicodeToBig5Data;
310 sal_Unicode nHighSurrogate = 0;
311 sal_uInt32 nInfo = 0;
312 sal_Size nConverted = 0;
313 char * pDestBufPtr = pDestBuf;
314 char * pDestBufEnd = pDestBuf + nDestBytes;
316 if (pContext)
317 nHighSurrogate
318 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate;
320 for (; nConverted < nSrcChars; ++nConverted)
322 bool bUndefined = true;
323 sal_uInt32 nChar = *pSrcBuf++;
324 if (nHighSurrogate == 0)
326 if (ImplIsHighSurrogate(nChar))
328 nHighSurrogate = (sal_Unicode) nChar;
329 continue;
332 else if (ImplIsLowSurrogate(nChar))
333 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
334 else
336 bUndefined = false;
337 goto bad_input;
340 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
342 bUndefined = false;
343 goto bad_input;
346 if (nChar < 0x80)
347 if (pDestBufPtr != pDestBufEnd)
348 *pDestBufPtr++ = static_cast< char >(nChar);
349 else
350 goto no_output;
351 else
353 sal_uInt32 nBytes = 0;
354 sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
355 if (nOffset != -1)
357 nOffset
358 = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
359 >> 8)];
360 if (nOffset != -1)
362 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
363 sal_uInt32 nFirst = nFirstLast & 0xFF;
364 sal_uInt32 nLast = nFirstLast >> 8;
365 sal_uInt32 nIndex = nChar & 0xFF;
366 if (nIndex >= nFirst && nIndex <= nLast)
368 nBytes
369 = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
373 if (nBytes == 0)
375 sal_uInt32 nIndex1 = nChar >> 8;
376 if (nIndex1 < 0x100)
378 sal_uInt32 nIndex2 = nChar & 0xFF;
379 sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
380 if (nIndex2 >= nFirst
381 && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
382 nBytes = pBig5Data[nIndex1].
383 mpToUniTrailTab[nIndex2 - nFirst];
386 if (nBytes == 0)
388 ImplDBCSEUDCData const * p
389 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
390 m_pEudcData;
391 sal_uInt32 nCount
392 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
393 m_nEudcCount;
394 sal_uInt32 i;
395 for (i = 0; i < nCount; ++i) {
396 if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
398 sal_uInt32 nIndex = nChar - p->mnUniStart;
399 sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
400 sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
401 sal_uInt32 nSize;
402 nBytes = (p->mnLeadStart + nLeadOff) << 8;
403 nSize = p->mnTrail1End - p->mnTrail1Start + 1;
404 if (nTrailOff < nSize)
406 nBytes |= p->mnTrail1Start + nTrailOff;
407 break;
409 nTrailOff -= nSize;
410 nSize = p->mnTrail2End - p->mnTrail2Start + 1;
411 if (nTrailOff < nSize)
413 nBytes |= p->mnTrail2Start + nTrailOff;
414 break;
416 nTrailOff -= nSize;
417 nBytes |= p->mnTrail3Start + nTrailOff;
418 break;
420 ++p;
423 if (nBytes == 0)
424 goto bad_input;
425 if (pDestBufEnd - pDestBufPtr >= 2)
427 *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
428 *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
430 else
431 goto no_output;
433 nHighSurrogate = 0;
434 continue;
436 bad_input:
437 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
438 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
439 &nInfo, NULL, 0, NULL))
441 case sal::detail::textenc::BAD_INPUT_STOP:
442 nHighSurrogate = 0;
443 break;
445 case sal::detail::textenc::BAD_INPUT_CONTINUE:
446 nHighSurrogate = 0;
447 continue;
449 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
450 goto no_output;
452 break;
454 no_output:
455 --pSrcBuf;
456 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
457 break;
460 if (nHighSurrogate != 0
461 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
462 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
463 == 0)
465 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
466 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
467 else
468 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
469 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
470 NULL, 0, NULL))
472 case sal::detail::textenc::BAD_INPUT_STOP:
473 case sal::detail::textenc::BAD_INPUT_CONTINUE:
474 nHighSurrogate = 0;
475 break;
477 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
478 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
479 break;
483 if (pContext)
484 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate
485 = nHighSurrogate;
486 if (pInfo)
487 *pInfo = nInfo;
488 if (pSrcCvtChars)
489 *pSrcCvtChars = nConverted;
491 return pDestBufPtr - pDestBuf;
494 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */