bump product version to 5.0.4.1
[LibreOffice.git] / sal / textenc / convertbig5hkscs.cxx
blob034b206cb9f08b42026695dce728e1d7a43036a5
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include <cassert>
24 #include "rtl/textcvt.h"
25 #include "sal/types.h"
27 #include "context.hxx"
28 #include "convertbig5hkscs.hxx"
29 #include "converter.hxx"
30 #include "tenchelp.hxx"
31 #include "unichars.hxx"
33 namespace {
35 struct ImplBig5HkscsToUnicodeContext
37 sal_Int32 m_nRow; // 0--255; 0 means none
42 void * ImplCreateBig5HkscsToUnicodeContext()
44 ImplBig5HkscsToUnicodeContext * pContext =
45 new ImplBig5HkscsToUnicodeContext;
46 pContext->m_nRow = 0;
47 return pContext;
50 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
52 if (pContext)
53 static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = 0;
56 void ImplDestroyBig5HkscsToUnicodeContext(void * pContext)
58 delete static_cast< ImplBig5HkscsToUnicodeContext * >(pContext);
61 sal_Size ImplConvertBig5HkscsToUnicode(void const * pData,
62 void * pContext,
63 char const * pSrcBuf,
64 sal_Size nSrcBytes,
65 sal_Unicode * pDestBuf,
66 sal_Size nDestChars,
67 sal_uInt32 nFlags,
68 sal_uInt32 * pInfo,
69 sal_Size * pSrcCvtBytes)
71 sal_uInt16 const * pBig5Hkscs2001Data
72 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
73 m_pBig5Hkscs2001ToUnicodeData;
74 sal_Int32 const * pBig5Hkscs2001RowOffsets
75 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
76 m_pBig5Hkscs2001ToUnicodeRowOffsets;
77 ImplDBCSToUniLeadTab const * pBig5Data
78 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
79 m_pBig5ToUnicodeData;
80 sal_Int32 nRow = 0;
81 sal_uInt32 nInfo = 0;
82 sal_Size nConverted = 0;
83 sal_Unicode * pDestBufPtr = pDestBuf;
84 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
86 if (pContext)
87 nRow = static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow;
89 for (; nConverted < nSrcBytes; ++nConverted)
91 bool bUndefined = true;
92 sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
93 if (nRow == 0)
94 if (nChar < 0x80)
95 if (pDestBufPtr != pDestBufEnd)
96 *pDestBufPtr++ = (sal_Unicode) nChar;
97 else
98 goto no_output;
99 else if (nChar >= 0x81 && nChar <= 0xFE)
100 nRow = nChar;
101 else
103 bUndefined = false;
104 goto bad_input;
106 else
107 if ((nChar >= 0x40 && nChar <= 0x7E)
108 || (nChar >= 0xA1 && nChar <= 0xFE))
110 sal_uInt32 nUnicode = 0xFFFF;
111 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
112 sal_uInt32 nFirst=0;
113 sal_uInt32 nLast=0;
114 if (nOffset != -1)
116 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
117 nFirst = nFirstLast & 0xFF;
118 nLast = nFirstLast >> 8;
119 if (nChar >= nFirst && nChar <= nLast)
120 nUnicode
121 = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
123 if (nUnicode == 0xFFFF)
125 sal_uInt32 n = pBig5Data[nRow].mnTrailStart;
126 if (nChar >= n && nChar <= pBig5Data[nRow].mnTrailEnd)
128 nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n];
129 if (nUnicode == 0)
130 nUnicode = 0xFFFF;
131 assert(!ImplIsHighSurrogate(nUnicode));
134 if (nUnicode == 0xFFFF)
136 ImplDBCSEUDCData const * p
137 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
138 m_pEudcData;
139 sal_uInt32 nCount
140 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
141 m_nEudcCount;
142 sal_uInt32 i;
143 for (i = 0; i < nCount; ++i)
145 if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
147 if (nChar < p->mnTrail1Start)
148 break;
149 if (nChar <= p->mnTrail1End)
151 nUnicode
152 = p->mnUniStart
153 + (nRow - p->mnLeadStart)
154 * p->mnTrailRangeCount
155 + (nChar - p->mnTrail1Start);
156 break;
158 if (p->mnTrailCount < 2
159 || nChar < p->mnTrail2Start)
160 break;
161 if (nChar <= p->mnTrail2End)
163 nUnicode
164 = p->mnUniStart
165 + (nRow - p->mnLeadStart)
166 * p->mnTrailRangeCount
167 + (nChar - p->mnTrail2Start)
168 + (p->mnTrail1End - p->mnTrail1Start
169 + 1);
170 break;
172 if (p->mnTrailCount < 3
173 || nChar < p->mnTrail3Start)
174 break;
175 if (nChar <= p->mnTrail3End)
177 nUnicode
178 = p->mnUniStart
179 + (nRow - p->mnLeadStart)
180 * p->mnTrailRangeCount
181 + (nChar - p->mnTrail3Start)
182 + (p->mnTrail1End - p->mnTrail1Start
183 + 1)
184 + (p->mnTrail2End - p->mnTrail2Start
185 + 1);
186 break;
188 break;
190 ++p;
192 assert(!ImplIsHighSurrogate(nUnicode));
194 if (nUnicode == 0xFFFF)
195 goto bad_input;
196 if (ImplIsHighSurrogate(nUnicode))
197 if (pDestBufEnd - pDestBufPtr >= 2)
199 nOffset += nLast - nFirst + 1;
200 nFirst = pBig5Hkscs2001Data[nOffset++];
201 *pDestBufPtr++ = (sal_Unicode) nUnicode;
202 *pDestBufPtr++
203 = (sal_Unicode) pBig5Hkscs2001Data[
204 nOffset + (nChar - nFirst)];
206 else
207 goto no_output;
208 else
209 if (pDestBufPtr != pDestBufEnd)
210 *pDestBufPtr++ = (sal_Unicode) nUnicode;
211 else
212 goto no_output;
213 nRow = 0;
215 else
217 bUndefined = false;
218 goto bad_input;
220 continue;
222 bad_input:
223 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
224 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
225 &nInfo))
227 case sal::detail::textenc::BAD_INPUT_STOP:
228 nRow = 0;
229 break;
231 case sal::detail::textenc::BAD_INPUT_CONTINUE:
232 nRow = 0;
233 continue;
235 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
236 goto no_output;
238 break;
240 no_output:
241 --pSrcBuf;
242 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
243 break;
246 if (nRow != 0
247 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
248 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
249 == 0)
251 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
252 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
253 else
254 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
255 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
256 &nInfo))
258 case sal::detail::textenc::BAD_INPUT_STOP:
259 case sal::detail::textenc::BAD_INPUT_CONTINUE:
260 nRow = 0;
261 break;
263 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
264 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
265 break;
269 if (pContext)
270 static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = nRow;
271 if (pInfo)
272 *pInfo = nInfo;
273 if (pSrcCvtBytes)
274 *pSrcCvtBytes = nConverted;
276 return pDestBufPtr - pDestBuf;
279 sal_Size ImplConvertUnicodeToBig5Hkscs(void const * pData,
280 void * pContext,
281 sal_Unicode const * pSrcBuf,
282 sal_Size nSrcChars,
283 char * pDestBuf,
284 sal_Size nDestBytes,
285 sal_uInt32 nFlags,
286 sal_uInt32 * pInfo,
287 sal_Size * pSrcCvtChars)
289 sal_uInt16 const * pBig5Hkscs2001Data
290 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
291 m_pUnicodeToBig5Hkscs2001Data;
292 sal_Int32 const * pBig5Hkscs2001PageOffsets
293 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
294 m_pUnicodeToBig5Hkscs2001PageOffsets;
295 sal_Int32 const * pBig5Hkscs2001PlaneOffsets
296 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
297 m_pUnicodeToBig5Hkscs2001PlaneOffsets;
298 ImplUniToDBCSHighTab const * pBig5Data
299 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
300 m_pUnicodeToBig5Data;
301 sal_Unicode nHighSurrogate = 0;
302 sal_uInt32 nInfo = 0;
303 sal_Size nConverted = 0;
304 char * pDestBufPtr = pDestBuf;
305 char * pDestBufEnd = pDestBuf + nDestBytes;
307 if (pContext)
308 nHighSurrogate
309 = static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate;
311 for (; nConverted < nSrcChars; ++nConverted)
313 bool bUndefined = true;
314 sal_uInt32 nChar = *pSrcBuf++;
315 if (nHighSurrogate == 0)
317 if (ImplIsHighSurrogate(nChar))
319 nHighSurrogate = (sal_Unicode) nChar;
320 continue;
323 else if (ImplIsLowSurrogate(nChar))
324 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
325 else
327 bUndefined = false;
328 goto bad_input;
331 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
333 bUndefined = false;
334 goto bad_input;
337 if (nChar < 0x80)
338 if (pDestBufPtr != pDestBufEnd)
339 *pDestBufPtr++ = static_cast< char >(nChar);
340 else
341 goto no_output;
342 else
344 sal_uInt32 nBytes = 0;
345 sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
346 if (nOffset != -1)
348 nOffset
349 = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
350 >> 8)];
351 if (nOffset != -1)
353 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
354 sal_uInt32 nFirst = nFirstLast & 0xFF;
355 sal_uInt32 nLast = nFirstLast >> 8;
356 sal_uInt32 nIndex = nChar & 0xFF;
357 if (nIndex >= nFirst && nIndex <= nLast)
359 nBytes
360 = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
364 if (nBytes == 0)
366 sal_uInt32 nIndex1 = nChar >> 8;
367 if (nIndex1 < 0x100)
369 sal_uInt32 nIndex2 = nChar & 0xFF;
370 sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
371 if (nIndex2 >= nFirst
372 && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
373 nBytes = pBig5Data[nIndex1].
374 mpToUniTrailTab[nIndex2 - nFirst];
377 if (nBytes == 0)
379 ImplDBCSEUDCData const * p
380 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
381 m_pEudcData;
382 sal_uInt32 nCount
383 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
384 m_nEudcCount;
385 sal_uInt32 i;
386 for (i = 0; i < nCount; ++i) {
387 if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
389 sal_uInt32 nIndex = nChar - p->mnUniStart;
390 sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
391 sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
392 sal_uInt32 nSize;
393 nBytes = (p->mnLeadStart + nLeadOff) << 8;
394 nSize = p->mnTrail1End - p->mnTrail1Start + 1;
395 if (nTrailOff < nSize)
397 nBytes |= p->mnTrail1Start + nTrailOff;
398 break;
400 nTrailOff -= nSize;
401 nSize = p->mnTrail2End - p->mnTrail2Start + 1;
402 if (nTrailOff < nSize)
404 nBytes |= p->mnTrail2Start + nTrailOff;
405 break;
407 nTrailOff -= nSize;
408 nBytes |= p->mnTrail3Start + nTrailOff;
409 break;
411 ++p;
414 if (nBytes == 0)
415 goto bad_input;
416 if (pDestBufEnd - pDestBufPtr >= 2)
418 *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
419 *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
421 else
422 goto no_output;
424 nHighSurrogate = 0;
425 continue;
427 bad_input:
428 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
429 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
430 &nInfo, NULL, 0, NULL))
432 case sal::detail::textenc::BAD_INPUT_STOP:
433 nHighSurrogate = 0;
434 break;
436 case sal::detail::textenc::BAD_INPUT_CONTINUE:
437 nHighSurrogate = 0;
438 continue;
440 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
441 goto no_output;
443 break;
445 no_output:
446 --pSrcBuf;
447 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
448 break;
451 if (nHighSurrogate != 0
452 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
453 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
454 == 0)
456 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
457 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
458 else
459 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
460 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
461 NULL, 0, NULL))
463 case sal::detail::textenc::BAD_INPUT_STOP:
464 case sal::detail::textenc::BAD_INPUT_CONTINUE:
465 nHighSurrogate = 0;
466 break;
468 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
469 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
470 break;
474 if (pContext)
475 static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate
476 = nHighSurrogate;
477 if (pInfo)
478 *pInfo = nInfo;
479 if (pSrcCvtChars)
480 *pSrcCvtChars = nConverted;
482 return pDestBufPtr - pDestBuf;
485 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */