Avoid potential negative array index access to cached text.
[LibreOffice.git] / sal / textenc / convertbig5hkscs.cxx
blob275e91d3d3b482695032310986933a15cbbc774c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <cassert>
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
28 #include "context.hxx"
29 #include "convertbig5hkscs.hxx"
30 #include "converter.hxx"
31 #include "tenchelp.hxx"
33 namespace {
35 struct ImplBig5HkscsToUnicodeContext
37 sal_Int32 m_nRow; // 0--255; 0 means none
42 void * ImplCreateBig5HkscsToUnicodeContext()
44 ImplBig5HkscsToUnicodeContext * pContext =
45 new ImplBig5HkscsToUnicodeContext;
46 pContext->m_nRow = 0;
47 return pContext;
50 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
52 if (pContext)
53 static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = 0;
56 void ImplDestroyBig5HkscsToUnicodeContext(void * pContext)
58 delete static_cast< ImplBig5HkscsToUnicodeContext * >(pContext);
61 sal_Size ImplConvertBig5HkscsToUnicode(void const * pData,
62 void * pContext,
63 char const * pSrcBuf,
64 sal_Size nSrcBytes,
65 sal_Unicode * pDestBuf,
66 sal_Size nDestChars,
67 sal_uInt32 nFlags,
68 sal_uInt32 * pInfo,
69 sal_Size * pSrcCvtBytes)
71 sal_uInt16 const * pBig5Hkscs2001Data
72 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
73 m_pBig5Hkscs2001ToUnicodeData;
74 sal_Int32 const * pBig5Hkscs2001RowOffsets
75 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
76 m_pBig5Hkscs2001ToUnicodeRowOffsets;
77 ImplDBCSToUniLeadTab const * pBig5Data
78 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
79 m_pBig5ToUnicodeData;
80 sal_Int32 nRow = 0;
81 sal_uInt32 nInfo = 0;
82 sal_Size nConverted = 0;
83 sal_Unicode * pDestBufPtr = pDestBuf;
84 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
85 sal_Size startOfCurrentChar = 0;
87 if (pContext)
88 nRow = static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow;
90 for (; nConverted < nSrcBytes; ++nConverted)
92 bool bUndefined = true;
93 sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
94 if (nRow == 0)
95 if (nChar < 0x80)
96 if (pDestBufPtr != pDestBufEnd) {
97 *pDestBufPtr++ = static_cast<sal_Unicode>(nChar);
98 startOfCurrentChar = nConverted + 1;
99 } else
100 goto no_output;
101 else if (nChar >= 0x81 && nChar <= 0xFE)
102 nRow = nChar;
103 else
105 bUndefined = false;
106 goto bad_input;
108 else
109 if ((nChar >= 0x40 && nChar <= 0x7E)
110 || (nChar >= 0xA1 && nChar <= 0xFE))
112 sal_uInt32 nUnicode = 0xFFFF;
113 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
114 sal_uInt32 nFirst=0;
115 sal_uInt32 nLast=0;
116 if (nOffset != -1)
118 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
119 nFirst = nFirstLast & 0xFF;
120 nLast = nFirstLast >> 8;
121 if (nChar >= nFirst && nChar <= nLast)
122 nUnicode
123 = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
125 if (nUnicode == 0xFFFF)
127 sal_uInt32 n = pBig5Data[nRow].mnTrailStart;
128 if (nChar >= n && nChar <= pBig5Data[nRow].mnTrailEnd)
130 nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n];
131 if (nUnicode == 0)
132 nUnicode = 0xFFFF;
133 assert(!rtl::isHighSurrogate(nUnicode));
136 if (nUnicode == 0xFFFF)
138 ImplDBCSEUDCData const * p
139 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
140 m_pEudcData;
141 sal_uInt32 nCount
142 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
143 m_nEudcCount;
144 sal_uInt32 i;
145 for (i = 0; i < nCount; ++i)
147 if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
149 if (nChar < p->mnTrail1Start)
150 break;
151 if (nChar <= p->mnTrail1End)
153 nUnicode
154 = p->mnUniStart
155 + (nRow - p->mnLeadStart)
156 * p->mnTrailRangeCount
157 + (nChar - p->mnTrail1Start);
158 break;
160 if (p->mnTrailCount < 2
161 || nChar < p->mnTrail2Start)
162 break;
163 if (nChar <= p->mnTrail2End)
165 nUnicode
166 = p->mnUniStart
167 + (nRow - p->mnLeadStart)
168 * p->mnTrailRangeCount
169 + (nChar - p->mnTrail2Start)
170 + (p->mnTrail1End - p->mnTrail1Start
171 + 1);
172 break;
174 if (p->mnTrailCount < 3
175 || nChar < p->mnTrail3Start)
176 break;
177 if (nChar <= p->mnTrail3End)
179 nUnicode
180 = p->mnUniStart
181 + (nRow - p->mnLeadStart)
182 * p->mnTrailRangeCount
183 + (nChar - p->mnTrail3Start)
184 + (p->mnTrail1End - p->mnTrail1Start
185 + 1)
186 + (p->mnTrail2End - p->mnTrail2Start
187 + 1);
188 break;
190 break;
192 ++p;
194 assert(!rtl::isHighSurrogate(nUnicode));
196 if (nUnicode == 0xFFFF)
197 goto bad_input;
198 if (rtl::isHighSurrogate(nUnicode))
199 if (pDestBufEnd - pDestBufPtr >= 2)
201 nOffset += nLast - nFirst + 1;
202 nFirst = pBig5Hkscs2001Data[nOffset++];
203 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
204 *pDestBufPtr++
205 = static_cast<sal_Unicode>(pBig5Hkscs2001Data[
206 nOffset + (nChar - nFirst)]);
207 startOfCurrentChar = nConverted + 1;
209 else
210 goto no_output;
211 else
212 if (pDestBufPtr != pDestBufEnd) {
213 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
214 startOfCurrentChar = nConverted + 1;
215 } else
216 goto no_output;
217 nRow = 0;
219 else
221 bUndefined = false;
222 goto bad_input;
224 continue;
226 bad_input:
227 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
228 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
229 &nInfo))
231 case sal::detail::textenc::BAD_INPUT_STOP:
232 nRow = 0;
233 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
234 ++nConverted;
235 } else {
236 nConverted = startOfCurrentChar;
238 break;
240 case sal::detail::textenc::BAD_INPUT_CONTINUE:
241 nRow = 0;
242 startOfCurrentChar = nConverted + 1;
243 continue;
245 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
246 goto no_output;
248 break;
250 no_output:
251 --pSrcBuf;
252 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
253 break;
256 if (nRow != 0
257 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
258 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL))
259 == 0)
261 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
262 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
263 else
264 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
265 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
266 &nInfo))
268 case sal::detail::textenc::BAD_INPUT_STOP:
269 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) != 0) {
270 nConverted = startOfCurrentChar;
272 [[fallthrough]];
273 case sal::detail::textenc::BAD_INPUT_CONTINUE:
274 nRow = 0;
275 break;
277 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
278 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
279 break;
283 if (pContext)
284 static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = nRow;
285 if (pInfo)
286 *pInfo = nInfo;
287 if (pSrcCvtBytes)
288 *pSrcCvtBytes = nConverted;
290 return pDestBufPtr - pDestBuf;
293 sal_Size ImplConvertUnicodeToBig5Hkscs(void const * pData,
294 void * pContext,
295 sal_Unicode const * pSrcBuf,
296 sal_Size nSrcChars,
297 char * pDestBuf,
298 sal_Size nDestBytes,
299 sal_uInt32 nFlags,
300 sal_uInt32 * pInfo,
301 sal_Size * pSrcCvtChars)
303 sal_uInt16 const * pBig5Hkscs2001Data
304 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
305 m_pUnicodeToBig5Hkscs2001Data;
306 sal_Int32 const * pBig5Hkscs2001PageOffsets
307 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
308 m_pUnicodeToBig5Hkscs2001PageOffsets;
309 sal_Int32 const * pBig5Hkscs2001PlaneOffsets
310 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
311 m_pUnicodeToBig5Hkscs2001PlaneOffsets;
312 ImplUniToDBCSHighTab const * pBig5Data
313 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
314 m_pUnicodeToBig5Data;
315 sal_Unicode nHighSurrogate = 0;
316 sal_uInt32 nInfo = 0;
317 sal_Size nConverted = 0;
318 char * pDestBufPtr = pDestBuf;
319 char * pDestBufEnd = pDestBuf + nDestBytes;
321 if (pContext)
322 nHighSurrogate
323 = static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate;
325 for (; nConverted < nSrcChars; ++nConverted)
327 bool bUndefined = true;
328 sal_uInt32 nChar = *pSrcBuf++;
329 if (nHighSurrogate == 0)
331 if (rtl::isHighSurrogate(nChar))
333 nHighSurrogate = static_cast<sal_Unicode>(nChar);
334 continue;
336 else if (rtl::isLowSurrogate(nChar))
338 bUndefined = false;
339 goto bad_input;
342 else if (rtl::isLowSurrogate(nChar))
343 nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
344 else
346 bUndefined = false;
347 goto bad_input;
350 assert(rtl::isUnicodeScalarValue(nChar));
352 if (nChar < 0x80)
353 if (pDestBufPtr != pDestBufEnd)
354 *pDestBufPtr++ = static_cast< char >(nChar);
355 else
356 goto no_output;
357 else
359 sal_uInt32 nBytes = 0;
360 sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
361 if (nOffset != -1)
363 nOffset
364 = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
365 >> 8)];
366 if (nOffset != -1)
368 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
369 sal_uInt32 nFirst = nFirstLast & 0xFF;
370 sal_uInt32 nLast = nFirstLast >> 8;
371 sal_uInt32 nIndex = nChar & 0xFF;
372 if (nIndex >= nFirst && nIndex <= nLast)
374 nBytes
375 = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
379 if (nBytes == 0)
381 sal_uInt32 nIndex1 = nChar >> 8;
382 if (nIndex1 < 0x100)
384 sal_uInt32 nIndex2 = nChar & 0xFF;
385 sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
386 if (nIndex2 >= nFirst
387 && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
388 nBytes = pBig5Data[nIndex1].
389 mpToUniTrailTab[nIndex2 - nFirst];
392 if (nBytes == 0)
394 ImplDBCSEUDCData const * p
395 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
396 m_pEudcData;
397 sal_uInt32 nCount
398 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
399 m_nEudcCount;
400 sal_uInt32 i;
401 for (i = 0; i < nCount; ++i) {
402 if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
404 sal_uInt32 nIndex = nChar - p->mnUniStart;
405 sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
406 sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
407 sal_uInt32 nSize;
408 nBytes = (p->mnLeadStart + nLeadOff) << 8;
409 nSize = p->mnTrail1End - p->mnTrail1Start + 1;
410 if (nTrailOff < nSize)
412 nBytes |= p->mnTrail1Start + nTrailOff;
413 break;
415 nTrailOff -= nSize;
416 nSize = p->mnTrail2End - p->mnTrail2Start + 1;
417 if (nTrailOff < nSize)
419 nBytes |= p->mnTrail2Start + nTrailOff;
420 break;
422 nTrailOff -= nSize;
423 nBytes |= p->mnTrail3Start + nTrailOff;
424 break;
426 ++p;
429 if (nBytes == 0)
430 goto bad_input;
431 if (pDestBufEnd - pDestBufPtr >= 2)
433 *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
434 *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
436 else
437 goto no_output;
439 nHighSurrogate = 0;
440 continue;
442 bad_input:
443 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
444 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
445 &nInfo, nullptr, 0, nullptr))
447 case sal::detail::textenc::BAD_INPUT_STOP:
448 nHighSurrogate = 0;
449 break;
451 case sal::detail::textenc::BAD_INPUT_CONTINUE:
452 nHighSurrogate = 0;
453 continue;
455 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
456 goto no_output;
458 break;
460 no_output:
461 --pSrcBuf;
462 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
463 break;
466 if (nHighSurrogate != 0
467 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
468 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
469 == 0)
471 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
472 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
473 else
474 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
475 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
476 nullptr, 0, nullptr))
478 case sal::detail::textenc::BAD_INPUT_STOP:
479 case sal::detail::textenc::BAD_INPUT_CONTINUE:
480 nHighSurrogate = 0;
481 break;
483 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
484 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
485 break;
489 if (pContext)
490 static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate
491 = nHighSurrogate;
492 if (pInfo)
493 *pInfo = nInfo;
494 if (pSrcCvtChars)
495 *pSrcCvtChars = nConverted;
497 return pDestBufPtr - pDestBuf;
500 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */