Avoid potential negative array index access to cached text.
[LibreOffice.git] / sal / textenc / converteuctw.cxx
blobb1bfd8a67766c973200d1a2f0070a2e9f953f676
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <cassert>
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
28 #include "context.hxx"
29 #include "converter.hxx"
30 #include "converteuctw.hxx"
32 namespace {
34 enum ImplEucTwToUnicodeState
36 IMPL_EUC_TW_TO_UNICODE_STATE_0,
37 IMPL_EUC_TW_TO_UNICODE_STATE_1,
38 IMPL_EUC_TW_TO_UNICODE_STATE_2_1,
39 IMPL_EUC_TW_TO_UNICODE_STATE_2_2,
40 IMPL_EUC_TW_TO_UNICODE_STATE_2_3
43 struct ImplEucTwToUnicodeContext
45 ImplEucTwToUnicodeState m_eState;
46 sal_Int32 m_nPlane; // 0--15
47 sal_Int32 m_nRow; // 0--93
52 void * ImplCreateEucTwToUnicodeContext()
54 ImplEucTwToUnicodeContext * pContext = new ImplEucTwToUnicodeContext;
55 pContext->m_eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
56 return pContext;
59 void ImplResetEucTwToUnicodeContext(void * pContext)
61 if (pContext)
62 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState
63 = IMPL_EUC_TW_TO_UNICODE_STATE_0;
66 void ImplDestroyEucTwToUnicodeContext(void * pContext)
68 delete static_cast< ImplEucTwToUnicodeContext * >(pContext);
71 sal_Size ImplConvertEucTwToUnicode(void const * pData,
72 void * pContext,
73 char const * pSrcBuf,
74 sal_Size nSrcBytes,
75 sal_Unicode * pDestBuf,
76 sal_Size nDestChars,
77 sal_uInt32 nFlags,
78 sal_uInt32 * pInfo,
79 sal_Size * pSrcCvtBytes)
81 sal_uInt16 const * pCns116431992Data
82 = static_cast< ImplEucTwConverterData const * >(pData)->
83 m_pCns116431992ToUnicodeData;
84 sal_Int32 const * pCns116431992RowOffsets
85 = static_cast< ImplEucTwConverterData const * >(pData)->
86 m_pCns116431992ToUnicodeRowOffsets;
87 sal_Int32 const * pCns116431992PlaneOffsets
88 = static_cast< ImplEucTwConverterData const * >(pData)->
89 m_pCns116431992ToUnicodePlaneOffsets;
90 ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
91 sal_Int32 nPlane = 0;
92 sal_Int32 nRow = 0;
93 sal_uInt32 nInfo = 0;
94 sal_Size nConverted = 0;
95 sal_Unicode * pDestBufPtr = pDestBuf;
96 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
97 sal_Size startOfCurrentChar = 0;
99 if (pContext)
101 eState = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState;
102 nPlane = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nPlane;
103 nRow = static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nRow;
106 for (; nConverted < nSrcBytes; ++nConverted)
108 bool bUndefined = true;
109 sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
110 switch (eState)
112 case IMPL_EUC_TW_TO_UNICODE_STATE_0:
113 if (nChar < 0x80)
114 if (pDestBufPtr != pDestBufEnd) {
115 *pDestBufPtr++ = static_cast<sal_Unicode>(nChar);
116 startOfCurrentChar = nConverted + 1;
117 } else
118 goto no_output;
119 else if (nChar >= 0xA1 && nChar <= 0xFE)
121 nRow = nChar - 0xA1;
122 eState = IMPL_EUC_TW_TO_UNICODE_STATE_1;
124 else if (nChar == 0x8E)
125 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1;
126 else
128 bUndefined = false;
129 goto bad_input;
131 break;
133 case IMPL_EUC_TW_TO_UNICODE_STATE_1:
134 if (nChar >= 0xA1 && nChar <= 0xFE)
136 nPlane = 0;
137 goto transform;
139 else
141 bUndefined = false;
142 goto bad_input;
144 break;
146 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1:
147 if (nChar >= 0xA1 && nChar <= 0xB0)
149 nPlane = nChar - 0xA1;
150 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_2;
152 else
154 bUndefined = false;
155 goto bad_input;
157 break;
159 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2:
160 if (nChar >= 0xA1 && nChar <= 0xFE)
162 nRow = nChar - 0xA1;
163 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_3;
165 else
167 bUndefined = false;
168 goto bad_input;
170 break;
172 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3:
173 if (nChar >= 0xA1 && nChar <= 0xFE)
174 goto transform;
175 else
177 bUndefined = false;
178 goto bad_input;
180 break;
182 continue;
184 transform:
186 sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane];
187 if (nPlaneOffset == -1)
188 goto bad_input;
189 else
191 sal_Int32 nOffset
192 = pCns116431992RowOffsets[nPlaneOffset + nRow];
193 if (nOffset == -1)
194 goto bad_input;
195 else
197 sal_uInt32 nFirstLast = pCns116431992Data[nOffset++];
198 sal_uInt32 nFirst = nFirstLast & 0xFF;
199 sal_uInt32 nLast = nFirstLast >> 8;
200 nChar -= 0xA0;
201 if (nChar >= nFirst && nChar <= nLast)
203 sal_uInt32 nUnicode
204 = pCns116431992Data[nOffset + (nChar - nFirst)];
205 if (nUnicode == 0xFFFF)
206 goto bad_input;
207 else if (rtl::isHighSurrogate(nUnicode))
208 if (pDestBufEnd - pDestBufPtr >= 2)
210 nOffset += nLast - nFirst + 1;
211 nFirst = pCns116431992Data[nOffset++];
212 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
213 *pDestBufPtr++
214 = static_cast<sal_Unicode>(pCns116431992Data[
215 nOffset + (nChar - nFirst)]);
216 startOfCurrentChar = nConverted + 1;
218 else
219 goto no_output;
220 else
221 if (pDestBufPtr != pDestBufEnd) {
222 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
223 startOfCurrentChar = nConverted + 1;
224 } else
225 goto no_output;
227 else
228 goto bad_input;
229 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
232 continue;
235 bad_input:
236 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
237 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
238 &nInfo))
240 case sal::detail::textenc::BAD_INPUT_STOP:
241 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
242 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
243 ++nConverted;
244 } else {
245 nConverted = startOfCurrentChar;
247 break;
249 case sal::detail::textenc::BAD_INPUT_CONTINUE:
250 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
251 startOfCurrentChar = nConverted + 1;
252 continue;
254 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
255 goto no_output;
257 break;
259 no_output:
260 --pSrcBuf;
261 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
262 break;
265 if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0
266 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
267 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL))
268 == 0)
270 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
271 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
272 else
273 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
274 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
275 &nInfo))
277 case sal::detail::textenc::BAD_INPUT_STOP:
278 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) != 0) {
279 nConverted = startOfCurrentChar;
281 [[fallthrough]];
282 case sal::detail::textenc::BAD_INPUT_CONTINUE:
283 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
284 break;
286 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
287 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
288 break;
292 if (pContext)
294 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_eState = eState;
295 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nPlane = nPlane;
296 static_cast< ImplEucTwToUnicodeContext * >(pContext)->m_nRow = nRow;
298 if (pInfo)
299 *pInfo = nInfo;
300 if (pSrcCvtBytes)
301 *pSrcCvtBytes = nConverted;
303 return pDestBufPtr - pDestBuf;
306 sal_Size ImplConvertUnicodeToEucTw(void const * pData,
307 void * pContext,
308 sal_Unicode const * pSrcBuf,
309 sal_Size nSrcChars,
310 char * pDestBuf,
311 sal_Size nDestBytes,
312 sal_uInt32 nFlags,
313 sal_uInt32 * pInfo,
314 sal_Size * pSrcCvtChars)
316 sal_uInt8 const * pCns116431992Data
317 = static_cast< ImplEucTwConverterData const * >(pData)->
318 m_pUnicodeToCns116431992Data;
319 sal_Int32 const * pCns116431992PageOffsets
320 = static_cast< ImplEucTwConverterData const * >(pData)->
321 m_pUnicodeToCns116431992PageOffsets;
322 sal_Int32 const * pCns116431992PlaneOffsets
323 = static_cast< ImplEucTwConverterData const * >(pData)->
324 m_pUnicodeToCns116431992PlaneOffsets;
325 sal_Unicode nHighSurrogate = 0;
326 sal_uInt32 nInfo = 0;
327 sal_Size nConverted = 0;
328 char * pDestBufPtr = pDestBuf;
329 char * pDestBufEnd = pDestBuf + nDestBytes;
331 if (pContext)
332 nHighSurrogate
333 = static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate;
335 for (; nConverted < nSrcChars; ++nConverted)
337 bool bUndefined = true;
338 sal_uInt32 nChar = *pSrcBuf++;
339 if (nHighSurrogate == 0)
341 if (rtl::isHighSurrogate(nChar))
343 nHighSurrogate = static_cast<sal_Unicode>(nChar);
344 continue;
346 else if (rtl::isLowSurrogate(nChar))
348 bUndefined = false;
349 goto bad_input;
352 else if (rtl::isLowSurrogate(nChar))
353 nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
354 else
356 bUndefined = false;
357 goto bad_input;
360 assert(rtl::isUnicodeScalarValue(nChar));
362 if (nChar < 0x80)
363 if (pDestBufPtr != pDestBufEnd)
364 *pDestBufPtr++ = static_cast< char >(nChar);
365 else
366 goto no_output;
367 else
369 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
370 sal_uInt32 nFirst;
371 sal_uInt32 nLast;
372 sal_uInt32 nPlane;
373 if (nOffset == -1)
374 goto bad_input;
375 nOffset
376 = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)];
377 if (nOffset == -1)
378 goto bad_input;
379 nFirst = pCns116431992Data[nOffset++];
380 nLast = pCns116431992Data[nOffset++];
381 nChar &= 0xFF;
382 if (nChar < nFirst || nChar > nLast)
383 goto bad_input;
384 nOffset += 3 * (nChar - nFirst);
385 nPlane = pCns116431992Data[nOffset++];
386 if (nPlane == 0)
387 goto bad_input;
388 if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4))
389 goto no_output;
390 if (nPlane != 1)
392 *pDestBufPtr++ = static_cast< char >(static_cast< unsigned char >(0x8E));
393 *pDestBufPtr++ = static_cast< char >(0xA0 + nPlane);
395 *pDestBufPtr++ = static_cast< char >(0xA0 + pCns116431992Data[nOffset++]);
396 *pDestBufPtr++ = static_cast< char >(0xA0 + pCns116431992Data[nOffset]);
398 nHighSurrogate = 0;
399 continue;
401 bad_input:
402 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
403 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
404 &nInfo, nullptr, 0, nullptr))
406 case sal::detail::textenc::BAD_INPUT_STOP:
407 nHighSurrogate = 0;
408 break;
410 case sal::detail::textenc::BAD_INPUT_CONTINUE:
411 nHighSurrogate = 0;
412 continue;
414 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
415 goto no_output;
417 break;
419 no_output:
420 --pSrcBuf;
421 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
422 break;
425 if (nHighSurrogate != 0
426 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
427 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
428 == 0)
430 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
431 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
432 else
433 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
434 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
435 nullptr, 0, nullptr))
437 case sal::detail::textenc::BAD_INPUT_STOP:
438 case sal::detail::textenc::BAD_INPUT_CONTINUE:
439 nHighSurrogate = 0;
440 break;
442 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
443 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
444 break;
448 if (pContext)
449 static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate
450 = nHighSurrogate;
451 if (pInfo)
452 *pInfo = nInfo;
453 if (pSrcCvtChars)
454 *pSrcCvtChars = nConverted;
456 return pDestBufPtr - pDestBuf;
459 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */