Avoid potential negative array index access to cached text.
[LibreOffice.git] / sal / textenc / tcvtutf7.cxx
blob0f3ea4e7cb6037c620a411aa5ff4be4843f6f08c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <rtl/textcvt.h>
24 #include "tenchelp.hxx"
25 #include "unichars.hxx"
27 /* ======================================================================= */
29 unsigned char const aImplBase64Tab[64] =
31 /* A-Z */
32 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
33 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
34 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
35 0x58, 0x59, 0x5A,
36 /* a-z */
37 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
38 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
39 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
40 0x78, 0x79, 0x7A,
41 /* 0-9,+,/ */
42 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
43 0x38, 0x39, 0x2B, 0x2F
46 /* Index in Base64Tab or 0xFF, when is an invalid character */
47 unsigned char const aImplBase64IndexTab[128] =
49 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00-0x07 */
50 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08-0x0F */
51 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10-0x17 */
52 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18-0x1F */
53 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20-0x27 !"#$%&' */
54 0xFF, 0xFF, 0xFF, 62, 0xFF, 0xFF, 0xFF, 63, /* 0x28-0x2F ()*+,-./ */
55 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30-0x37 01234567 */
56 60, 61, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F 89:;<=>? */
57 0xFF, 0, 1, 2, 3, 4, 5, 6, /* 0x40-0x47 @ABCDEFG */
58 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48-0x4F HIJKLMNO */
59 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50-0x57 PQRSTUVW */
60 23, 24, 25, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5F XYZ[\]^_ */
61 0xFF, 26, 27, 28, 29, 30, 31, 32, /* 0x60-0x67 `abcdefg */
62 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68-0x6F hijklmno */
63 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70-0x77 pqrstuvw */
64 49, 50, 51, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF /* 0x78-0x7F xyz{|}~ */
67 unsigned char const aImplMustShiftTab[128] =
69 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00-0x07 */
70 1, 0, 0, 1, 0, 1, 1, 1, /* 0x08-0x0F 0x09 == HTAB, 0x0A == LF 0x0C == CR */
71 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10-0x17 */
72 1, 1, 1, 1, 1, 1, 1, 1, /* 0x18-0x1F */
73 0, 1, 1, 1, 1, 1, 1, 0, /* 0x20-0x27 !"#$%&' */
74 0, 0, 1, 1, 0, 1, 0, 0, /* 0x28-0x2F ()*+,-./ */
75 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x37 01234567 */
76 0, 0, 0, 1, 1, 1, 1, 0, /* 0x38-0x3F 89:;<=>? */
77 1, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x47 @ABCDEFG */
78 0, 0, 0, 0, 0, 0, 0, 0, /* 0x48-0x4F HIJKLMNO */
79 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x57 PQRSTUVW */
80 0, 0, 0, 1, 1, 1, 1, 1, /* 0x58-0x5F XYZ[\]^_ */
81 1, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 `abcdefg */
82 0, 0, 0, 0, 0, 0, 0, 0, /* 0x68-0x6F hijklmno */
83 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x77 pqrstuvw */
84 0, 0, 0, 1, 1, 1, 1, 1 /* 0x78-0x7F xyz{|}~ */
87 /* + */
88 #define IMPL_SHIFT_IN_CHAR 0x2B
89 /* - */
90 #define IMPL_SHIFT_OUT_CHAR 0x2D
92 /* ----------------------------------------------------------------------- */
94 namespace {
96 struct ImplUTF7ToUCContextData
98 bool mbShifted;
99 bool mbFirst;
100 bool mbWroteOne;
101 sal_uInt32 mnBitBuffer;
102 sal_uInt32 mnBufferBits;
107 /* ----------------------------------------------------------------------- */
109 void* ImplUTF7CreateUTF7TextToUnicodeContext()
111 ImplUTF7ToUCContextData* pContextData = new ImplUTF7ToUCContextData;
112 pContextData->mbShifted = false;
113 pContextData->mbFirst = false;
114 pContextData->mbWroteOne = false;
115 pContextData->mnBitBuffer = 0;
116 pContextData->mnBufferBits = 0;
117 return pContextData;
120 /* ----------------------------------------------------------------------- */
122 void ImplUTF7DestroyTextToUnicodeContext( void* pContext )
124 delete static_cast< ImplUTF7ToUCContextData * >(pContext);
127 /* ----------------------------------------------------------------------- */
129 void ImplUTF7ResetTextToUnicodeContext( void* pContext )
131 ImplUTF7ToUCContextData* pContextData = static_cast<ImplUTF7ToUCContextData*>(pContext);
132 pContextData->mbShifted = false;
133 pContextData->mbFirst = false;
134 pContextData->mbWroteOne = false;
135 pContextData->mnBitBuffer = 0;
136 pContextData->mnBufferBits = 0;
139 /* ----------------------------------------------------------------------- */
141 sal_Size ImplUTF7ToUnicode( SAL_UNUSED_PARAMETER const void*, void* pContext,
142 const char* pSrcBuf, sal_Size nSrcBytes,
143 sal_Unicode* pDestBuf, sal_Size nDestChars,
144 sal_uInt32 nFlags, sal_uInt32* pInfo,
145 sal_Size* pSrcCvtBytes )
147 ImplUTF7ToUCContextData* pContextData = static_cast<ImplUTF7ToUCContextData*>(pContext);
148 unsigned char c ='\0';
149 unsigned char nBase64Value = 0;
150 bool bEnd = false;
151 bool bShifted;
152 bool bFirst;
153 bool bWroteOne;
154 bool bBase64End;
155 sal_uInt32 nBitBuffer;
156 sal_uInt32 nBitBufferTemp;
157 sal_uInt32 nBufferBits;
158 sal_Unicode* pEndDestBuf;
159 const char* pEndSrcBuf;
161 /* !!! Implementation not finished !!!
162 if ( pContextData )
164 bShifted = pContextData->mbShifted;
165 bFirst = pContextData->mbFirst;
166 bWroteOne = pContextData->mbWroteOne;
167 nBitBuffer = pContextData->mnBitBuffer;
168 nBufferBits = pContextData->mnBufferBits;
170 else
173 bShifted = false;
174 bFirst = false;
175 bWroteOne = false;
176 nBitBuffer = 0;
177 nBufferBits = 0;
180 *pInfo = 0;
181 pEndDestBuf = pDestBuf+nDestChars;
182 pEndSrcBuf = pSrcBuf+nSrcBytes;
185 if ( pSrcBuf < pEndSrcBuf )
187 c = static_cast<unsigned char>(*pSrcBuf);
189 /* End, when not a base64 character */
190 bBase64End = false;
191 if ( c <= 0x7F )
193 nBase64Value = aImplBase64IndexTab[c];
194 if ( nBase64Value == 0xFF )
195 bBase64End = true;
198 else
200 bEnd = true;
201 bBase64End = true;
204 if ( bShifted )
206 if ( bBase64End )
208 bShifted = false;
210 /* If the character causing us to drop out was SHIFT_IN */
211 /* or SHIFT_OUT, it may be a special escape for SHIFT_IN. */
212 /* The test for SHIFT_IN is not necessary, but allows */
213 /* an alternate form of UTF-7 where SHIFT_IN is escaped */
214 /* by SHIFT_IN. This only works for some values of */
215 /* SHIFT_IN. It is so implemented, because this comes */
216 /* from the official unicode book (The Unicode Standard, */
217 /* Version 2.0) and so I think, that someone of the */
218 /* world has used this feature. */
219 if ( !bEnd )
221 if ( (c == IMPL_SHIFT_IN_CHAR) || (c == IMPL_SHIFT_OUT_CHAR) )
223 /* If no base64 character, and the terminating */
224 /* character of the shift sequence was the */
225 /* SHIFT_OUT_CHAR, then it't a special escape */
226 /* for SHIFT_IN_CHAR. */
227 if ( bFirst && (c == IMPL_SHIFT_OUT_CHAR) )
229 if ( pDestBuf >= pEndDestBuf )
231 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
232 break;
234 *pDestBuf = IMPL_SHIFT_IN_CHAR;
235 pDestBuf++;
236 bWroteOne = true;
239 /* Skip character */
240 pSrcBuf++;
241 if ( pSrcBuf < pEndSrcBuf )
242 c = static_cast<unsigned char>(*pSrcBuf);
243 else
244 bEnd = true;
248 /* Empty sequence not allowed, so when we don't write one */
249 /* valid char, then the sequence is corrupt */
250 if ( !bWroteOne )
252 /* When no more bytes in the source buffer, then */
253 /* this buffer may be too small */
254 if ( bEnd )
255 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
256 else
258 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
259 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
261 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
262 if (!bEnd) {
263 ++pSrcBuf;
265 } else {
266 //TODO: move pSrcBuf back to a reasonable starting place
268 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
269 break;
271 /* We insert here no default char, because I think */
272 /* this is better to ignore this */
276 else
278 /* Add 6 Bits from character to the bit buffer */
279 nBufferBits += 6;
280 nBitBuffer |= static_cast<sal_uInt32>(nBase64Value & 0x3F) << (32-nBufferBits);
281 bFirst = false;
284 /* Extract as many full 16 bit characters as possible from the */
285 /* bit buffer. */
286 while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 16) )
288 nBitBufferTemp = nBitBuffer >> (32-16);
289 *pDestBuf = static_cast<sal_Unicode>(nBitBufferTemp & 0xFFFF);
290 pDestBuf++;
291 nBitBuffer <<= 16;
292 nBufferBits -= 16;
293 bWroteOne = true;
296 if ( nBufferBits >= 16 )
298 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
299 break;
302 if ( bBase64End )
304 /* Sequence ended and we have some bits, then the */
305 /* sequence is corrupted */
306 if ( nBufferBits && nBitBuffer )
308 /* When no more bytes in the source buffer, then */
309 /* this buffer may be too small */
310 if ( bEnd )
311 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
312 else
314 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
315 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
317 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
318 if (!bEnd) {
319 ++pSrcBuf;
321 } else {
322 //TODO: move pSrcBuf back to a reasonable starting place
324 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
325 break;
327 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
329 if ( pDestBuf >= pEndDestBuf )
331 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
332 break;
334 *pDestBuf++
335 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
341 nBitBuffer = 0;
342 nBufferBits = 0;
346 if ( !bEnd )
348 if ( !bShifted )
350 if ( c == IMPL_SHIFT_IN_CHAR )
352 bShifted = true;
353 bFirst = true;
354 bWroteOne = false;
356 else
358 /* No direct encoded character, then the buffer is */
359 /* corrupt */
360 if ( c > 0x7F )
362 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
363 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
365 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
366 ++pSrcBuf;
368 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
369 break;
371 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
373 if ( pDestBuf >= pEndDestBuf )
375 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
376 break;
378 *pDestBuf++
379 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
382 else
384 /* Write char to unicode buffer */
385 if ( pDestBuf >= pEndDestBuf )
387 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
388 break;
390 *pDestBuf = c;
391 pDestBuf++;
397 pSrcBuf++;
400 while ( !bEnd );
402 if ( pContextData )
404 pContextData->mbShifted = bShifted;
405 pContextData->mbFirst = bFirst;
406 pContextData->mbWroteOne = bWroteOne;
407 pContextData->mnBitBuffer = nBitBuffer;
408 pContextData->mnBufferBits = nBufferBits;
411 *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
412 return (nDestChars - (pEndDestBuf-pDestBuf));
415 /* ======================================================================= */
417 namespace {
419 struct ImplUTF7FromUCContextData
421 bool mbShifted;
422 sal_uInt32 mnBitBuffer;
423 sal_uInt32 mnBufferBits;
428 /* ----------------------------------------------------------------------- */
430 void* ImplUTF7CreateUnicodeToTextContext()
432 ImplUTF7FromUCContextData* pContextData = new ImplUTF7FromUCContextData;
433 pContextData->mbShifted = false;
434 pContextData->mnBitBuffer = 0;
435 pContextData->mnBufferBits = 0;
436 return pContextData;
439 /* ----------------------------------------------------------------------- */
441 void ImplUTF7DestroyUnicodeToTextContext( void* pContext )
443 delete static_cast< ImplUTF7FromUCContextData * >(pContext);
446 /* ----------------------------------------------------------------------- */
448 void ImplUTF7ResetUnicodeToTextContext( void* pContext )
450 ImplUTF7FromUCContextData* pContextData = static_cast<ImplUTF7FromUCContextData*>(pContext);
451 pContextData->mbShifted = false;
452 pContextData->mnBitBuffer = 0;
453 pContextData->mnBufferBits = 0;
456 /* ----------------------------------------------------------------------- */
458 sal_Size ImplUnicodeToUTF7( SAL_UNUSED_PARAMETER const void*, void* pContext,
459 const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
460 char* pDestBuf, sal_Size nDestBytes,
461 SAL_UNUSED_PARAMETER sal_uInt32, sal_uInt32* pInfo,
462 sal_Size* pSrcCvtChars )
464 ImplUTF7FromUCContextData* pContextData = static_cast<ImplUTF7FromUCContextData*>(pContext);
465 sal_Unicode c = '\0';
466 bool bEnd = false;
467 bool bShifted;
468 bool bNeedShift;
469 sal_uInt32 nBitBuffer;
470 sal_uInt32 nBitBufferTemp;
471 sal_uInt32 nBufferBits;
472 char* pEndDestBuf;
473 const sal_Unicode* pEndSrcBuf;
475 /* !!! Implementation not finished !!!
476 if ( pContextData )
478 bShifted = pContextData->mbShifted;
479 nBitBuffer = pContextData->mnBitBuffer;
480 nBufferBits = pContextData->mnBufferBits;
482 else
485 bShifted = false;
486 nBitBuffer = 0;
487 nBufferBits = 0;
490 *pInfo = 0;
491 pEndDestBuf = pDestBuf+nDestBytes;
492 pEndSrcBuf = pSrcBuf+nSrcChars;
495 if ( pSrcBuf < pEndSrcBuf )
497 c = *pSrcBuf;
499 bNeedShift = (c > 0x7F) || aImplMustShiftTab[c];
500 if ( bNeedShift && !bShifted )
502 if ( pDestBuf >= pEndDestBuf )
504 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
505 break;
507 *pDestBuf = IMPL_SHIFT_IN_CHAR;
508 pDestBuf++;
509 /* Special case handling for SHIFT_IN_CHAR */
510 if ( c == IMPL_SHIFT_IN_CHAR )
512 if ( pDestBuf >= pEndDestBuf )
514 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
515 break;
517 *pDestBuf = IMPL_SHIFT_OUT_CHAR;
518 pDestBuf++;
520 else
521 bShifted = true;
524 else
526 bEnd = true;
527 bNeedShift = false;
530 if ( bShifted )
532 /* Write the character to the bit buffer, or pad the bit */
533 /* buffer out to a full base64 character */
534 if ( bNeedShift )
536 nBufferBits += 16;
537 nBitBuffer |= static_cast<sal_uInt32>(c) << (32-nBufferBits);
539 else
540 nBufferBits += (6-(nBufferBits%6))%6;
542 /* Flush out as many full base64 characters as possible */
543 while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 6) )
545 nBitBufferTemp = nBitBuffer >> (32-6);
546 *pDestBuf = aImplBase64Tab[nBitBufferTemp];
547 pDestBuf++;
548 nBitBuffer <<= 6;
549 nBufferBits -= 6;
552 if ( nBufferBits >= 6 )
554 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
555 break;
558 /* Write SHIFT_OUT_CHAR, when needed */
559 if ( !bNeedShift )
561 if ( pDestBuf >= pEndDestBuf )
563 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
564 break;
566 *pDestBuf = IMPL_SHIFT_OUT_CHAR;
567 pDestBuf++;
568 bShifted = false;
572 if ( !bEnd )
574 /* Character can be directly encoded */
575 if ( !bNeedShift )
577 if ( pDestBuf >= pEndDestBuf )
579 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
580 break;
582 *pDestBuf = static_cast< char >(static_cast< unsigned char >(c));
583 pDestBuf++;
586 pSrcBuf++;
589 while ( !bEnd );
591 if ( pContextData )
593 pContextData->mbShifted = bShifted;
594 pContextData->mnBitBuffer = nBitBuffer;
595 pContextData->mnBufferBits = nBufferBits;
598 *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
599 return (nDestBytes - (pEndDestBuf-pDestBuf));
602 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */