Bump for 3.6-28
[LibreOffice.git] / sal / textenc / tcvtutf7.cxx
blob13c820e7ef557af02fe3b26270584599371ca29b
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "sal/config.h"
31 #include "rtl/textcvt.h"
33 #include "tenchelp.hxx"
34 #include "unichars.hxx"
36 /* ======================================================================= */
38 static sal_uChar const aImplBase64Tab[64] =
40 /* A-Z */
41 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
42 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
43 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
44 0x58, 0x59, 0x5A,
45 /* a-z */
46 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
47 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
48 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
49 0x78, 0x79, 0x7A,
50 /* 0-9,+,/ */
51 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
52 0x38, 0x39, 0x2B, 0x2F
55 /* Index in Base64Tab or 0xFF, when is a invalid character */
56 static sal_uChar const aImplBase64IndexTab[128] =
58 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00-0x07 */
59 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08-0x0F */
60 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10-0x17 */
61 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18-0x1F */
62 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20-0x27 !"#$%&' */
63 0xFF, 0xFF, 0xFF, 62, 0xFF, 0xFF, 0xFF, 63, /* 0x28-0x2F ()*+,-./ */
64 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30-0x37 01234567 */
65 60, 61, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F 89:;<=>? */
66 0xFF, 0, 1, 2, 3, 4, 5, 6, /* 0x40-0x47 @ABCDEFG */
67 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48-0x4F HIJKLMNO */
68 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50-0x57 PQRSTUVW */
69 23, 24, 25, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5F XYZ[\]^_ */
70 0xFF, 26, 27, 28, 29, 30, 31, 32, /* 0x60-0x67 `abcdefg */
71 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68-0x6F hijklmno */
72 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70-0x77 pqrstuvw */
73 49, 50, 51, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF /* 0x78-0x7F xyz{|}~ */
76 static sal_uChar const aImplMustShiftTab[128] =
78 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00-0x07 */
79 1, 0, 0, 1, 0, 1, 1, 1, /* 0x08-0x0F 0x09 == HTAB, 0x0A == LF 0x0C == CR */
80 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10-0x17 */
81 1, 1, 1, 1, 1, 1, 1, 1, /* 0x18-0x1F */
82 0, 1, 1, 1, 1, 1, 1, 0, /* 0x20-0x27 !"#$%&' */
83 0, 0, 1, 1, 0, 1, 0, 0, /* 0x28-0x2F ()*+,-./ */
84 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x37 01234567 */
85 0, 0, 0, 1, 1, 1, 1, 0, /* 0x38-0x3F 89:;<=>? */
86 1, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x47 @ABCDEFG */
87 0, 0, 0, 0, 0, 0, 0, 0, /* 0x48-0x4F HIJKLMNO */
88 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x57 PQRSTUVW */
89 0, 0, 0, 1, 1, 1, 1, 1, /* 0x58-0x5F XYZ[\]^_ */
90 1, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 `abcdefg */
91 0, 0, 0, 0, 0, 0, 0, 0, /* 0x68-0x6F hijklmno */
92 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x77 pqrstuvw */
93 0, 0, 0, 1, 1, 1, 1, 1 /* 0x78-0x7F xyz{|}~ */
96 /* + */
97 #define IMPL_SHIFT_IN_CHAR 0x2B
98 /* - */
99 #define IMPL_SHIFT_OUT_CHAR 0x2D
101 /* ----------------------------------------------------------------------- */
103 struct ImplUTF7ToUCContextData
105 int mbShifted;
106 int mbFirst;
107 int mbWroteOne;
108 sal_uInt32 mnBitBuffer;
109 sal_uInt32 mnBufferBits;
112 /* ----------------------------------------------------------------------- */
114 void* ImplUTF7CreateUTF7TextToUnicodeContext()
116 ImplUTF7ToUCContextData* pContextData = new ImplUTF7ToUCContextData;
117 pContextData->mbShifted = sal_False;
118 pContextData->mbFirst = sal_False;
119 pContextData->mbWroteOne = sal_False;
120 pContextData->mnBitBuffer = 0;
121 pContextData->mnBufferBits = 0;
122 return pContextData;
125 /* ----------------------------------------------------------------------- */
127 void ImplUTF7DestroyTextToUnicodeContext( void* pContext )
129 delete static_cast< ImplUTF7ToUCContextData * >(pContext);
132 /* ----------------------------------------------------------------------- */
134 void ImplUTF7ResetTextToUnicodeContext( void* pContext )
136 ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext;
137 pContextData->mbShifted = sal_False;
138 pContextData->mbFirst = sal_False;
139 pContextData->mbWroteOne = sal_False;
140 pContextData->mnBitBuffer = 0;
141 pContextData->mnBufferBits = 0;
144 /* ----------------------------------------------------------------------- */
146 sal_Size ImplUTF7ToUnicode( SAL_UNUSED_PARAMETER const void*, void* pContext,
147 const char* pSrcBuf, sal_Size nSrcBytes,
148 sal_Unicode* pDestBuf, sal_Size nDestChars,
149 sal_uInt32 nFlags, sal_uInt32* pInfo,
150 sal_Size* pSrcCvtBytes )
152 ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext;
153 sal_uChar c ='\0';
154 sal_uChar nBase64Value = 0;
155 int bEnd = sal_False;
156 int bShifted;
157 int bFirst;
158 int bWroteOne;
159 int bBase64End;
160 sal_uInt32 nBitBuffer;
161 sal_uInt32 nBitBufferTemp;
162 sal_uInt32 nBufferBits;
163 sal_Unicode* pEndDestBuf;
164 const char* pEndSrcBuf;
166 /* !!! Implementation not finnished !!!
167 if ( pContextData )
169 bShifted = pContextData->mbShifted;
170 bFirst = pContextData->mbFirst;
171 bWroteOne = pContextData->mbWroteOne;
172 nBitBuffer = pContextData->mnBitBuffer;
173 nBufferBits = pContextData->mnBufferBits;
175 else
178 bShifted = sal_False;
179 bFirst = sal_False;
180 bWroteOne = sal_False;
181 nBitBuffer = 0;
182 nBufferBits = 0;
185 *pInfo = 0;
186 pEndDestBuf = pDestBuf+nDestChars;
187 pEndSrcBuf = pSrcBuf+nSrcBytes;
190 if ( pSrcBuf < pEndSrcBuf )
192 c = (sal_uChar)*pSrcBuf;
194 /* End, when not a base64 character */
195 bBase64End = sal_False;
196 if ( c <= 0x7F )
198 nBase64Value = aImplBase64IndexTab[c];
199 if ( nBase64Value == 0xFF )
200 bBase64End = sal_True;
203 else
205 bEnd = sal_True;
206 bBase64End = sal_True;
209 if ( bShifted )
211 if ( bBase64End )
213 bShifted = sal_False;
215 /* If the character causing us to drop out was SHIFT_IN */
216 /* or SHIFT_OUT, it may be a special escape for SHIFT_IN. */
217 /* The test for SHIFT_IN is not necessary, but allows */
218 /* an alternate form of UTF-7 where SHIFT_IN is escaped */
219 /* by SHIFT_IN. This only works for some values of */
220 /* SHIFT_IN. It is so implemented, because this comes */
221 /* from the officel unicode book (The Unicode Standard, */
222 /* Version 2.0) and so I think, that someone of the */
223 /* world has used this feature. */
224 if ( !bEnd )
226 if ( (c == IMPL_SHIFT_IN_CHAR) || (c == IMPL_SHIFT_OUT_CHAR) )
228 /* If no base64 character, and the terminating */
229 /* character of the shift sequence was the */
230 /* SHIFT_OUT_CHAR, then it't a special escape */
231 /* for SHIFT_IN_CHAR. */
232 if ( bFirst && (c == IMPL_SHIFT_OUT_CHAR) )
234 if ( pDestBuf >= pEndDestBuf )
236 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
237 break;
239 *pDestBuf = IMPL_SHIFT_IN_CHAR;
240 pDestBuf++;
241 bWroteOne = sal_True;
244 /* Skip character */
245 pSrcBuf++;
246 if ( pSrcBuf < pEndSrcBuf )
247 c = (sal_uChar)*pSrcBuf;
248 else
249 bEnd = sal_True;
253 /* Empty sequence not allowed, so when we don't write one */
254 /* valid char, then the sequence is corrupt */
255 if ( !bWroteOne )
257 /* When no more bytes in the source buffer, then */
258 /* this buffer may be to small */
259 if ( bEnd )
260 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
261 else
263 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
264 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
266 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
267 break;
269 /* We insert here no default char, because I think */
270 /* this is better to ignore this */
274 else
276 /* Add 6 Bits from character to the bit buffer */
277 nBufferBits += 6;
278 nBitBuffer |= ((sal_uInt32)(nBase64Value & 0x3F)) << (32-nBufferBits);
279 bFirst = sal_False;
282 /* Extract as many full 16 bit characters as possible from the */
283 /* bit buffer. */
284 while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 16) )
286 nBitBufferTemp = nBitBuffer >> (32-16);
287 *pDestBuf = (sal_Unicode)((nBitBufferTemp) & 0xFFFF);
288 pDestBuf++;
289 nBitBuffer <<= 16;
290 nBufferBits -= 16;
291 bWroteOne = sal_True;
294 if ( nBufferBits >= 16 )
296 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
297 break;
300 if ( bBase64End )
302 /* Sequence ended and we have some bits, then the */
303 /* sequence is corrupted */
304 if ( nBufferBits && nBitBuffer )
306 /* When no more bytes in the source buffer, then */
307 /* this buffer may be to small */
308 if ( bEnd )
309 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
310 else
312 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
313 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
315 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
316 break;
318 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
320 if ( pDestBuf >= pEndDestBuf )
322 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
323 break;
325 *pDestBuf++
326 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
332 nBitBuffer = 0;
333 nBufferBits = 0;
337 if ( !bEnd )
339 if ( !bShifted )
341 if ( c == IMPL_SHIFT_IN_CHAR )
343 bShifted = sal_True;
344 bFirst = sal_True;
345 bWroteOne = sal_False;
347 else
349 /* No direct encoded charcater, then the buffer is */
350 /* corrupt */
351 if ( c > 0x7F )
353 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
354 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
356 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
357 break;
359 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
361 if ( pDestBuf >= pEndDestBuf )
363 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
364 break;
366 *pDestBuf++
367 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
371 /* Write char to unicode buffer */
372 if ( pDestBuf >= pEndDestBuf )
374 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
375 break;
377 *pDestBuf = c;
378 pDestBuf++;
382 pSrcBuf++;
385 while ( !bEnd );
387 if ( pContextData )
389 pContextData->mbShifted = bShifted;
390 pContextData->mbFirst = bFirst;
391 pContextData->mbWroteOne = bWroteOne;
392 pContextData->mnBitBuffer = nBitBuffer;
393 pContextData->mnBufferBits = nBufferBits;
396 *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
397 return (nDestChars - (pEndDestBuf-pDestBuf));
400 /* ======================================================================= */
402 struct ImplUTF7FromUCContextData
404 int mbShifted;
405 sal_uInt32 mnBitBuffer;
406 sal_uInt32 mnBufferBits;
409 /* ----------------------------------------------------------------------- */
411 void* ImplUTF7CreateUnicodeToTextContext()
413 ImplUTF7FromUCContextData* pContextData = new ImplUTF7FromUCContextData;
414 pContextData->mbShifted = sal_False;
415 pContextData->mnBitBuffer = 0;
416 pContextData->mnBufferBits = 0;
417 return pContextData;
420 /* ----------------------------------------------------------------------- */
422 void ImplUTF7DestroyUnicodeToTextContext( void* pContext )
424 delete static_cast< ImplUTF7FromUCContextData * >(pContext);
427 /* ----------------------------------------------------------------------- */
429 void ImplUTF7ResetUnicodeToTextContext( void* pContext )
431 ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext;
432 pContextData->mbShifted = sal_False;
433 pContextData->mnBitBuffer = 0;
434 pContextData->mnBufferBits = 0;
437 /* ----------------------------------------------------------------------- */
439 sal_Size ImplUnicodeToUTF7( SAL_UNUSED_PARAMETER const void*, void* pContext,
440 const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
441 char* pDestBuf, sal_Size nDestBytes,
442 SAL_UNUSED_PARAMETER sal_uInt32, sal_uInt32* pInfo,
443 sal_Size* pSrcCvtChars )
445 ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext;
446 sal_Unicode c = '\0';
447 int bEnd = sal_False;
448 int bShifted;
449 int bNeedShift;
450 sal_uInt32 nBitBuffer;
451 sal_uInt32 nBitBufferTemp;
452 sal_uInt32 nBufferBits;
453 char* pEndDestBuf;
454 const sal_Unicode* pEndSrcBuf;
456 /* !!! Implementation not finnished !!!
457 if ( pContextData )
459 bShifted = pContextData->mbShifted;
460 nBitBuffer = pContextData->mnBitBuffer;
461 nBufferBits = pContextData->mnBufferBits;
463 else
466 bShifted = sal_False;
467 nBitBuffer = 0;
468 nBufferBits = 0;
471 *pInfo = 0;
472 pEndDestBuf = pDestBuf+nDestBytes;
473 pEndSrcBuf = pSrcBuf+nSrcChars;
476 if ( pSrcBuf < pEndSrcBuf )
478 c = *pSrcBuf;
480 bNeedShift = (c > 0x7F) || aImplMustShiftTab[c];
481 if ( bNeedShift && !bShifted )
483 if ( pDestBuf >= pEndDestBuf )
485 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
486 break;
488 *pDestBuf = IMPL_SHIFT_IN_CHAR;
489 pDestBuf++;
490 /* Special case handling for SHIFT_IN_CHAR */
491 if ( c == IMPL_SHIFT_IN_CHAR )
493 if ( pDestBuf >= pEndDestBuf )
495 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
496 break;
498 *pDestBuf = IMPL_SHIFT_OUT_CHAR;
499 pDestBuf++;
501 else
502 bShifted = sal_True;
505 else
507 bEnd = sal_True;
508 bNeedShift = sal_False;
511 if ( bShifted )
513 /* Write the character to the bit buffer, or pad the bit */
514 /* buffer out to a full base64 character */
515 if ( bNeedShift )
517 nBufferBits += 16;
518 nBitBuffer |= ((sal_uInt32)c) << (32-nBufferBits);
520 else
521 nBufferBits += (6-(nBufferBits%6))%6;
523 /* Flush out as many full base64 characters as possible */
524 while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 6) )
526 nBitBufferTemp = nBitBuffer >> (32-6);
527 *pDestBuf = aImplBase64Tab[nBitBufferTemp];
528 pDestBuf++;
529 nBitBuffer <<= 6;
530 nBufferBits -= 6;
533 if ( nBufferBits >= 6 )
535 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
536 break;
539 /* Write SHIFT_OUT_CHAR, when needed */
540 if ( !bNeedShift )
542 if ( pDestBuf >= pEndDestBuf )
544 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
545 break;
547 *pDestBuf = IMPL_SHIFT_OUT_CHAR;
548 pDestBuf++;
549 bShifted = sal_False;
553 if ( !bEnd )
555 /* Character can be directly endcoded */
556 if ( !bNeedShift )
558 if ( pDestBuf >= pEndDestBuf )
560 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
561 break;
563 *pDestBuf = static_cast< char >(static_cast< unsigned char >(c));
564 pDestBuf++;
567 pSrcBuf++;
570 while ( !bEnd );
572 if ( pContextData )
574 pContextData->mbShifted = bShifted;
575 pContextData->mnBitBuffer = nBitBuffer;
576 pContextData->mnBufferBits = nBufferBits;
579 *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
580 return (nDestBytes - (pEndDestBuf-pDestBuf));
583 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */