Update ooo320-m1
[ooovba.git] / sal / textenc / tcvtutf7.c
blobbc2fedaa7b5e168248c050e3be50b86765cfd75e
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: tcvtutf7.c,v $
10 * $Revision: 1.8 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "tenchelp.h"
32 #include "unichars.h"
34 #ifndef _RTL_ALLOC_H
35 #include "rtl/alloc.h"
36 #endif
37 #include "rtl/textcvt.h"
39 /* ======================================================================= */
41 static sal_uChar const aImplBase64Tab[64] =
43 /* A-Z */
44 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
45 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
46 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
47 0x58, 0x59, 0x5A,
48 /* a-z */
49 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
50 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
51 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
52 0x78, 0x79, 0x7A,
53 /* 0-9,+,/ */
54 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
55 0x38, 0x39, 0x2B, 0x2F
58 /* Index in Base64Tab or 0xFF, when is a invalid character */
59 static sal_uChar const aImplBase64IndexTab[128] =
61 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00-0x07 */
62 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08-0x0F */
63 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10-0x17 */
64 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18-0x1F */
65 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20-0x27 !"#$%&' */
66 0xFF, 0xFF, 0xFF, 62, 0xFF, 0xFF, 0xFF, 63, /* 0x28-0x2F ()*+,-./ */
67 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30-0x37 01234567 */
68 60, 61, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F 89:;<=>? */
69 0xFF, 0, 1, 2, 3, 4, 5, 6, /* 0x40-0x47 @ABCDEFG */
70 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48-0x4F HIJKLMNO */
71 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50-0x57 PQRSTUVW */
72 23, 24, 25, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5F XYZ[\]^_ */
73 0xFF, 26, 27, 28, 29, 30, 31, 32, /* 0x60-0x67 `abcdefg */
74 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68-0x6F hijklmno */
75 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70-0x77 pqrstuvw */
76 49, 50, 51, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF /* 0x78-0x7F xyz{|}~ */
79 static sal_uChar const aImplMustShiftTab[128] =
81 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00-0x07 */
82 1, 0, 0, 1, 0, 1, 1, 1, /* 0x08-0x0F 0x09 == HTAB, 0x0A == LF 0x0C == CR */
83 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10-0x17 */
84 1, 1, 1, 1, 1, 1, 1, 1, /* 0x18-0x1F */
85 0, 1, 1, 1, 1, 1, 1, 0, /* 0x20-0x27 !"#$%&' */
86 0, 0, 1, 1, 0, 1, 0, 0, /* 0x28-0x2F ()*+,-./ */
87 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x37 01234567 */
88 0, 0, 0, 1, 1, 1, 1, 0, /* 0x38-0x3F 89:;<=>? */
89 1, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x47 @ABCDEFG */
90 0, 0, 0, 0, 0, 0, 0, 0, /* 0x48-0x4F HIJKLMNO */
91 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x57 PQRSTUVW */
92 0, 0, 0, 1, 1, 1, 1, 1, /* 0x58-0x5F XYZ[\]^_ */
93 1, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 `abcdefg */
94 0, 0, 0, 0, 0, 0, 0, 0, /* 0x68-0x6F hijklmno */
95 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x77 pqrstuvw */
96 0, 0, 0, 1, 1, 1, 1, 1 /* 0x78-0x7F xyz{|}~ */
99 /* + */
100 #define IMPL_SHIFT_IN_CHAR 0x2B
101 /* - */
102 #define IMPL_SHIFT_OUT_CHAR 0x2D
104 /* ----------------------------------------------------------------------- */
106 typedef struct
108 int mbShifted;
109 int mbFirst;
110 int mbWroteOne;
111 sal_uInt32 mnBitBuffer;
112 sal_uInt32 mnBufferBits;
113 } ImplUTF7ToUCContextData;
115 /* ----------------------------------------------------------------------- */
117 void* ImplUTF7CreateUTF7TextToUnicodeContext( void )
119 ImplUTF7ToUCContextData* pContextData;
120 pContextData = (ImplUTF7ToUCContextData*)rtl_allocateMemory( sizeof( ImplUTF7ToUCContextData ) );
121 pContextData->mbShifted = sal_False;
122 pContextData->mbFirst = sal_False;
123 pContextData->mbWroteOne = sal_False;
124 pContextData->mnBitBuffer = 0;
125 pContextData->mnBufferBits = 0;
126 return (void*)pContextData;
129 /* ----------------------------------------------------------------------- */
131 void ImplUTF7DestroyTextToUnicodeContext( void* pContext )
133 rtl_freeMemory( pContext );
136 /* ----------------------------------------------------------------------- */
138 void ImplUTF7ResetTextToUnicodeContext( void* pContext )
140 ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext;
141 pContextData->mbShifted = sal_False;
142 pContextData->mbFirst = sal_False;
143 pContextData->mbWroteOne = sal_False;
144 pContextData->mnBitBuffer = 0;
145 pContextData->mnBufferBits = 0;
148 /* ----------------------------------------------------------------------- */
150 sal_Size ImplUTF7ToUnicode( const ImplTextConverterData* pData, void* pContext,
151 const sal_Char* pSrcBuf, sal_Size nSrcBytes,
152 sal_Unicode* pDestBuf, sal_Size nDestChars,
153 sal_uInt32 nFlags, sal_uInt32* pInfo,
154 sal_Size* pSrcCvtBytes )
156 ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext;
157 sal_uChar c ='\0';
158 sal_uChar nBase64Value = 0;
159 int bEnd = sal_False;
160 int bShifted;
161 int bFirst;
162 int bWroteOne;
163 int bBase64End;
164 sal_uInt32 nBitBuffer;
165 sal_uInt32 nBitBufferTemp;
166 sal_uInt32 nBufferBits;
167 sal_Unicode* pEndDestBuf;
168 const sal_Char* pEndSrcBuf;
170 (void) pData; /* unused */
172 /* !!! Implementation not finnished !!!
173 if ( pContextData )
175 bShifted = pContextData->mbShifted;
176 bFirst = pContextData->mbFirst;
177 bWroteOne = pContextData->mbWroteOne;
178 nBitBuffer = pContextData->mnBitBuffer;
179 nBufferBits = pContextData->mnBufferBits;
181 else
184 bShifted = sal_False;
185 bFirst = sal_False;
186 bWroteOne = sal_False;
187 nBitBuffer = 0;
188 nBufferBits = 0;
191 *pInfo = 0;
192 pEndDestBuf = pDestBuf+nDestChars;
193 pEndSrcBuf = pSrcBuf+nSrcBytes;
196 if ( pSrcBuf < pEndSrcBuf )
198 c = (sal_uChar)*pSrcBuf;
200 /* End, when not a base64 character */
201 bBase64End = sal_False;
202 if ( c <= 0x7F )
204 nBase64Value = aImplBase64IndexTab[c];
205 if ( nBase64Value == 0xFF )
206 bBase64End = sal_True;
209 else
211 bEnd = sal_True;
212 bBase64End = sal_True;
215 if ( bShifted )
217 if ( bBase64End )
219 bShifted = sal_False;
221 /* If the character causing us to drop out was SHIFT_IN */
222 /* or SHIFT_OUT, it may be a special escape for SHIFT_IN. */
223 /* The test for SHIFT_IN is not necessary, but allows */
224 /* an alternate form of UTF-7 where SHIFT_IN is escaped */
225 /* by SHIFT_IN. This only works for some values of */
226 /* SHIFT_IN. It is so implemented, because this comes */
227 /* from the officel unicode book (The Unicode Standard, */
228 /* Version 2.0) and so I think, that someone of the */
229 /* world has used this feature. */
230 if ( !bEnd )
232 if ( (c == IMPL_SHIFT_IN_CHAR) || (c == IMPL_SHIFT_OUT_CHAR) )
234 /* If no base64 character, and the terminating */
235 /* character of the shift sequence was the */
236 /* SHIFT_OUT_CHAR, then it't a special escape */
237 /* for SHIFT_IN_CHAR. */
238 if ( bFirst && (c == IMPL_SHIFT_OUT_CHAR) )
240 if ( pDestBuf >= pEndDestBuf )
242 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
243 break;
245 *pDestBuf = IMPL_SHIFT_IN_CHAR;
246 pDestBuf++;
247 bWroteOne = sal_True;
250 /* Skip character */
251 pSrcBuf++;
252 if ( pSrcBuf < pEndSrcBuf )
253 c = (sal_uChar)*pSrcBuf;
254 else
255 bEnd = sal_True;
259 /* Empty sequence not allowed, so when we don't write one */
260 /* valid char, then the sequence is corrupt */
261 if ( !bWroteOne )
263 /* When no more bytes in the source buffer, then */
264 /* this buffer may be to small */
265 if ( bEnd )
266 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
267 else
269 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
270 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
272 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
273 break;
275 /* We insert here no default char, because I think */
276 /* this is better to ignore this */
280 else
282 /* Add 6 Bits from character to the bit buffer */
283 nBufferBits += 6;
284 nBitBuffer |= ((sal_uInt32)(nBase64Value & 0x3F)) << (32-nBufferBits);
285 bFirst = sal_False;
288 /* Extract as many full 16 bit characters as possible from the */
289 /* bit buffer. */
290 while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 16) )
292 nBitBufferTemp = nBitBuffer >> (32-16);
293 *pDestBuf = (sal_Unicode)((nBitBufferTemp) & 0xFFFF);
294 pDestBuf++;
295 nBitBuffer <<= 16;
296 nBufferBits -= 16;
297 bWroteOne = sal_True;
300 if ( nBufferBits >= 16 )
302 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
303 break;
306 if ( bBase64End )
308 /* Sequence ended and we have some bits, then the */
309 /* sequence is corrupted */
310 if ( nBufferBits && nBitBuffer )
312 /* When no more bytes in the source buffer, then */
313 /* this buffer may be to small */
314 if ( bEnd )
315 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
316 else
318 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
319 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
321 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
322 break;
324 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
326 if ( pDestBuf >= pEndDestBuf )
328 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
329 break;
331 *pDestBuf++
332 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
338 nBitBuffer = 0;
339 nBufferBits = 0;
343 if ( !bEnd )
345 if ( !bShifted )
347 if ( c == IMPL_SHIFT_IN_CHAR )
349 bShifted = sal_True;
350 bFirst = sal_True;
351 bWroteOne = sal_False;
353 else
355 /* No direct encoded charcater, then the buffer is */
356 /* corrupt */
357 if ( c > 0x7F )
359 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
360 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
362 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
363 break;
365 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
367 if ( pDestBuf >= pEndDestBuf )
369 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
370 break;
372 *pDestBuf++
373 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
377 /* Write char to unicode buffer */
378 if ( pDestBuf >= pEndDestBuf )
380 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
381 break;
383 *pDestBuf = c;
384 pDestBuf++;
388 pSrcBuf++;
391 while ( !bEnd );
393 if ( pContextData )
395 pContextData->mbShifted = bShifted;
396 pContextData->mbFirst = bFirst;
397 pContextData->mbWroteOne = bWroteOne;
398 pContextData->mnBitBuffer = nBitBuffer;
399 pContextData->mnBufferBits = nBufferBits;
402 *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
403 return (nDestChars - (pEndDestBuf-pDestBuf));
406 /* ======================================================================= */
408 typedef struct
410 int mbShifted;
411 sal_uInt32 mnBitBuffer;
412 sal_uInt32 mnBufferBits;
413 } ImplUTF7FromUCContextData;
415 /* ----------------------------------------------------------------------- */
417 void* ImplUTF7CreateUnicodeToTextContext( void )
419 ImplUTF7FromUCContextData* pContextData;
420 pContextData = (ImplUTF7FromUCContextData*)rtl_allocateMemory( sizeof( ImplUTF7FromUCContextData ) );
421 pContextData->mbShifted = sal_False;
422 pContextData->mnBitBuffer = 0;
423 pContextData->mnBufferBits = 0;
424 return (void*)pContextData;
427 /* ----------------------------------------------------------------------- */
429 void ImplUTF7DestroyUnicodeToTextContext( void* pContext )
431 rtl_freeMemory( pContext );
434 /* ----------------------------------------------------------------------- */
436 void ImplUTF7ResetUnicodeToTextContext( void* pContext )
438 ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext;
439 pContextData->mbShifted = sal_False;
440 pContextData->mnBitBuffer = 0;
441 pContextData->mnBufferBits = 0;
444 /* ----------------------------------------------------------------------- */
446 sal_Size ImplUnicodeToUTF7( const ImplTextConverterData* pData, void* pContext,
447 const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
448 sal_Char* pDestBuf, sal_Size nDestBytes,
449 sal_uInt32 nFlags, sal_uInt32* pInfo,
450 sal_Size* pSrcCvtChars )
452 ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext;
453 sal_Unicode c = '\0';
454 int bEnd = sal_False;
455 int bShifted;
456 int bNeedShift;
457 sal_uInt32 nBitBuffer;
458 sal_uInt32 nBitBufferTemp;
459 sal_uInt32 nBufferBits;
460 sal_Char* pEndDestBuf;
461 const sal_Unicode* pEndSrcBuf;
463 (void) pData; /* unused */
464 (void) nFlags; /* unused */
466 /* !!! Implementation not finnished !!!
467 if ( pContextData )
469 bShifted = pContextData->mbShifted;
470 nBitBuffer = pContextData->mnBitBuffer;
471 nBufferBits = pContextData->mnBufferBits;
473 else
476 bShifted = sal_False;
477 nBitBuffer = 0;
478 nBufferBits = 0;
481 *pInfo = 0;
482 pEndDestBuf = pDestBuf+nDestBytes;
483 pEndSrcBuf = pSrcBuf+nSrcChars;
486 if ( pSrcBuf < pEndSrcBuf )
488 c = *pSrcBuf;
490 bNeedShift = (c > 0x7F) || aImplMustShiftTab[c];
491 if ( bNeedShift && !bShifted )
493 if ( pDestBuf >= pEndDestBuf )
495 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
496 break;
498 *pDestBuf = IMPL_SHIFT_IN_CHAR;
499 pDestBuf++;
500 /* Special case handling for SHIFT_IN_CHAR */
501 if ( c == IMPL_SHIFT_IN_CHAR )
503 if ( pDestBuf >= pEndDestBuf )
505 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
506 break;
508 *pDestBuf = IMPL_SHIFT_OUT_CHAR;
509 pDestBuf++;
511 else
512 bShifted = sal_True;
515 else
517 bEnd = sal_True;
518 bNeedShift = sal_False;
521 if ( bShifted )
523 /* Write the character to the bit buffer, or pad the bit */
524 /* buffer out to a full base64 character */
525 if ( bNeedShift )
527 nBufferBits += 16;
528 nBitBuffer |= ((sal_uInt32)c) << (32-nBufferBits);
530 else
531 nBufferBits += (6-(nBufferBits%6))%6;
533 /* Flush out as many full base64 characters as possible */
534 while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 6) )
536 nBitBufferTemp = nBitBuffer >> (32-6);
537 *pDestBuf = aImplBase64Tab[nBitBufferTemp];
538 pDestBuf++;
539 nBitBuffer <<= 6;
540 nBufferBits -= 6;
543 if ( nBufferBits >= 6 )
545 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
546 break;
549 /* Write SHIFT_OUT_CHAR, when needed */
550 if ( !bNeedShift )
552 if ( pDestBuf >= pEndDestBuf )
554 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
555 break;
557 *pDestBuf = IMPL_SHIFT_OUT_CHAR;
558 pDestBuf++;
559 bShifted = sal_False;
563 if ( !bEnd )
565 /* Character can be directly endcoded */
566 if ( !bNeedShift )
568 if ( pDestBuf >= pEndDestBuf )
570 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
571 break;
573 *pDestBuf = (sal_Char)(sal_uChar)c;
574 pDestBuf++;
577 pSrcBuf++;
580 while ( !bEnd );
582 if ( pContextData )
584 pContextData->mbShifted = bShifted;
585 pContextData->mnBitBuffer = nBitBuffer;
586 pContextData->mnBufferBits = nBufferBits;
589 *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
590 return (nDestBytes - (pEndDestBuf-pDestBuf));