1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: tcvtutf7.c,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
35 #include "rtl/alloc.h"
37 #include "rtl/textcvt.h"
39 /* ======================================================================= */
41 static sal_uChar
const aImplBase64Tab
[64] =
44 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
45 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
46 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
49 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
50 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
51 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
54 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
55 0x38, 0x39, 0x2B, 0x2F
58 /* Index in Base64Tab or 0xFF, when is a invalid character */
59 static sal_uChar
const aImplBase64IndexTab
[128] =
61 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00-0x07 */
62 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08-0x0F */
63 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10-0x17 */
64 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18-0x1F */
65 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20-0x27 !"#$%&' */
66 0xFF, 0xFF, 0xFF, 62, 0xFF, 0xFF, 0xFF, 63, /* 0x28-0x2F ()*+,-./ */
67 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30-0x37 01234567 */
68 60, 61, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F 89:;<=>? */
69 0xFF, 0, 1, 2, 3, 4, 5, 6, /* 0x40-0x47 @ABCDEFG */
70 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48-0x4F HIJKLMNO */
71 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50-0x57 PQRSTUVW */
72 23, 24, 25, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5F XYZ[\]^_ */
73 0xFF, 26, 27, 28, 29, 30, 31, 32, /* 0x60-0x67 `abcdefg */
74 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68-0x6F hijklmno */
75 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70-0x77 pqrstuvw */
76 49, 50, 51, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF /* 0x78-0x7F xyz{|}~ */
79 static sal_uChar
const aImplMustShiftTab
[128] =
81 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00-0x07 */
82 1, 0, 0, 1, 0, 1, 1, 1, /* 0x08-0x0F 0x09 == HTAB, 0x0A == LF 0x0C == CR */
83 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10-0x17 */
84 1, 1, 1, 1, 1, 1, 1, 1, /* 0x18-0x1F */
85 0, 1, 1, 1, 1, 1, 1, 0, /* 0x20-0x27 !"#$%&' */
86 0, 0, 1, 1, 0, 1, 0, 0, /* 0x28-0x2F ()*+,-./ */
87 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x37 01234567 */
88 0, 0, 0, 1, 1, 1, 1, 0, /* 0x38-0x3F 89:;<=>? */
89 1, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x47 @ABCDEFG */
90 0, 0, 0, 0, 0, 0, 0, 0, /* 0x48-0x4F HIJKLMNO */
91 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x57 PQRSTUVW */
92 0, 0, 0, 1, 1, 1, 1, 1, /* 0x58-0x5F XYZ[\]^_ */
93 1, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 `abcdefg */
94 0, 0, 0, 0, 0, 0, 0, 0, /* 0x68-0x6F hijklmno */
95 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x77 pqrstuvw */
96 0, 0, 0, 1, 1, 1, 1, 1 /* 0x78-0x7F xyz{|}~ */
100 #define IMPL_SHIFT_IN_CHAR 0x2B
102 #define IMPL_SHIFT_OUT_CHAR 0x2D
104 /* ----------------------------------------------------------------------- */
111 sal_uInt32 mnBitBuffer
;
112 sal_uInt32 mnBufferBits
;
113 } ImplUTF7ToUCContextData
;
115 /* ----------------------------------------------------------------------- */
117 void* ImplUTF7CreateUTF7TextToUnicodeContext( void )
119 ImplUTF7ToUCContextData
* pContextData
;
120 pContextData
= (ImplUTF7ToUCContextData
*)rtl_allocateMemory( sizeof( ImplUTF7ToUCContextData
) );
121 pContextData
->mbShifted
= sal_False
;
122 pContextData
->mbFirst
= sal_False
;
123 pContextData
->mbWroteOne
= sal_False
;
124 pContextData
->mnBitBuffer
= 0;
125 pContextData
->mnBufferBits
= 0;
126 return (void*)pContextData
;
129 /* ----------------------------------------------------------------------- */
131 void ImplUTF7DestroyTextToUnicodeContext( void* pContext
)
133 rtl_freeMemory( pContext
);
136 /* ----------------------------------------------------------------------- */
138 void ImplUTF7ResetTextToUnicodeContext( void* pContext
)
140 ImplUTF7ToUCContextData
* pContextData
= (ImplUTF7ToUCContextData
*)pContext
;
141 pContextData
->mbShifted
= sal_False
;
142 pContextData
->mbFirst
= sal_False
;
143 pContextData
->mbWroteOne
= sal_False
;
144 pContextData
->mnBitBuffer
= 0;
145 pContextData
->mnBufferBits
= 0;
148 /* ----------------------------------------------------------------------- */
150 sal_Size
ImplUTF7ToUnicode( const ImplTextConverterData
* pData
, void* pContext
,
151 const sal_Char
* pSrcBuf
, sal_Size nSrcBytes
,
152 sal_Unicode
* pDestBuf
, sal_Size nDestChars
,
153 sal_uInt32 nFlags
, sal_uInt32
* pInfo
,
154 sal_Size
* pSrcCvtBytes
)
156 ImplUTF7ToUCContextData
* pContextData
= (ImplUTF7ToUCContextData
*)pContext
;
158 sal_uChar nBase64Value
= 0;
159 int bEnd
= sal_False
;
164 sal_uInt32 nBitBuffer
;
165 sal_uInt32 nBitBufferTemp
;
166 sal_uInt32 nBufferBits
;
167 sal_Unicode
* pEndDestBuf
;
168 const sal_Char
* pEndSrcBuf
;
170 (void) pData
; /* unused */
172 /* !!! Implementation not finnished !!!
175 bShifted = pContextData->mbShifted;
176 bFirst = pContextData->mbFirst;
177 bWroteOne = pContextData->mbWroteOne;
178 nBitBuffer = pContextData->mnBitBuffer;
179 nBufferBits = pContextData->mnBufferBits;
184 bShifted
= sal_False
;
186 bWroteOne
= sal_False
;
192 pEndDestBuf
= pDestBuf
+nDestChars
;
193 pEndSrcBuf
= pSrcBuf
+nSrcBytes
;
196 if ( pSrcBuf
< pEndSrcBuf
)
198 c
= (sal_uChar
)*pSrcBuf
;
200 /* End, when not a base64 character */
201 bBase64End
= sal_False
;
204 nBase64Value
= aImplBase64IndexTab
[c
];
205 if ( nBase64Value
== 0xFF )
206 bBase64End
= sal_True
;
212 bBase64End
= sal_True
;
219 bShifted
= sal_False
;
221 /* If the character causing us to drop out was SHIFT_IN */
222 /* or SHIFT_OUT, it may be a special escape for SHIFT_IN. */
223 /* The test for SHIFT_IN is not necessary, but allows */
224 /* an alternate form of UTF-7 where SHIFT_IN is escaped */
225 /* by SHIFT_IN. This only works for some values of */
226 /* SHIFT_IN. It is so implemented, because this comes */
227 /* from the officel unicode book (The Unicode Standard, */
228 /* Version 2.0) and so I think, that someone of the */
229 /* world has used this feature. */
232 if ( (c
== IMPL_SHIFT_IN_CHAR
) || (c
== IMPL_SHIFT_OUT_CHAR
) )
234 /* If no base64 character, and the terminating */
235 /* character of the shift sequence was the */
236 /* SHIFT_OUT_CHAR, then it't a special escape */
237 /* for SHIFT_IN_CHAR. */
238 if ( bFirst
&& (c
== IMPL_SHIFT_OUT_CHAR
) )
240 if ( pDestBuf
>= pEndDestBuf
)
242 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
245 *pDestBuf
= IMPL_SHIFT_IN_CHAR
;
247 bWroteOne
= sal_True
;
252 if ( pSrcBuf
< pEndSrcBuf
)
253 c
= (sal_uChar
)*pSrcBuf
;
259 /* Empty sequence not allowed, so when we don't write one */
260 /* valid char, then the sequence is corrupt */
263 /* When no more bytes in the source buffer, then */
264 /* this buffer may be to small */
266 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
269 *pInfo
|= RTL_TEXTTOUNICODE_INFO_INVALID
;
270 if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK
) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
)
272 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
;
275 /* We insert here no default char, because I think */
276 /* this is better to ignore this */
282 /* Add 6 Bits from character to the bit buffer */
284 nBitBuffer
|= ((sal_uInt32
)(nBase64Value
& 0x3F)) << (32-nBufferBits
);
288 /* Extract as many full 16 bit characters as possible from the */
290 while ( (pDestBuf
< pEndDestBuf
) && (nBufferBits
>= 16) )
292 nBitBufferTemp
= nBitBuffer
>> (32-16);
293 *pDestBuf
= (sal_Unicode
)((nBitBufferTemp
) & 0xFFFF);
297 bWroteOne
= sal_True
;
300 if ( nBufferBits
>= 16 )
302 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
308 /* Sequence ended and we have some bits, then the */
309 /* sequence is corrupted */
310 if ( nBufferBits
&& nBitBuffer
)
312 /* When no more bytes in the source buffer, then */
313 /* this buffer may be to small */
315 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
318 *pInfo
|= RTL_TEXTTOUNICODE_INFO_INVALID
;
319 if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK
) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
)
321 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
;
324 else if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK
) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
)
326 if ( pDestBuf
>= pEndDestBuf
)
328 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
332 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER
;
347 if ( c
== IMPL_SHIFT_IN_CHAR
)
351 bWroteOne
= sal_False
;
355 /* No direct encoded charcater, then the buffer is */
359 *pInfo
|= RTL_TEXTTOUNICODE_INFO_INVALID
;
360 if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK
) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
)
362 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
;
365 else if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK
) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
)
367 if ( pDestBuf
>= pEndDestBuf
)
369 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
373 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER
;
377 /* Write char to unicode buffer */
378 if ( pDestBuf
>= pEndDestBuf
)
380 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
395 pContextData
->mbShifted
= bShifted
;
396 pContextData
->mbFirst
= bFirst
;
397 pContextData
->mbWroteOne
= bWroteOne
;
398 pContextData
->mnBitBuffer
= nBitBuffer
;
399 pContextData
->mnBufferBits
= nBufferBits
;
402 *pSrcCvtBytes
= nSrcBytes
- (pEndSrcBuf
-pSrcBuf
);
403 return (nDestChars
- (pEndDestBuf
-pDestBuf
));
406 /* ======================================================================= */
411 sal_uInt32 mnBitBuffer
;
412 sal_uInt32 mnBufferBits
;
413 } ImplUTF7FromUCContextData
;
415 /* ----------------------------------------------------------------------- */
417 void* ImplUTF7CreateUnicodeToTextContext( void )
419 ImplUTF7FromUCContextData
* pContextData
;
420 pContextData
= (ImplUTF7FromUCContextData
*)rtl_allocateMemory( sizeof( ImplUTF7FromUCContextData
) );
421 pContextData
->mbShifted
= sal_False
;
422 pContextData
->mnBitBuffer
= 0;
423 pContextData
->mnBufferBits
= 0;
424 return (void*)pContextData
;
427 /* ----------------------------------------------------------------------- */
429 void ImplUTF7DestroyUnicodeToTextContext( void* pContext
)
431 rtl_freeMemory( pContext
);
434 /* ----------------------------------------------------------------------- */
436 void ImplUTF7ResetUnicodeToTextContext( void* pContext
)
438 ImplUTF7FromUCContextData
* pContextData
= (ImplUTF7FromUCContextData
*)pContext
;
439 pContextData
->mbShifted
= sal_False
;
440 pContextData
->mnBitBuffer
= 0;
441 pContextData
->mnBufferBits
= 0;
444 /* ----------------------------------------------------------------------- */
446 sal_Size
ImplUnicodeToUTF7( const ImplTextConverterData
* pData
, void* pContext
,
447 const sal_Unicode
* pSrcBuf
, sal_Size nSrcChars
,
448 sal_Char
* pDestBuf
, sal_Size nDestBytes
,
449 sal_uInt32 nFlags
, sal_uInt32
* pInfo
,
450 sal_Size
* pSrcCvtChars
)
452 ImplUTF7FromUCContextData
* pContextData
= (ImplUTF7FromUCContextData
*)pContext
;
453 sal_Unicode c
= '\0';
454 int bEnd
= sal_False
;
457 sal_uInt32 nBitBuffer
;
458 sal_uInt32 nBitBufferTemp
;
459 sal_uInt32 nBufferBits
;
460 sal_Char
* pEndDestBuf
;
461 const sal_Unicode
* pEndSrcBuf
;
463 (void) pData
; /* unused */
464 (void) nFlags
; /* unused */
466 /* !!! Implementation not finnished !!!
469 bShifted = pContextData->mbShifted;
470 nBitBuffer = pContextData->mnBitBuffer;
471 nBufferBits = pContextData->mnBufferBits;
476 bShifted
= sal_False
;
482 pEndDestBuf
= pDestBuf
+nDestBytes
;
483 pEndSrcBuf
= pSrcBuf
+nSrcChars
;
486 if ( pSrcBuf
< pEndSrcBuf
)
490 bNeedShift
= (c
> 0x7F) || aImplMustShiftTab
[c
];
491 if ( bNeedShift
&& !bShifted
)
493 if ( pDestBuf
>= pEndDestBuf
)
495 *pInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
498 *pDestBuf
= IMPL_SHIFT_IN_CHAR
;
500 /* Special case handling for SHIFT_IN_CHAR */
501 if ( c
== IMPL_SHIFT_IN_CHAR
)
503 if ( pDestBuf
>= pEndDestBuf
)
505 *pInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
508 *pDestBuf
= IMPL_SHIFT_OUT_CHAR
;
518 bNeedShift
= sal_False
;
523 /* Write the character to the bit buffer, or pad the bit */
524 /* buffer out to a full base64 character */
528 nBitBuffer
|= ((sal_uInt32
)c
) << (32-nBufferBits
);
531 nBufferBits
+= (6-(nBufferBits
%6))%6;
533 /* Flush out as many full base64 characters as possible */
534 while ( (pDestBuf
< pEndDestBuf
) && (nBufferBits
>= 6) )
536 nBitBufferTemp
= nBitBuffer
>> (32-6);
537 *pDestBuf
= aImplBase64Tab
[nBitBufferTemp
];
543 if ( nBufferBits
>= 6 )
545 *pInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
549 /* Write SHIFT_OUT_CHAR, when needed */
552 if ( pDestBuf
>= pEndDestBuf
)
554 *pInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
557 *pDestBuf
= IMPL_SHIFT_OUT_CHAR
;
559 bShifted
= sal_False
;
565 /* Character can be directly endcoded */
568 if ( pDestBuf
>= pEndDestBuf
)
570 *pInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
573 *pDestBuf
= (sal_Char
)(sal_uChar
)c
;
584 pContextData
->mbShifted
= bShifted
;
585 pContextData
->mnBitBuffer
= nBitBuffer
;
586 pContextData
->mnBufferBits
= nBufferBits
;
589 *pSrcCvtChars
= nSrcChars
- (pEndSrcBuf
-pSrcBuf
);
590 return (nDestBytes
- (pEndDestBuf
-pDestBuf
));