1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <rtl/textcvt.h>
24 #include "tenchelp.hxx"
25 #include "unichars.hxx"
27 /* ======================================================================= */
29 static unsigned char const aImplBase64Tab
[64] =
32 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
33 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
34 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
37 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
38 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
39 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
42 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
43 0x38, 0x39, 0x2B, 0x2F
46 /* Index in Base64Tab or 0xFF, when is a invalid character */
47 static unsigned char const aImplBase64IndexTab
[128] =
49 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00-0x07 */
50 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08-0x0F */
51 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10-0x17 */
52 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18-0x1F */
53 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20-0x27 !"#$%&' */
54 0xFF, 0xFF, 0xFF, 62, 0xFF, 0xFF, 0xFF, 63, /* 0x28-0x2F ()*+,-./ */
55 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30-0x37 01234567 */
56 60, 61, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F 89:;<=>? */
57 0xFF, 0, 1, 2, 3, 4, 5, 6, /* 0x40-0x47 @ABCDEFG */
58 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48-0x4F HIJKLMNO */
59 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50-0x57 PQRSTUVW */
60 23, 24, 25, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5F XYZ[\]^_ */
61 0xFF, 26, 27, 28, 29, 30, 31, 32, /* 0x60-0x67 `abcdefg */
62 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68-0x6F hijklmno */
63 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70-0x77 pqrstuvw */
64 49, 50, 51, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF /* 0x78-0x7F xyz{|}~ */
67 static unsigned char const aImplMustShiftTab
[128] =
69 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00-0x07 */
70 1, 0, 0, 1, 0, 1, 1, 1, /* 0x08-0x0F 0x09 == HTAB, 0x0A == LF 0x0C == CR */
71 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10-0x17 */
72 1, 1, 1, 1, 1, 1, 1, 1, /* 0x18-0x1F */
73 0, 1, 1, 1, 1, 1, 1, 0, /* 0x20-0x27 !"#$%&' */
74 0, 0, 1, 1, 0, 1, 0, 0, /* 0x28-0x2F ()*+,-./ */
75 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x37 01234567 */
76 0, 0, 0, 1, 1, 1, 1, 0, /* 0x38-0x3F 89:;<=>? */
77 1, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x47 @ABCDEFG */
78 0, 0, 0, 0, 0, 0, 0, 0, /* 0x48-0x4F HIJKLMNO */
79 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x57 PQRSTUVW */
80 0, 0, 0, 1, 1, 1, 1, 1, /* 0x58-0x5F XYZ[\]^_ */
81 1, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 `abcdefg */
82 0, 0, 0, 0, 0, 0, 0, 0, /* 0x68-0x6F hijklmno */
83 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x77 pqrstuvw */
84 0, 0, 0, 1, 1, 1, 1, 1 /* 0x78-0x7F xyz{|}~ */
88 #define IMPL_SHIFT_IN_CHAR 0x2B
90 #define IMPL_SHIFT_OUT_CHAR 0x2D
92 /* ----------------------------------------------------------------------- */
94 struct ImplUTF7ToUCContextData
99 sal_uInt32 mnBitBuffer
;
100 sal_uInt32 mnBufferBits
;
103 /* ----------------------------------------------------------------------- */
105 void* ImplUTF7CreateUTF7TextToUnicodeContext()
107 ImplUTF7ToUCContextData
* pContextData
= new ImplUTF7ToUCContextData
;
108 pContextData
->mbShifted
= false;
109 pContextData
->mbFirst
= false;
110 pContextData
->mbWroteOne
= false;
111 pContextData
->mnBitBuffer
= 0;
112 pContextData
->mnBufferBits
= 0;
116 /* ----------------------------------------------------------------------- */
118 void ImplUTF7DestroyTextToUnicodeContext( void* pContext
)
120 delete static_cast< ImplUTF7ToUCContextData
* >(pContext
);
123 /* ----------------------------------------------------------------------- */
125 void ImplUTF7ResetTextToUnicodeContext( void* pContext
)
127 ImplUTF7ToUCContextData
* pContextData
= static_cast<ImplUTF7ToUCContextData
*>(pContext
);
128 pContextData
->mbShifted
= false;
129 pContextData
->mbFirst
= false;
130 pContextData
->mbWroteOne
= false;
131 pContextData
->mnBitBuffer
= 0;
132 pContextData
->mnBufferBits
= 0;
135 /* ----------------------------------------------------------------------- */
137 sal_Size
ImplUTF7ToUnicode( SAL_UNUSED_PARAMETER
const void*, void* pContext
,
138 const char* pSrcBuf
, sal_Size nSrcBytes
,
139 sal_Unicode
* pDestBuf
, sal_Size nDestChars
,
140 sal_uInt32 nFlags
, sal_uInt32
* pInfo
,
141 sal_Size
* pSrcCvtBytes
)
143 ImplUTF7ToUCContextData
* pContextData
= static_cast<ImplUTF7ToUCContextData
*>(pContext
);
144 unsigned char c
='\0';
145 unsigned char nBase64Value
= 0;
151 sal_uInt32 nBitBuffer
;
152 sal_uInt32 nBitBufferTemp
;
153 sal_uInt32 nBufferBits
;
154 sal_Unicode
* pEndDestBuf
;
155 const char* pEndSrcBuf
;
157 /* !!! Implementation not finished !!!
160 bShifted = pContextData->mbShifted;
161 bFirst = pContextData->mbFirst;
162 bWroteOne = pContextData->mbWroteOne;
163 nBitBuffer = pContextData->mnBitBuffer;
164 nBufferBits = pContextData->mnBufferBits;
177 pEndDestBuf
= pDestBuf
+nDestChars
;
178 pEndSrcBuf
= pSrcBuf
+nSrcBytes
;
181 if ( pSrcBuf
< pEndSrcBuf
)
183 c
= static_cast<unsigned char>(*pSrcBuf
);
185 /* End, when not a base64 character */
189 nBase64Value
= aImplBase64IndexTab
[c
];
190 if ( nBase64Value
== 0xFF )
206 /* If the character causing us to drop out was SHIFT_IN */
207 /* or SHIFT_OUT, it may be a special escape for SHIFT_IN. */
208 /* The test for SHIFT_IN is not necessary, but allows */
209 /* an alternate form of UTF-7 where SHIFT_IN is escaped */
210 /* by SHIFT_IN. This only works for some values of */
211 /* SHIFT_IN. It is so implemented, because this comes */
212 /* from the official unicode book (The Unicode Standard, */
213 /* Version 2.0) and so I think, that someone of the */
214 /* world has used this feature. */
217 if ( (c
== IMPL_SHIFT_IN_CHAR
) || (c
== IMPL_SHIFT_OUT_CHAR
) )
219 /* If no base64 character, and the terminating */
220 /* character of the shift sequence was the */
221 /* SHIFT_OUT_CHAR, then it't a special escape */
222 /* for SHIFT_IN_CHAR. */
223 if ( bFirst
&& (c
== IMPL_SHIFT_OUT_CHAR
) )
225 if ( pDestBuf
>= pEndDestBuf
)
227 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
230 *pDestBuf
= IMPL_SHIFT_IN_CHAR
;
237 if ( pSrcBuf
< pEndSrcBuf
)
238 c
= static_cast<unsigned char>(*pSrcBuf
);
244 /* Empty sequence not allowed, so when we don't write one */
245 /* valid char, then the sequence is corrupt */
248 /* When no more bytes in the source buffer, then */
249 /* this buffer may be to small */
251 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
;
254 *pInfo
|= RTL_TEXTTOUNICODE_INFO_INVALID
;
255 if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK
) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
)
257 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
;
260 /* We insert here no default char, because I think */
261 /* this is better to ignore this */
267 /* Add 6 Bits from character to the bit buffer */
269 nBitBuffer
|= static_cast<sal_uInt32
>(nBase64Value
& 0x3F) << (32-nBufferBits
);
273 /* Extract as many full 16 bit characters as possible from the */
275 while ( (pDestBuf
< pEndDestBuf
) && (nBufferBits
>= 16) )
277 nBitBufferTemp
= nBitBuffer
>> (32-16);
278 *pDestBuf
= static_cast<sal_Unicode
>(nBitBufferTemp
& 0xFFFF);
285 if ( nBufferBits
>= 16 )
287 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
293 /* Sequence ended and we have some bits, then the */
294 /* sequence is corrupted */
295 if ( nBufferBits
&& nBitBuffer
)
297 /* When no more bytes in the source buffer, then */
298 /* this buffer may be to small */
300 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
;
303 *pInfo
|= RTL_TEXTTOUNICODE_INFO_INVALID
;
304 if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK
) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
)
306 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
;
309 if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK
) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
)
311 if ( pDestBuf
>= pEndDestBuf
)
313 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
317 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER
;
332 if ( c
== IMPL_SHIFT_IN_CHAR
)
340 /* No direct encoded character, then the buffer is */
344 *pInfo
|= RTL_TEXTTOUNICODE_INFO_INVALID
;
345 if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK
) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
)
347 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
;
350 if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK
) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
)
352 if ( pDestBuf
>= pEndDestBuf
)
354 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
358 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER
;
362 /* Write char to unicode buffer */
363 if ( pDestBuf
>= pEndDestBuf
)
365 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
380 pContextData
->mbShifted
= bShifted
;
381 pContextData
->mbFirst
= bFirst
;
382 pContextData
->mbWroteOne
= bWroteOne
;
383 pContextData
->mnBitBuffer
= nBitBuffer
;
384 pContextData
->mnBufferBits
= nBufferBits
;
387 *pSrcCvtBytes
= nSrcBytes
- (pEndSrcBuf
-pSrcBuf
);
388 return (nDestChars
- (pEndDestBuf
-pDestBuf
));
391 /* ======================================================================= */
393 struct ImplUTF7FromUCContextData
396 sal_uInt32 mnBitBuffer
;
397 sal_uInt32 mnBufferBits
;
400 /* ----------------------------------------------------------------------- */
402 void* ImplUTF7CreateUnicodeToTextContext()
404 ImplUTF7FromUCContextData
* pContextData
= new ImplUTF7FromUCContextData
;
405 pContextData
->mbShifted
= false;
406 pContextData
->mnBitBuffer
= 0;
407 pContextData
->mnBufferBits
= 0;
411 /* ----------------------------------------------------------------------- */
413 void ImplUTF7DestroyUnicodeToTextContext( void* pContext
)
415 delete static_cast< ImplUTF7FromUCContextData
* >(pContext
);
418 /* ----------------------------------------------------------------------- */
420 void ImplUTF7ResetUnicodeToTextContext( void* pContext
)
422 ImplUTF7FromUCContextData
* pContextData
= static_cast<ImplUTF7FromUCContextData
*>(pContext
);
423 pContextData
->mbShifted
= false;
424 pContextData
->mnBitBuffer
= 0;
425 pContextData
->mnBufferBits
= 0;
428 /* ----------------------------------------------------------------------- */
430 sal_Size
ImplUnicodeToUTF7( SAL_UNUSED_PARAMETER
const void*, void* pContext
,
431 const sal_Unicode
* pSrcBuf
, sal_Size nSrcChars
,
432 char* pDestBuf
, sal_Size nDestBytes
,
433 SAL_UNUSED_PARAMETER sal_uInt32
, sal_uInt32
* pInfo
,
434 sal_Size
* pSrcCvtChars
)
436 ImplUTF7FromUCContextData
* pContextData
= static_cast<ImplUTF7FromUCContextData
*>(pContext
);
437 sal_Unicode c
= '\0';
441 sal_uInt32 nBitBuffer
;
442 sal_uInt32 nBitBufferTemp
;
443 sal_uInt32 nBufferBits
;
445 const sal_Unicode
* pEndSrcBuf
;
447 /* !!! Implementation not finished !!!
450 bShifted = pContextData->mbShifted;
451 nBitBuffer = pContextData->mnBitBuffer;
452 nBufferBits = pContextData->mnBufferBits;
463 pEndDestBuf
= pDestBuf
+nDestBytes
;
464 pEndSrcBuf
= pSrcBuf
+nSrcChars
;
467 if ( pSrcBuf
< pEndSrcBuf
)
471 bNeedShift
= (c
> 0x7F) || aImplMustShiftTab
[c
];
472 if ( bNeedShift
&& !bShifted
)
474 if ( pDestBuf
>= pEndDestBuf
)
476 *pInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
479 *pDestBuf
= IMPL_SHIFT_IN_CHAR
;
481 /* Special case handling for SHIFT_IN_CHAR */
482 if ( c
== IMPL_SHIFT_IN_CHAR
)
484 if ( pDestBuf
>= pEndDestBuf
)
486 *pInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
489 *pDestBuf
= IMPL_SHIFT_OUT_CHAR
;
504 /* Write the character to the bit buffer, or pad the bit */
505 /* buffer out to a full base64 character */
509 nBitBuffer
|= static_cast<sal_uInt32
>(c
) << (32-nBufferBits
);
512 nBufferBits
+= (6-(nBufferBits
%6))%6;
514 /* Flush out as many full base64 characters as possible */
515 while ( (pDestBuf
< pEndDestBuf
) && (nBufferBits
>= 6) )
517 nBitBufferTemp
= nBitBuffer
>> (32-6);
518 *pDestBuf
= aImplBase64Tab
[nBitBufferTemp
];
524 if ( nBufferBits
>= 6 )
526 *pInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
530 /* Write SHIFT_OUT_CHAR, when needed */
533 if ( pDestBuf
>= pEndDestBuf
)
535 *pInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
538 *pDestBuf
= IMPL_SHIFT_OUT_CHAR
;
546 /* Character can be directly encoded */
549 if ( pDestBuf
>= pEndDestBuf
)
551 *pInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
554 *pDestBuf
= static_cast< char >(static_cast< unsigned char >(c
));
565 pContextData
->mbShifted
= bShifted
;
566 pContextData
->mnBitBuffer
= nBitBuffer
;
567 pContextData
->mnBufferBits
= nBufferBits
;
570 *pSrcCvtChars
= nSrcChars
- (pEndSrcBuf
-pSrcBuf
);
571 return (nDestBytes
- (pEndDestBuf
-pDestBuf
));
574 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */