1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: inetmime.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_tools.hxx"
37 #include "rtl/tencinfo.h"
38 #include <tools/datetime.hxx>
39 #include <tools/inetmime.hxx>
41 namespace unnamed_tools_inetmime
{} using namespace unnamed_tools_inetmime
;
42 // unnamed namespaces don't work well yet
44 //============================================================================
45 namespace unnamed_tools_inetmime
{
49 rtl_TextEncoding m_eEncoding
;
50 const sal_uInt32
* m_pRanges
;
53 inline Charset(rtl_TextEncoding eTheEncoding
,
54 const sal_uInt32
* pTheRanges
);
56 rtl_TextEncoding
getEncoding() const { return m_eEncoding
; }
58 bool contains(sal_uInt32 nChar
) const;
61 inline Charset::Charset(rtl_TextEncoding eTheEncoding
,
62 const sal_uInt32
* pTheRanges
):
63 m_eEncoding(eTheEncoding
),
66 DBG_ASSERT(m_pRanges
, "Charset::Charset(): Bad ranges");
69 //============================================================================
70 void appendISO88591(UniString
& rText
, sal_Char
const * pBegin
,
71 sal_Char
const * pEnd
);
75 //============================================================================
76 class INetMIMECharsetList_Impl
84 inline Node(const Charset
& rTheCharset
, bool bTheDisabled
,
91 INetMIMECharsetList_Impl(): m_pFirst(0) {}
93 ~INetMIMECharsetList_Impl();
95 void prepend(const Charset
& rCharset
)
96 { m_pFirst
= new Node(rCharset
, false, m_pFirst
); }
98 void includes(sal_uInt32 nChar
);
100 rtl_TextEncoding
getPreferredEncoding(rtl_TextEncoding eDefault
101 = RTL_TEXTENCODING_DONTKNOW
)
107 inline INetMIMECharsetList_Impl::Node::Node(const Charset
& rTheCharset
,
110 m_aCharset(rTheCharset
),
111 m_bDisabled(bTheDisabled
),
115 //============================================================================
116 namespace unnamed_tools_inetmime
{
121 ByteString m_aAttribute
;
122 ByteString m_aCharset
;
123 ByteString m_aLanguage
;
125 sal_uInt32 m_nSection
;
128 inline Parameter(Parameter
* pTheNext
, ByteString
const & rTheAttribute
,
129 ByteString
const & rTheCharset
,
130 ByteString
const & rTheLanguage
,
131 ByteString
const & rTheValue
, sal_uInt32 nTheSection
,
135 inline Parameter::Parameter(Parameter
* pTheNext
,
136 ByteString
const & rTheAttribute
,
137 ByteString
const & rTheCharset
,
138 ByteString
const & rTheLanguage
,
139 ByteString
const & rTheValue
,
140 sal_uInt32 nTheSection
, bool bTheExtended
):
142 m_aAttribute(rTheAttribute
),
143 m_aCharset(rTheCharset
),
144 m_aLanguage(rTheLanguage
),
146 m_nSection(nTheSection
),
147 m_bExtended(bTheExtended
)
150 //============================================================================
155 ParameterList(): m_pList(0) {}
157 inline ~ParameterList();
159 Parameter
** find(ByteString
const & rAttribute
, sal_uInt32 nSection
,
163 inline ParameterList::~ParameterList()
167 Parameter
* pNext
= m_pList
->m_pNext
;
173 //============================================================================
174 bool parseParameters(ParameterList
const & rInput
,
175 INetContentTypeParameterList
* pOutput
);
179 //============================================================================
183 //============================================================================
185 bool Charset::contains(sal_uInt32 nChar
) const
187 for (const sal_uInt32
* p
= m_pRanges
;;)
196 //============================================================================
200 //============================================================================
202 namespace unnamed_tools_inetmime
{
204 void appendISO88591(UniString
& rText
, sal_Char
const * pBegin
,
205 sal_Char
const * pEnd
)
207 xub_StrLen nLength
= static_cast< xub_StrLen
>(pEnd
- pBegin
);
208 sal_Unicode
* pBuffer
= new sal_Unicode
[nLength
];
209 for (sal_Unicode
* p
= pBuffer
; pBegin
!= pEnd
;)
210 *p
++ = sal_uChar(*pBegin
++);
211 rText
.Append(pBuffer
, nLength
);
217 //============================================================================
219 // INetMIMECharsetList_Impl
221 //============================================================================
223 INetMIMECharsetList_Impl::~INetMIMECharsetList_Impl()
227 Node
* pRemove
= m_pFirst
;
228 m_pFirst
= m_pFirst
->m_pNext
;
233 //============================================================================
234 void INetMIMECharsetList_Impl::includes(sal_uInt32 nChar
)
236 for (Node
* p
= m_pFirst
; p
; p
= p
->m_pNext
)
237 if (!(p
->m_bDisabled
|| p
->m_aCharset
.contains(nChar
)))
238 p
->m_bDisabled
= true;
241 //============================================================================
243 INetMIMECharsetList_Impl::getPreferredEncoding(rtl_TextEncoding eDefault
)
246 for (Node
* p
= m_pFirst
; p
; p
= p
->m_pNext
)
248 return p
->m_aCharset
.getEncoding();
252 //============================================================================
253 void INetMIMECharsetList_Impl::reset()
255 for (Node
* p
= m_pFirst
; p
; p
= p
->m_pNext
)
256 p
->m_bDisabled
= false;
259 //============================================================================
263 //============================================================================
265 Parameter
** ParameterList::find(ByteString
const & rAttribute
,
266 sal_uInt32 nSection
, bool & rPresent
)
268 Parameter
** p
= &m_pList
;
269 for (; *p
; p
= &(*p
)->m_pNext
)
271 StringCompare eCompare
= rAttribute
.CompareTo((*p
)->m_aAttribute
);
272 if (eCompare
== COMPARE_GREATER
)
274 else if (eCompare
== COMPARE_EQUAL
)
276 if (nSection
> (*p
)->m_nSection
)
278 else if (nSection
== (*p
)->m_nSection
)
289 //============================================================================
293 //============================================================================
295 namespace unnamed_tools_inetmime
{
297 bool parseParameters(ParameterList
const & rInput
,
298 INetContentTypeParameterList
* pOutput
)
303 Parameter
* pPrev
= 0;
304 for (Parameter
* p
= rInput
.m_pList
; p
; p
= p
->m_pNext
)
306 if (p
->m_nSection
> 0
308 || pPrev
->m_nSection
!= p
->m_nSection
- 1
309 || pPrev
->m_aAttribute
!= p
->m_aAttribute
))
315 for (Parameter
* p
= rInput
.m_pList
; p
;)
317 bool bCharset
= p
->m_aCharset
.Len() != 0;
318 rtl_TextEncoding eEncoding
= RTL_TEXTENCODING_DONTKNOW
;
321 = INetMIME::getCharsetEncoding(p
->m_aCharset
.GetBuffer(),
322 p
->m_aCharset
.GetBuffer()
327 bool bBadEncoding
= false;
328 Parameter
* pNext
= p
;
332 sal_Unicode
* pUnicode
333 = INetMIME::convertToUnicode(pNext
->m_aValue
.GetBuffer(),
334 pNext
->m_aValue
.GetBuffer()
335 + pNext
->m_aValue
.Len(),
336 bCharset
&& p
->m_bExtended
?
338 RTL_TEXTENCODING_UTF8
,
340 if (!pUnicode
&& !(bCharset
&& p
->m_bExtended
))
341 pUnicode
= INetMIME::convertToUnicode(
342 pNext
->m_aValue
.GetBuffer(),
343 pNext
->m_aValue
.GetBuffer()
344 + pNext
->m_aValue
.Len(),
345 RTL_TEXTENCODING_ISO_8859_1
, nSize
);
351 aValue
+= UniString(pUnicode
, static_cast< xub_StrLen
>(nSize
));
353 pNext
= pNext
->m_pNext
;
355 while (pNext
&& pNext
->m_nSection
> 0);
361 if (pNext
->m_bExtended
)
362 for (xub_StrLen i
= 0; i
< pNext
->m_aValue
.Len(); ++i
)
363 aValue
+= sal_Unicode(
365 sal_uChar(pNext
->m_aValue
.GetChar(i
)))
368 for (xub_StrLen i
= 0; i
< pNext
->m_aValue
.Len(); ++i
)
370 += sal_Unicode(sal_uChar
372 m_aValue
.GetChar(i
)));
373 pNext
= pNext
->m_pNext
;
374 if (!pNext
|| pNext
->m_nSection
== 0)
378 pOutput
->Insert(new INetContentTypeParameter(p
->m_aAttribute
,
391 //============================================================================
395 //============================================================================
398 bool INetMIME::isAtomChar(sal_uInt32 nChar
)
400 static const bool aMap
[128]
401 = { false, false, false, false, false, false, false, false,
402 false, false, false, false, false, false, false, false,
403 false, false, false, false, false, false, false, false,
404 false, false, false, false, false, false, false, false,
405 false, true, false, true, true, true, true, true, // !"#$%&'
406 false, false, true, true, false, true, false, true, //()*+,-./
407 true, true, true, true, true, true, true, true, //01234567
408 true, true, false, false, false, true, false, true, //89:;<=>?
409 false, true, true, true, true, true, true, true, //@ABCDEFG
410 true, true, true, true, true, true, true, true, //HIJKLMNO
411 true, true, true, true, true, true, true, true, //PQRSTUVW
412 true, true, true, false, false, false, true, true, //XYZ[\]^_
413 true, true, true, true, true, true, true, true, //`abcdefg
414 true, true, true, true, true, true, true, true, //hijklmno
415 true, true, true, true, true, true, true, true, //pqrstuvw
416 true, true, true, true, true, true, true, false //xyz{|}~
418 return isUSASCII(nChar
) && aMap
[nChar
];
421 //============================================================================
423 bool INetMIME::isTokenChar(sal_uInt32 nChar
)
425 static const sal_Char aMap
[128]
426 = { false, false, false, false, false, false, false, false,
427 false, false, false, false, false, false, false, false,
428 false, false, false, false, false, false, false, false,
429 false, false, false, false, false, false, false, false,
430 false, true, false, true, true, true, true, true, // !"#$%&'
431 false, false, true, true, false, true, true, false, //()*+,-./
432 true, true, true, true, true, true, true, true, //01234567
433 true, true, false, false, false, false, false, false, //89:;<=>?
434 false, true, true, true, true, true, true, true, //@ABCDEFG
435 true, true, true, true, true, true, true, true, //HIJKLMNO
436 true, true, true, true, true, true, true, true, //PQRSTUVW
437 true, true, true, false, false, false, true, true, //XYZ[\]^_
438 true, true, true, true, true, true, true, true, //`abcdefg
439 true, true, true, true, true, true, true, true, //hijklmno
440 true, true, true, true, true, true, true, true, //pqrstuvw
441 true, true, true, true, true, true, true, false //xyz{|}~
443 return isUSASCII(nChar
) && aMap
[nChar
];
446 //============================================================================
448 bool INetMIME::isEncodedWordTokenChar(sal_uInt32 nChar
)
450 static const sal_Char aMap
[128]
451 = { false, false, false, false, false, false, false, false,
452 false, false, false, false, false, false, false, false,
453 false, false, false, false, false, false, false, false,
454 false, false, false, false, false, false, false, false,
455 false, true, false, true, true, true, true, true, // !"#$%&'
456 false, false, true, true, false, true, false, false, //()*+,-./
457 true, true, true, true, true, true, true, true, //01234567
458 true, true, false, false, false, false, false, false, //89:;<=>?
459 false, true, true, true, true, true, true, true, //@ABCDEFG
460 true, true, true, true, true, true, true, true, //HIJKLMNO
461 true, true, true, true, true, true, true, true, //PQRSTUVW
462 true, true, true, false, false, false, true, true, //XYZ[\]^_
463 true, true, true, true, true, true, true, true, //`abcdefg
464 true, true, true, true, true, true, true, true, //hijklmno
465 true, true, true, true, true, true, true, true, //pqrstuvw
466 true, true, true, true, true, true, true, false //xyz{|}~
468 return isUSASCII(nChar
) && aMap
[nChar
];
471 //============================================================================
473 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar
)
475 static const sal_Char aMap
[128]
476 = { false, false, false, false, false, false, false, false,
477 false, false, false, false, false, false, false, false,
478 false, false, false, false, false, false, false, false,
479 false, false, false, false, false, false, false, false,
480 false, true, false, true, true, false, true, true, // !"#$%&'
481 false, false, false, true, true, true, true, true, //()*+,-./
482 true, true, true, true, true, true, true, true, //01234567
483 true, true, true, true, true, true, true, true, //89:;<=>?
484 true, true, true, true, true, true, true, true, //@ABCDEFG
485 true, true, true, true, true, true, true, true, //HIJKLMNO
486 true, true, true, true, true, true, true, true, //PQRSTUVW
487 true, true, true, true, false, true, true, true, //XYZ[\]^_
488 true, true, true, true, true, true, true, true, //`abcdefg
489 true, true, true, true, true, true, true, true, //hijklmno
490 true, true, true, true, true, true, true, true, //pqrstuvw
491 true, true, true, false, true, true, true, false //xyz{|}~
493 return isUSASCII(nChar
) && aMap
[nChar
];
496 //============================================================================
498 sal_uInt32
INetMIME::getDigit(int nWeight
)
500 DBG_ASSERT(nWeight
>= 0 && nWeight
< 10,
501 "INetMIME::getDigit(): Bad weight");
503 static const sal_Char aDigits
[16]
504 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
505 return aDigits
[nWeight
];
508 //============================================================================
510 sal_uInt32
INetMIME::getHexDigit(int nWeight
)
512 DBG_ASSERT(nWeight
>= 0 && nWeight
< 16,
513 "INetMIME::getHexDigit(): Bad weight");
515 static const sal_Char aDigits
[16]
516 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
518 return aDigits
[nWeight
];
521 //============================================================================
523 sal_uInt32
INetMIME::getBase64Digit(int nWeight
)
525 DBG_ASSERT(nWeight
>= 0 && nWeight
< 64,
526 "INetMIME::getBase64Digit(): Bad weight");
528 static const sal_Char aDigits
[64]
529 = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
530 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
531 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
532 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
533 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
534 return aDigits
[nWeight
];
537 //============================================================================
539 bool INetMIME::equalIgnoreCase(const sal_Char
* pBegin1
,
540 const sal_Char
* pEnd1
,
541 const sal_Char
* pBegin2
,
542 const sal_Char
* pEnd2
)
544 DBG_ASSERT(pBegin1
&& pBegin1
<= pEnd1
&& pBegin2
&& pBegin2
<= pEnd2
,
545 "INetMIME::equalIgnoreCase(): Bad sequences");
547 if (pEnd1
- pBegin1
!= pEnd2
- pBegin2
)
549 while (pBegin1
!= pEnd1
)
550 if (toUpperCase(*pBegin1
++) != toUpperCase(*pBegin2
++))
555 //============================================================================
557 bool INetMIME::equalIgnoreCase(const sal_Char
* pBegin1
,
558 const sal_Char
* pEnd1
,
559 const sal_Char
* pString2
)
561 DBG_ASSERT(pBegin1
&& pBegin1
<= pEnd1
&& pString2
,
562 "INetMIME::equalIgnoreCase(): Bad sequences");
564 while (*pString2
!= 0)
566 || toUpperCase(*pBegin1
++) != toUpperCase(*pString2
++))
568 return pBegin1
== pEnd1
;
571 //============================================================================
573 bool INetMIME::equalIgnoreCase(const sal_Unicode
* pBegin1
,
574 const sal_Unicode
* pEnd1
,
575 const sal_Char
* pString2
)
577 DBG_ASSERT(pBegin1
&& pBegin1
<= pEnd1
&& pString2
,
578 "INetMIME::equalIgnoreCase(): Bad sequences");
580 while (*pString2
!= 0)
582 || toUpperCase(*pBegin1
++) != toUpperCase(*pString2
++))
584 return pBegin1
== pEnd1
;
587 //============================================================================
589 const sal_Char
* INetMIME::skipLinearWhiteSpace(const sal_Char
* pBegin
,
590 const sal_Char
* pEnd
)
592 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
593 "INetMIME::skipLinearWhiteSpace(): Bad sequence");
595 while (pBegin
!= pEnd
)
604 if (startsWithLineFolding(pBegin
, pEnd
))
616 //============================================================================
618 const sal_Unicode
* INetMIME::skipLinearWhiteSpace(const sal_Unicode
* pBegin
,
619 const sal_Unicode
* pEnd
)
621 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
622 "INetMIME::skipLinearWhiteSpace(): Bad sequence");
624 while (pBegin
!= pEnd
)
633 if (startsWithLineFolding(pBegin
, pEnd
))
645 //============================================================================
647 const sal_Char
* INetMIME::skipComment(const sal_Char
* pBegin
,
648 const sal_Char
* pEnd
)
650 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
651 "INetMIME::skipComment(): Bad sequence");
653 if (pBegin
!= pEnd
&& *pBegin
== '(')
655 sal_uInt32 nLevel
= 0;
656 for (const sal_Char
* p
= pBegin
; p
!= pEnd
;)
677 //============================================================================
679 const sal_Unicode
* INetMIME::skipComment(const sal_Unicode
* pBegin
,
680 const sal_Unicode
* pEnd
)
682 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
683 "INetMIME::skipComment(): Bad sequence");
685 if (pBegin
!= pEnd
&& *pBegin
== '(')
687 sal_uInt32 nLevel
= 0;
688 for (const sal_Unicode
* p
= pBegin
; p
!= pEnd
;)
709 //============================================================================
711 const sal_Char
* INetMIME::skipLinearWhiteSpaceComment(const sal_Char
*
713 const sal_Char
* pEnd
)
715 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
716 "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence");
718 while (pBegin
!= pEnd
)
727 if (startsWithLineFolding(pBegin
, pEnd
))
735 const sal_Char
* p
= skipComment(pBegin
, pEnd
);
748 //============================================================================
750 const sal_Unicode
* INetMIME::skipLinearWhiteSpaceComment(const sal_Unicode
*
755 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
756 "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence");
758 while (pBegin
!= pEnd
)
767 if (startsWithLineFolding(pBegin
, pEnd
))
775 const sal_Unicode
* p
= skipComment(pBegin
, pEnd
);
788 //============================================================================
790 const sal_Char
* INetMIME::skipQuotedString(const sal_Char
* pBegin
,
791 const sal_Char
* pEnd
)
793 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
794 "INetMIME::skipQuotedString(): Bad sequence");
796 if (pBegin
!= pEnd
&& *pBegin
== '"')
797 for (const sal_Char
* p
= pBegin
+ 1; p
!= pEnd
;)
801 if (pEnd
- p
< 2 || *p
++ != 0x0A // LF
802 || !isWhiteSpace(*p
++))
817 //============================================================================
819 const sal_Unicode
* INetMIME::skipQuotedString(const sal_Unicode
* pBegin
,
820 const sal_Unicode
* pEnd
)
822 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
823 "INetMIME::skipQuotedString(): Bad sequence");
825 if (pBegin
!= pEnd
&& *pBegin
== '"')
826 for (const sal_Unicode
* p
= pBegin
+ 1; p
!= pEnd
;)
830 if (pEnd
- p
< 2 || *p
++ != 0x0A // LF
831 || !isWhiteSpace(*p
++))
846 //============================================================================
848 const sal_Char
* INetMIME::scanAtom(const sal_Char
* pBegin
,
849 const sal_Char
* pEnd
)
851 while (pBegin
!= pEnd
&& isAtomChar(*pBegin
))
856 //============================================================================
858 const sal_Unicode
* INetMIME::scanAtom(const sal_Unicode
* pBegin
,
859 const sal_Unicode
* pEnd
)
861 while (pBegin
!= pEnd
&& isAtomChar(*pBegin
))
866 //============================================================================
868 bool INetMIME::scanUnsigned(const sal_Char
*& rBegin
, const sal_Char
* pEnd
,
869 bool bLeadingZeroes
, sal_uInt32
& rValue
)
871 sal_uInt64 nTheValue
= 0;
872 const sal_Char
* p
= rBegin
;
873 for ( ; p
!= pEnd
; ++p
)
875 int nWeight
= getWeight(*p
);
878 nTheValue
= 10 * nTheValue
+ nWeight
;
879 if (nTheValue
> std::numeric_limits
< sal_uInt32
>::max())
882 if (nTheValue
== 0 && (p
== rBegin
|| (!bLeadingZeroes
&& p
- rBegin
!= 1)))
885 rValue
= sal_uInt32(nTheValue
);
889 //============================================================================
891 bool INetMIME::scanUnsigned(const sal_Unicode
*& rBegin
,
892 const sal_Unicode
* pEnd
, bool bLeadingZeroes
,
895 sal_uInt64 nTheValue
= 0;
896 const sal_Unicode
* p
= rBegin
;
897 for ( ; p
!= pEnd
; ++p
)
899 int nWeight
= getWeight(*p
);
902 nTheValue
= 10 * nTheValue
+ nWeight
;
903 if (nTheValue
> std::numeric_limits
< sal_uInt32
>::max())
906 if (nTheValue
== 0 && (p
== rBegin
|| (!bLeadingZeroes
&& p
- rBegin
!= 1)))
909 rValue
= sal_uInt32(nTheValue
);
913 //============================================================================
915 bool INetMIME::scanUnsignedHex(const sal_Char
*& rBegin
,
916 const sal_Char
* pEnd
, bool bLeadingZeroes
,
919 sal_uInt64 nTheValue
= 0;
920 const sal_Char
* p
= rBegin
;
921 for ( p
= rBegin
; p
!= pEnd
; ++p
)
923 int nWeight
= getHexWeight(*p
);
926 nTheValue
= nTheValue
<< 4 | nWeight
;
927 if (nTheValue
> std::numeric_limits
< sal_uInt32
>::max())
930 if (nTheValue
== 0 && (p
== rBegin
|| (!bLeadingZeroes
&& p
- rBegin
!= 1)))
933 rValue
= sal_uInt32(nTheValue
);
937 //============================================================================
939 bool INetMIME::scanUnsignedHex(const sal_Unicode
*& rBegin
,
940 const sal_Unicode
* pEnd
, bool bLeadingZeroes
,
943 sal_uInt64 nTheValue
= 0;
944 const sal_Unicode
* p
= rBegin
;
945 for ( ; p
!= pEnd
; ++p
)
947 int nWeight
= getHexWeight(*p
);
950 nTheValue
= nTheValue
<< 4 | nWeight
;
951 if (nTheValue
> std::numeric_limits
< sal_uInt32
>::max())
954 if (nTheValue
== 0 && (p
== rBegin
|| (!bLeadingZeroes
&& p
- rBegin
!= 1)))
957 rValue
= sal_uInt32(nTheValue
);
961 //============================================================================
963 const sal_Char
* INetMIME::scanQuotedBlock(const sal_Char
* pBegin
,
964 const sal_Char
* pEnd
,
970 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
971 "INetMIME::scanQuotedBlock(): Bad sequence");
973 if (pBegin
!= pEnd
&& static_cast< unsigned char >(*pBegin
) == nOpening
)
977 while (pBegin
!= pEnd
)
978 if (static_cast< unsigned char >(*pBegin
) == nClosing
)
985 sal_uInt32 c
= *pBegin
++;
989 if (pBegin
!= pEnd
&& *pBegin
== 0x0A) // LF
990 if (pEnd
- pBegin
>= 2 && isWhiteSpace(pBegin
[1]))
1010 if (startsWithLineBreak(pBegin
, pEnd
)
1011 && (pEnd
- pBegin
< 3
1012 || !isWhiteSpace(pBegin
[2])))
1034 //============================================================================
1036 const sal_Unicode
* INetMIME::scanQuotedBlock(const sal_Unicode
* pBegin
,
1037 const sal_Unicode
* pEnd
,
1038 sal_uInt32 nOpening
,
1039 sal_uInt32 nClosing
,
1043 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
1044 "INetMIME::scanQuotedBlock(): Bad sequence");
1046 if (pBegin
!= pEnd
&& *pBegin
== nOpening
)
1050 while (pBegin
!= pEnd
)
1051 if (*pBegin
== nClosing
)
1058 sal_uInt32 c
= *pBegin
++;
1062 if (pBegin
!= pEnd
&& *pBegin
== 0x0A) // LF
1063 if (pEnd
- pBegin
>= 2 && isWhiteSpace(pBegin
[1]))
1083 if (startsWithLineBreak(pBegin
, pEnd
)
1084 && (pEnd
- pBegin
< 3
1085 || !isWhiteSpace(pBegin
[2])))
1107 //============================================================================
1109 sal_Char
const * INetMIME::scanParameters(sal_Char
const * pBegin
,
1110 sal_Char
const * pEnd
,
1111 INetContentTypeParameterList
*
1114 ParameterList aList
;
1115 sal_Char
const * pParameterBegin
= pBegin
;
1116 for (sal_Char
const * p
= pParameterBegin
;; pParameterBegin
= p
)
1118 pParameterBegin
= skipLinearWhiteSpaceComment(p
, pEnd
);
1119 if (pParameterBegin
== pEnd
|| *pParameterBegin
!= ';')
1121 p
= pParameterBegin
+ 1;
1123 sal_Char
const * pAttributeBegin
= skipLinearWhiteSpaceComment(p
,
1125 p
= pAttributeBegin
;
1126 bool bDowncaseAttribute
= false;
1127 while (p
!= pEnd
&& isTokenChar(*p
) && *p
!= '*')
1129 bDowncaseAttribute
= bDowncaseAttribute
|| isUpperCase(*p
);
1132 if (p
== pAttributeBegin
)
1134 ByteString
aAttribute(
1135 pAttributeBegin
, static_cast< xub_StrLen
>(p
- pAttributeBegin
));
1136 if (bDowncaseAttribute
)
1137 aAttribute
.ToLowerAscii();
1139 sal_uInt32 nSection
= 0;
1140 if (p
!= pEnd
&& *p
== '*')
1143 if (p
!= pEnd
&& isDigit(*p
)
1144 && !scanUnsigned(p
, pEnd
, false, nSection
))
1149 Parameter
** pPos
= aList
.find(aAttribute
, nSection
, bPresent
);
1153 bool bExtended
= false;
1154 if (p
!= pEnd
&& *p
== '*')
1160 p
= skipLinearWhiteSpaceComment(p
, pEnd
);
1162 if (p
== pEnd
|| *p
!= '=')
1165 p
= skipLinearWhiteSpaceComment(p
+ 1, pEnd
);
1167 ByteString aCharset
;
1168 ByteString aLanguage
;
1174 sal_Char
const * pCharsetBegin
= p
;
1175 bool bDowncaseCharset
= false;
1176 while (p
!= pEnd
&& isTokenChar(*p
) && *p
!= '\'')
1178 bDowncaseCharset
= bDowncaseCharset
|| isUpperCase(*p
);
1181 if (p
== pCharsetBegin
)
1185 aCharset
= ByteString(
1187 static_cast< xub_StrLen
>(p
- pCharsetBegin
));
1188 if (bDowncaseCharset
)
1189 aCharset
.ToLowerAscii();
1192 if (p
== pEnd
|| *p
!= '\'')
1196 sal_Char
const * pLanguageBegin
= p
;
1197 bool bDowncaseLanguage
= false;
1199 for (; p
!= pEnd
; ++p
)
1204 bDowncaseLanguage
= bDowncaseLanguage
1215 if (nLetters
== 0 || nLetters
> 8)
1219 aLanguage
= ByteString(
1221 static_cast< xub_StrLen
>(p
- pLanguageBegin
));
1222 if (bDowncaseLanguage
)
1223 aLanguage
.ToLowerAscii();
1226 if (p
== pEnd
|| *p
!= '\'')
1231 while (p
!= pEnd
&& (isTokenChar(*p
) || !isUSASCII(*p
)))
1237 int nWeight1
= getHexWeight(p
[1]);
1238 int nWeight2
= getHexWeight(p
[2]);
1239 if (nWeight1
>= 0 && nWeight2
>= 0)
1241 aValue
+= sal_Char(nWeight1
<< 4 | nWeight2
);
1250 while (p
!= pEnd
&& (isTokenChar(*p
) || !isUSASCII(*p
)))
1253 else if (p
!= pEnd
&& *p
== '"')
1256 bool bInvalid
= false;
1269 else if (*p
== 0x0D) // CR
1271 if (pEnd
- p
< 3 || p
[1] != 0x0A // LF
1272 || !isWhiteSpace(p
[2]))
1279 else if (*p
== '\\' && ++p
== pEnd
)
1291 sal_Char
const * pStringEnd
= skipQuotedString(p
, pEnd
);
1292 if (p
== pStringEnd
)
1298 sal_Char
const * pTokenBegin
= p
;
1299 while (p
!= pEnd
&& (isTokenChar(*p
) || !isUSASCII(*p
)))
1301 if (p
== pTokenBegin
)
1304 aValue
= ByteString(
1305 pTokenBegin
, static_cast< xub_StrLen
>(p
- pTokenBegin
));
1308 *pPos
= new Parameter(*pPos
, aAttribute
, aCharset
, aLanguage
, aValue
,
1309 nSection
, bExtended
);
1311 return parseParameters(aList
, pParameters
) ? pParameterBegin
: pBegin
;
1314 //============================================================================
1316 sal_Unicode
const * INetMIME::scanParameters(sal_Unicode
const * pBegin
,
1317 sal_Unicode
const * pEnd
,
1318 INetContentTypeParameterList
*
1321 ParameterList aList
;
1322 sal_Unicode
const * pParameterBegin
= pBegin
;
1323 for (sal_Unicode
const * p
= pParameterBegin
;; pParameterBegin
= p
)
1325 pParameterBegin
= skipLinearWhiteSpaceComment(p
, pEnd
);
1326 if (pParameterBegin
== pEnd
|| *pParameterBegin
!= ';')
1328 p
= pParameterBegin
+ 1;
1330 sal_Unicode
const * pAttributeBegin
1331 = skipLinearWhiteSpaceComment(p
, pEnd
);
1332 p
= pAttributeBegin
;
1333 bool bDowncaseAttribute
= false;
1334 while (p
!= pEnd
&& isTokenChar(*p
) && *p
!= '*')
1336 bDowncaseAttribute
= bDowncaseAttribute
|| isUpperCase(*p
);
1339 if (p
== pAttributeBegin
)
1341 ByteString aAttribute
= ByteString(
1342 pAttributeBegin
, static_cast< xub_StrLen
>(p
- pAttributeBegin
),
1343 RTL_TEXTENCODING_ASCII_US
);
1344 if (bDowncaseAttribute
)
1345 aAttribute
.ToLowerAscii();
1347 sal_uInt32 nSection
= 0;
1348 if (p
!= pEnd
&& *p
== '*')
1351 if (p
!= pEnd
&& isDigit(*p
)
1352 && !scanUnsigned(p
, pEnd
, false, nSection
))
1357 Parameter
** pPos
= aList
.find(aAttribute
, nSection
, bPresent
);
1361 bool bExtended
= false;
1362 if (p
!= pEnd
&& *p
== '*')
1368 p
= skipLinearWhiteSpaceComment(p
, pEnd
);
1370 if (p
== pEnd
|| *p
!= '=')
1373 p
= skipLinearWhiteSpaceComment(p
+ 1, pEnd
);
1375 ByteString aCharset
;
1376 ByteString aLanguage
;
1382 sal_Unicode
const * pCharsetBegin
= p
;
1383 bool bDowncaseCharset
= false;
1384 while (p
!= pEnd
&& isTokenChar(*p
) && *p
!= '\'')
1386 bDowncaseCharset
= bDowncaseCharset
|| isUpperCase(*p
);
1389 if (p
== pCharsetBegin
)
1393 aCharset
= ByteString(
1395 static_cast< xub_StrLen
>(p
- pCharsetBegin
),
1396 RTL_TEXTENCODING_ASCII_US
);
1397 if (bDowncaseCharset
)
1398 aCharset
.ToLowerAscii();
1401 if (p
== pEnd
|| *p
!= '\'')
1405 sal_Unicode
const * pLanguageBegin
= p
;
1406 bool bDowncaseLanguage
= false;
1408 for (; p
!= pEnd
; ++p
)
1413 bDowncaseLanguage
= bDowncaseLanguage
1424 if (nLetters
== 0 || nLetters
> 8)
1428 aLanguage
= ByteString(
1430 static_cast< xub_StrLen
>(p
- pLanguageBegin
),
1431 RTL_TEXTENCODING_ASCII_US
);
1432 if (bDowncaseLanguage
)
1433 aLanguage
.ToLowerAscii();
1436 if (p
== pEnd
|| *p
!= '\'')
1442 INetMIMEStringOutputSink
1443 aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT
);
1446 sal_uInt32 nChar
= INetMIME::getUTF32Character(p
, pEnd
);
1447 if (isUSASCII(nChar
) && !isTokenChar(nChar
))
1449 if (nChar
== '%' && p
+ 1 < pEnd
)
1451 int nWeight1
= getHexWeight(p
[0]);
1452 int nWeight2
= getHexWeight(p
[1]);
1453 if (nWeight1
>= 0 && nWeight2
>= 0)
1455 aSink
<< sal_Char(nWeight1
<< 4 | nWeight2
);
1460 INetMIME::writeUTF8(aSink
, nChar
);
1462 aValue
= aSink
.takeBuffer();
1465 while (p
!= pEnd
&& (isTokenChar(*p
) || !isUSASCII(*p
)))
1468 else if (p
!= pEnd
&& *p
== '"')
1471 INetMIMEStringOutputSink
1472 aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT
);
1473 bool bInvalid
= false;
1481 sal_uInt32 nChar
= INetMIME::getUTF32Character(p
, pEnd
);
1484 else if (nChar
== 0x0D) // CR
1486 if (pEnd
- p
< 2 || *p
++ != 0x0A // LF
1487 || !isWhiteSpace(*p
))
1492 nChar
= sal_uChar(*p
++);
1494 else if (nChar
== '\\')
1501 nChar
= INetMIME::getUTF32Character(p
, pEnd
);
1503 INetMIME::writeUTF8(aSink
, nChar
);
1507 aValue
= aSink
.takeBuffer();
1511 sal_Unicode
const * pStringEnd
= skipQuotedString(p
, pEnd
);
1512 if (p
== pStringEnd
)
1518 sal_Unicode
const * pTokenBegin
= p
;
1519 while (p
!= pEnd
&& (isTokenChar(*p
) || !isUSASCII(*p
)))
1521 if (p
== pTokenBegin
)
1524 aValue
= ByteString(
1525 pTokenBegin
, static_cast< xub_StrLen
>(p
- pTokenBegin
),
1526 RTL_TEXTENCODING_UTF8
);
1529 *pPos
= new Parameter(*pPos
, aAttribute
, aCharset
, aLanguage
, aValue
,
1530 nSection
, bExtended
);
1532 return parseParameters(aList
, pParameters
) ? pParameterBegin
: pBegin
;
1535 //============================================================================
1537 const sal_Char
* INetMIME::getCharsetName(rtl_TextEncoding eEncoding
)
1539 if (rtl_isOctetTextEncoding(eEncoding
))
1541 char const * p
= rtl_getMimeCharsetFromTextEncoding(eEncoding
);
1542 DBG_ASSERT(p
, "INetMIME::getCharsetName(): Unsupported encoding");
1548 case RTL_TEXTENCODING_UCS4
:
1549 return "ISO-10646-UCS-4";
1551 case RTL_TEXTENCODING_UCS2
:
1552 return "ISO-10646-UCS-2";
1555 DBG_ERROR("INetMIME::getCharsetName(): Unsupported encoding");
1560 //============================================================================
1561 namespace unnamed_tools_inetmime
{
1563 struct EncodingEntry
1565 sal_Char
const * m_aName
;
1566 rtl_TextEncoding m_eEncoding
;
1569 //============================================================================
1570 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
1571 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
1573 EncodingEntry
const aEncodingMap
[]
1574 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US
},
1575 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US
},
1576 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US
},
1577 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US
},
1578 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US
},
1579 { "ASCII", RTL_TEXTENCODING_ASCII_US
},
1580 { "ISO646-US", RTL_TEXTENCODING_ASCII_US
},
1581 { "US", RTL_TEXTENCODING_ASCII_US
},
1582 { "IBM367", RTL_TEXTENCODING_ASCII_US
},
1583 { "CP367", RTL_TEXTENCODING_ASCII_US
},
1584 { "CSASCII", RTL_TEXTENCODING_ASCII_US
},
1585 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1
},
1586 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1
},
1587 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1
},
1588 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1
},
1589 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1
},
1590 { "L1", RTL_TEXTENCODING_ISO_8859_1
},
1591 { "IBM819", RTL_TEXTENCODING_ISO_8859_1
},
1592 { "CP819", RTL_TEXTENCODING_ISO_8859_1
},
1593 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1
},
1594 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2
},
1595 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2
},
1596 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2
},
1597 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2
},
1598 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2
},
1599 { "L2", RTL_TEXTENCODING_ISO_8859_2
},
1600 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2
},
1601 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3
},
1602 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3
},
1603 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3
},
1604 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3
},
1605 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3
},
1606 { "L3", RTL_TEXTENCODING_ISO_8859_3
},
1607 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3
},
1608 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4
},
1609 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4
},
1610 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4
},
1611 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4
},
1612 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4
},
1613 { "L4", RTL_TEXTENCODING_ISO_8859_4
},
1614 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4
},
1615 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5
},
1616 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5
},
1617 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5
},
1618 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5
},
1619 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5
},
1620 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5
},
1621 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6
},
1622 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6
},
1623 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6
},
1624 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6
},
1625 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6
},
1626 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6
},
1627 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6
},
1628 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6
},
1629 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7
},
1630 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7
},
1631 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7
},
1632 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7
},
1633 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7
},
1634 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7
},
1635 { "GREEK", RTL_TEXTENCODING_ISO_8859_7
},
1636 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7
},
1637 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7
},
1638 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8
},
1639 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8
},
1640 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8
},
1641 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8
},
1642 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8
},
1643 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8
},
1644 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9
},
1645 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9
},
1646 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9
},
1647 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9
},
1648 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9
},
1649 { "L5", RTL_TEXTENCODING_ISO_8859_9
},
1650 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9
},
1651 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14
}, // RFC 2047
1652 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15
},
1653 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15
}, // RFC 2047
1654 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN
},
1655 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN
},
1656 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN
},
1657 { "IBM437", RTL_TEXTENCODING_IBM_437
},
1658 { "CP437", RTL_TEXTENCODING_IBM_437
},
1659 { "437", RTL_TEXTENCODING_IBM_437
},
1660 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437
},
1661 { "IBM850", RTL_TEXTENCODING_IBM_850
},
1662 { "CP850", RTL_TEXTENCODING_IBM_850
},
1663 { "850", RTL_TEXTENCODING_IBM_850
},
1664 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850
},
1665 { "IBM860", RTL_TEXTENCODING_IBM_860
},
1666 { "CP860", RTL_TEXTENCODING_IBM_860
},
1667 { "860", RTL_TEXTENCODING_IBM_860
},
1668 { "CSIBM860", RTL_TEXTENCODING_IBM_860
},
1669 { "IBM861", RTL_TEXTENCODING_IBM_861
},
1670 { "CP861", RTL_TEXTENCODING_IBM_861
},
1671 { "861", RTL_TEXTENCODING_IBM_861
},
1672 { "CP-IS", RTL_TEXTENCODING_IBM_861
},
1673 { "CSIBM861", RTL_TEXTENCODING_IBM_861
},
1674 { "IBM863", RTL_TEXTENCODING_IBM_863
},
1675 { "CP863", RTL_TEXTENCODING_IBM_863
},
1676 { "863", RTL_TEXTENCODING_IBM_863
},
1677 { "CSIBM863", RTL_TEXTENCODING_IBM_863
},
1678 { "IBM865", RTL_TEXTENCODING_IBM_865
},
1679 { "CP865", RTL_TEXTENCODING_IBM_865
},
1680 { "865", RTL_TEXTENCODING_IBM_865
},
1681 { "CSIBM865", RTL_TEXTENCODING_IBM_865
},
1682 { "IBM775", RTL_TEXTENCODING_IBM_775
},
1683 { "CP775", RTL_TEXTENCODING_IBM_775
},
1684 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775
},
1685 { "IBM852", RTL_TEXTENCODING_IBM_852
},
1686 { "CP852", RTL_TEXTENCODING_IBM_852
},
1687 { "852", RTL_TEXTENCODING_IBM_852
},
1688 { "CSPCP852", RTL_TEXTENCODING_IBM_852
},
1689 { "IBM855", RTL_TEXTENCODING_IBM_855
},
1690 { "CP855", RTL_TEXTENCODING_IBM_855
},
1691 { "855", RTL_TEXTENCODING_IBM_855
},
1692 { "CSIBM855", RTL_TEXTENCODING_IBM_855
},
1693 { "IBM857", RTL_TEXTENCODING_IBM_857
},
1694 { "CP857", RTL_TEXTENCODING_IBM_857
},
1695 { "857", RTL_TEXTENCODING_IBM_857
},
1696 { "CSIBM857", RTL_TEXTENCODING_IBM_857
},
1697 { "IBM862", RTL_TEXTENCODING_IBM_862
},
1698 { "CP862", RTL_TEXTENCODING_IBM_862
},
1699 { "862", RTL_TEXTENCODING_IBM_862
},
1700 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862
},
1701 { "IBM864", RTL_TEXTENCODING_IBM_864
},
1702 { "CP864", RTL_TEXTENCODING_IBM_864
},
1703 { "CSIBM864", RTL_TEXTENCODING_IBM_864
},
1704 { "IBM866", RTL_TEXTENCODING_IBM_866
},
1705 { "CP866", RTL_TEXTENCODING_IBM_866
},
1706 { "866", RTL_TEXTENCODING_IBM_866
},
1707 { "CSIBM866", RTL_TEXTENCODING_IBM_866
},
1708 { "IBM869", RTL_TEXTENCODING_IBM_869
},
1709 { "CP869", RTL_TEXTENCODING_IBM_869
},
1710 { "869", RTL_TEXTENCODING_IBM_869
},
1711 { "CP-GR", RTL_TEXTENCODING_IBM_869
},
1712 { "CSIBM869", RTL_TEXTENCODING_IBM_869
},
1713 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250
},
1714 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251
},
1715 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253
},
1716 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254
},
1717 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255
},
1718 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256
},
1719 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257
},
1720 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258
},
1721 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS
},
1722 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS
},
1723 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS
},
1724 { "GB2312", RTL_TEXTENCODING_GB_2312
},
1725 { "CSGB2312", RTL_TEXTENCODING_GB_2312
},
1726 { "BIG5", RTL_TEXTENCODING_BIG5
},
1727 { "CSBIG5", RTL_TEXTENCODING_BIG5
},
1728 { "EUC-JP", RTL_TEXTENCODING_EUC_JP
},
1729 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
1730 RTL_TEXTENCODING_EUC_JP
},
1731 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP
},
1732 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP
},
1733 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP
},
1734 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN
},
1735 { "KOI8-R", RTL_TEXTENCODING_KOI8_R
},
1736 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R
},
1737 { "UTF-7", RTL_TEXTENCODING_UTF7
},
1738 { "UTF-8", RTL_TEXTENCODING_UTF8
},
1739 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10
}, // RFC 2047
1740 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13
}, // RFC 2047
1741 { "EUC-KR", RTL_TEXTENCODING_EUC_KR
},
1742 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR
},
1743 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR
},
1744 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR
},
1745 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4
},
1746 { "CSUCS4", RTL_TEXTENCODING_UCS4
},
1747 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2
},
1748 { "CSUNICODE", RTL_TEXTENCODING_UCS2
} };
1750 //============================================================================
1751 template< typename T
>
1752 inline rtl_TextEncoding
getCharsetEncoding_Impl(T
const * pBegin
,
1755 for (sal_Size i
= 0; i
< sizeof aEncodingMap
/ sizeof (EncodingEntry
);
1757 if (INetMIME::equalIgnoreCase(pBegin
, pEnd
, aEncodingMap
[i
].m_aName
))
1758 return aEncodingMap
[i
].m_eEncoding
;
1759 return RTL_TEXTENCODING_DONTKNOW
;
1764 //============================================================================
1766 rtl_TextEncoding
INetMIME::getCharsetEncoding(sal_Char
const * pBegin
,
1767 sal_Char
const * pEnd
)
1769 return getCharsetEncoding_Impl(pBegin
, pEnd
);
1772 //============================================================================
1774 rtl_TextEncoding
INetMIME::getCharsetEncoding(sal_Unicode
const * pBegin
,
1775 sal_Unicode
const * pEnd
)
1777 return getCharsetEncoding_Impl(pBegin
, pEnd
);
1780 //============================================================================
1782 INetMIMECharsetList_Impl
*
1783 INetMIME::createPreferredCharsetList(rtl_TextEncoding eEncoding
)
1785 static const sal_uInt32 aUSASCIIRanges
[] = { 0, 0x7F, sal_uInt32(-1) };
1787 static const sal_uInt32 aISO88591Ranges
[] = { 0, 0xFF, sal_uInt32(-1) };
1788 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT> version
1789 // 1.0 of 1999 July 27
1791 static const sal_uInt32 aISO88592Ranges
[]
1792 = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0,
1793 0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC2, 0xC4, 0xC4, 0xC7, 0xC7,
1794 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xCE, 0xD3, 0xD4, 0xD6, 0xD7,
1795 0xDA, 0xDA, 0xDC, 0xDD, 0xDF, 0xDF, 0xE1, 0xE2, 0xE4, 0xE4,
1796 0xE7, 0xE7, 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF3, 0xF4,
1797 0xF6, 0xF7, 0xFA, 0xFA, 0xFC, 0xFD, 0x102, 0x107, 0x10C, 0x111,
1798 0x118, 0x11B, 0x139, 0x13A, 0x13D, 0x13E, 0x141, 0x144,
1799 0x147, 0x148, 0x150, 0x151, 0x154, 0x155, 0x158, 0x15B,
1800 0x15E, 0x165, 0x16E, 0x171, 0x179, 0x17E, 0x2C7, 0x2C7,
1801 0x2D8, 0x2D9, 0x2DB, 0x2DB, 0x2DD, 0x2DD, sal_uInt32(-1) };
1802 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT> version
1803 // 1.0 of 1999 July 27
1805 static const sal_uInt32 aISO88593Ranges
[]
1806 = { 0, 0xA0, 0xA3, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0,
1807 0xB2, 0xB5, 0xB7, 0xB8, 0xBD, 0xBD, 0xC0, 0xC2, 0xC4, 0xC4,
1808 0xC7, 0xCF, 0xD1, 0xD4, 0xD6, 0xD7, 0xD9, 0xDC, 0xDF, 0xE2,
1809 0xE4, 0xE4, 0xE7, 0xEF, 0xF1, 0xF4, 0xF6, 0xF7, 0xF9, 0xFC,
1810 0x108, 0x10B, 0x11C, 0x121, 0x124, 0x127, 0x130, 0x131,
1811 0x134, 0x135, 0x15C, 0x15F, 0x16C, 0x16D, 0x17B, 0x17C,
1812 0x2D8, 0x2D9, sal_uInt32(-1) };
1813 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT> version
1814 // 1.0 of 1999 July 27
1816 static const sal_uInt32 aISO88594Ranges
[]
1817 = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xAF, 0xB0,
1818 0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB,
1819 0xCD, 0xCE, 0xD4, 0xD8, 0xDA, 0xDC, 0xDF, 0xDF, 0xE1, 0xE6,
1820 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF4, 0xF8, 0xFA, 0xFC,
1821 0x100, 0x101, 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113,
1822 0x116, 0x119, 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F,
1823 0x136, 0x138, 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D,
1824 0x156, 0x157, 0x160, 0x161, 0x166, 0x16B, 0x172, 0x173,
1825 0x17D, 0x17E, 0x2C7, 0x2C7, 0x2D9, 0x2D9, 0x2DB, 0x2DB,
1827 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT> version
1828 // 1.0 of 1999 July 27
1830 static const sal_uInt32 aISO88595Ranges
[]
1831 = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0x401, 0x40C, 0x40E, 0x44F,
1832 0x451, 0x45C, 0x45E, 0x45F, 0x2116, 0x2116, sal_uInt32(-1) };
1833 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT> version
1834 // 1.0 of 1999 July 27
1836 static const sal_uInt32 aISO88596Ranges
[]
1837 = { 0, 0xA0, 0xA4, 0xA4, 0xAD, 0xAD, 0x60C, 0x60C, 0x61B, 0x61B,
1838 0x61F, 0x61F, 0x621, 0x63A, 0x640, 0x652, sal_uInt32(-1) };
1839 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT> version
1840 // 1.0 of 1999 July 27
1842 static const sal_uInt32 aISO88597Ranges
[]
1843 = { 0, 0xA0, 0xA3, 0xA3, 0xA6, 0xA9, 0xAB, 0xAD, 0xB0, 0xB3,
1844 0xB7, 0xB7, 0xBB, 0xBB, 0xBD, 0xBD, 0x384, 0x386, 0x388, 0x38A,
1845 0x38C, 0x38C, 0x38E, 0x3A1, 0x3A3, 0x3CE, 0x2015, 0x2015,
1846 0x2018, 0x2019, sal_uInt32(-1) };
1847 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT> version
1848 // 1.0 of 1999 July 27
1850 static const sal_uInt32 aISO88598Ranges
[]
1851 = { 0, 0xA0, 0xA2, 0xA9, 0xAB, 0xB9, 0xBB, 0xBE, 0xD7, 0xD7,
1852 0xF7, 0xF7, 0x5D0, 0x5EA, 0x200E, 0x200F, 0x2017, 0x2017,
1854 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT> version
1855 // 1.1 of 2000-Jan-03
1857 static const sal_uInt32 aISO88599Ranges
[]
1858 = { 0, 0xCF, 0xD1, 0xDC, 0xDF, 0xEF, 0xF1, 0xFC, 0xFF, 0xFF,
1859 0x11E, 0x11F, 0x130, 0x131, 0x15E, 0x15F, sal_uInt32(-1) };
1860 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT> version
1861 // 1.0 of 1999 July 27
1863 static const sal_uInt32 aISO885910Ranges
[]
1864 = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0xB0, 0xB0, 0xB7, 0xB7,
1865 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xD0, 0xD3, 0xD6,
1866 0xD8, 0xD8, 0xDA, 0xDF, 0xE1, 0xE6, 0xE9, 0xE9, 0xEB, 0xEB,
1867 0xED, 0xF0, 0xF3, 0xF6, 0xF8, 0xF8, 0xFA, 0xFE, 0x100, 0x101,
1868 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113, 0x116, 0x119,
1869 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F, 0x136, 0x138,
1870 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D, 0x160, 0x161,
1871 0x166, 0x16B, 0x172, 0x173, 0x17D, 0x17E, 0x2015, 0x2015,
1873 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT> version
1874 // 1.1 of 1999 October 11
1876 static const sal_uInt32 aISO885913Ranges
[]
1877 = { 0, 0xA0, 0xA2, 0xA4, 0xA6, 0xA7, 0xA9, 0xA9, 0xAB, 0xAE,
1878 0xB0, 0xB3, 0xB5, 0xB7, 0xB9, 0xB9, 0xBB, 0xBE, 0xC4, 0xC6,
1879 0xC9, 0xC9, 0xD3, 0xD3, 0xD5, 0xD8, 0xDC, 0xDC, 0xDF, 0xDF,
1880 0xE4, 0xE6, 0xE9, 0xE9, 0xF3, 0xF3, 0xF5, 0xF8, 0xFC, 0xFC,
1881 0x100, 0x101, 0x104, 0x107, 0x10C, 0x10D, 0x112, 0x113,
1882 0x116, 0x119, 0x122, 0x123, 0x12A, 0x12B, 0x12E, 0x12F,
1883 0x136, 0x137, 0x13B, 0x13C, 0x141, 0x146, 0x14C, 0x14D,
1884 0x156, 0x157, 0x15A, 0x15B, 0x160, 0x161, 0x16A, 0x16B,
1885 0x172, 0x173, 0x179, 0x17E, 0x2019, 0x2019, 0x201C, 0x201E,
1887 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT> version
1888 // 1.0 of 1999 July 27
1890 static const sal_uInt32 aISO885914Ranges
[]
1891 = { 0, 0xA0, 0xA3, 0xA3, 0xA7, 0xA7, 0xA9, 0xA9, 0xAD, 0xAE,
1892 0xB6, 0xB6, 0xC0, 0xCF, 0xD1, 0xD6, 0xD8, 0xDD, 0xDF, 0xEF,
1893 0xF1, 0xF6, 0xF8, 0xFD, 0xFF, 0xFF, 0x10A, 0x10B, 0x120, 0x121,
1894 0x174, 0x178, 0x1E02, 0x1E03, 0x1E0A, 0x1E0B, 0x1E1E, 0x1E1F,
1895 0x1E40, 0x1E41, 0x1E56, 0x1E57, 0x1E60, 0x1E61, 0x1E6A, 0x1E6B,
1896 0x1E80, 0x1E85, 0x1EF2, 0x1EF3, sal_uInt32(-1) };
1897 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT> version
1898 // 1.0 of 1999 July 27
1900 static const sal_uInt32 aISO885915Ranges
[]
1901 = { 0, 0xA3, 0xA5, 0xA5, 0xA7, 0xA7, 0xA9, 0xB3, 0xB5, 0xB7,
1902 0xB9, 0xBB, 0xBF, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178,
1903 0x17D, 0x17E, 0x20AC, 0x20AC, sal_uInt32(-1) };
1904 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT> version
1905 // 1.0 of 1999 July 27
1907 static const sal_uInt32 aKOI8RRanges
[]
1908 = { 0, 0x7F, 0xA0, 0xA0, 0xA9, 0xA9, 0xB0, 0xB0, 0xB2, 0xB2,
1909 0xB7, 0xB7, 0xF7, 0xF7, 0x401, 0x401, 0x410, 0x44F, 0x451, 0x451,
1910 0x2219, 0x221A, 0x2248, 0x2248, 0x2264, 0x2265, 0x2320, 0x2321,
1911 0x2500, 0x2500, 0x2502, 0x2502, 0x250C, 0x250C, 0x2510, 0x2510,
1912 0x2514, 0x2514, 0x2518, 0x2518, 0x251C, 0x251C, 0x2524, 0x2524,
1913 0x252C, 0x252C, 0x2534, 0x2534, 0x253C, 0x253C, 0x2550, 0x256C,
1914 0x2580, 0x2580, 0x2584, 0x2584, 0x2588, 0x2588, 0x258C, 0x258C,
1915 0x2590, 0x2593, 0x25A0, 0x25A0, sal_uInt32(-1) };
1916 // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT>
1917 // version 1.0 of 18 August 1999
1920 static const sal_uInt32 aWindows1252Ranges
[]
1921 = { 0, 0x7F, 0xA0, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178,
1922 0x17D, 0x17E, 0x192, 0x192, 0x2C6, 0x2C6, 0x2DC, 0x2DC,
1923 0x2013, 0x2014, 0x2018, 0x201A, 0x201C, 0x201E, 0x2020, 0x2022,
1924 0x2026, 0x2026, 0x2030, 0x2030, 0x2039, 0x203A, 0x20AC, 0x20AC,
1925 0x2122, 0x2122, sal_uInt32(-1) };
1926 // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/
1927 // CP1252.TXT> version 2.01 of 04/15/98
1930 INetMIMECharsetList_Impl
* pList
= new INetMIMECharsetList_Impl
;
1933 case RTL_TEXTENCODING_MS_1252
:
1935 pList
->prepend(Charset(RTL_TEXTENCODING_MS_1252
,
1936 aWindows1252Ranges
));
1938 case RTL_TEXTENCODING_ISO_8859_1
:
1939 case RTL_TEXTENCODING_UTF7
:
1940 case RTL_TEXTENCODING_UTF8
:
1943 case RTL_TEXTENCODING_ISO_8859_2
:
1944 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2
,
1948 case RTL_TEXTENCODING_ISO_8859_3
:
1949 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_3
,
1953 case RTL_TEXTENCODING_ISO_8859_4
:
1954 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4
,
1958 case RTL_TEXTENCODING_ISO_8859_5
:
1959 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5
,
1963 case RTL_TEXTENCODING_ISO_8859_6
:
1964 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6
,
1968 case RTL_TEXTENCODING_ISO_8859_7
:
1969 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7
,
1973 case RTL_TEXTENCODING_ISO_8859_8
:
1974 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8
,
1978 case RTL_TEXTENCODING_ISO_8859_9
:
1979 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9
,
1983 case RTL_TEXTENCODING_ISO_8859_10
:
1984 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_10
,
1988 case RTL_TEXTENCODING_ISO_8859_13
:
1989 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_13
,
1993 case RTL_TEXTENCODING_ISO_8859_14
:
1994 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_14
,
1998 case RTL_TEXTENCODING_ISO_8859_15
:
1999 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_15
,
2003 case RTL_TEXTENCODING_MS_1250
:
2004 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2
,
2008 case RTL_TEXTENCODING_MS_1251
:
2009 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5
,
2013 case RTL_TEXTENCODING_MS_1253
:
2014 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7
,
2018 case RTL_TEXTENCODING_MS_1254
:
2019 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9
,
2023 case RTL_TEXTENCODING_MS_1255
:
2024 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8
,
2028 case RTL_TEXTENCODING_MS_1256
:
2029 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6
,
2033 case RTL_TEXTENCODING_MS_1257
:
2034 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4
,
2038 case RTL_TEXTENCODING_KOI8_R
:
2039 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5
,
2041 pList
->prepend(Charset(RTL_TEXTENCODING_KOI8_R
, aKOI8RRanges
));
2044 default: //@@@ more cases are missing!
2045 DBG_ERROR("INetMIME::createPreferredCharsetList():"
2046 " Unsupported encoding");
2049 pList
->prepend(Charset(RTL_TEXTENCODING_ISO_8859_1
, aISO88591Ranges
));
2050 pList
->prepend(Charset(RTL_TEXTENCODING_ASCII_US
, aUSASCIIRanges
));
2054 //============================================================================
2056 sal_Unicode
* INetMIME::convertToUnicode(const sal_Char
* pBegin
,
2057 const sal_Char
* pEnd
,
2058 rtl_TextEncoding eEncoding
,
2061 if (eEncoding
== RTL_TEXTENCODING_DONTKNOW
)
2063 rtl_TextToUnicodeConverter hConverter
2064 = rtl_createTextToUnicodeConverter(eEncoding
);
2065 rtl_TextToUnicodeContext hContext
2066 = rtl_createTextToUnicodeContext(hConverter
);
2067 sal_Unicode
* pBuffer
;
2069 for (sal_Size nBufferSize
= pEnd
- pBegin
;;
2070 nBufferSize
+= nBufferSize
/ 3 + 1)
2072 pBuffer
= new sal_Unicode
[nBufferSize
];
2073 sal_Size nSrcCvtBytes
;
2074 rSize
= rtl_convertTextToUnicode(
2075 hConverter
, hContext
, pBegin
, pEnd
- pBegin
, pBuffer
,
2077 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
2078 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
2079 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
,
2080 &nInfo
, &nSrcCvtBytes
);
2081 if (nInfo
!= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
)
2084 rtl_resetTextToUnicodeContext(hConverter
, hContext
);
2086 rtl_destroyTextToUnicodeContext(hConverter
, hContext
);
2087 rtl_destroyTextToUnicodeConverter(hConverter
);
2096 //============================================================================
2098 sal_Char
* INetMIME::convertFromUnicode(const sal_Unicode
* pBegin
,
2099 const sal_Unicode
* pEnd
,
2100 rtl_TextEncoding eEncoding
,
2103 if (eEncoding
== RTL_TEXTENCODING_DONTKNOW
)
2105 rtl_UnicodeToTextConverter hConverter
2106 = rtl_createUnicodeToTextConverter(eEncoding
);
2107 rtl_UnicodeToTextContext hContext
2108 = rtl_createUnicodeToTextContext(hConverter
);
2111 for (sal_Size nBufferSize
= pEnd
- pBegin
;;
2112 nBufferSize
+= nBufferSize
/ 3 + 1)
2114 pBuffer
= new sal_Char
[nBufferSize
];
2115 sal_Size nSrcCvtBytes
;
2116 rSize
= rtl_convertUnicodeToText(
2117 hConverter
, hContext
, pBegin
, pEnd
- pBegin
, pBuffer
,
2119 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
2120 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
2121 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
2122 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR
,
2123 &nInfo
, &nSrcCvtBytes
);
2124 if (nInfo
!= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
)
2127 rtl_resetUnicodeToTextContext(hConverter
, hContext
);
2129 rtl_destroyUnicodeToTextContext(hConverter
, hContext
);
2130 rtl_destroyUnicodeToTextConverter(hConverter
);
2139 //============================================================================
2141 void INetMIME::writeUTF8(INetMIMEOutputSink
& rSink
, sal_uInt32 nChar
)
2143 // See RFC 2279 for a discussion of UTF-8.
2144 DBG_ASSERT(nChar
< 0x80000000, "INetMIME::writeUTF8(): Bad char");
2147 rSink
<< sal_Char(nChar
);
2148 else if (nChar
< 0x800)
2149 rSink
<< sal_Char(nChar
>> 6 | 0xC0)
2150 << sal_Char((nChar
& 0x3F) | 0x80);
2151 else if (nChar
< 0x10000)
2152 rSink
<< sal_Char(nChar
>> 12 | 0xE0)
2153 << sal_Char((nChar
>> 6 & 0x3F) | 0x80)
2154 << sal_Char((nChar
& 0x3F) | 0x80);
2155 else if (nChar
< 0x200000)
2156 rSink
<< sal_Char(nChar
>> 18 | 0xF0)
2157 << sal_Char((nChar
>> 12 & 0x3F) | 0x80)
2158 << sal_Char((nChar
>> 6 & 0x3F) | 0x80)
2159 << sal_Char((nChar
& 0x3F) | 0x80);
2160 else if (nChar
< 0x4000000)
2161 rSink
<< sal_Char(nChar
>> 24 | 0xF8)
2162 << sal_Char((nChar
>> 18 & 0x3F) | 0x80)
2163 << sal_Char((nChar
>> 12 & 0x3F) | 0x80)
2164 << sal_Char((nChar
>> 6 & 0x3F) | 0x80)
2165 << sal_Char((nChar
& 0x3F) | 0x80);
2167 rSink
<< sal_Char(nChar
>> 30 | 0xFC)
2168 << sal_Char((nChar
>> 24 & 0x3F) | 0x80)
2169 << sal_Char((nChar
>> 18 & 0x3F) | 0x80)
2170 << sal_Char((nChar
>> 12 & 0x3F) | 0x80)
2171 << sal_Char((nChar
>> 6 & 0x3F) | 0x80)
2172 << sal_Char((nChar
& 0x3F) | 0x80);
2175 //============================================================================
2177 void INetMIME::writeUnsigned(INetMIMEOutputSink
& rSink
, sal_uInt32 nValue
,
2180 sal_Char aBuffer
[10];
2181 // max unsigned 32 bit value (4294967295) has 10 places
2182 sal_Char
* p
= aBuffer
;
2183 for (; nValue
> 0; nValue
/= 10)
2184 *p
++ = sal_Char(getDigit(nValue
% 10));
2185 nMinDigits
-= p
- aBuffer
;
2186 while (nMinDigits
-- > 0)
2188 while (p
!= aBuffer
)
2192 //============================================================================
2194 void INetMIME::writeDateTime(INetMIMEOutputSink
& rSink
,
2195 const DateTime
& rUTC
)
2197 static const sal_Char aDay
[7][3]
2198 = { { 'M', 'o', 'n' },
2204 { 'S', 'u', 'n' } };
2205 const sal_Char
* pTheDay
= aDay
[rUTC
.GetDayOfWeek()];
2206 rSink
.write(pTheDay
, pTheDay
+ 3);
2208 writeUnsigned(rSink
, rUTC
.GetDay());
2210 static const sal_Char aMonth
[12][3]
2211 = { { 'J', 'a', 'n' },
2222 { 'D', 'e', 'c' } };
2223 const sal_Char
* pTheMonth
= aMonth
[rUTC
.GetMonth() - 1];
2224 rSink
.write(pTheMonth
, pTheMonth
+ 3);
2226 writeUnsigned(rSink
, rUTC
.GetYear());
2228 writeUnsigned(rSink
, rUTC
.GetHour(), 2);
2230 writeUnsigned(rSink
, rUTC
.GetMin(), 2);
2232 writeUnsigned(rSink
, rUTC
.GetSec(), 2);
2236 //============================================================================
2238 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink
& rSink
,
2239 HeaderFieldType eType
,
2240 const ByteString
& rBody
,
2241 rtl_TextEncoding ePreferredEncoding
,
2244 writeHeaderFieldBody(rSink
, eType
,
2245 UniString(rBody
, RTL_TEXTENCODING_UTF8
),
2246 ePreferredEncoding
, bInitialSpace
);
2249 //============================================================================
2251 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink
& rSink
,
2252 HeaderFieldType eType
,
2253 const UniString
& rBody
,
2254 rtl_TextEncoding ePreferredEncoding
,
2257 if (eType
== HEADER_FIELD_TEXT
)
2259 INetMIMEEncodedWordOutputSink
2260 aOutput(rSink
, INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
,
2262 INetMIMEEncodedWordOutputSink::SPACE_ALWAYS
:
2263 INetMIMEEncodedWordOutputSink::SPACE_NO
,
2264 ePreferredEncoding
);
2265 aOutput
.write(rBody
.GetBuffer(), rBody
.GetBuffer() + rBody
.Len());
2270 enum Brackets
{ BRACKETS_OUTSIDE
, BRACKETS_OPENING
, BRACKETS_INSIDE
};
2271 Brackets eBrackets
= BRACKETS_OUTSIDE
;
2273 const sal_Unicode
* pBodyPtr
= rBody
.GetBuffer();
2274 const sal_Unicode
* pBodyEnd
= pBodyPtr
+ rBody
.Len();
2275 while (pBodyPtr
!= pBodyEnd
)
2280 // A WSP adds to accumulated space:
2281 bInitialSpace
= true;
2287 // Write a pending '<' if necessary:
2288 if (eBrackets
== BRACKETS_OPENING
)
2290 if (rSink
.getColumn() + (bInitialSpace
? 1 : 0)
2291 >= rSink
.getLineLengthLimit())
2292 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2293 else if (bInitialSpace
)
2296 bInitialSpace
= false;
2297 eBrackets
= BRACKETS_INSIDE
;
2300 // Write the comment, introducing encoded-words where
2303 INetMIMEEncodedWordOutputSink
2306 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
,
2307 INetMIMEEncodedWordOutputSink::SPACE_NO
,
2308 ePreferredEncoding
);
2309 while (pBodyPtr
!= pBodyEnd
)
2314 if (rSink
.getColumn()
2315 + (bInitialSpace
? 1 : 0)
2316 >= rSink
.getLineLengthLimit())
2317 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2318 else if (bInitialSpace
)
2321 bInitialSpace
= false;
2328 if (rSink
.getColumn()
2329 >= rSink
.getLineLengthLimit())
2330 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2338 if (++pBodyPtr
== pBodyEnd
)
2341 aOutput
<< *pBodyPtr
++;
2349 // Write an already pending '<' if necessary:
2350 if (eBrackets
== BRACKETS_OPENING
)
2352 if (rSink
.getColumn() + (bInitialSpace
? 1 : 0)
2353 >= rSink
.getLineLengthLimit())
2354 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2355 else if (bInitialSpace
)
2358 bInitialSpace
= false;
2361 // Remember this '<' as pending, and open a bracketed
2363 eBrackets
= BRACKETS_OPENING
;
2368 // Write a pending '<' if necessary:
2369 if (eBrackets
== BRACKETS_OPENING
)
2371 if (rSink
.getColumn() + (bInitialSpace
? 1 : 0)
2372 >= rSink
.getLineLengthLimit())
2373 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2374 else if (bInitialSpace
)
2377 bInitialSpace
= false;
2380 // Write this '>', and close any bracketed block:
2381 if (rSink
.getColumn() + (bInitialSpace
? 1 : 0)
2382 >= rSink
.getLineLengthLimit())
2383 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2384 else if (bInitialSpace
)
2387 bInitialSpace
= false;
2388 eBrackets
= BRACKETS_OUTSIDE
;
2397 // Write a pending '<' if necessary:
2398 if (eBrackets
== BRACKETS_OPENING
)
2400 if (rSink
.getColumn() + (bInitialSpace
? 1 : 0)
2401 >= rSink
.getLineLengthLimit())
2402 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2403 else if (bInitialSpace
)
2406 bInitialSpace
= false;
2407 eBrackets
= BRACKETS_INSIDE
;
2410 // Write this specials:
2411 if (rSink
.getColumn() + (bInitialSpace
? 1 : 0)
2412 >= rSink
.getLineLengthLimit())
2413 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2414 else if (bInitialSpace
)
2416 rSink
<< sal_Char(*pBodyPtr
++);
2417 bInitialSpace
= false;
2421 // A <CRLF WSP> adds to accumulated space, a <CR> not
2422 // followed by <LF WSP> starts 'junk':
2423 if (startsWithLineFolding(pBodyPtr
, pBodyEnd
))
2425 bInitialSpace
= true;
2431 // The next token is either one of <"." / "@" / atom /
2432 // quoted-string / domain-literal>, or it's 'junk'; if it
2433 // is not 'junk', it is either a 'phrase' (i.e., it may
2434 // contain encoded-words) or a 'non-phrase' (i.e., it may
2435 // not contain encoded-words):
2436 enum Entity
{ ENTITY_JUNK
, ENTITY_NON_PHRASE
,
2438 Entity eEntity
= ENTITY_JUNK
;
2444 // A token of <"." / "@" / domain-literal> always
2445 // starts a 'non-phrase':
2446 eEntity
= ENTITY_NON_PHRASE
;
2450 if (isUSASCII(*pBodyPtr
)
2451 && !isAtomChar(*pBodyPtr
))
2453 eEntity
= ENTITY_JUNK
;
2457 // A token of <atom / quoted-string> can either be
2458 // a 'phrase' or a 'non-phrase':
2461 case HEADER_FIELD_STRUCTURED
:
2462 eEntity
= ENTITY_NON_PHRASE
;
2465 case HEADER_FIELD_PHRASE
:
2466 eEntity
= ENTITY_PHRASE
;
2469 case HEADER_FIELD_MESSAGE_ID
:
2470 // A 'phrase' if and only if outside any
2473 = eBrackets
== BRACKETS_OUTSIDE
?
2478 case HEADER_FIELD_ADDRESS
:
2480 // A 'non-phrase' if and only if, after
2481 // skipping this token and any following
2482 // <linear-white-space> and <comment>s,
2483 // there is no token left, or the next
2484 // token is any of <"." / "@" / ">" / ","
2485 // / ";">, or the next token is <":"> and
2486 // is within a bracketed block:
2487 const sal_Unicode
* pLookAhead
= pBodyPtr
;
2488 if (*pLookAhead
== '"')
2491 = skipQuotedString(pLookAhead
,
2493 if (pLookAhead
== pBodyPtr
)
2494 pLookAhead
= pBodyEnd
;
2497 while (pLookAhead
!= pBodyEnd
2498 && (isAtomChar(*pLookAhead
)
2502 while (pLookAhead
!= pBodyEnd
)
2503 switch (*pLookAhead
)
2512 const sal_Unicode
* pPast
2513 = skipComment(pLookAhead
,
2516 = pPast
== pLookAhead
?
2526 eEntity
= ENTITY_NON_PHRASE
;
2527 goto entity_determined
;
2532 == BRACKETS_OUTSIDE
?
2535 goto entity_determined
;
2538 if (startsWithLineFolding(
2539 pLookAhead
, pBodyEnd
))
2545 eEntity
= ENTITY_PHRASE
;
2546 goto entity_determined
;
2548 eEntity
= ENTITY_NON_PHRASE
;
2553 case HEADER_FIELD_TEXT
:
2558 // In a 'non-phrase', a non-US-ASCII character
2559 // cannot be part of an <atom>, but instead the
2560 // whole entity is 'junk' rather than 'non-
2562 if (eEntity
== ENTITY_NON_PHRASE
2563 && !isUSASCII(*pBodyPtr
))
2564 eEntity
= ENTITY_JUNK
;
2572 // Write a pending '<' if necessary:
2573 if (eBrackets
== BRACKETS_OPENING
)
2575 if (rSink
.getColumn()
2576 + (bInitialSpace
? 1 : 0)
2577 >= rSink
.getLineLengthLimit())
2578 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2579 else if (bInitialSpace
)
2582 bInitialSpace
= false;
2583 eBrackets
= BRACKETS_INSIDE
;
2586 // Calculate the length of in- and output:
2587 const sal_Unicode
* pStart
= pBodyPtr
;
2588 sal_Size nLength
= 0;
2589 bool bModify
= false;
2591 while (pBodyPtr
!= pBodyEnd
&& !bEnd
)
2595 if (startsWithLineFolding(pBodyPtr
,
2598 else if (startsWithLineBreak(
2599 pBodyPtr
, pBodyEnd
))
2618 if (isVisible(*pBodyPtr
))
2620 else if (isUSASCII(*pBodyPtr
))
2627 nLength
+= getUTF8OctetCount(
2634 // Write the output:
2635 if (rSink
.getColumn() + (bInitialSpace
? 1 : 0)
2637 > rSink
.getLineLengthLimit())
2638 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2639 else if (bInitialSpace
)
2641 bInitialSpace
= false;
2643 while (pStart
!= pBodyPtr
)
2644 if (startsWithLineBreak(pStart
, pBodyPtr
))
2646 rSink
<< "\x0D\\\x0A"; // CR, '\', LF
2650 writeUTF8(rSink
, *pStart
++);
2652 rSink
.write(pStart
, pBodyPtr
);
2656 case ENTITY_NON_PHRASE
:
2658 // Calculate the length of in- and output:
2659 const sal_Unicode
* pStart
= pBodyPtr
;
2660 sal_Size nLength
= 0;
2661 bool bBracketedBlock
= false;
2662 bool bSymbol
= *pStart
!= '.' && *pStart
!= '@';
2663 bool bModify
= false;
2665 while (pBodyPtr
!= pBodyEnd
&& !bEnd
)
2672 const sal_Unicode
* pLookAhead
2673 = skipLinearWhiteSpace(pBodyPtr
,
2675 if (pLookAhead
< pBodyEnd
2677 isAtomChar(*pLookAhead
)
2678 || *pLookAhead
== '"'
2679 || *pLookAhead
== '[' :
2681 || *pLookAhead
== '@'
2682 || (*pLookAhead
== '>'
2684 >= HEADER_FIELD_MESSAGE_ID
2686 == BRACKETS_OPENING
)))
2689 pBodyPtr
= pLookAhead
;
2700 = scanQuotedBlock(pBodyPtr
,
2715 = scanQuotedBlock(pBodyPtr
,
2739 if (eBrackets
== BRACKETS_OPENING
2741 >= HEADER_FIELD_MESSAGE_ID
)
2744 bBracketedBlock
= true;
2751 if (isAtomChar(*pBodyPtr
) && bSymbol
)
2753 while (pBodyPtr
!= pBodyEnd
2754 && isAtomChar(*pBodyPtr
))
2763 if (!isUSASCII(*pBodyPtr
))
2770 // Write a pending '<' if necessary:
2771 if (eBrackets
== BRACKETS_OPENING
2772 && !bBracketedBlock
)
2774 if (rSink
.getColumn()
2775 + (bInitialSpace
? 1 : 0)
2776 >= rSink
.getLineLengthLimit())
2777 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2778 else if (bInitialSpace
)
2781 bInitialSpace
= false;
2782 eBrackets
= BRACKETS_INSIDE
;
2785 // Write the output:
2786 if (rSink
.getColumn() + (bInitialSpace
? 1 : 0)
2788 > rSink
.getLineLengthLimit())
2789 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2790 else if (bInitialSpace
)
2792 bInitialSpace
= false;
2793 if (bBracketedBlock
)
2796 eBrackets
= BRACKETS_OUTSIDE
;
2800 enum Mode
{ MODE_PLAIN
, MODE_QUOTED_STRING
,
2801 MODE_DOMAIN_LITERAL
};
2802 Mode eMode
= MODE_PLAIN
;
2803 while (pStart
!= pBodyPtr
)
2807 if (startsWithLineFolding(
2810 if (eMode
!= MODE_PLAIN
)
2815 else if (startsWithLineBreak(
2818 rSink
<< "\x0D\\\x0A";
2824 rSink
<< '\x0D'; // CR
2831 if (eMode
!= MODE_PLAIN
)
2832 rSink
<< sal_Char(*pStart
);
2837 if (eMode
== MODE_PLAIN
)
2838 eMode
= MODE_QUOTED_STRING
;
2840 == MODE_QUOTED_STRING
)
2847 if (eMode
== MODE_PLAIN
)
2848 eMode
= MODE_DOMAIN_LITERAL
;
2854 if (eMode
== MODE_DOMAIN_LITERAL
)
2862 if (++pStart
< pBodyPtr
)
2863 writeUTF8(rSink
, *pStart
++);
2867 writeUTF8(rSink
, *pStart
++);
2872 rSink
.write(pStart
, pBodyPtr
);
2878 // Write a pending '<' if necessary:
2879 if (eBrackets
== BRACKETS_OPENING
)
2881 if (rSink
.getColumn()
2882 + (bInitialSpace
? 1 : 0)
2883 >= rSink
.getLineLengthLimit())
2884 rSink
<< INetMIMEOutputSink::endl
<< ' ';
2885 else if (bInitialSpace
)
2888 bInitialSpace
= false;
2889 eBrackets
= BRACKETS_INSIDE
;
2892 // Calculate the length of in- and output:
2893 const sal_Unicode
* pStart
= pBodyPtr
;
2894 bool bQuotedString
= false;
2896 while (pBodyPtr
!= pBodyEnd
&& !bEnd
)
2906 const sal_Unicode
* pLookAhead
2907 = skipLinearWhiteSpace(
2908 pBodyPtr
, pBodyEnd
);
2909 if (pLookAhead
!= pBodyEnd
2910 && (isAtomChar(*pLookAhead
)
2911 || !isUSASCII(*pLookAhead
)
2912 || *pLookAhead
== '"'))
2913 pBodyPtr
= pLookAhead
;
2920 bQuotedString
= !bQuotedString
;
2927 if (++pBodyPtr
!= pBodyEnd
)
2936 || isAtomChar(*pBodyPtr
)
2937 || !isUSASCII(*pBodyPtr
))
2944 // Write the phrase, introducing encoded-words
2946 INetMIMEEncodedWordOutputSink
2949 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
,
2951 INetMIMEEncodedWordOutputSink::SPACE_ALWAYS
:
2952 INetMIMEEncodedWordOutputSink::SPACE_ENCODED
,
2953 ePreferredEncoding
);
2954 while (pStart
!= pBodyPtr
)
2962 if (++pStart
!= pBodyPtr
)
2963 aOutput
<< *pStart
++;
2968 aOutput
<< *pStart
++;
2972 aOutput
<< *pStart
++;
2975 bInitialSpace
= aOutput
.flush();
2985 //============================================================================
2987 bool INetMIME::translateUTF8Char(const sal_Char
*& rBegin
,
2988 const sal_Char
* pEnd
,
2989 rtl_TextEncoding eEncoding
,
2990 sal_uInt32
& rCharacter
)
2992 if (rBegin
== pEnd
|| static_cast< unsigned char >(*rBegin
) < 0x80
2993 || static_cast< unsigned char >(*rBegin
) >= 0xFE)
2999 const sal_Char
* p
= rBegin
;
3000 if (static_cast< unsigned char >(*p
) < 0xE0)
3004 nUCS4
= static_cast< unsigned char >(*p
) & 0x1F;
3006 else if (static_cast< unsigned char >(*p
) < 0xF0)
3010 nUCS4
= static_cast< unsigned char >(*p
) & 0xF;
3012 else if (static_cast< unsigned char >(*p
) < 0xF8)
3016 nUCS4
= static_cast< unsigned char >(*p
) & 7;
3018 else if (static_cast< unsigned char >(*p
) < 0xFC)
3022 nUCS4
= static_cast< unsigned char >(*p
) & 3;
3028 nUCS4
= static_cast< unsigned char >(*p
) & 1;
3032 for (; nCount
-- > 0; ++p
)
3033 if ((static_cast< unsigned char >(*p
) & 0xC0) == 0x80)
3034 nUCS4
= (nUCS4
<< 6) | (static_cast< unsigned char >(*p
) & 0x3F);
3038 if (nUCS4
< nMin
|| nUCS4
> 0x10FFFF)
3041 if (eEncoding
>= RTL_TEXTENCODING_UCS4
)
3045 sal_Unicode aUTF16
[2];
3046 const sal_Unicode
* pUTF16End
= putUTF32Character(aUTF16
, nUCS4
);
3048 sal_Char
* pBuffer
= convertFromUnicode(aUTF16
, pUTF16End
, eEncoding
,
3052 DBG_ASSERT(nSize
== 1,
3053 "INetMIME::translateUTF8Char(): Bad conversion");
3054 rCharacter
= *pBuffer
;
3061 //============================================================================
3063 ByteString
INetMIME::decodeUTF8(const ByteString
& rText
,
3064 rtl_TextEncoding eEncoding
)
3066 const sal_Char
* p
= rText
.GetBuffer();
3067 const sal_Char
* pEnd
= p
+ rText
.Len();
3068 ByteString sDecoded
;
3071 // the value is initialized just to avoid warning: ‘nCharacter’ is used uninitialized in this function
3072 // there seems to be a bug in gcc-4.1 because this value is used only if it is initialized
3073 sal_uInt32 nCharacter
=0;
3074 if (translateUTF8Char(p
, pEnd
, eEncoding
, nCharacter
))
3075 sDecoded
+= sal_Char(nCharacter
);
3077 sDecoded
+= sal_Char(*p
++);
3082 //============================================================================
3084 UniString
INetMIME::decodeHeaderFieldBody(HeaderFieldType eType
,
3085 const ByteString
& rBody
)
3087 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
3088 // versions of StarOffice send mails with header fields where encoded
3089 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
3090 // '=', ',', '.', '"', ')', without any required white space in between.
3091 // And there appear to exist some broken mailers that only encode single
3092 // letters within words, like "Appel
3093 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
3094 // detect encoded words even when not propperly surrounded by white space.
3096 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
3098 // encoded-word = "=?"
3099 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
3100 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
3101 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
3102 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
3105 // base64 = ALPHA / DIGIT / "+" / "/"
3107 const sal_Char
* pBegin
= rBody
.GetBuffer();
3108 const sal_Char
* pEnd
= pBegin
+ rBody
.Len();
3111 const sal_Char
* pCopyBegin
= pBegin
;
3113 /* bool bStartEncodedWord = true; */
3114 const sal_Char
* pWSPBegin
= pBegin
;
3115 UniString sEncodedText
;
3116 bool bQuotedEncodedText
= false;
3117 sal_uInt32 nCommentLevel
= 0;
3119 for (const sal_Char
* p
= pBegin
; p
!= pEnd
;)
3121 if (p
!= pEnd
&& *p
== '=' /* && bStartEncodedWord */)
3123 const sal_Char
* q
= p
+ 1;
3124 bool bEncodedWord
= q
!= pEnd
&& *q
++ == '?';
3126 rtl_TextEncoding eCharsetEncoding
= RTL_TEXTENCODING_DONTKNOW
;
3129 const sal_Char
* pCharsetBegin
= q
;
3130 const sal_Char
* pLanguageBegin
= 0;
3131 int nAlphaCount
= 0;
3132 for (bool bDone
= false; !bDone
;)
3135 bEncodedWord
= false;
3140 sal_Char cChar
= *q
++;
3144 pLanguageBegin
= q
- 1;
3149 if (pLanguageBegin
!= 0)
3151 if (nAlphaCount
== 0)
3159 if (pCharsetBegin
== q
- 1)
3160 bEncodedWord
= false;
3164 = getCharsetEncoding(
3167 || nAlphaCount
== 0 ?
3168 q
- 1 : pLanguageBegin
);
3169 bEncodedWord
= isMIMECharsetEncoding(
3172 = translateFromMIME(eCharsetEncoding
);
3178 if (pLanguageBegin
!= 0
3179 && (!isAlpha(cChar
) || ++nAlphaCount
> 8))
3186 bool bEncodingB
= false;
3190 bEncodedWord
= false;
3206 bEncodedWord
= false;
3212 bEncodedWord
= bEncodedWord
&& q
!= pEnd
&& *q
++ == '?';
3219 for (bool bDone
= false; !bDone
;)
3223 bEncodedWord
= false;
3228 bool bFinal
= false;
3230 sal_uInt32 nValue
= 0;
3231 for (int nShift
= 18; nShift
>= 0; nShift
-= 6)
3233 int nWeight
= getBase64Weight(*q
++);
3236 bEncodedWord
= false;
3246 bEncodedWord
= false;
3251 nCount
= nShift
== 6 ? 1 : 2;
3255 nValue
|= nWeight
<< nShift
;
3259 for (int nShift
= 16; nCount
-- > 0;
3261 sText
+= sal_Char(nValue
>> nShift
3268 if (bFinal
&& !bDone
)
3270 bEncodedWord
= false;
3279 const sal_Char
* pEncodedTextBegin
= q
;
3280 const sal_Char
* pEncodedTextCopyBegin
= q
;
3281 for (bool bDone
= false; !bDone
;)
3284 bEncodedWord
= false;
3289 sal_uInt32 nChar
= *q
++;
3296 bEncodedWord
= false;
3300 int nDigit1
= getHexWeight(q
[0]);
3301 int nDigit2
= getHexWeight(q
[1]);
3302 if (nDigit1
< 0 || nDigit2
< 0)
3304 bEncodedWord
= false;
3308 sText
+= rBody
.Copy(
3309 static_cast< xub_StrLen
>(
3310 pEncodedTextCopyBegin
- pBegin
),
3311 static_cast< xub_StrLen
>(
3312 q
- 1 - pEncodedTextCopyBegin
));
3313 sText
+= sal_Char(nDigit1
<< 4 | nDigit2
);
3315 pEncodedTextCopyBegin
= q
;
3320 if (q
- pEncodedTextBegin
> 1)
3321 sText
+= rBody
.Copy(
3322 static_cast< xub_StrLen
>(
3323 pEncodedTextCopyBegin
- pBegin
),
3324 static_cast< xub_StrLen
>(
3325 q
- 1 - pEncodedTextCopyBegin
));
3327 bEncodedWord
= false;
3332 sText
+= rBody
.Copy(
3333 static_cast< xub_StrLen
>(
3334 pEncodedTextCopyBegin
- pBegin
),
3335 static_cast< xub_StrLen
>(
3336 q
- 1 - pEncodedTextCopyBegin
));
3338 pEncodedTextCopyBegin
= q
;
3342 if (!isVisible(nChar
))
3344 bEncodedWord
= false;
3353 bEncodedWord
= bEncodedWord
&& q
!= pEnd
&& *q
++ == '=';
3355 // if (bEncodedWord && q != pEnd)
3368 // bEncodedWord = false;
3372 sal_Unicode
* pUnicodeBuffer
= 0;
3373 sal_Size nUnicodeSize
= 0;
3377 = convertToUnicode(sText
.GetBuffer(),
3378 sText
.GetBuffer() + sText
.Len(),
3379 eCharsetEncoding
, nUnicodeSize
);
3380 if (pUnicodeBuffer
== 0)
3381 bEncodedWord
= false;
3386 appendISO88591(sDecoded
, pCopyBegin
, pWSPBegin
);
3387 if (eType
== HEADER_FIELD_TEXT
)
3390 static_cast< xub_StrLen
>(nUnicodeSize
));
3391 else if (nCommentLevel
== 0)
3393 sEncodedText
.Append(
3395 static_cast< xub_StrLen
>(nUnicodeSize
));
3396 if (!bQuotedEncodedText
)
3398 const sal_Unicode
* pTextPtr
= pUnicodeBuffer
;
3399 const sal_Unicode
* pTextEnd
= pTextPtr
3401 for (; pTextPtr
!= pTextEnd
; ++pTextPtr
)
3402 if (!isEncodedWordTokenChar(*pTextPtr
))
3404 bQuotedEncodedText
= true;
3411 const sal_Unicode
* pTextPtr
= pUnicodeBuffer
;
3412 const sal_Unicode
* pTextEnd
= pTextPtr
+ nUnicodeSize
;
3413 for (; pTextPtr
!= pTextEnd
; ++pTextPtr
)
3425 sDecoded
+= *pTextPtr
;
3428 delete[] pUnicodeBuffer
;
3433 while (p
!= pEnd
&& isWhiteSpace(*p
))
3435 /* bStartEncodedWord = p != pWSPBegin; */
3440 if (sEncodedText
.Len() != 0)
3442 if (bQuotedEncodedText
)
3445 const sal_Unicode
* pTextPtr
= sEncodedText
.GetBuffer();
3446 const sal_Unicode
* pTextEnd
= pTextPtr
+ sEncodedText
.Len();
3447 for (;pTextPtr
!= pTextEnd
; ++pTextPtr
)
3457 sDecoded
+= *pTextPtr
;
3462 sDecoded
+= sEncodedText
;
3463 sEncodedText
.Erase();
3464 bQuotedEncodedText
= false;
3477 // bStartEncodedWord = true;
3481 if (eType
!= HEADER_FIELD_TEXT
&& nCommentLevel
== 0)
3483 const sal_Char
* pQuotedStringEnd
3484 = skipQuotedString(p
- 1, pEnd
);
3485 p
= pQuotedStringEnd
== p
- 1 ? pEnd
: pQuotedStringEnd
;
3487 /* bStartEncodedWord = true; */
3491 if (eType
!= HEADER_FIELD_TEXT
)
3493 /* bStartEncodedWord = true; */
3497 if (nCommentLevel
> 0)
3499 /* bStartEncodedWord = false; */
3504 const sal_Char
* pUTF8Begin
= p
- 1;
3505 const sal_Char
* pUTF8End
= pUTF8Begin
;
3506 // the value is initialized just to avoid warning: ‘nCharacter’ is used uninitialized in this function
3507 // there seems to be a bug in gcc-4.1 because this value is used only if it is initialized
3508 sal_uInt32 nCharacter
=0;
3509 if (translateUTF8Char(pUTF8End
, pEnd
, RTL_TEXTENCODING_UCS4
,
3512 appendISO88591(sDecoded
, pCopyBegin
, p
- 1);
3513 sal_Unicode aUTF16Buf
[2];
3514 xub_StrLen nUTF16Len
= static_cast< xub_StrLen
>(
3515 putUTF32Character(aUTF16Buf
, nCharacter
) - aUTF16Buf
);
3516 sDecoded
.Append(aUTF16Buf
, nUTF16Len
);
3520 /* bStartEncodedWord = false; */
3527 appendISO88591(sDecoded
, pCopyBegin
, pEnd
);
3531 //============================================================================
3533 // INetMIMEOutputSink
3535 //============================================================================
3538 sal_Size
INetMIMEOutputSink::writeSequence(const sal_Char
* pSequence
)
3540 sal_Size nLength
= rtl_str_getLength(pSequence
);
3541 writeSequence(pSequence
, pSequence
+ nLength
);
3545 //============================================================================
3547 void INetMIMEOutputSink::writeSequence(const sal_uInt32
* pBegin
,
3548 const sal_uInt32
* pEnd
)
3550 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
3551 "INetMIMEOutputSink::writeSequence(): Bad sequence");
3553 sal_Char
* pBufferBegin
= new sal_Char
[pEnd
- pBegin
];
3554 sal_Char
* pBufferEnd
= pBufferBegin
;
3555 while (pBegin
!= pEnd
)
3557 DBG_ASSERT(*pBegin
< 256,
3558 "INetMIMEOutputSink::writeSequence(): Bad octet");
3559 *pBufferEnd
++ = sal_Char(*pBegin
++);
3561 writeSequence(pBufferBegin
, pBufferEnd
);
3562 delete[] pBufferBegin
;
3565 //============================================================================
3567 void INetMIMEOutputSink::writeSequence(const sal_Unicode
* pBegin
,
3568 const sal_Unicode
* pEnd
)
3570 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
3571 "INetMIMEOutputSink::writeSequence(): Bad sequence");
3573 sal_Char
* pBufferBegin
= new sal_Char
[pEnd
- pBegin
];
3574 sal_Char
* pBufferEnd
= pBufferBegin
;
3575 while (pBegin
!= pEnd
)
3577 DBG_ASSERT(*pBegin
< 256,
3578 "INetMIMEOutputSink::writeSequence(): Bad octet");
3579 *pBufferEnd
++ = sal_Char(*pBegin
++);
3581 writeSequence(pBufferBegin
, pBufferEnd
);
3582 delete[] pBufferBegin
;
3585 //============================================================================
3587 ErrCode
INetMIMEOutputSink::getError() const
3589 return ERRCODE_NONE
;
3592 //============================================================================
3593 void INetMIMEOutputSink::writeLineEnd()
3595 static const sal_Char aCRLF
[2] = { 0x0D, 0x0A };
3596 writeSequence(aCRLF
, aCRLF
+ 2);
3600 //============================================================================
3602 // INetMIMEStringOutputSink
3604 //============================================================================
3607 void INetMIMEStringOutputSink::writeSequence(const sal_Char
* pBegin
,
3608 const sal_Char
* pEnd
)
3610 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
3611 "INetMIMEStringOutputSink::writeSequence(): Bad sequence");
3613 m_bOverflow
= m_bOverflow
3614 || pEnd
- pBegin
> STRING_MAXLEN
- m_aBuffer
.Len();
3616 m_aBuffer
.Append(pBegin
, static_cast< xub_StrLen
>(pEnd
- pBegin
));
3619 //============================================================================
3621 ErrCode
INetMIMEStringOutputSink::getError() const
3623 return m_bOverflow
? ERRCODE_IO_OUTOFMEMORY
: ERRCODE_NONE
;
3626 //============================================================================
3628 // INetMIMEUnicodeOutputSink
3630 //============================================================================
3633 void INetMIMEUnicodeOutputSink::writeSequence(const sal_Char
* pBegin
,
3634 const sal_Char
* pEnd
)
3636 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
3637 "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3639 sal_Unicode
* pBufferBegin
= new sal_Unicode
[pEnd
- pBegin
];
3640 sal_Unicode
* pBufferEnd
= pBufferBegin
;
3641 while (pBegin
!= pEnd
)
3642 *pBufferEnd
++ = sal_uChar(*pBegin
++);
3643 writeSequence(pBufferBegin
, pBufferEnd
);
3644 delete[] pBufferBegin
;
3647 //============================================================================
3649 void INetMIMEUnicodeOutputSink::writeSequence(const sal_uInt32
* pBegin
,
3650 const sal_uInt32
* pEnd
)
3652 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
3653 "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3655 sal_Unicode
* pBufferBegin
= new sal_Unicode
[pEnd
- pBegin
];
3656 sal_Unicode
* pBufferEnd
= pBufferBegin
;
3657 while (pBegin
!= pEnd
)
3659 DBG_ASSERT(*pBegin
< 256,
3660 "INetMIMEOutputSink::writeSequence(): Bad octet");
3661 *pBufferEnd
++ = sal_Unicode(*pBegin
++);
3663 writeSequence(pBufferBegin
, pBufferEnd
);
3664 delete[] pBufferBegin
;
3667 //============================================================================
3669 void INetMIMEUnicodeOutputSink::writeSequence(const sal_Unicode
* pBegin
,
3670 const sal_Unicode
* pEnd
)
3672 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
3673 "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3675 m_bOverflow
= m_bOverflow
3676 || pEnd
- pBegin
> STRING_MAXLEN
- m_aBuffer
.Len();
3678 m_aBuffer
.Append(pBegin
, static_cast< xub_StrLen
>(pEnd
- pBegin
));
3681 //============================================================================
3683 ErrCode
INetMIMEUnicodeOutputSink::getError() const
3685 return m_bOverflow
? ERRCODE_IO_OUTOFMEMORY
: ERRCODE_NONE
;
3688 //============================================================================
3690 // INetMIMEEncodedWordOutputSink
3692 //============================================================================
3694 static const sal_Char aEscape
[128]
3695 = { INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x00
3696 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x01
3697 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x02
3698 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x03
3699 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x04
3700 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x05
3701 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x06
3702 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x07
3703 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x08
3704 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x09
3705 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x0A
3706 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x0B
3707 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x0C
3708 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x0D
3709 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x0E
3710 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x0F
3711 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x10
3712 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x11
3713 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x12
3714 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x13
3715 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x14
3716 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x15
3717 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x16
3718 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x17
3719 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x18
3720 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x19
3721 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x1A
3722 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x1B
3723 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x1C
3724 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x1D
3725 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x1E
3726 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // 0x1F
3729 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '"'
3730 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '#'
3731 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '$'
3732 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '%'
3733 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '&'
3734 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '''
3735 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '('
3736 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // ')'
3739 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // ','
3741 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '.'
3753 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // ':'
3754 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // ';'
3755 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '<'
3756 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '='
3757 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '>'
3758 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '?'
3759 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '@'
3786 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '['
3787 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '\'
3788 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // ']'
3789 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '^'
3790 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '_'
3791 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '`'
3818 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '{'
3819 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '|'
3820 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '}'
3821 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
, // '~'
3822 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT
| INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT
| INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE
}; // DEL
3825 INetMIMEEncodedWordOutputSink::needsEncodedWordEscape(sal_uInt32 nChar
) const
3827 return !INetMIME::isUSASCII(nChar
) || aEscape
[nChar
] & m_eContext
;
3830 //============================================================================
3831 void INetMIMEEncodedWordOutputSink::finish(bool bWriteTrailer
)
3833 if (m_eInitialSpace
== SPACE_ALWAYS
&& m_nExtraSpaces
== 0)
3836 if (m_eEncodedWordState
== STATE_SECOND_EQUALS
)
3838 // If the text is already an encoded word, copy it verbatim:
3839 sal_uInt32 nSize
= m_pBufferEnd
- m_pBuffer
;
3840 switch (m_ePrevCoding
)
3845 if (m_eInitialSpace
== SPACE_ENCODED
&& m_nExtraSpaces
== 0)
3847 for (; m_nExtraSpaces
> 1; --m_nExtraSpaces
)
3849 if (m_rSink
.getColumn() >= m_rSink
.getLineLengthLimit())
3850 m_rSink
<< INetMIMEOutputSink::endl
;
3853 if (m_nExtraSpaces
== 1)
3855 if (m_rSink
.getColumn() + nSize
3856 >= m_rSink
.getLineLengthLimit())
3857 m_rSink
<< INetMIMEOutputSink::endl
;
3862 case CODING_ENCODED
:
3864 const sal_Char
* pCharsetName
3865 = INetMIME::getCharsetName(m_ePrevMIMEEncoding
);
3866 while (m_nExtraSpaces
-- > 0)
3868 if (m_rSink
.getColumn()
3869 > m_rSink
.getLineLengthLimit() - 3)
3870 m_rSink
<< "?=" << INetMIMEOutputSink::endl
<< " =?"
3871 << pCharsetName
<< "?Q?";
3876 case CODING_ENCODED_TERMINATED
:
3877 if (m_rSink
.getColumn() + nSize
3878 > m_rSink
.getLineLengthLimit() - 1)
3879 m_rSink
<< INetMIMEOutputSink::endl
;
3883 m_rSink
.write(m_pBuffer
, m_pBufferEnd
);
3884 m_eCoding
= CODING_ENCODED_TERMINATED
;
3888 // If the text itself is too long to fit into a single line, make it
3889 // into multiple encoded words:
3893 if (m_nExtraSpaces
== 0)
3895 DBG_ASSERT(m_ePrevCoding
== CODING_NONE
3896 || m_pBuffer
== m_pBufferEnd
,
3897 "INetMIMEEncodedWordOutputSink::finish():"
3899 if (m_rSink
.getColumn() + (m_pBufferEnd
- m_pBuffer
)
3900 > m_rSink
.getLineLengthLimit())
3901 m_eCoding
= CODING_ENCODED
;
3905 OSL_ASSERT(m_pBufferEnd
>= m_pBuffer
);
3906 if (static_cast< std::size_t >(m_pBufferEnd
- m_pBuffer
)
3907 > m_rSink
.getLineLengthLimit() - 1)
3909 m_eCoding
= CODING_ENCODED
;
3915 if (m_nExtraSpaces
== 0)
3917 DBG_ASSERT(m_ePrevCoding
== CODING_NONE
,
3918 "INetMIMEEncodedWordOutputSink::finish():"
3920 if (m_rSink
.getColumn() + (m_pBufferEnd
- m_pBuffer
)
3922 > m_rSink
.getLineLengthLimit() - 2)
3923 m_eCoding
= CODING_ENCODED
;
3925 else if ((m_pBufferEnd
- m_pBuffer
) + m_nQuotedEscaped
3926 > m_rSink
.getLineLengthLimit() - 3)
3927 m_eCoding
= CODING_ENCODED
;
3937 switch (m_ePrevCoding
)
3940 if (m_rSink
.getColumn() + m_nExtraSpaces
3941 + (m_pBufferEnd
- m_pBuffer
)
3942 < m_rSink
.getLineLengthLimit())
3943 m_eCoding
= CODING_QUOTED
;
3948 case CODING_ENCODED
:
3955 for (; m_nExtraSpaces
> 1; --m_nExtraSpaces
)
3957 if (m_rSink
.getColumn() >= m_rSink
.getLineLengthLimit())
3958 m_rSink
<< INetMIMEOutputSink::endl
;
3961 if (m_nExtraSpaces
== 1)
3963 if (m_rSink
.getColumn() + (m_pBufferEnd
- m_pBuffer
)
3964 >= m_rSink
.getLineLengthLimit())
3965 m_rSink
<< INetMIMEOutputSink::endl
;
3968 m_rSink
.write(m_pBuffer
, m_pBufferEnd
);
3969 if (m_eCoding
== CODING_QUOTED
&& bWriteTrailer
)
3972 m_eCoding
= CODING_NONE
;
3978 bool bInsertLeadingQuote
= true;
3979 sal_uInt32 nSize
= (m_pBufferEnd
- m_pBuffer
)
3980 + m_nQuotedEscaped
+ 2;
3981 switch (m_ePrevCoding
)
3984 if (m_rSink
.getColumn() + m_nExtraSpaces
+ nSize
- 1
3985 < m_rSink
.getLineLengthLimit())
3987 bInsertLeadingQuote
= false;
3994 case CODING_ENCODED
:
4001 for (; m_nExtraSpaces
> 1; --m_nExtraSpaces
)
4003 if (m_rSink
.getColumn() >= m_rSink
.getLineLengthLimit())
4004 m_rSink
<< INetMIMEOutputSink::endl
;
4007 if (m_nExtraSpaces
== 1)
4009 if (m_rSink
.getColumn() + nSize
4010 >= m_rSink
.getLineLengthLimit())
4011 m_rSink
<< INetMIMEOutputSink::endl
;
4014 if (bInsertLeadingQuote
)
4016 for (const sal_Unicode
* p
= m_pBuffer
; p
!= m_pBufferEnd
;
4019 if (INetMIME::needsQuotedStringEscape(*p
))
4021 m_rSink
<< sal_Char(*p
);
4026 m_eCoding
= CODING_NONE
;
4031 case CODING_ENCODED
:
4033 rtl_TextEncoding eCharsetEncoding
4035 getPreferredEncoding(RTL_TEXTENCODING_UTF8
);
4036 rtl_TextEncoding eMIMEEncoding
4037 = INetMIME::translateToMIME(eCharsetEncoding
);
4039 // The non UTF-8 code will only work for stateless single byte
4040 // character encodings (see also below):
4041 sal_Char
* pTargetBuffer
= NULL
;
4042 sal_Size nTargetSize
= 0;
4044 if (eMIMEEncoding
== RTL_TEXTENCODING_UTF8
)
4047 for (sal_Unicode
const * p
= m_pBuffer
;
4051 = INetMIME::getUTF32Character(p
, m_pBufferEnd
);
4052 nSize
+= needsEncodedWordEscape(nUTF32
) ?
4053 3 * INetMIME::getUTF8OctetCount(nUTF32
) :
4055 // only US-ASCII characters (that are converted to
4056 // a single byte by UTF-8) need no encoded word
4062 rtl_UnicodeToTextConverter hConverter
4063 = rtl_createUnicodeToTextConverter(eCharsetEncoding
);
4064 rtl_UnicodeToTextContext hContext
4065 = rtl_createUnicodeToTextContext(hConverter
);
4066 for (sal_Size nBufferSize
= m_pBufferEnd
- m_pBuffer
;;
4067 nBufferSize
+= nBufferSize
/ 3 + 1)
4069 pTargetBuffer
= new sal_Char
[nBufferSize
];
4071 sal_Size nSrcCvtBytes
;
4073 = rtl_convertUnicodeToText(
4074 hConverter
, hContext
, m_pBuffer
,
4075 m_pBufferEnd
- m_pBuffer
, pTargetBuffer
,
4077 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE
4078 | RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE
,
4079 &nInfo
, &nSrcCvtBytes
);
4081 & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
4083 delete[] pTargetBuffer
;
4084 pTargetBuffer
= NULL
;
4085 rtl_resetUnicodeToTextContext(hConverter
, hContext
);
4087 rtl_destroyUnicodeToTextContext(hConverter
, hContext
);
4088 rtl_destroyUnicodeToTextConverter(hConverter
);
4090 nSize
= nTargetSize
;
4091 for (sal_Size k
= 0; k
< nTargetSize
; ++k
)
4092 if (needsEncodedWordEscape(sal_uChar(
4097 const sal_Char
* pCharsetName
4098 = INetMIME::getCharsetName(eMIMEEncoding
);
4099 sal_uInt32 nWrapperSize
= rtl_str_getLength(pCharsetName
) + 7;
4100 // '=?', '?Q?', '?='
4102 switch (m_ePrevCoding
)
4107 if (m_eInitialSpace
== SPACE_ENCODED
4108 && m_nExtraSpaces
== 0)
4110 nSize
+= nWrapperSize
;
4111 for (; m_nExtraSpaces
> 1; --m_nExtraSpaces
)
4113 if (m_rSink
.getColumn()
4114 >= m_rSink
.getLineLengthLimit())
4115 m_rSink
<< INetMIMEOutputSink::endl
;
4118 if (m_nExtraSpaces
== 1)
4120 if (m_rSink
.getColumn() + nSize
4121 >= m_rSink
.getLineLengthLimit())
4122 m_rSink
<< INetMIMEOutputSink::endl
;
4125 m_rSink
<< "=?" << pCharsetName
<< "?Q?";
4128 case CODING_ENCODED
:
4129 if (m_ePrevMIMEEncoding
!= eMIMEEncoding
4130 || m_rSink
.getColumn() + m_nExtraSpaces
+ nSize
4131 > m_rSink
.getLineLengthLimit() - 2)
4134 if (m_rSink
.getColumn() + nWrapperSize
4135 + m_nExtraSpaces
+ nSize
4136 > m_rSink
.getLineLengthLimit() - 1)
4137 m_rSink
<< INetMIMEOutputSink::endl
;
4138 m_rSink
<< " =?" << pCharsetName
<< "?Q?";
4140 while (m_nExtraSpaces
-- > 0)
4142 if (m_rSink
.getColumn()
4143 > m_rSink
.getLineLengthLimit() - 3)
4144 m_rSink
<< "?=" << INetMIMEOutputSink::endl
4145 << " =?" << pCharsetName
<< "?Q?";
4150 case CODING_ENCODED_TERMINATED
:
4151 if (m_rSink
.getColumn() + nWrapperSize
4152 + m_nExtraSpaces
+ nSize
4153 > m_rSink
.getLineLengthLimit() - 1)
4154 m_rSink
<< INetMIMEOutputSink::endl
;
4155 m_rSink
<< " =?" << pCharsetName
<< "?Q?";
4156 while (m_nExtraSpaces
-- > 0)
4158 if (m_rSink
.getColumn()
4159 > m_rSink
.getLineLengthLimit() - 3)
4160 m_rSink
<< "?=" << INetMIMEOutputSink::endl
4161 << " =?" << pCharsetName
<< "?Q?";
4167 // The non UTF-8 code will only work for stateless single byte
4168 // character encodings (see also above):
4169 if (eMIMEEncoding
== RTL_TEXTENCODING_UTF8
)
4171 bool bInitial
= true;
4172 for (sal_Unicode
const * p
= m_pBuffer
;
4176 = INetMIME::getUTF32Character(p
, m_pBufferEnd
);
4177 bool bEscape
= needsEncodedWordEscape(nUTF32
);
4180 3 * INetMIME::getUTF8OctetCount(nUTF32
) : 1;
4181 // only US-ASCII characters (that are converted to
4182 // a single byte by UTF-8) need no encoded word
4185 && m_rSink
.getColumn() + nWidth
+ 2
4186 > m_rSink
.getLineLengthLimit())
4187 m_rSink
<< "?=" << INetMIMEOutputSink::endl
4188 << " =?" << pCharsetName
<< "?Q?";
4193 "INetMIMEEncodedWordOutputSink::finish():"
4196 INetMIME::writeEscapeSequence(m_rSink
,
4198 else if (nUTF32
< 0x800)
4200 INetMIME::writeEscapeSequence(m_rSink
,
4203 INetMIME::writeEscapeSequence(m_rSink
,
4207 else if (nUTF32
< 0x10000)
4209 INetMIME::writeEscapeSequence(m_rSink
,
4212 INetMIME::writeEscapeSequence(m_rSink
,
4216 INetMIME::writeEscapeSequence(m_rSink
,
4222 INetMIME::writeEscapeSequence(m_rSink
,
4225 INetMIME::writeEscapeSequence(m_rSink
,
4229 INetMIME::writeEscapeSequence(m_rSink
,
4233 INetMIME::writeEscapeSequence(m_rSink
,
4239 m_rSink
<< sal_Char(nUTF32
);
4245 for (sal_Size k
= 0; k
< nTargetSize
; ++k
)
4247 sal_uInt32 nUCS4
= sal_uChar(pTargetBuffer
[k
]);
4248 bool bEscape
= needsEncodedWordEscape(nUCS4
);
4250 && m_rSink
.getColumn() + (bEscape
? 5 : 3)
4251 > m_rSink
.getLineLengthLimit())
4252 m_rSink
<< "?=" << INetMIMEOutputSink::endl
4253 << " =?" << pCharsetName
<< "?Q?";
4255 INetMIME::writeEscapeSequence(m_rSink
, nUCS4
);
4257 m_rSink
<< sal_Char(nUCS4
);
4259 delete[] pTargetBuffer
;
4265 m_eCoding
= CODING_ENCODED_TERMINATED
;
4268 m_ePrevMIMEEncoding
= eMIMEEncoding
;
4278 m_eInitialSpace
= SPACE_NO
;
4280 m_pEncodingList
->reset();
4281 m_pBufferEnd
= m_pBuffer
;
4282 m_ePrevCoding
= m_eCoding
;
4283 m_eCoding
= CODING_NONE
;
4284 m_nQuotedEscaped
= 0;
4285 m_eEncodedWordState
= STATE_INITIAL
;
4288 //============================================================================
4289 INetMIMEEncodedWordOutputSink::~INetMIMEEncodedWordOutputSink()
4291 rtl_freeMemory(m_pBuffer
);
4292 delete m_pEncodingList
;
4295 //============================================================================
4296 INetMIMEEncodedWordOutputSink
&
4297 INetMIMEEncodedWordOutputSink::operator <<(sal_uInt32 nChar
)
4301 if (m_pBufferEnd
!= m_pBuffer
)
4307 // Check for an already encoded word:
4308 switch (m_eEncodedWordState
)
4312 m_eEncodedWordState
= STATE_FIRST_EQUALS
;
4314 m_eEncodedWordState
= STATE_BAD
;
4317 case STATE_FIRST_EQUALS
:
4319 m_eEncodedWordState
= STATE_FIRST_EQUALS
;
4321 m_eEncodedWordState
= STATE_BAD
;
4324 case STATE_FIRST_QUESTION
:
4325 if (INetMIME::isEncodedWordTokenChar(nChar
))
4326 m_eEncodedWordState
= STATE_CHARSET
;
4328 m_eEncodedWordState
= STATE_BAD
;
4333 m_eEncodedWordState
= STATE_SECOND_QUESTION
;
4334 else if (!INetMIME::isEncodedWordTokenChar(nChar
))
4335 m_eEncodedWordState
= STATE_BAD
;
4338 case STATE_SECOND_QUESTION
:
4339 if (nChar
== 'B' || nChar
== 'Q'
4340 || nChar
== 'b' || nChar
== 'q')
4341 m_eEncodedWordState
= STATE_ENCODING
;
4343 m_eEncodedWordState
= STATE_BAD
;
4346 case STATE_ENCODING
:
4348 m_eEncodedWordState
= STATE_THIRD_QUESTION
;
4350 m_eEncodedWordState
= STATE_BAD
;
4353 case STATE_THIRD_QUESTION
:
4354 if (INetMIME::isVisible(nChar
) && nChar
!= '?')
4355 m_eEncodedWordState
= STATE_ENCODED_TEXT
;
4357 m_eEncodedWordState
= STATE_BAD
;
4360 case STATE_ENCODED_TEXT
:
4362 m_eEncodedWordState
= STATE_FOURTH_QUESTION
;
4363 else if (!INetMIME::isVisible(nChar
))
4364 m_eEncodedWordState
= STATE_BAD
;
4367 case STATE_FOURTH_QUESTION
:
4369 m_eEncodedWordState
= STATE_SECOND_EQUALS
;
4371 m_eEncodedWordState
= STATE_BAD
;
4374 case STATE_SECOND_EQUALS
:
4375 m_eEncodedWordState
= STATE_BAD
;
4383 m_pEncodingList
->includes(nChar
);
4386 enum { TENQ
= 1, // CONTEXT_TEXT, CODING_ENCODED
4387 CENQ
= 2, // CONTEXT_COMMENT, CODING_ENCODED
4388 PQTD
= 4, // CONTEXT_PHRASE, CODING_QUOTED
4389 PENQ
= 8 }; // CONTEXT_PHRASE, CODING_ENCODED
4390 static const sal_Char aMinimal
[128]
4391 = { TENQ
| CENQ
| PENQ
, // 0x00
4392 TENQ
| CENQ
| PENQ
, // 0x01
4393 TENQ
| CENQ
| PENQ
, // 0x02
4394 TENQ
| CENQ
| PENQ
, // 0x03
4395 TENQ
| CENQ
| PENQ
, // 0x04
4396 TENQ
| CENQ
| PENQ
, // 0x05
4397 TENQ
| CENQ
| PENQ
, // 0x06
4398 TENQ
| CENQ
| PENQ
, // 0x07
4399 TENQ
| CENQ
| PENQ
, // 0x08
4400 TENQ
| CENQ
| PENQ
, // 0x09
4401 TENQ
| CENQ
| PENQ
, // 0x0A
4402 TENQ
| CENQ
| PENQ
, // 0x0B
4403 TENQ
| CENQ
| PENQ
, // 0x0C
4404 TENQ
| CENQ
| PENQ
, // 0x0D
4405 TENQ
| CENQ
| PENQ
, // 0x0E
4406 TENQ
| CENQ
| PENQ
, // 0x0F
4407 TENQ
| CENQ
| PENQ
, // 0x10
4408 TENQ
| CENQ
| PENQ
, // 0x11
4409 TENQ
| CENQ
| PENQ
, // 0x12
4410 TENQ
| CENQ
| PENQ
, // 0x13
4411 TENQ
| CENQ
| PENQ
, // 0x14
4412 TENQ
| CENQ
| PENQ
, // 0x15
4413 TENQ
| CENQ
| PENQ
, // 0x16
4414 TENQ
| CENQ
| PENQ
, // 0x17
4415 TENQ
| CENQ
| PENQ
, // 0x18
4416 TENQ
| CENQ
| PENQ
, // 0x19
4417 TENQ
| CENQ
| PENQ
, // 0x1A
4418 TENQ
| CENQ
| PENQ
, // 0x1B
4419 TENQ
| CENQ
| PENQ
, // 0x1C
4420 TENQ
| CENQ
| PENQ
, // 0x1D
4421 TENQ
| CENQ
| PENQ
, // 0x1E
4422 TENQ
| CENQ
| PENQ
, // 0x1F
4431 CENQ
| PQTD
, // '('
4432 CENQ
| PQTD
, // ')'
4483 CENQ
| PQTD
, // '\'
4518 TENQ
| CENQ
| PENQ
}; // DEL
4519 Coding eNewCoding
= !INetMIME::isUSASCII(nChar
) ? CODING_ENCODED
:
4520 m_eContext
== CONTEXT_PHRASE
?
4521 Coding(aMinimal
[nChar
] >> 2) :
4522 aMinimal
[nChar
] & m_eContext
? CODING_ENCODED
:
4524 if (eNewCoding
> m_eCoding
)
4525 m_eCoding
= eNewCoding
;
4526 if (m_eCoding
== CODING_QUOTED
4527 && INetMIME::needsQuotedStringEscape(nChar
))
4530 // Append to buffer:
4531 if (sal_uInt32(m_pBufferEnd
- m_pBuffer
) == m_nBufferSize
)
4534 = static_cast< sal_Unicode
* >(
4535 rtl_reallocateMemory(m_pBuffer
,
4536 (m_nBufferSize
+ BUFFER_SIZE
)
4537 * sizeof (sal_Unicode
)));
4538 m_pBufferEnd
= m_pBuffer
+ m_nBufferSize
;
4539 m_nBufferSize
+= BUFFER_SIZE
;
4541 *m_pBufferEnd
++ = sal_Unicode(nChar
);
4546 //============================================================================
4548 // INetContentTypeParameterList
4550 //============================================================================
4552 void INetContentTypeParameterList::Clear()
4555 delete static_cast< INetContentTypeParameter
* >(Remove(Count() - 1));
4558 //============================================================================
4559 const INetContentTypeParameter
*
4560 INetContentTypeParameterList::find(const ByteString
& rAttribute
) const
4562 for (ULONG i
= 0; i
< Count(); ++i
)
4564 const INetContentTypeParameter
* pParameter
= GetObject(i
);
4565 if (pParameter
->m_sAttribute
.EqualsIgnoreCaseAscii(rAttribute
))