1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
23 #include <forward_list>
26 #include <osl/diagnose.h>
27 #include <rtl/ustring.hxx>
28 #include <rtl/strbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/inetmime.hxx>
31 #include <rtl/character.hxx>
35 rtl_TextEncoding
getCharsetEncoding(const sal_Char
* pBegin
,
36 const sal_Char
* pEnd
);
38 /** Check for US-ASCII white space character.
40 @param nChar Some UCS-4 character.
42 @return True if nChar is a US-ASCII white space character (US-ASCII
45 inline bool isWhiteSpace(sal_uInt32 nChar
)
47 return nChar
== '\t' || nChar
== ' ';
50 /** Get the Base 64 digit weight of a US-ASCII character.
52 @param nChar Some UCS-4 character.
54 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
55 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
56 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
57 character (US-ASCII '='), return -1; otherwise, return -2.
59 inline int getBase64Weight(sal_uInt32 nChar
)
61 return rtl::isAsciiUpperCase(nChar
) ? int(nChar
- 'A') :
62 rtl::isAsciiLowerCase(nChar
) ? int(nChar
- 'a' + 26) :
63 rtl::isAsciiDigit(nChar
) ? int(nChar
- '0' + 52) :
66 nChar
== '=' ? -1 : -2;
69 inline bool startsWithLineFolding(const sal_Unicode
* pBegin
,
70 const sal_Unicode
* pEnd
)
72 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
73 "startsWithLineFolding(): Bad sequence");
75 return pEnd
- pBegin
>= 3 && pBegin
[0] == 0x0D && pBegin
[1] == 0x0A
76 && isWhiteSpace(pBegin
[2]); // CR, LF
79 inline rtl_TextEncoding
translateFromMIME(rtl_TextEncoding
83 return eEncoding
== RTL_TEXTENCODING_ISO_8859_1
?
84 RTL_TEXTENCODING_MS_1252
: eEncoding
;
90 inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding
)
92 return rtl_isOctetTextEncoding(eEncoding
);
95 sal_Unicode
* convertToUnicode(const sal_Char
* pBegin
,
96 const sal_Char
* pEnd
,
97 rtl_TextEncoding eEncoding
,
100 if (eEncoding
== RTL_TEXTENCODING_DONTKNOW
)
102 rtl_TextToUnicodeConverter hConverter
103 = rtl_createTextToUnicodeConverter(eEncoding
);
104 rtl_TextToUnicodeContext hContext
105 = rtl_createTextToUnicodeContext(hConverter
);
106 sal_Unicode
* pBuffer
;
108 for (sal_Size nBufferSize
= pEnd
- pBegin
;;
109 nBufferSize
+= nBufferSize
/ 3 + 1)
111 pBuffer
= new sal_Unicode
[nBufferSize
];
112 sal_Size nSrcCvtBytes
;
113 rSize
= rtl_convertTextToUnicode(
114 hConverter
, hContext
, pBegin
, pEnd
- pBegin
, pBuffer
,
116 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
,
119 &nInfo
, &nSrcCvtBytes
);
120 if (nInfo
!= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
)
123 rtl_resetTextToUnicodeContext(hConverter
, hContext
);
125 rtl_destroyTextToUnicodeContext(hConverter
, hContext
);
126 rtl_destroyTextToUnicodeConverter(hConverter
);
135 sal_Char
* convertFromUnicode(const sal_Unicode
* pBegin
,
136 const sal_Unicode
* pEnd
,
137 rtl_TextEncoding eEncoding
,
140 if (eEncoding
== RTL_TEXTENCODING_DONTKNOW
)
142 rtl_UnicodeToTextConverter hConverter
143 = rtl_createUnicodeToTextConverter(eEncoding
);
144 rtl_UnicodeToTextContext hContext
145 = rtl_createUnicodeToTextContext(hConverter
);
148 for (sal_Size nBufferSize
= pEnd
- pBegin
;;
149 nBufferSize
+= nBufferSize
/ 3 + 1)
151 pBuffer
= new sal_Char
[nBufferSize
];
152 sal_Size nSrcCvtBytes
;
153 rSize
= rtl_convertUnicodeToText(
154 hConverter
, hContext
, pBegin
, pEnd
- pBegin
, pBuffer
,
156 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
157 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
158 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
159 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR
,
160 &nInfo
, &nSrcCvtBytes
);
161 if (nInfo
!= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
)
164 rtl_resetUnicodeToTextContext(hConverter
, hContext
);
166 rtl_destroyUnicodeToTextContext(hConverter
, hContext
);
167 rtl_destroyUnicodeToTextConverter(hConverter
);
176 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
178 @param pBuffer Points to a buffer, must not be null.
180 @param nUTF32 An UTF-32 character, must be in the range 0..0x10FFFF.
182 @return A pointer past the UTF-16 characters put into the buffer
183 (i.e., pBuffer + 1 or pBuffer + 2).
185 inline sal_Unicode
* putUTF32Character(sal_Unicode
* pBuffer
,
188 DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32
), "putUTF32Character(): Bad char");
189 if (nUTF32
< 0x10000)
190 *pBuffer
++ = sal_Unicode(nUTF32
);
194 *pBuffer
++ = sal_Unicode(0xD800 | (nUTF32
>> 10));
195 *pBuffer
++ = sal_Unicode(0xDC00 | (nUTF32
& 0x3FF));
200 void writeUTF8(OStringBuffer
& rSink
, sal_uInt32 nChar
)
202 // See RFC 2279 for a discussion of UTF-8.
203 DBG_ASSERT(nChar
< 0x80000000, "writeUTF8(): Bad char");
206 rSink
.append(sal_Char(nChar
));
207 else if (nChar
< 0x800)
208 rSink
.append(sal_Char(nChar
>> 6 | 0xC0))
209 .append(sal_Char((nChar
& 0x3F) | 0x80));
210 else if (nChar
< 0x10000)
211 rSink
.append(sal_Char(nChar
>> 12 | 0xE0))
212 .append(sal_Char((nChar
>> 6 & 0x3F) | 0x80))
213 .append(sal_Char((nChar
& 0x3F) | 0x80));
214 else if (nChar
< 0x200000)
215 rSink
.append(sal_Char(nChar
>> 18 | 0xF0))
216 .append(sal_Char((nChar
>> 12 & 0x3F) | 0x80))
217 .append(sal_Char((nChar
>> 6 & 0x3F) | 0x80))
218 .append(sal_Char((nChar
& 0x3F) | 0x80));
219 else if (nChar
< 0x4000000)
220 rSink
.append(sal_Char(nChar
>> 24 | 0xF8))
221 .append(sal_Char((nChar
>> 18 & 0x3F) | 0x80))
222 .append(sal_Char((nChar
>> 12 & 0x3F) | 0x80))
223 .append(sal_Char((nChar
>> 6 & 0x3F) | 0x80))
224 .append(sal_Char((nChar
& 0x3F) | 0x80));
226 rSink
.append(sal_Char(nChar
>> 30 | 0xFC))
227 .append(sal_Char((nChar
>> 24 & 0x3F) | 0x80))
228 .append(sal_Char((nChar
>> 18 & 0x3F) | 0x80))
229 .append(sal_Char((nChar
>> 12 & 0x3F) | 0x80))
230 .append(sal_Char((nChar
>> 6 & 0x3F) | 0x80))
231 .append(sal_Char((nChar
& 0x3F) | 0x80));
234 bool translateUTF8Char(const sal_Char
*& rBegin
,
235 const sal_Char
* pEnd
,
236 rtl_TextEncoding eEncoding
,
237 sal_uInt32
& rCharacter
)
239 if (rBegin
== pEnd
|| static_cast< unsigned char >(*rBegin
) < 0x80
240 || static_cast< unsigned char >(*rBegin
) >= 0xFE)
246 const sal_Char
* p
= rBegin
;
247 if (static_cast< unsigned char >(*p
) < 0xE0)
251 nUCS4
= static_cast< unsigned char >(*p
) & 0x1F;
253 else if (static_cast< unsigned char >(*p
) < 0xF0)
257 nUCS4
= static_cast< unsigned char >(*p
) & 0xF;
259 else if (static_cast< unsigned char >(*p
) < 0xF8)
263 nUCS4
= static_cast< unsigned char >(*p
) & 7;
265 else if (static_cast< unsigned char >(*p
) < 0xFC)
269 nUCS4
= static_cast< unsigned char >(*p
) & 3;
275 nUCS4
= static_cast< unsigned char >(*p
) & 1;
279 for (; nCount
-- > 0; ++p
)
280 if ((static_cast< unsigned char >(*p
) & 0xC0) == 0x80)
281 nUCS4
= (nUCS4
<< 6) | (static_cast< unsigned char >(*p
) & 0x3F);
285 if (!rtl::isUnicodeCodePoint(nUCS4
) || nUCS4
< nMin
)
288 if (eEncoding
>= RTL_TEXTENCODING_UCS4
)
292 sal_Unicode aUTF16
[2];
293 const sal_Unicode
* pUTF16End
= putUTF32Character(aUTF16
, nUCS4
);
295 sal_Char
* pBuffer
= convertFromUnicode(aUTF16
, pUTF16End
, eEncoding
,
299 DBG_ASSERT(nSize
== 1,
300 "translateUTF8Char(): Bad conversion");
301 rCharacter
= *pBuffer
;
308 void appendISO88591(OUString
& rText
, sal_Char
const * pBegin
,
309 sal_Char
const * pEnd
);
313 OString m_aAttribute
;
317 sal_uInt32 m_nSection
;
320 bool operator<(const Parameter
& rhs
) const // is used by std::list<Parameter>::sort
322 int nComp
= m_aAttribute
.compareTo(rhs
.m_aAttribute
);
324 (nComp
== 0 && m_nSection
< rhs
.m_nSection
);
326 struct IsSameSection
// is used to check container for duplicates with std::any_of
328 const OString
& rAttribute
;
329 const sal_uInt32 nSection
;
330 bool operator()(const Parameter
& r
) const
331 { return r
.m_aAttribute
== rAttribute
&& r
.m_nSection
== nSection
; }
335 typedef std::forward_list
<Parameter
> ParameterList
;
337 bool parseParameters(ParameterList
const & rInput
,
338 INetContentTypeParameterList
* pOutput
);
342 void appendISO88591(OUString
& rText
, sal_Char
const * pBegin
,
343 sal_Char
const * pEnd
)
345 sal_Int32 nLength
= pEnd
- pBegin
;
346 std::unique_ptr
<sal_Unicode
[]> pBuffer(new sal_Unicode
[nLength
]);
347 for (sal_Unicode
* p
= pBuffer
.get(); pBegin
!= pEnd
;)
348 *p
++ = static_cast<unsigned char>(*pBegin
++);
349 rText
+= OUString(pBuffer
.get(), nLength
);
354 bool parseParameters(ParameterList
const & rInput
,
355 INetContentTypeParameterList
* pOutput
)
360 for (auto it
= rInput
.begin(), itPrev
= rInput
.end(); it
!= rInput
.end() ; itPrev
= it
++)
362 if (it
->m_nSection
> 0
363 && (itPrev
== rInput
.end()
364 || itPrev
->m_nSection
!= it
->m_nSection
- 1
365 || itPrev
->m_aAttribute
!= it
->m_aAttribute
))
370 for (auto it
= rInput
.begin(), itNext
= rInput
.begin(); it
!= rInput
.end(); it
= itNext
)
372 bool bCharset
= !it
->m_aCharset
.isEmpty();
373 rtl_TextEncoding eEncoding
= RTL_TEXTENCODING_DONTKNOW
;
376 = getCharsetEncoding(it
->m_aCharset
.getStr(),
377 it
->m_aCharset
.getStr()
378 + it
->m_aCharset
.getLength());
380 bool bBadEncoding
= false;
385 sal_Unicode
* pUnicode
386 = convertToUnicode(itNext
->m_aValue
.getStr(),
387 itNext
->m_aValue
.getStr()
388 + itNext
->m_aValue
.getLength(),
389 bCharset
&& it
->m_bExtended
?
391 RTL_TEXTENCODING_UTF8
,
393 if (!pUnicode
&& !(bCharset
&& it
->m_bExtended
))
394 pUnicode
= convertToUnicode(
395 itNext
->m_aValue
.getStr(),
396 itNext
->m_aValue
.getStr()
397 + itNext
->m_aValue
.getLength(),
398 RTL_TEXTENCODING_ISO_8859_1
, nSize
);
404 aValue
+= OUString(pUnicode
, static_cast<sal_Int32
>(nSize
));
408 while (itNext
!= rInput
.end() && itNext
->m_nSection
!= 0);
416 if (itNext
->m_bExtended
)
418 for (sal_Int32 i
= 0; i
< itNext
->m_aValue
.getLength(); ++i
)
419 aValue
+= OUStringLiteral1(
421 static_cast<unsigned char>(itNext
->m_aValue
[i
]))
422 | 0xF800); // map to unicode corporate use sub area
426 for (sal_Int32 i
= 0; i
< itNext
->m_aValue
.getLength(); ++i
)
427 aValue
+= OUStringLiteral1( static_cast<unsigned char>(itNext
->m_aValue
[i
]) );
431 while (itNext
!= rInput
.end() && itNext
->m_nSection
!= 0);
433 auto const ret
= pOutput
->insert(
435 {it
->m_aCharset
, it
->m_aLanguage
, aValue
, !bBadEncoding
}});
436 SAL_INFO_IF(!ret
.second
, "tools",
437 "INetMIME: dropping duplicate parameter: " << it
->m_aAttribute
);
442 /** Check whether some character is valid within an RFC 2045 <token>.
444 @param nChar Some UCS-4 character.
446 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
447 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
448 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
450 bool isTokenChar(sal_uInt32 nChar
)
452 static const bool aMap
[128]
453 = { false, false, false, false, false, false, false, false,
454 false, false, false, false, false, false, false, false,
455 false, false, false, false, false, false, false, false,
456 false, false, false, false, false, false, false, false,
457 false, true, false, true, true, true, true, true, // !"#$%&'
458 false, false, true, true, false, true, true, false, //()*+,-./
459 true, true, true, true, true, true, true, true, //01234567
460 true, true, false, false, false, false, false, false, //89:;<=>?
461 false, true, true, true, true, true, true, true, //@ABCDEFG
462 true, true, true, true, true, true, true, true, //HIJKLMNO
463 true, true, true, true, true, true, true, true, //PQRSTUVW
464 true, true, true, false, false, false, true, true, //XYZ[\]^_
465 true, true, true, true, true, true, true, true, //`abcdefg
466 true, true, true, true, true, true, true, true, //hijklmno
467 true, true, true, true, true, true, true, true, //pqrstuvw
468 true, true, true, true, true, true, true, false //xyz{|}~
470 return rtl::isAscii(nChar
) && aMap
[nChar
];
473 const sal_Unicode
* skipComment(const sal_Unicode
* pBegin
,
474 const sal_Unicode
* pEnd
)
476 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
477 "skipComment(): Bad sequence");
479 if (pBegin
!= pEnd
&& *pBegin
== '(')
481 sal_uInt32 nLevel
= 0;
482 for (const sal_Unicode
* p
= pBegin
; p
!= pEnd
;)
503 const sal_Unicode
* skipLinearWhiteSpaceComment(const sal_Unicode
*
508 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
509 "skipLinearWhiteSpaceComment(): Bad sequence");
511 while (pBegin
!= pEnd
)
520 if (startsWithLineFolding(pBegin
, pEnd
))
528 const sal_Unicode
* p
= skipComment(pBegin
, pEnd
);
541 const sal_Unicode
* skipQuotedString(const sal_Unicode
* pBegin
,
542 const sal_Unicode
* pEnd
)
544 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
545 "skipQuotedString(): Bad sequence");
547 if (pBegin
!= pEnd
&& *pBegin
== '"')
548 for (const sal_Unicode
* p
= pBegin
+ 1; p
!= pEnd
;)
552 if (pEnd
- p
< 2 || *p
++ != 0x0A // LF
553 || !isWhiteSpace(*p
++))
568 sal_Unicode
const * scanParameters(sal_Unicode
const * pBegin
,
569 sal_Unicode
const * pEnd
,
570 INetContentTypeParameterList
*
574 sal_Unicode
const * pParameterBegin
= pBegin
;
575 for (sal_Unicode
const * p
= pParameterBegin
;;)
577 pParameterBegin
= skipLinearWhiteSpaceComment(p
, pEnd
);
578 if (pParameterBegin
== pEnd
|| *pParameterBegin
!= ';')
580 p
= pParameterBegin
+ 1;
582 sal_Unicode
const * pAttributeBegin
583 = skipLinearWhiteSpaceComment(p
, pEnd
);
585 bool bDowncaseAttribute
= false;
586 while (p
!= pEnd
&& isTokenChar(*p
) && *p
!= '*')
588 bDowncaseAttribute
= bDowncaseAttribute
|| rtl::isAsciiUpperCase(*p
);
591 if (p
== pAttributeBegin
)
593 OString aAttribute
= OString(
594 pAttributeBegin
, p
- pAttributeBegin
,
595 RTL_TEXTENCODING_ASCII_US
);
596 if (bDowncaseAttribute
)
597 aAttribute
= aAttribute
.toAsciiLowerCase();
599 sal_uInt32 nSection
= 0;
600 if (p
!= pEnd
&& *p
== '*')
603 if (p
!= pEnd
&& rtl::isAsciiDigit(*p
)
604 && !INetMIME::scanUnsigned(p
, pEnd
, false, nSection
))
608 bool bPresent
= std::any_of(aList
.begin(), aList
.end(),
609 Parameter::IsSameSection
{aAttribute
, nSection
});
613 bool bExtended
= false;
614 if (p
!= pEnd
&& *p
== '*')
620 p
= skipLinearWhiteSpaceComment(p
, pEnd
);
622 if (p
== pEnd
|| *p
!= '=')
625 p
= skipLinearWhiteSpaceComment(p
+ 1, pEnd
);
634 sal_Unicode
const * pCharsetBegin
= p
;
635 bool bDowncaseCharset
= false;
636 while (p
!= pEnd
&& isTokenChar(*p
) && *p
!= '\'')
638 bDowncaseCharset
= bDowncaseCharset
|| rtl::isAsciiUpperCase(*p
);
641 if (p
== pCharsetBegin
)
648 RTL_TEXTENCODING_ASCII_US
);
649 if (bDowncaseCharset
)
650 aCharset
= aCharset
.toAsciiLowerCase();
653 if (p
== pEnd
|| *p
!= '\'')
657 sal_Unicode
const * pLanguageBegin
= p
;
658 bool bDowncaseLanguage
= false;
660 for (; p
!= pEnd
; ++p
)
661 if (rtl::isAsciiAlpha(*p
))
665 bDowncaseLanguage
= bDowncaseLanguage
666 || rtl::isAsciiUpperCase(*p
);
676 if (nLetters
== 0 || nLetters
> 8)
683 RTL_TEXTENCODING_ASCII_US
);
684 if (bDowncaseLanguage
)
685 aLanguage
= aLanguage
.toAsciiLowerCase();
688 if (p
== pEnd
|| *p
!= '\'')
698 sal_uInt32 nChar
= INetMIME::getUTF32Character(q
, pEnd
);
699 if (rtl::isAscii(nChar
) && !isTokenChar(nChar
))
702 if (nChar
== '%' && p
+ 1 < pEnd
)
704 int nWeight1
= INetMIME::getHexWeight(p
[0]);
705 int nWeight2
= INetMIME::getHexWeight(p
[1]);
706 if (nWeight1
>= 0 && nWeight2
>= 0)
708 aSink
.append(sal_Char(nWeight1
<< 4 | nWeight2
));
713 writeUTF8(aSink
, nChar
);
715 aValue
= aSink
.makeStringAndClear();
718 while (p
!= pEnd
&& (isTokenChar(*p
) || !rtl::isAscii(*p
)))
721 else if (p
!= pEnd
&& *p
== '"')
725 bool bInvalid
= false;
733 sal_uInt32 nChar
= INetMIME::getUTF32Character(p
, pEnd
);
736 else if (nChar
== 0x0D) // CR
738 if (pEnd
- p
< 2 || *p
++ != 0x0A // LF
739 || !isWhiteSpace(*p
))
744 nChar
= static_cast<unsigned char>(*p
++);
746 else if (nChar
== '\\')
753 nChar
= INetMIME::getUTF32Character(p
, pEnd
);
755 writeUTF8(aSink
, nChar
);
759 aValue
= aSink
.makeStringAndClear();
763 sal_Unicode
const * pStringEnd
= skipQuotedString(p
, pEnd
);
770 sal_Unicode
const * pTokenBegin
= p
;
771 while (p
!= pEnd
&& (isTokenChar(*p
) || !rtl::isAscii(*p
)))
773 if (p
== pTokenBegin
)
777 pTokenBegin
, p
- pTokenBegin
,
778 RTL_TEXTENCODING_UTF8
);
780 aList
.emplace_front(Parameter
{aAttribute
, aCharset
, aLanguage
, aValue
, nSection
, bExtended
});
783 return parseParameters(aList
, pParameters
) ? pParameterBegin
: pBegin
;
786 bool equalIgnoreCase(const sal_Char
* pBegin1
,
787 const sal_Char
* pEnd1
,
788 const sal_Char
* pString2
)
790 DBG_ASSERT(pBegin1
&& pBegin1
<= pEnd1
&& pString2
,
791 "equalIgnoreCase(): Bad sequences");
793 while (*pString2
!= 0)
795 || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1
++))
796 != rtl::toAsciiUpperCase(
797 static_cast<unsigned char>(*pString2
++))))
799 return pBegin1
== pEnd1
;
804 sal_Char
const * m_aName
;
805 rtl_TextEncoding m_eEncoding
;
808 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
809 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
811 static EncodingEntry
const aEncodingMap
[]
812 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US
},
813 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US
},
814 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US
},
815 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US
},
816 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US
},
817 { "ASCII", RTL_TEXTENCODING_ASCII_US
},
818 { "ISO646-US", RTL_TEXTENCODING_ASCII_US
},
819 { "US", RTL_TEXTENCODING_ASCII_US
},
820 { "IBM367", RTL_TEXTENCODING_ASCII_US
},
821 { "CP367", RTL_TEXTENCODING_ASCII_US
},
822 { "CSASCII", RTL_TEXTENCODING_ASCII_US
},
823 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1
},
824 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1
},
825 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1
},
826 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1
},
827 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1
},
828 { "L1", RTL_TEXTENCODING_ISO_8859_1
},
829 { "IBM819", RTL_TEXTENCODING_ISO_8859_1
},
830 { "CP819", RTL_TEXTENCODING_ISO_8859_1
},
831 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1
},
832 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2
},
833 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2
},
834 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2
},
835 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2
},
836 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2
},
837 { "L2", RTL_TEXTENCODING_ISO_8859_2
},
838 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2
},
839 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3
},
840 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3
},
841 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3
},
842 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3
},
843 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3
},
844 { "L3", RTL_TEXTENCODING_ISO_8859_3
},
845 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3
},
846 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4
},
847 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4
},
848 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4
},
849 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4
},
850 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4
},
851 { "L4", RTL_TEXTENCODING_ISO_8859_4
},
852 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4
},
853 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5
},
854 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5
},
855 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5
},
856 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5
},
857 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5
},
858 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5
},
859 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6
},
860 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6
},
861 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6
},
862 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6
},
863 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6
},
864 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6
},
865 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6
},
866 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6
},
867 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7
},
868 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7
},
869 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7
},
870 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7
},
871 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7
},
872 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7
},
873 { "GREEK", RTL_TEXTENCODING_ISO_8859_7
},
874 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7
},
875 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7
},
876 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8
},
877 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8
},
878 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8
},
879 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8
},
880 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8
},
881 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8
},
882 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9
},
883 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9
},
884 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9
},
885 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9
},
886 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9
},
887 { "L5", RTL_TEXTENCODING_ISO_8859_9
},
888 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9
},
889 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14
}, // RFC 2047
890 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15
},
891 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15
}, // RFC 2047
892 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN
},
893 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN
},
894 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN
},
895 { "IBM437", RTL_TEXTENCODING_IBM_437
},
896 { "CP437", RTL_TEXTENCODING_IBM_437
},
897 { "437", RTL_TEXTENCODING_IBM_437
},
898 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437
},
899 { "IBM850", RTL_TEXTENCODING_IBM_850
},
900 { "CP850", RTL_TEXTENCODING_IBM_850
},
901 { "850", RTL_TEXTENCODING_IBM_850
},
902 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850
},
903 { "IBM860", RTL_TEXTENCODING_IBM_860
},
904 { "CP860", RTL_TEXTENCODING_IBM_860
},
905 { "860", RTL_TEXTENCODING_IBM_860
},
906 { "CSIBM860", RTL_TEXTENCODING_IBM_860
},
907 { "IBM861", RTL_TEXTENCODING_IBM_861
},
908 { "CP861", RTL_TEXTENCODING_IBM_861
},
909 { "861", RTL_TEXTENCODING_IBM_861
},
910 { "CP-IS", RTL_TEXTENCODING_IBM_861
},
911 { "CSIBM861", RTL_TEXTENCODING_IBM_861
},
912 { "IBM863", RTL_TEXTENCODING_IBM_863
},
913 { "CP863", RTL_TEXTENCODING_IBM_863
},
914 { "863", RTL_TEXTENCODING_IBM_863
},
915 { "CSIBM863", RTL_TEXTENCODING_IBM_863
},
916 { "IBM865", RTL_TEXTENCODING_IBM_865
},
917 { "CP865", RTL_TEXTENCODING_IBM_865
},
918 { "865", RTL_TEXTENCODING_IBM_865
},
919 { "CSIBM865", RTL_TEXTENCODING_IBM_865
},
920 { "IBM775", RTL_TEXTENCODING_IBM_775
},
921 { "CP775", RTL_TEXTENCODING_IBM_775
},
922 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775
},
923 { "IBM852", RTL_TEXTENCODING_IBM_852
},
924 { "CP852", RTL_TEXTENCODING_IBM_852
},
925 { "852", RTL_TEXTENCODING_IBM_852
},
926 { "CSPCP852", RTL_TEXTENCODING_IBM_852
},
927 { "IBM855", RTL_TEXTENCODING_IBM_855
},
928 { "CP855", RTL_TEXTENCODING_IBM_855
},
929 { "855", RTL_TEXTENCODING_IBM_855
},
930 { "CSIBM855", RTL_TEXTENCODING_IBM_855
},
931 { "IBM857", RTL_TEXTENCODING_IBM_857
},
932 { "CP857", RTL_TEXTENCODING_IBM_857
},
933 { "857", RTL_TEXTENCODING_IBM_857
},
934 { "CSIBM857", RTL_TEXTENCODING_IBM_857
},
935 { "IBM862", RTL_TEXTENCODING_IBM_862
},
936 { "CP862", RTL_TEXTENCODING_IBM_862
},
937 { "862", RTL_TEXTENCODING_IBM_862
},
938 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862
},
939 { "IBM864", RTL_TEXTENCODING_IBM_864
},
940 { "CP864", RTL_TEXTENCODING_IBM_864
},
941 { "CSIBM864", RTL_TEXTENCODING_IBM_864
},
942 { "IBM866", RTL_TEXTENCODING_IBM_866
},
943 { "CP866", RTL_TEXTENCODING_IBM_866
},
944 { "866", RTL_TEXTENCODING_IBM_866
},
945 { "CSIBM866", RTL_TEXTENCODING_IBM_866
},
946 { "IBM869", RTL_TEXTENCODING_IBM_869
},
947 { "CP869", RTL_TEXTENCODING_IBM_869
},
948 { "869", RTL_TEXTENCODING_IBM_869
},
949 { "CP-GR", RTL_TEXTENCODING_IBM_869
},
950 { "CSIBM869", RTL_TEXTENCODING_IBM_869
},
951 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250
},
952 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251
},
953 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253
},
954 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254
},
955 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255
},
956 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256
},
957 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257
},
958 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258
},
959 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS
},
960 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS
},
961 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS
},
962 { "GB2312", RTL_TEXTENCODING_GB_2312
},
963 { "CSGB2312", RTL_TEXTENCODING_GB_2312
},
964 { "BIG5", RTL_TEXTENCODING_BIG5
},
965 { "CSBIG5", RTL_TEXTENCODING_BIG5
},
966 { "EUC-JP", RTL_TEXTENCODING_EUC_JP
},
967 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
968 RTL_TEXTENCODING_EUC_JP
},
969 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP
},
970 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP
},
971 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP
},
972 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN
},
973 { "KOI8-R", RTL_TEXTENCODING_KOI8_R
},
974 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R
},
975 { "UTF-7", RTL_TEXTENCODING_UTF7
},
976 { "UTF-8", RTL_TEXTENCODING_UTF8
},
977 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10
}, // RFC 2047
978 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13
}, // RFC 2047
979 { "EUC-KR", RTL_TEXTENCODING_EUC_KR
},
980 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR
},
981 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR
},
982 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR
},
983 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4
},
984 { "CSUCS4", RTL_TEXTENCODING_UCS4
},
985 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2
},
986 { "CSUNICODE", RTL_TEXTENCODING_UCS2
} };
988 rtl_TextEncoding
getCharsetEncoding(sal_Char
const * pBegin
,
989 sal_Char
const * pEnd
)
991 for (const EncodingEntry
& i
: aEncodingMap
)
992 if (equalIgnoreCase(pBegin
, pEnd
, i
.m_aName
))
993 return i
.m_eEncoding
;
994 return RTL_TEXTENCODING_DONTKNOW
;
1002 bool INetMIME::isAtomChar(sal_uInt32 nChar
)
1004 static const bool aMap
[128]
1005 = { false, false, false, false, false, false, false, false,
1006 false, false, false, false, false, false, false, false,
1007 false, false, false, false, false, false, false, false,
1008 false, false, false, false, false, false, false, false,
1009 false, true, false, true, true, true, true, true, // !"#$%&'
1010 false, false, true, true, false, true, false, true, //()*+,-./
1011 true, true, true, true, true, true, true, true, //01234567
1012 true, true, false, false, false, true, false, true, //89:;<=>?
1013 false, true, true, true, true, true, true, true, //@ABCDEFG
1014 true, true, true, true, true, true, true, true, //HIJKLMNO
1015 true, true, true, true, true, true, true, true, //PQRSTUVW
1016 true, true, true, false, false, false, true, true, //XYZ[\]^_
1017 true, true, true, true, true, true, true, true, //`abcdefg
1018 true, true, true, true, true, true, true, true, //hijklmno
1019 true, true, true, true, true, true, true, true, //pqrstuvw
1020 true, true, true, true, true, true, true, false //xyz{|}~
1022 return rtl::isAscii(nChar
) && aMap
[nChar
];
1026 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar
)
1028 static const bool aMap
[128]
1029 = { false, false, false, false, false, false, false, false,
1030 false, false, false, false, false, false, false, false,
1031 false, false, false, false, false, false, false, false,
1032 false, false, false, false, false, false, false, false,
1033 false, true, false, true, true, false, true, true, // !"#$%&'
1034 false, false, false, true, true, true, true, true, //()*+,-./
1035 true, true, true, true, true, true, true, true, //01234567
1036 true, true, true, true, true, true, true, true, //89:;<=>?
1037 true, true, true, true, true, true, true, true, //@ABCDEFG
1038 true, true, true, true, true, true, true, true, //HIJKLMNO
1039 true, true, true, true, true, true, true, true, //PQRSTUVW
1040 true, true, true, true, false, true, true, true, //XYZ[\]^_
1041 true, true, true, true, true, true, true, true, //`abcdefg
1042 true, true, true, true, true, true, true, true, //hijklmno
1043 true, true, true, true, true, true, true, true, //pqrstuvw
1044 true, true, true, false, true, true, true, false //xyz{|}~
1046 return rtl::isAscii(nChar
) && aMap
[nChar
];
1050 bool INetMIME::equalIgnoreCase(const sal_Unicode
* pBegin1
,
1051 const sal_Unicode
* pEnd1
,
1052 const sal_Char
* pString2
)
1054 DBG_ASSERT(pBegin1
&& pBegin1
<= pEnd1
&& pString2
,
1055 "INetMIME::equalIgnoreCase(): Bad sequences");
1057 while (*pString2
!= 0)
1058 if (pBegin1
== pEnd1
1059 || (rtl::toAsciiUpperCase(*pBegin1
++)
1060 != rtl::toAsciiUpperCase(
1061 static_cast<unsigned char>(*pString2
++))))
1063 return pBegin1
== pEnd1
;
1067 bool INetMIME::scanUnsigned(const sal_Unicode
*& rBegin
,
1068 const sal_Unicode
* pEnd
, bool bLeadingZeroes
,
1069 sal_uInt32
& rValue
)
1071 sal_uInt64 nTheValue
= 0;
1072 const sal_Unicode
* p
= rBegin
;
1073 for ( ; p
!= pEnd
; ++p
)
1075 int nWeight
= getWeight(*p
);
1078 nTheValue
= 10 * nTheValue
+ nWeight
;
1079 if (nTheValue
> std::numeric_limits
< sal_uInt32
>::max())
1082 if (nTheValue
== 0 && (p
== rBegin
|| (!bLeadingZeroes
&& p
- rBegin
!= 1)))
1085 rValue
= sal_uInt32(nTheValue
);
1090 sal_Unicode
const * INetMIME::scanContentType(
1091 OUString
const & rStr
, OUString
* pType
,
1092 OUString
* pSubType
, INetContentTypeParameterList
* pParameters
)
1094 sal_Unicode
const * pBegin
= rStr
.getStr();
1095 sal_Unicode
const * pEnd
= pBegin
+ rStr
.getLength();
1096 sal_Unicode
const * p
= skipLinearWhiteSpaceComment(pBegin
, pEnd
);
1097 sal_Unicode
const * pTypeBegin
= p
;
1098 while (p
!= pEnd
&& isTokenChar(*p
))
1102 if (p
== pTypeBegin
)
1104 sal_Unicode
const * pTypeEnd
= p
;
1106 p
= skipLinearWhiteSpaceComment(p
, pEnd
);
1107 if (p
== pEnd
|| *p
++ != '/')
1110 p
= skipLinearWhiteSpaceComment(p
, pEnd
);
1111 sal_Unicode
const * pSubTypeBegin
= p
;
1112 while (p
!= pEnd
&& isTokenChar(*p
))
1116 if (p
== pSubTypeBegin
)
1118 sal_Unicode
const * pSubTypeEnd
= p
;
1120 if (pType
!= nullptr)
1122 *pType
= OUString(pTypeBegin
, pTypeEnd
- pTypeBegin
).toAsciiLowerCase();
1124 if (pSubType
!= nullptr)
1126 *pSubType
= OUString(pSubTypeBegin
, pSubTypeEnd
- pSubTypeBegin
)
1127 .toAsciiLowerCase();
1130 return scanParameters(p
, pEnd
, pParameters
);
1134 OUString
INetMIME::decodeHeaderFieldBody(const OString
& rBody
)
1136 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1137 // versions of StarOffice send mails with header fields where encoded
1138 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1139 // '=', ',', '.', '"', ')', without any required white space in between.
1140 // And there appear to exist some broken mailers that only encode single
1141 // letters within words, like "Appel
1142 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1143 // detect encoded words even when not properly surrounded by white space.
1145 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1147 // encoded-word = "=?"
1148 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1149 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1150 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1151 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1154 // base64 = ALPHA / DIGIT / "+" / "/"
1156 const sal_Char
* pBegin
= rBody
.getStr();
1157 const sal_Char
* pEnd
= pBegin
+ rBody
.getLength();
1160 const sal_Char
* pCopyBegin
= pBegin
;
1162 /* bool bStartEncodedWord = true; */
1163 const sal_Char
* pWSPBegin
= pBegin
;
1165 for (const sal_Char
* p
= pBegin
; p
!= pEnd
;)
1167 OUString sEncodedText
;
1168 if (p
!= pEnd
&& *p
== '=' /* && bStartEncodedWord */)
1170 const sal_Char
* q
= p
+ 1;
1171 bool bEncodedWord
= q
!= pEnd
&& *q
++ == '?';
1173 rtl_TextEncoding eCharsetEncoding
= RTL_TEXTENCODING_DONTKNOW
;
1176 const sal_Char
* pCharsetBegin
= q
;
1177 const sal_Char
* pLanguageBegin
= nullptr;
1178 int nAlphaCount
= 0;
1179 for (bool bDone
= false; !bDone
;)
1182 bEncodedWord
= false;
1187 sal_Char cChar
= *q
++;
1191 pLanguageBegin
= q
- 1;
1196 if (pLanguageBegin
!= nullptr)
1198 if (nAlphaCount
== 0)
1199 pLanguageBegin
= nullptr;
1206 if (pCharsetBegin
== q
- 1)
1207 bEncodedWord
= false;
1211 = getCharsetEncoding(
1213 pLanguageBegin
== nullptr
1214 || nAlphaCount
== 0 ?
1215 q
- 1 : pLanguageBegin
);
1216 bEncodedWord
= isMIMECharsetEncoding(
1219 = translateFromMIME(eCharsetEncoding
);
1225 if (pLanguageBegin
!= nullptr
1226 && (!rtl::isAsciiAlpha(
1227 static_cast<unsigned char>(cChar
))
1228 || ++nAlphaCount
> 8))
1229 pLanguageBegin
= nullptr;
1235 bool bEncodingB
= false;
1239 bEncodedWord
= false;
1255 bEncodedWord
= false;
1261 bEncodedWord
= bEncodedWord
&& q
!= pEnd
&& *q
++ == '?';
1263 OStringBuffer sText
;
1268 for (bool bDone
= false; !bDone
;)
1272 bEncodedWord
= false;
1277 bool bFinal
= false;
1279 sal_uInt32 nValue
= 0;
1280 for (int nShift
= 18; nShift
>= 0; nShift
-= 6)
1282 int nWeight
= getBase64Weight(*q
++);
1285 bEncodedWord
= false;
1295 bEncodedWord
= false;
1300 nCount
= nShift
== 6 ? 1 : 2;
1304 nValue
|= nWeight
<< nShift
;
1308 for (int nShift
= 16; nCount
-- > 0; nShift
-= 8)
1309 sText
.append(sal_Char(nValue
>> nShift
& 0xFF));
1315 if (bFinal
&& !bDone
)
1317 bEncodedWord
= false;
1326 const sal_Char
* pEncodedTextBegin
= q
;
1327 const sal_Char
* pEncodedTextCopyBegin
= q
;
1328 for (bool bDone
= false; !bDone
;)
1331 bEncodedWord
= false;
1336 sal_uInt32 nChar
= *q
++;
1343 bEncodedWord
= false;
1347 int nDigit1
= getHexWeight(q
[0]);
1348 int nDigit2
= getHexWeight(q
[1]);
1349 if (nDigit1
< 0 || nDigit2
< 0)
1351 bEncodedWord
= false;
1355 sText
.append(rBody
.copy(
1356 (pEncodedTextCopyBegin
- pBegin
),
1357 (q
- 1 - pEncodedTextCopyBegin
)));
1358 sText
.append(sal_Char(nDigit1
<< 4 | nDigit2
));
1360 pEncodedTextCopyBegin
= q
;
1365 if (q
- pEncodedTextBegin
> 1)
1366 sText
.append(rBody
.copy(
1367 (pEncodedTextCopyBegin
- pBegin
),
1368 (q
- 1 - pEncodedTextCopyBegin
)));
1370 bEncodedWord
= false;
1375 sText
.append(rBody
.copy(
1376 (pEncodedTextCopyBegin
- pBegin
),
1377 (q
- 1 - pEncodedTextCopyBegin
)));
1379 pEncodedTextCopyBegin
= q
;
1383 if (!isVisible(nChar
))
1385 bEncodedWord
= false;
1394 bEncodedWord
= bEncodedWord
&& q
!= pEnd
&& *q
++ == '=';
1396 sal_Unicode
* pUnicodeBuffer
= nullptr;
1397 sal_Size nUnicodeSize
= 0;
1401 = convertToUnicode(sText
.getStr(),
1402 sText
.getStr() + sText
.getLength(),
1403 eCharsetEncoding
, nUnicodeSize
);
1404 if (pUnicodeBuffer
== nullptr)
1405 bEncodedWord
= false;
1410 appendISO88591(sDecoded
, pCopyBegin
, pWSPBegin
);
1411 sDecoded
+= OUString(
1413 static_cast< sal_Int32
>(nUnicodeSize
));
1414 delete[] pUnicodeBuffer
;
1419 while (p
!= pEnd
&& isWhiteSpace(*p
))
1421 /* bStartEncodedWord = p != pWSPBegin; */
1426 if (!sEncodedText
.isEmpty())
1427 sDecoded
+= sEncodedText
;
1435 /* bStartEncodedWord = true; */
1439 /* bStartEncodedWord = true; */
1443 /* bStartEncodedWord = false; */
1448 const sal_Char
* pUTF8Begin
= p
- 1;
1449 const sal_Char
* pUTF8End
= pUTF8Begin
;
1450 sal_uInt32 nCharacter
= 0;
1451 if (translateUTF8Char(pUTF8End
, pEnd
, RTL_TEXTENCODING_UCS4
,
1454 appendISO88591(sDecoded
, pCopyBegin
, p
- 1);
1455 sal_Unicode aUTF16Buf
[2];
1456 sal_Int32 nUTF16Len
= putUTF32Character(aUTF16Buf
, nCharacter
) - aUTF16Buf
;
1457 sDecoded
+= OUString(aUTF16Buf
, nUTF16Len
);
1461 /* bStartEncodedWord = false; */
1468 appendISO88591(sDecoded
, pCopyBegin
, pEnd
);
1472 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */