1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
22 #include <forward_list>
25 #include <sal/log.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/strbuf.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/inetmime.hxx>
31 #include <rtl/character.hxx>
35 rtl_TextEncoding
getCharsetEncoding(const sal_Char
* pBegin
,
36 const sal_Char
* pEnd
);
38 /** Check for US-ASCII white space character.
40 @param nChar Some UCS-4 character.
42 @return True if nChar is a US-ASCII white space character (US-ASCII
45 bool isWhiteSpace(sal_uInt32 nChar
)
47 return nChar
== '\t' || nChar
== ' ';
50 /** Get the Base 64 digit weight of a US-ASCII character.
52 @param nChar Some UCS-4 character.
54 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
55 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
56 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
57 character (US-ASCII '='), return -1; otherwise, return -2.
59 int getBase64Weight(sal_uInt32 nChar
)
61 return rtl::isAsciiUpperCase(nChar
) ? int(nChar
- 'A') :
62 rtl::isAsciiLowerCase(nChar
) ? int(nChar
- 'a' + 26) :
63 rtl::isAsciiDigit(nChar
) ? int(nChar
- '0' + 52) :
66 nChar
== '=' ? -1 : -2;
69 bool startsWithLineFolding(const sal_Unicode
* pBegin
,
70 const sal_Unicode
* pEnd
)
72 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
73 "startsWithLineFolding(): Bad sequence");
75 return pEnd
- pBegin
>= 3 && pBegin
[0] == 0x0D && pBegin
[1] == 0x0A
76 && isWhiteSpace(pBegin
[2]); // CR, LF
79 rtl_TextEncoding
translateFromMIME(rtl_TextEncoding
83 return eEncoding
== RTL_TEXTENCODING_ISO_8859_1
?
84 RTL_TEXTENCODING_MS_1252
: eEncoding
;
90 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding
)
92 return rtl_isOctetTextEncoding(eEncoding
);
95 std::unique_ptr
<sal_Unicode
[]> convertToUnicode(const sal_Char
* pBegin
,
96 const sal_Char
* pEnd
,
97 rtl_TextEncoding eEncoding
,
100 if (eEncoding
== RTL_TEXTENCODING_DONTKNOW
)
102 rtl_TextToUnicodeConverter hConverter
103 = rtl_createTextToUnicodeConverter(eEncoding
);
104 rtl_TextToUnicodeContext hContext
105 = rtl_createTextToUnicodeContext(hConverter
);
106 std::unique_ptr
<sal_Unicode
[]> pBuffer
;
108 for (sal_Size nBufferSize
= pEnd
- pBegin
;;
109 nBufferSize
+= nBufferSize
/ 3 + 1)
111 pBuffer
.reset(new sal_Unicode
[nBufferSize
]);
112 sal_Size nSrcCvtBytes
;
113 rSize
= rtl_convertTextToUnicode(
114 hConverter
, hContext
, pBegin
, pEnd
- pBegin
, pBuffer
.get(),
116 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
,
119 &nInfo
, &nSrcCvtBytes
);
120 if (nInfo
!= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
)
123 rtl_resetTextToUnicodeContext(hConverter
, hContext
);
125 rtl_destroyTextToUnicodeContext(hConverter
, hContext
);
126 rtl_destroyTextToUnicodeConverter(hConverter
);
134 std::unique_ptr
<sal_Char
[]> convertFromUnicode(const sal_Unicode
* pBegin
,
135 const sal_Unicode
* pEnd
,
136 rtl_TextEncoding eEncoding
,
139 if (eEncoding
== RTL_TEXTENCODING_DONTKNOW
)
141 rtl_UnicodeToTextConverter hConverter
142 = rtl_createUnicodeToTextConverter(eEncoding
);
143 rtl_UnicodeToTextContext hContext
144 = rtl_createUnicodeToTextContext(hConverter
);
145 std::unique_ptr
<sal_Char
[]> pBuffer
;
147 for (sal_Size nBufferSize
= pEnd
- pBegin
;;
148 nBufferSize
+= nBufferSize
/ 3 + 1)
150 pBuffer
.reset(new sal_Char
[nBufferSize
]);
151 sal_Size nSrcCvtBytes
;
152 rSize
= rtl_convertUnicodeToText(
153 hConverter
, hContext
, pBegin
, pEnd
- pBegin
, pBuffer
.get(),
155 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
156 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
157 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
158 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR
,
159 &nInfo
, &nSrcCvtBytes
);
160 if (nInfo
!= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
)
163 rtl_resetUnicodeToTextContext(hConverter
, hContext
);
165 rtl_destroyUnicodeToTextContext(hConverter
, hContext
);
166 rtl_destroyUnicodeToTextConverter(hConverter
);
174 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
176 @param pBuffer Points to a buffer, must not be null.
178 @param nUTF32 A UTF-32 character, must be in the range 0..0x10FFFF.
180 @return A pointer past the UTF-16 characters put into the buffer
181 (i.e., pBuffer + 1 or pBuffer + 2).
183 sal_Unicode
* putUTF32Character(sal_Unicode
* pBuffer
,
186 DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32
), "putUTF32Character(): Bad char");
187 if (nUTF32
< 0x10000)
188 *pBuffer
++ = sal_Unicode(nUTF32
);
192 *pBuffer
++ = sal_Unicode(0xD800 | (nUTF32
>> 10));
193 *pBuffer
++ = sal_Unicode(0xDC00 | (nUTF32
& 0x3FF));
198 void writeUTF8(OStringBuffer
& rSink
, sal_uInt32 nChar
)
200 // See RFC 2279 for a discussion of UTF-8.
201 DBG_ASSERT(nChar
< 0x80000000, "writeUTF8(): Bad char");
204 rSink
.append(sal_Char(nChar
));
205 else if (nChar
< 0x800)
206 rSink
.append(sal_Char(nChar
>> 6 | 0xC0))
207 .append(sal_Char((nChar
& 0x3F) | 0x80));
208 else if (nChar
< 0x10000)
209 rSink
.append(sal_Char(nChar
>> 12 | 0xE0))
210 .append(sal_Char((nChar
>> 6 & 0x3F) | 0x80))
211 .append(sal_Char((nChar
& 0x3F) | 0x80));
212 else if (nChar
< 0x200000)
213 rSink
.append(sal_Char(nChar
>> 18 | 0xF0))
214 .append(sal_Char((nChar
>> 12 & 0x3F) | 0x80))
215 .append(sal_Char((nChar
>> 6 & 0x3F) | 0x80))
216 .append(sal_Char((nChar
& 0x3F) | 0x80));
217 else if (nChar
< 0x4000000)
218 rSink
.append(sal_Char(nChar
>> 24 | 0xF8))
219 .append(sal_Char((nChar
>> 18 & 0x3F) | 0x80))
220 .append(sal_Char((nChar
>> 12 & 0x3F) | 0x80))
221 .append(sal_Char((nChar
>> 6 & 0x3F) | 0x80))
222 .append(sal_Char((nChar
& 0x3F) | 0x80));
224 rSink
.append(sal_Char(nChar
>> 30 | 0xFC))
225 .append(sal_Char((nChar
>> 24 & 0x3F) | 0x80))
226 .append(sal_Char((nChar
>> 18 & 0x3F) | 0x80))
227 .append(sal_Char((nChar
>> 12 & 0x3F) | 0x80))
228 .append(sal_Char((nChar
>> 6 & 0x3F) | 0x80))
229 .append(sal_Char((nChar
& 0x3F) | 0x80));
232 bool translateUTF8Char(const sal_Char
*& rBegin
,
233 const sal_Char
* pEnd
,
234 rtl_TextEncoding eEncoding
,
235 sal_uInt32
& rCharacter
)
237 if (rBegin
== pEnd
|| static_cast< unsigned char >(*rBegin
) < 0x80
238 || static_cast< unsigned char >(*rBegin
) >= 0xFE)
244 const sal_Char
* p
= rBegin
;
245 if (static_cast< unsigned char >(*p
) < 0xE0)
249 nUCS4
= static_cast< unsigned char >(*p
) & 0x1F;
251 else if (static_cast< unsigned char >(*p
) < 0xF0)
255 nUCS4
= static_cast< unsigned char >(*p
) & 0xF;
257 else if (static_cast< unsigned char >(*p
) < 0xF8)
261 nUCS4
= static_cast< unsigned char >(*p
) & 7;
263 else if (static_cast< unsigned char >(*p
) < 0xFC)
267 nUCS4
= static_cast< unsigned char >(*p
) & 3;
273 nUCS4
= static_cast< unsigned char >(*p
) & 1;
277 for (; nCount
-- > 0; ++p
)
278 if ((static_cast< unsigned char >(*p
) & 0xC0) == 0x80)
279 nUCS4
= (nUCS4
<< 6) | (static_cast< unsigned char >(*p
) & 0x3F);
283 if (!rtl::isUnicodeCodePoint(nUCS4
) || nUCS4
< nMin
)
286 if (eEncoding
>= RTL_TEXTENCODING_UCS4
)
290 sal_Unicode aUTF16
[2];
291 const sal_Unicode
* pUTF16End
= putUTF32Character(aUTF16
, nUCS4
);
293 std::unique_ptr
<sal_Char
[]> pBuffer
= convertFromUnicode(aUTF16
, pUTF16End
, eEncoding
,
297 DBG_ASSERT(nSize
== 1,
298 "translateUTF8Char(): Bad conversion");
299 rCharacter
= pBuffer
[0];
305 void appendISO88591(OUStringBuffer
& rText
, sal_Char
const * pBegin
,
306 sal_Char
const * pEnd
);
310 OString
const m_aAttribute
;
311 OString
const m_aCharset
;
312 OString
const m_aLanguage
;
313 OString
const m_aValue
;
314 sal_uInt32
const m_nSection
;
315 bool const m_bExtended
;
317 bool operator<(const Parameter
& rhs
) const // is used by std::list<Parameter>::sort
319 int nComp
= m_aAttribute
.compareTo(rhs
.m_aAttribute
);
321 (nComp
== 0 && m_nSection
< rhs
.m_nSection
);
323 struct IsSameSection
// is used to check container for duplicates with std::any_of
325 const OString
& rAttribute
;
326 const sal_uInt32 nSection
;
327 bool operator()(const Parameter
& r
) const
328 { return r
.m_aAttribute
== rAttribute
&& r
.m_nSection
== nSection
; }
332 typedef std::forward_list
<Parameter
> ParameterList
;
334 bool parseParameters(ParameterList
const & rInput
,
335 INetContentTypeParameterList
* pOutput
);
339 void appendISO88591(OUStringBuffer
& rText
, sal_Char
const * pBegin
,
340 sal_Char
const * pEnd
)
342 sal_Int32 nLength
= pEnd
- pBegin
;
343 std::unique_ptr
<sal_Unicode
[]> pBuffer(new sal_Unicode
[nLength
]);
344 for (sal_Unicode
* p
= pBuffer
.get(); pBegin
!= pEnd
;)
345 *p
++ = static_cast<unsigned char>(*pBegin
++);
346 rText
.append(pBuffer
.get(), nLength
);
351 bool parseParameters(ParameterList
const & rInput
,
352 INetContentTypeParameterList
* pOutput
)
357 for (auto it
= rInput
.begin(), itPrev
= rInput
.end(); it
!= rInput
.end() ; itPrev
= it
++)
359 if (it
->m_nSection
> 0
360 && (itPrev
== rInput
.end()
361 || itPrev
->m_nSection
!= it
->m_nSection
- 1
362 || itPrev
->m_aAttribute
!= it
->m_aAttribute
))
367 for (auto it
= rInput
.begin(), itNext
= rInput
.begin(); it
!= rInput
.end(); it
= itNext
)
369 bool bCharset
= !it
->m_aCharset
.isEmpty();
370 rtl_TextEncoding eEncoding
= RTL_TEXTENCODING_DONTKNOW
;
373 = getCharsetEncoding(it
->m_aCharset
.getStr(),
374 it
->m_aCharset
.getStr()
375 + it
->m_aCharset
.getLength());
376 OUStringBuffer
aValue(64);
377 bool bBadEncoding
= false;
382 std::unique_ptr
<sal_Unicode
[]> pUnicode
383 = convertToUnicode(itNext
->m_aValue
.getStr(),
384 itNext
->m_aValue
.getStr()
385 + itNext
->m_aValue
.getLength(),
386 bCharset
&& it
->m_bExtended
?
388 RTL_TEXTENCODING_UTF8
,
390 if (!pUnicode
&& !(bCharset
&& it
->m_bExtended
))
391 pUnicode
= convertToUnicode(
392 itNext
->m_aValue
.getStr(),
393 itNext
->m_aValue
.getStr()
394 + itNext
->m_aValue
.getLength(),
395 RTL_TEXTENCODING_ISO_8859_1
, nSize
);
401 aValue
.append(pUnicode
.get(), static_cast<sal_Int32
>(nSize
));
404 while (itNext
!= rInput
.end() && itNext
->m_nSection
!= 0);
412 if (itNext
->m_bExtended
)
414 for (sal_Int32 i
= 0; i
< itNext
->m_aValue
.getLength(); ++i
)
416 static_cast<sal_Unicode
>(
417 static_cast<unsigned char>(itNext
->m_aValue
[i
])
418 | 0xF800)); // map to unicode corporate use sub area
422 for (sal_Int32 i
= 0; i
< itNext
->m_aValue
.getLength(); ++i
)
423 aValue
.append( static_cast<char>(itNext
->m_aValue
[i
]) );
427 while (itNext
!= rInput
.end() && itNext
->m_nSection
!= 0);
429 auto const ret
= pOutput
->insert(
431 {it
->m_aCharset
, it
->m_aLanguage
, aValue
.makeStringAndClear(), !bBadEncoding
}});
432 SAL_INFO_IF(!ret
.second
, "tools",
433 "INetMIME: dropping duplicate parameter: " << it
->m_aAttribute
);
438 /** Check whether some character is valid within an RFC 2045 <token>.
440 @param nChar Some UCS-4 character.
442 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
443 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
444 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
446 bool isTokenChar(sal_uInt32 nChar
)
448 static const bool aMap
[128]
449 = { false, false, false, false, false, false, false, false,
450 false, false, false, false, false, false, false, false,
451 false, false, false, false, false, false, false, false,
452 false, false, false, false, false, false, false, false,
453 false, true, false, true, true, true, true, true, // !"#$%&'
454 false, false, true, true, false, true, true, false, //()*+,-./
455 true, true, true, true, true, true, true, true, //01234567
456 true, true, false, false, false, false, false, false, //89:;<=>?
457 false, true, true, true, true, true, true, true, //@ABCDEFG
458 true, true, true, true, true, true, true, true, //HIJKLMNO
459 true, true, true, true, true, true, true, true, //PQRSTUVW
460 true, true, true, false, false, false, true, true, //XYZ[\]^_
461 true, true, true, true, true, true, true, true, //`abcdefg
462 true, true, true, true, true, true, true, true, //hijklmno
463 true, true, true, true, true, true, true, true, //pqrstuvw
464 true, true, true, true, true, true, true, false //xyz{|}~
466 return rtl::isAscii(nChar
) && aMap
[nChar
];
469 const sal_Unicode
* skipComment(const sal_Unicode
* pBegin
,
470 const sal_Unicode
* pEnd
)
472 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
473 "skipComment(): Bad sequence");
475 if (pBegin
!= pEnd
&& *pBegin
== '(')
477 sal_uInt32 nLevel
= 0;
478 for (const sal_Unicode
* p
= pBegin
; p
!= pEnd
;)
499 const sal_Unicode
* skipLinearWhiteSpaceComment(const sal_Unicode
*
504 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
505 "skipLinearWhiteSpaceComment(): Bad sequence");
507 while (pBegin
!= pEnd
)
516 if (startsWithLineFolding(pBegin
, pEnd
))
524 const sal_Unicode
* p
= skipComment(pBegin
, pEnd
);
537 const sal_Unicode
* skipQuotedString(const sal_Unicode
* pBegin
,
538 const sal_Unicode
* pEnd
)
540 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
541 "skipQuotedString(): Bad sequence");
543 if (pBegin
!= pEnd
&& *pBegin
== '"')
544 for (const sal_Unicode
* p
= pBegin
+ 1; p
!= pEnd
;)
548 if (pEnd
- p
< 2 || *p
++ != 0x0A // LF
549 || !isWhiteSpace(*p
++))
564 sal_Unicode
const * scanParameters(sal_Unicode
const * pBegin
,
565 sal_Unicode
const * pEnd
,
566 INetContentTypeParameterList
*
570 sal_Unicode
const * pParameterBegin
= pBegin
;
571 for (sal_Unicode
const * p
= pParameterBegin
;;)
573 pParameterBegin
= skipLinearWhiteSpaceComment(p
, pEnd
);
574 if (pParameterBegin
== pEnd
|| *pParameterBegin
!= ';')
576 p
= pParameterBegin
+ 1;
578 sal_Unicode
const * pAttributeBegin
579 = skipLinearWhiteSpaceComment(p
, pEnd
);
581 bool bDowncaseAttribute
= false;
582 while (p
!= pEnd
&& isTokenChar(*p
) && *p
!= '*')
584 bDowncaseAttribute
= bDowncaseAttribute
|| rtl::isAsciiUpperCase(*p
);
587 if (p
== pAttributeBegin
)
589 OString
aAttribute(pAttributeBegin
, p
- pAttributeBegin
, RTL_TEXTENCODING_ASCII_US
);
590 if (bDowncaseAttribute
)
591 aAttribute
= aAttribute
.toAsciiLowerCase();
593 sal_uInt32 nSection
= 0;
594 if (p
!= pEnd
&& *p
== '*')
597 if (p
!= pEnd
&& rtl::isAsciiDigit(*p
)
598 && !INetMIME::scanUnsigned(p
, pEnd
, false, nSection
))
602 bool bPresent
= std::any_of(aList
.begin(), aList
.end(),
603 Parameter::IsSameSection
{aAttribute
, nSection
});
607 bool bExtended
= false;
608 if (p
!= pEnd
&& *p
== '*')
614 p
= skipLinearWhiteSpaceComment(p
, pEnd
);
616 if (p
== pEnd
|| *p
!= '=')
619 p
= skipLinearWhiteSpaceComment(p
+ 1, pEnd
);
628 sal_Unicode
const * pCharsetBegin
= p
;
629 bool bDowncaseCharset
= false;
630 while (p
!= pEnd
&& isTokenChar(*p
) && *p
!= '\'')
632 bDowncaseCharset
= bDowncaseCharset
|| rtl::isAsciiUpperCase(*p
);
635 if (p
== pCharsetBegin
)
642 RTL_TEXTENCODING_ASCII_US
);
643 if (bDowncaseCharset
)
644 aCharset
= aCharset
.toAsciiLowerCase();
647 if (p
== pEnd
|| *p
!= '\'')
651 sal_Unicode
const * pLanguageBegin
= p
;
652 bool bDowncaseLanguage
= false;
654 for (; p
!= pEnd
; ++p
)
655 if (rtl::isAsciiAlpha(*p
))
659 bDowncaseLanguage
= bDowncaseLanguage
660 || rtl::isAsciiUpperCase(*p
);
670 if (nLetters
== 0 || nLetters
> 8)
677 RTL_TEXTENCODING_ASCII_US
);
678 if (bDowncaseLanguage
)
679 aLanguage
= aLanguage
.toAsciiLowerCase();
682 if (p
== pEnd
|| *p
!= '\'')
692 sal_uInt32 nChar
= INetMIME::getUTF32Character(q
, pEnd
);
693 if (rtl::isAscii(nChar
) && !isTokenChar(nChar
))
696 if (nChar
== '%' && p
+ 1 < pEnd
)
698 int nWeight1
= INetMIME::getHexWeight(p
[0]);
699 int nWeight2
= INetMIME::getHexWeight(p
[1]);
700 if (nWeight1
>= 0 && nWeight2
>= 0)
702 aSink
.append(sal_Char(nWeight1
<< 4 | nWeight2
));
707 writeUTF8(aSink
, nChar
);
709 aValue
= aSink
.makeStringAndClear();
712 while (p
!= pEnd
&& (isTokenChar(*p
) || !rtl::isAscii(*p
)))
715 else if (p
!= pEnd
&& *p
== '"')
718 OStringBuffer
aSink(256);
719 bool bInvalid
= false;
727 sal_uInt32 nChar
= INetMIME::getUTF32Character(p
, pEnd
);
730 else if (nChar
== 0x0D) // CR
732 if (pEnd
- p
< 2 || *p
++ != 0x0A // LF
733 || !isWhiteSpace(*p
))
738 nChar
= static_cast<unsigned char>(*p
++);
740 else if (nChar
== '\\')
747 nChar
= INetMIME::getUTF32Character(p
, pEnd
);
749 writeUTF8(aSink
, nChar
);
753 aValue
= aSink
.makeStringAndClear();
757 sal_Unicode
const * pStringEnd
= skipQuotedString(p
, pEnd
);
764 sal_Unicode
const * pTokenBegin
= p
;
765 while (p
!= pEnd
&& (isTokenChar(*p
) || !rtl::isAscii(*p
)))
767 if (p
== pTokenBegin
)
771 pTokenBegin
, p
- pTokenBegin
,
772 RTL_TEXTENCODING_UTF8
);
774 aList
.emplace_front(Parameter
{aAttribute
, aCharset
, aLanguage
, aValue
, nSection
, bExtended
});
777 return parseParameters(aList
, pParameters
) ? pParameterBegin
: pBegin
;
780 bool equalIgnoreCase(const sal_Char
* pBegin1
,
781 const sal_Char
* pEnd1
,
782 const sal_Char
* pString2
)
784 DBG_ASSERT(pBegin1
&& pBegin1
<= pEnd1
&& pString2
,
785 "equalIgnoreCase(): Bad sequences");
787 while (*pString2
!= 0)
789 || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1
++))
790 != rtl::toAsciiUpperCase(
791 static_cast<unsigned char>(*pString2
++))))
793 return pBegin1
== pEnd1
;
798 sal_Char
const * m_aName
;
799 rtl_TextEncoding
const m_eEncoding
;
802 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
803 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
805 static EncodingEntry
const aEncodingMap
[]
806 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US
},
807 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US
},
808 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US
},
809 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US
},
810 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US
},
811 { "ASCII", RTL_TEXTENCODING_ASCII_US
},
812 { "ISO646-US", RTL_TEXTENCODING_ASCII_US
},
813 { "US", RTL_TEXTENCODING_ASCII_US
},
814 { "IBM367", RTL_TEXTENCODING_ASCII_US
},
815 { "CP367", RTL_TEXTENCODING_ASCII_US
},
816 { "CSASCII", RTL_TEXTENCODING_ASCII_US
},
817 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1
},
818 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1
},
819 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1
},
820 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1
},
821 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1
},
822 { "L1", RTL_TEXTENCODING_ISO_8859_1
},
823 { "IBM819", RTL_TEXTENCODING_ISO_8859_1
},
824 { "CP819", RTL_TEXTENCODING_ISO_8859_1
},
825 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1
},
826 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2
},
827 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2
},
828 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2
},
829 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2
},
830 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2
},
831 { "L2", RTL_TEXTENCODING_ISO_8859_2
},
832 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2
},
833 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3
},
834 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3
},
835 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3
},
836 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3
},
837 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3
},
838 { "L3", RTL_TEXTENCODING_ISO_8859_3
},
839 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3
},
840 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4
},
841 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4
},
842 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4
},
843 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4
},
844 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4
},
845 { "L4", RTL_TEXTENCODING_ISO_8859_4
},
846 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4
},
847 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5
},
848 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5
},
849 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5
},
850 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5
},
851 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5
},
852 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5
},
853 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6
},
854 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6
},
855 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6
},
856 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6
},
857 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6
},
858 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6
},
859 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6
},
860 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6
},
861 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7
},
862 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7
},
863 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7
},
864 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7
},
865 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7
},
866 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7
},
867 { "GREEK", RTL_TEXTENCODING_ISO_8859_7
},
868 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7
},
869 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7
},
870 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8
},
871 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8
},
872 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8
},
873 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8
},
874 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8
},
875 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8
},
876 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9
},
877 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9
},
878 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9
},
879 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9
},
880 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9
},
881 { "L5", RTL_TEXTENCODING_ISO_8859_9
},
882 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9
},
883 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14
}, // RFC 2047
884 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15
},
885 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15
}, // RFC 2047
886 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN
},
887 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN
},
888 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN
},
889 { "IBM437", RTL_TEXTENCODING_IBM_437
},
890 { "CP437", RTL_TEXTENCODING_IBM_437
},
891 { "437", RTL_TEXTENCODING_IBM_437
},
892 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437
},
893 { "IBM850", RTL_TEXTENCODING_IBM_850
},
894 { "CP850", RTL_TEXTENCODING_IBM_850
},
895 { "850", RTL_TEXTENCODING_IBM_850
},
896 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850
},
897 { "IBM860", RTL_TEXTENCODING_IBM_860
},
898 { "CP860", RTL_TEXTENCODING_IBM_860
},
899 { "860", RTL_TEXTENCODING_IBM_860
},
900 { "CSIBM860", RTL_TEXTENCODING_IBM_860
},
901 { "IBM861", RTL_TEXTENCODING_IBM_861
},
902 { "CP861", RTL_TEXTENCODING_IBM_861
},
903 { "861", RTL_TEXTENCODING_IBM_861
},
904 { "CP-IS", RTL_TEXTENCODING_IBM_861
},
905 { "CSIBM861", RTL_TEXTENCODING_IBM_861
},
906 { "IBM863", RTL_TEXTENCODING_IBM_863
},
907 { "CP863", RTL_TEXTENCODING_IBM_863
},
908 { "863", RTL_TEXTENCODING_IBM_863
},
909 { "CSIBM863", RTL_TEXTENCODING_IBM_863
},
910 { "IBM865", RTL_TEXTENCODING_IBM_865
},
911 { "CP865", RTL_TEXTENCODING_IBM_865
},
912 { "865", RTL_TEXTENCODING_IBM_865
},
913 { "CSIBM865", RTL_TEXTENCODING_IBM_865
},
914 { "IBM775", RTL_TEXTENCODING_IBM_775
},
915 { "CP775", RTL_TEXTENCODING_IBM_775
},
916 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775
},
917 { "IBM852", RTL_TEXTENCODING_IBM_852
},
918 { "CP852", RTL_TEXTENCODING_IBM_852
},
919 { "852", RTL_TEXTENCODING_IBM_852
},
920 { "CSPCP852", RTL_TEXTENCODING_IBM_852
},
921 { "IBM855", RTL_TEXTENCODING_IBM_855
},
922 { "CP855", RTL_TEXTENCODING_IBM_855
},
923 { "855", RTL_TEXTENCODING_IBM_855
},
924 { "CSIBM855", RTL_TEXTENCODING_IBM_855
},
925 { "IBM857", RTL_TEXTENCODING_IBM_857
},
926 { "CP857", RTL_TEXTENCODING_IBM_857
},
927 { "857", RTL_TEXTENCODING_IBM_857
},
928 { "CSIBM857", RTL_TEXTENCODING_IBM_857
},
929 { "IBM862", RTL_TEXTENCODING_IBM_862
},
930 { "CP862", RTL_TEXTENCODING_IBM_862
},
931 { "862", RTL_TEXTENCODING_IBM_862
},
932 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862
},
933 { "IBM864", RTL_TEXTENCODING_IBM_864
},
934 { "CP864", RTL_TEXTENCODING_IBM_864
},
935 { "CSIBM864", RTL_TEXTENCODING_IBM_864
},
936 { "IBM866", RTL_TEXTENCODING_IBM_866
},
937 { "CP866", RTL_TEXTENCODING_IBM_866
},
938 { "866", RTL_TEXTENCODING_IBM_866
},
939 { "CSIBM866", RTL_TEXTENCODING_IBM_866
},
940 { "IBM869", RTL_TEXTENCODING_IBM_869
},
941 { "CP869", RTL_TEXTENCODING_IBM_869
},
942 { "869", RTL_TEXTENCODING_IBM_869
},
943 { "CP-GR", RTL_TEXTENCODING_IBM_869
},
944 { "CSIBM869", RTL_TEXTENCODING_IBM_869
},
945 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250
},
946 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251
},
947 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253
},
948 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254
},
949 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255
},
950 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256
},
951 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257
},
952 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258
},
953 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS
},
954 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS
},
955 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS
},
956 { "GB2312", RTL_TEXTENCODING_GB_2312
},
957 { "CSGB2312", RTL_TEXTENCODING_GB_2312
},
958 { "BIG5", RTL_TEXTENCODING_BIG5
},
959 { "CSBIG5", RTL_TEXTENCODING_BIG5
},
960 { "EUC-JP", RTL_TEXTENCODING_EUC_JP
},
961 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
962 RTL_TEXTENCODING_EUC_JP
},
963 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP
},
964 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP
},
965 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP
},
966 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN
},
967 { "KOI8-R", RTL_TEXTENCODING_KOI8_R
},
968 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R
},
969 { "UTF-7", RTL_TEXTENCODING_UTF7
},
970 { "UTF-8", RTL_TEXTENCODING_UTF8
},
971 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10
}, // RFC 2047
972 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13
}, // RFC 2047
973 { "EUC-KR", RTL_TEXTENCODING_EUC_KR
},
974 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR
},
975 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR
},
976 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR
},
977 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4
},
978 { "CSUCS4", RTL_TEXTENCODING_UCS4
},
979 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2
},
980 { "CSUNICODE", RTL_TEXTENCODING_UCS2
} };
982 rtl_TextEncoding
getCharsetEncoding(sal_Char
const * pBegin
,
983 sal_Char
const * pEnd
)
985 for (const EncodingEntry
& i
: aEncodingMap
)
986 if (equalIgnoreCase(pBegin
, pEnd
, i
.m_aName
))
987 return i
.m_eEncoding
;
988 return RTL_TEXTENCODING_DONTKNOW
;
996 bool INetMIME::isAtomChar(sal_uInt32 nChar
)
998 static const bool aMap
[128]
999 = { false, false, false, false, false, false, false, false,
1000 false, false, false, false, false, false, false, false,
1001 false, false, false, false, false, false, false, false,
1002 false, false, false, false, false, false, false, false,
1003 false, true, false, true, true, true, true, true, // !"#$%&'
1004 false, false, true, true, false, true, false, true, //()*+,-./
1005 true, true, true, true, true, true, true, true, //01234567
1006 true, true, false, false, false, true, false, true, //89:;<=>?
1007 false, true, true, true, true, true, true, true, //@ABCDEFG
1008 true, true, true, true, true, true, true, true, //HIJKLMNO
1009 true, true, true, true, true, true, true, true, //PQRSTUVW
1010 true, true, true, false, false, false, true, true, //XYZ[\]^_
1011 true, true, true, true, true, true, true, true, //`abcdefg
1012 true, true, true, true, true, true, true, true, //hijklmno
1013 true, true, true, true, true, true, true, true, //pqrstuvw
1014 true, true, true, true, true, true, true, false //xyz{|}~
1016 return rtl::isAscii(nChar
) && aMap
[nChar
];
1020 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar
)
1022 static const bool aMap
[128]
1023 = { false, false, false, false, false, false, false, false,
1024 false, false, false, false, false, false, false, false,
1025 false, false, false, false, false, false, false, false,
1026 false, false, false, false, false, false, false, false,
1027 false, true, false, true, true, false, true, true, // !"#$%&'
1028 false, false, false, true, true, true, true, true, //()*+,-./
1029 true, true, true, true, true, true, true, true, //01234567
1030 true, true, true, true, true, true, true, true, //89:;<=>?
1031 true, true, true, true, true, true, true, true, //@ABCDEFG
1032 true, true, true, true, true, true, true, true, //HIJKLMNO
1033 true, true, true, true, true, true, true, true, //PQRSTUVW
1034 true, true, true, true, false, true, true, true, //XYZ[\]^_
1035 true, true, true, true, true, true, true, true, //`abcdefg
1036 true, true, true, true, true, true, true, true, //hijklmno
1037 true, true, true, true, true, true, true, true, //pqrstuvw
1038 true, true, true, false, true, true, true, false //xyz{|}~
1040 return rtl::isAscii(nChar
) && aMap
[nChar
];
1044 bool INetMIME::equalIgnoreCase(const sal_Unicode
* pBegin1
,
1045 const sal_Unicode
* pEnd1
,
1046 const sal_Char
* pString2
)
1048 DBG_ASSERT(pBegin1
&& pBegin1
<= pEnd1
&& pString2
,
1049 "INetMIME::equalIgnoreCase(): Bad sequences");
1051 while (*pString2
!= 0)
1052 if (pBegin1
== pEnd1
1053 || (rtl::toAsciiUpperCase(*pBegin1
++)
1054 != rtl::toAsciiUpperCase(
1055 static_cast<unsigned char>(*pString2
++))))
1057 return pBegin1
== pEnd1
;
1061 bool INetMIME::scanUnsigned(const sal_Unicode
*& rBegin
,
1062 const sal_Unicode
* pEnd
, bool bLeadingZeroes
,
1063 sal_uInt32
& rValue
)
1065 sal_uInt64 nTheValue
= 0;
1066 const sal_Unicode
* p
= rBegin
;
1067 for ( ; p
!= pEnd
; ++p
)
1069 int nWeight
= getWeight(*p
);
1072 nTheValue
= 10 * nTheValue
+ nWeight
;
1073 if (nTheValue
> std::numeric_limits
< sal_uInt32
>::max())
1076 if (nTheValue
== 0 && (p
== rBegin
|| (!bLeadingZeroes
&& p
- rBegin
!= 1)))
1079 rValue
= sal_uInt32(nTheValue
);
1084 sal_Unicode
const * INetMIME::scanContentType(
1085 OUString
const & rStr
, OUString
* pType
,
1086 OUString
* pSubType
, INetContentTypeParameterList
* pParameters
)
1088 sal_Unicode
const * pBegin
= rStr
.getStr();
1089 sal_Unicode
const * pEnd
= pBegin
+ rStr
.getLength();
1090 sal_Unicode
const * p
= skipLinearWhiteSpaceComment(pBegin
, pEnd
);
1091 sal_Unicode
const * pTypeBegin
= p
;
1092 while (p
!= pEnd
&& isTokenChar(*p
))
1096 if (p
== pTypeBegin
)
1098 sal_Unicode
const * pTypeEnd
= p
;
1100 p
= skipLinearWhiteSpaceComment(p
, pEnd
);
1101 if (p
== pEnd
|| *p
++ != '/')
1104 p
= skipLinearWhiteSpaceComment(p
, pEnd
);
1105 sal_Unicode
const * pSubTypeBegin
= p
;
1106 while (p
!= pEnd
&& isTokenChar(*p
))
1110 if (p
== pSubTypeBegin
)
1112 sal_Unicode
const * pSubTypeEnd
= p
;
1114 if (pType
!= nullptr)
1116 *pType
= OUString(pTypeBegin
, pTypeEnd
- pTypeBegin
).toAsciiLowerCase();
1118 if (pSubType
!= nullptr)
1120 *pSubType
= OUString(pSubTypeBegin
, pSubTypeEnd
- pSubTypeBegin
)
1121 .toAsciiLowerCase();
1124 return scanParameters(p
, pEnd
, pParameters
);
1128 OUString
INetMIME::decodeHeaderFieldBody(const OString
& rBody
)
1130 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1131 // versions of StarOffice send mails with header fields where encoded
1132 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1133 // '=', ',', '.', '"', ')', without any required white space in between.
1134 // And there appear to exist some broken mailers that only encode single
1135 // letters within words, like "Appel
1136 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1137 // detect encoded words even when not properly surrounded by white space.
1139 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1141 // encoded-word = "=?"
1142 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1143 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1144 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1145 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1148 // base64 = ALPHA / DIGIT / "+" / "/"
1150 const sal_Char
* pBegin
= rBody
.getStr();
1151 const sal_Char
* pEnd
= pBegin
+ rBody
.getLength();
1153 OUStringBuffer sDecoded
;
1154 const sal_Char
* pCopyBegin
= pBegin
;
1156 /* bool bStartEncodedWord = true; */
1157 const sal_Char
* pWSPBegin
= pBegin
;
1159 for (const sal_Char
* p
= pBegin
; p
!= pEnd
;)
1161 OUString sEncodedText
;
1162 if (*p
== '=' /* && bStartEncodedWord */)
1164 const sal_Char
* q
= p
+ 1;
1165 bool bEncodedWord
= q
!= pEnd
&& *q
++ == '?';
1167 rtl_TextEncoding eCharsetEncoding
= RTL_TEXTENCODING_DONTKNOW
;
1170 const sal_Char
* pCharsetBegin
= q
;
1171 const sal_Char
* pLanguageBegin
= nullptr;
1172 int nAlphaCount
= 0;
1173 for (bool bDone
= false; !bDone
;)
1176 bEncodedWord
= false;
1181 sal_Char cChar
= *q
++;
1185 pLanguageBegin
= q
- 1;
1190 if (pLanguageBegin
!= nullptr)
1192 if (nAlphaCount
== 0)
1193 pLanguageBegin
= nullptr;
1200 if (pCharsetBegin
== q
- 1)
1201 bEncodedWord
= false;
1205 = getCharsetEncoding(
1207 pLanguageBegin
== nullptr
1208 || nAlphaCount
== 0 ?
1209 q
- 1 : pLanguageBegin
);
1210 bEncodedWord
= isMIMECharsetEncoding(
1213 = translateFromMIME(eCharsetEncoding
);
1219 if (pLanguageBegin
!= nullptr
1220 && (!rtl::isAsciiAlpha(
1221 static_cast<unsigned char>(cChar
))
1222 || ++nAlphaCount
> 8))
1223 pLanguageBegin
= nullptr;
1229 bool bEncodingB
= false;
1233 bEncodedWord
= false;
1249 bEncodedWord
= false;
1255 bEncodedWord
= bEncodedWord
&& q
!= pEnd
&& *q
++ == '?';
1257 OStringBuffer sText
;
1262 for (bool bDone
= false; !bDone
;)
1266 bEncodedWord
= false;
1271 bool bFinal
= false;
1273 sal_uInt32 nValue
= 0;
1274 for (int nShift
= 18; nShift
>= 0; nShift
-= 6)
1276 int nWeight
= getBase64Weight(*q
++);
1279 bEncodedWord
= false;
1289 bEncodedWord
= false;
1294 nCount
= nShift
== 6 ? 1 : 2;
1298 nValue
|= nWeight
<< nShift
;
1302 for (int nShift
= 16; nCount
-- > 0; nShift
-= 8)
1303 sText
.append(sal_Char(nValue
>> nShift
& 0xFF));
1309 if (bFinal
&& !bDone
)
1311 bEncodedWord
= false;
1320 const sal_Char
* pEncodedTextBegin
= q
;
1321 const sal_Char
* pEncodedTextCopyBegin
= q
;
1322 for (bool bDone
= false; !bDone
;)
1325 bEncodedWord
= false;
1330 sal_uInt32 nChar
= *q
++;
1337 bEncodedWord
= false;
1341 int nDigit1
= getHexWeight(q
[0]);
1342 int nDigit2
= getHexWeight(q
[1]);
1343 if (nDigit1
< 0 || nDigit2
< 0)
1345 bEncodedWord
= false;
1349 sText
.append(rBody
.copy(
1350 (pEncodedTextCopyBegin
- pBegin
),
1351 (q
- 1 - pEncodedTextCopyBegin
)));
1352 sText
.append(sal_Char(nDigit1
<< 4 | nDigit2
));
1354 pEncodedTextCopyBegin
= q
;
1359 if (q
- pEncodedTextBegin
> 1)
1360 sText
.append(rBody
.copy(
1361 (pEncodedTextCopyBegin
- pBegin
),
1362 (q
- 1 - pEncodedTextCopyBegin
)));
1364 bEncodedWord
= false;
1369 sText
.append(rBody
.copy(
1370 (pEncodedTextCopyBegin
- pBegin
),
1371 (q
- 1 - pEncodedTextCopyBegin
)));
1373 pEncodedTextCopyBegin
= q
;
1377 if (!isVisible(nChar
))
1379 bEncodedWord
= false;
1388 bEncodedWord
= bEncodedWord
&& q
!= pEnd
&& *q
++ == '=';
1390 std::unique_ptr
<sal_Unicode
[]> pUnicodeBuffer
;
1391 sal_Size nUnicodeSize
= 0;
1395 = convertToUnicode(sText
.getStr(),
1396 sText
.getStr() + sText
.getLength(),
1397 eCharsetEncoding
, nUnicodeSize
);
1398 if (!pUnicodeBuffer
)
1399 bEncodedWord
= false;
1404 appendISO88591(sDecoded
, pCopyBegin
, pWSPBegin
);
1406 pUnicodeBuffer
.get(),
1407 static_cast< sal_Int32
>(nUnicodeSize
));
1408 pUnicodeBuffer
.reset();
1413 while (p
!= pEnd
&& isWhiteSpace(*p
))
1415 /* bStartEncodedWord = p != pWSPBegin; */
1420 if (!sEncodedText
.isEmpty())
1421 sDecoded
.append(sEncodedText
);
1429 /* bStartEncodedWord = true; */
1433 /* bStartEncodedWord = true; */
1437 /* bStartEncodedWord = false; */
1442 const sal_Char
* pUTF8Begin
= p
- 1;
1443 const sal_Char
* pUTF8End
= pUTF8Begin
;
1444 sal_uInt32 nCharacter
= 0;
1445 if (translateUTF8Char(pUTF8End
, pEnd
, RTL_TEXTENCODING_UCS4
,
1448 appendISO88591(sDecoded
, pCopyBegin
, p
- 1);
1449 sal_Unicode aUTF16Buf
[2];
1450 sal_Int32 nUTF16Len
= putUTF32Character(aUTF16Buf
, nCharacter
) - aUTF16Buf
;
1451 sDecoded
.append(aUTF16Buf
, nUTF16Len
);
1455 /* bStartEncodedWord = false; */
1462 appendISO88591(sDecoded
, pCopyBegin
, pEnd
);
1463 return sDecoded
.makeStringAndClear();
1466 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */