Bump version to 6.4-15
[LibreOffice.git] / tools / source / inet / inetmime.cxx
blobf974c911013c26e0608bd4f581375aeab31066bf
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <algorithm>
21 #include <limits>
22 #include <forward_list>
23 #include <memory>
25 #include <sal/log.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/strbuf.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/inetmime.hxx>
31 #include <rtl/character.hxx>
33 namespace {
35 rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
36 const sal_Char * pEnd);
38 /** Check for US-ASCII white space character.
40 @param nChar Some UCS-4 character.
42 @return True if nChar is a US-ASCII white space character (US-ASCII
43 0x09 or 0x20).
45 bool isWhiteSpace(sal_uInt32 nChar)
47 return nChar == '\t' || nChar == ' ';
50 /** Get the Base 64 digit weight of a US-ASCII character.
52 @param nChar Some UCS-4 character.
54 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
55 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
56 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
57 character (US-ASCII '='), return -1; otherwise, return -2.
59 int getBase64Weight(sal_uInt32 nChar)
61 return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
62 rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
63 rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
64 nChar == '+' ? 62 :
65 nChar == '/' ? 63 :
66 nChar == '=' ? -1 : -2;
69 bool startsWithLineFolding(const sal_Unicode * pBegin,
70 const sal_Unicode * pEnd)
72 DBG_ASSERT(pBegin && pBegin <= pEnd,
73 "startsWithLineFolding(): Bad sequence");
75 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
76 && isWhiteSpace(pBegin[2]); // CR, LF
79 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
80 eEncoding)
82 #if defined(_WIN32)
83 return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
84 RTL_TEXTENCODING_MS_1252 : eEncoding;
85 #else
86 return eEncoding;
87 #endif
90 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
92 return rtl_isOctetTextEncoding(eEncoding);
95 std::unique_ptr<sal_Unicode[]> convertToUnicode(const sal_Char * pBegin,
96 const sal_Char * pEnd,
97 rtl_TextEncoding eEncoding,
98 sal_Size & rSize)
100 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
101 return nullptr;
102 rtl_TextToUnicodeConverter hConverter
103 = rtl_createTextToUnicodeConverter(eEncoding);
104 rtl_TextToUnicodeContext hContext
105 = rtl_createTextToUnicodeContext(hConverter);
106 std::unique_ptr<sal_Unicode[]> pBuffer;
107 sal_uInt32 nInfo;
108 for (sal_Size nBufferSize = pEnd - pBegin;;
109 nBufferSize += nBufferSize / 3 + 1)
111 pBuffer.reset(new sal_Unicode[nBufferSize]);
112 sal_Size nSrcCvtBytes;
113 rSize = rtl_convertTextToUnicode(
114 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
115 nBufferSize,
116 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
119 &nInfo, &nSrcCvtBytes);
120 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
121 break;
122 pBuffer.reset();
123 rtl_resetTextToUnicodeContext(hConverter, hContext);
125 rtl_destroyTextToUnicodeContext(hConverter, hContext);
126 rtl_destroyTextToUnicodeConverter(hConverter);
127 if (nInfo != 0)
129 pBuffer.reset();
131 return pBuffer;
134 std::unique_ptr<sal_Char[]> convertFromUnicode(const sal_Unicode * pBegin,
135 const sal_Unicode * pEnd,
136 rtl_TextEncoding eEncoding,
137 sal_Size & rSize)
139 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
140 return nullptr;
141 rtl_UnicodeToTextConverter hConverter
142 = rtl_createUnicodeToTextConverter(eEncoding);
143 rtl_UnicodeToTextContext hContext
144 = rtl_createUnicodeToTextContext(hConverter);
145 std::unique_ptr<sal_Char[]> pBuffer;
146 sal_uInt32 nInfo;
147 for (sal_Size nBufferSize = pEnd - pBegin;;
148 nBufferSize += nBufferSize / 3 + 1)
150 pBuffer.reset(new sal_Char[nBufferSize]);
151 sal_Size nSrcCvtBytes;
152 rSize = rtl_convertUnicodeToText(
153 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
154 nBufferSize,
155 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
156 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
157 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
158 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
159 &nInfo, &nSrcCvtBytes);
160 if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
161 break;
162 pBuffer.reset();
163 rtl_resetUnicodeToTextContext(hConverter, hContext);
165 rtl_destroyUnicodeToTextContext(hConverter, hContext);
166 rtl_destroyUnicodeToTextConverter(hConverter);
167 if (nInfo != 0)
169 pBuffer.reset();
171 return pBuffer;
174 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
176 @param pBuffer Points to a buffer, must not be null.
178 @param nUTF32 A UTF-32 character, must be in the range 0..0x10FFFF.
180 @return A pointer past the UTF-16 characters put into the buffer
181 (i.e., pBuffer + 1 or pBuffer + 2).
183 sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
184 sal_uInt32 nUTF32)
186 DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
187 if (nUTF32 < 0x10000)
188 *pBuffer++ = sal_Unicode(nUTF32);
189 else
191 nUTF32 -= 0x10000;
192 *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
193 *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
195 return pBuffer;
198 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
200 // See RFC 2279 for a discussion of UTF-8.
201 DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
203 if (nChar < 0x80)
204 rSink.append(sal_Char(nChar));
205 else if (nChar < 0x800)
206 rSink.append(sal_Char(nChar >> 6 | 0xC0))
207 .append(sal_Char((nChar & 0x3F) | 0x80));
208 else if (nChar < 0x10000)
209 rSink.append(sal_Char(nChar >> 12 | 0xE0))
210 .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
211 .append(sal_Char((nChar & 0x3F) | 0x80));
212 else if (nChar < 0x200000)
213 rSink.append(sal_Char(nChar >> 18 | 0xF0))
214 .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
215 .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
216 .append(sal_Char((nChar & 0x3F) | 0x80));
217 else if (nChar < 0x4000000)
218 rSink.append(sal_Char(nChar >> 24 | 0xF8))
219 .append(sal_Char((nChar >> 18 & 0x3F) | 0x80))
220 .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
221 .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
222 .append(sal_Char((nChar & 0x3F) | 0x80));
223 else
224 rSink.append(sal_Char(nChar >> 30 | 0xFC))
225 .append(sal_Char((nChar >> 24 & 0x3F) | 0x80))
226 .append(sal_Char((nChar >> 18 & 0x3F) | 0x80))
227 .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
228 .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
229 .append(sal_Char((nChar & 0x3F) | 0x80));
232 bool translateUTF8Char(const sal_Char *& rBegin,
233 const sal_Char * pEnd,
234 rtl_TextEncoding eEncoding,
235 sal_uInt32 & rCharacter)
237 if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
238 || static_cast< unsigned char >(*rBegin) >= 0xFE)
239 return false;
241 int nCount;
242 sal_uInt32 nMin;
243 sal_uInt32 nUCS4;
244 const sal_Char * p = rBegin;
245 if (static_cast< unsigned char >(*p) < 0xE0)
247 nCount = 1;
248 nMin = 0x80;
249 nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
251 else if (static_cast< unsigned char >(*p) < 0xF0)
253 nCount = 2;
254 nMin = 0x800;
255 nUCS4 = static_cast< unsigned char >(*p) & 0xF;
257 else if (static_cast< unsigned char >(*p) < 0xF8)
259 nCount = 3;
260 nMin = 0x10000;
261 nUCS4 = static_cast< unsigned char >(*p) & 7;
263 else if (static_cast< unsigned char >(*p) < 0xFC)
265 nCount = 4;
266 nMin = 0x200000;
267 nUCS4 = static_cast< unsigned char >(*p) & 3;
269 else
271 nCount = 5;
272 nMin = 0x4000000;
273 nUCS4 = static_cast< unsigned char >(*p) & 1;
275 ++p;
277 for (; nCount-- > 0; ++p)
278 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
279 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
280 else
281 return false;
283 if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
284 return false;
286 if (eEncoding >= RTL_TEXTENCODING_UCS4)
287 rCharacter = nUCS4;
288 else
290 sal_Unicode aUTF16[2];
291 const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
292 sal_Size nSize;
293 std::unique_ptr<sal_Char[]> pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
294 nSize);
295 if (!pBuffer)
296 return false;
297 DBG_ASSERT(nSize == 1,
298 "translateUTF8Char(): Bad conversion");
299 rCharacter = pBuffer[0];
301 rBegin = p;
302 return true;
305 void appendISO88591(OUStringBuffer & rText, sal_Char const * pBegin,
306 sal_Char const * pEnd);
308 struct Parameter
310 OString const m_aAttribute;
311 OString const m_aCharset;
312 OString const m_aLanguage;
313 OString const m_aValue;
314 sal_uInt32 const m_nSection;
315 bool const m_bExtended;
317 bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
319 int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
320 return nComp < 0 ||
321 (nComp == 0 && m_nSection < rhs.m_nSection);
323 struct IsSameSection // is used to check container for duplicates with std::any_of
325 const OString& rAttribute;
326 const sal_uInt32 nSection;
327 bool operator()(const Parameter& r) const
328 { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
332 typedef std::forward_list<Parameter> ParameterList;
334 bool parseParameters(ParameterList const & rInput,
335 INetContentTypeParameterList * pOutput);
337 // appendISO88591
339 void appendISO88591(OUStringBuffer & rText, sal_Char const * pBegin,
340 sal_Char const * pEnd)
342 sal_Int32 nLength = pEnd - pBegin;
343 std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
344 for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
345 *p++ = static_cast<unsigned char>(*pBegin++);
346 rText.append(pBuffer.get(), nLength);
349 // parseParameters
351 bool parseParameters(ParameterList const & rInput,
352 INetContentTypeParameterList * pOutput)
354 if (pOutput)
355 pOutput->clear();
357 for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
359 if (it->m_nSection > 0
360 && (itPrev == rInput.end()
361 || itPrev->m_nSection != it->m_nSection - 1
362 || itPrev->m_aAttribute != it->m_aAttribute))
363 return false;
366 if (pOutput)
367 for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
369 bool bCharset = !it->m_aCharset.isEmpty();
370 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
371 if (bCharset)
372 eEncoding
373 = getCharsetEncoding(it->m_aCharset.getStr(),
374 it->m_aCharset.getStr()
375 + it->m_aCharset.getLength());
376 OUStringBuffer aValue(64);
377 bool bBadEncoding = false;
378 itNext = it;
381 sal_Size nSize;
382 std::unique_ptr<sal_Unicode[]> pUnicode
383 = convertToUnicode(itNext->m_aValue.getStr(),
384 itNext->m_aValue.getStr()
385 + itNext->m_aValue.getLength(),
386 bCharset && it->m_bExtended ?
387 eEncoding :
388 RTL_TEXTENCODING_UTF8,
389 nSize);
390 if (!pUnicode && !(bCharset && it->m_bExtended))
391 pUnicode = convertToUnicode(
392 itNext->m_aValue.getStr(),
393 itNext->m_aValue.getStr()
394 + itNext->m_aValue.getLength(),
395 RTL_TEXTENCODING_ISO_8859_1, nSize);
396 if (!pUnicode)
398 bBadEncoding = true;
399 break;
401 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
402 ++itNext;
404 while (itNext != rInput.end() && itNext->m_nSection != 0);
406 if (bBadEncoding)
408 aValue.setLength(0);
409 itNext = it;
412 if (itNext->m_bExtended)
414 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
415 aValue.append(
416 static_cast<sal_Unicode>(
417 static_cast<unsigned char>(itNext->m_aValue[i])
418 | 0xF800)); // map to unicode corporate use sub area
420 else
422 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
423 aValue.append( static_cast<char>(itNext->m_aValue[i]) );
425 ++itNext;
427 while (itNext != rInput.end() && itNext->m_nSection != 0);
429 auto const ret = pOutput->insert(
430 {it->m_aAttribute,
431 {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
432 SAL_INFO_IF(!ret.second, "tools",
433 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
435 return true;
438 /** Check whether some character is valid within an RFC 2045 <token>.
440 @param nChar Some UCS-4 character.
442 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
443 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
444 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
446 bool isTokenChar(sal_uInt32 nChar)
448 static const bool aMap[128]
449 = { false, false, false, false, false, false, false, false,
450 false, false, false, false, false, false, false, false,
451 false, false, false, false, false, false, false, false,
452 false, false, false, false, false, false, false, false,
453 false, true, false, true, true, true, true, true, // !"#$%&'
454 false, false, true, true, false, true, true, false, //()*+,-./
455 true, true, true, true, true, true, true, true, //01234567
456 true, true, false, false, false, false, false, false, //89:;<=>?
457 false, true, true, true, true, true, true, true, //@ABCDEFG
458 true, true, true, true, true, true, true, true, //HIJKLMNO
459 true, true, true, true, true, true, true, true, //PQRSTUVW
460 true, true, true, false, false, false, true, true, //XYZ[\]^_
461 true, true, true, true, true, true, true, true, //`abcdefg
462 true, true, true, true, true, true, true, true, //hijklmno
463 true, true, true, true, true, true, true, true, //pqrstuvw
464 true, true, true, true, true, true, true, false //xyz{|}~
466 return rtl::isAscii(nChar) && aMap[nChar];
469 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
470 const sal_Unicode * pEnd)
472 DBG_ASSERT(pBegin && pBegin <= pEnd,
473 "skipComment(): Bad sequence");
475 if (pBegin != pEnd && *pBegin == '(')
477 sal_uInt32 nLevel = 0;
478 for (const sal_Unicode * p = pBegin; p != pEnd;)
479 switch (*p++)
481 case '(':
482 ++nLevel;
483 break;
485 case ')':
486 if (--nLevel == 0)
487 return p;
488 break;
490 case '\\':
491 if (p != pEnd)
492 ++p;
493 break;
496 return pBegin;
499 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
500 pBegin,
501 const sal_Unicode *
502 pEnd)
504 DBG_ASSERT(pBegin && pBegin <= pEnd,
505 "skipLinearWhiteSpaceComment(): Bad sequence");
507 while (pBegin != pEnd)
508 switch (*pBegin)
510 case '\t':
511 case ' ':
512 ++pBegin;
513 break;
515 case 0x0D: // CR
516 if (startsWithLineFolding(pBegin, pEnd))
517 pBegin += 3;
518 else
519 return pBegin;
520 break;
522 case '(':
524 const sal_Unicode * p = skipComment(pBegin, pEnd);
525 if (p == pBegin)
526 return pBegin;
527 pBegin = p;
528 break;
531 default:
532 return pBegin;
534 return pBegin;
537 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
538 const sal_Unicode * pEnd)
540 DBG_ASSERT(pBegin && pBegin <= pEnd,
541 "skipQuotedString(): Bad sequence");
543 if (pBegin != pEnd && *pBegin == '"')
544 for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
545 switch (*p++)
547 case 0x0D: // CR
548 if (pEnd - p < 2 || *p++ != 0x0A // LF
549 || !isWhiteSpace(*p++))
550 return pBegin;
551 break;
553 case '"':
554 return p;
556 case '\\':
557 if (p != pEnd)
558 ++p;
559 break;
561 return pBegin;
564 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
565 sal_Unicode const * pEnd,
566 INetContentTypeParameterList *
567 pParameters)
569 ParameterList aList;
570 sal_Unicode const * pParameterBegin = pBegin;
571 for (sal_Unicode const * p = pParameterBegin;;)
573 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
574 if (pParameterBegin == pEnd || *pParameterBegin != ';')
575 break;
576 p = pParameterBegin + 1;
578 sal_Unicode const * pAttributeBegin
579 = skipLinearWhiteSpaceComment(p, pEnd);
580 p = pAttributeBegin;
581 bool bDowncaseAttribute = false;
582 while (p != pEnd && isTokenChar(*p) && *p != '*')
584 bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
585 ++p;
587 if (p == pAttributeBegin)
588 break;
589 OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
590 if (bDowncaseAttribute)
591 aAttribute = aAttribute.toAsciiLowerCase();
593 sal_uInt32 nSection = 0;
594 if (p != pEnd && *p == '*')
596 ++p;
597 if (p != pEnd && rtl::isAsciiDigit(*p)
598 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
599 break;
602 bool bPresent = std::any_of(aList.begin(), aList.end(),
603 Parameter::IsSameSection{aAttribute, nSection});
604 if (bPresent)
605 break;
607 bool bExtended = false;
608 if (p != pEnd && *p == '*')
610 ++p;
611 bExtended = true;
614 p = skipLinearWhiteSpaceComment(p, pEnd);
616 if (p == pEnd || *p != '=')
617 break;
619 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
621 OString aCharset;
622 OString aLanguage;
623 OString aValue;
624 if (bExtended)
626 if (nSection == 0)
628 sal_Unicode const * pCharsetBegin = p;
629 bool bDowncaseCharset = false;
630 while (p != pEnd && isTokenChar(*p) && *p != '\'')
632 bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
633 ++p;
635 if (p == pCharsetBegin)
636 break;
637 if (pParameters)
639 aCharset = OString(
640 pCharsetBegin,
641 p - pCharsetBegin,
642 RTL_TEXTENCODING_ASCII_US);
643 if (bDowncaseCharset)
644 aCharset = aCharset.toAsciiLowerCase();
647 if (p == pEnd || *p != '\'')
648 break;
649 ++p;
651 sal_Unicode const * pLanguageBegin = p;
652 bool bDowncaseLanguage = false;
653 int nLetters = 0;
654 for (; p != pEnd; ++p)
655 if (rtl::isAsciiAlpha(*p))
657 if (++nLetters > 8)
658 break;
659 bDowncaseLanguage = bDowncaseLanguage
660 || rtl::isAsciiUpperCase(*p);
662 else if (*p == '-')
664 if (nLetters == 0)
665 break;
666 nLetters = 0;
668 else
669 break;
670 if (nLetters == 0 || nLetters > 8)
671 break;
672 if (pParameters)
674 aLanguage = OString(
675 pLanguageBegin,
676 p - pLanguageBegin,
677 RTL_TEXTENCODING_ASCII_US);
678 if (bDowncaseLanguage)
679 aLanguage = aLanguage.toAsciiLowerCase();
682 if (p == pEnd || *p != '\'')
683 break;
684 ++p;
686 if (pParameters)
688 OStringBuffer aSink;
689 while (p != pEnd)
691 auto q = p;
692 sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
693 if (rtl::isAscii(nChar) && !isTokenChar(nChar))
694 break;
695 p = q;
696 if (nChar == '%' && p + 1 < pEnd)
698 int nWeight1 = INetMIME::getHexWeight(p[0]);
699 int nWeight2 = INetMIME::getHexWeight(p[1]);
700 if (nWeight1 >= 0 && nWeight2 >= 0)
702 aSink.append(sal_Char(nWeight1 << 4 | nWeight2));
703 p += 2;
704 continue;
707 writeUTF8(aSink, nChar);
709 aValue = aSink.makeStringAndClear();
711 else
712 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
713 ++p;
715 else if (p != pEnd && *p == '"')
716 if (pParameters)
718 OStringBuffer aSink(256);
719 bool bInvalid = false;
720 for (++p;;)
722 if (p == pEnd)
724 bInvalid = true;
725 break;
727 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
728 if (nChar == '"')
729 break;
730 else if (nChar == 0x0D) // CR
732 if (pEnd - p < 2 || *p++ != 0x0A // LF
733 || !isWhiteSpace(*p))
735 bInvalid = true;
736 break;
738 nChar = static_cast<unsigned char>(*p++);
740 else if (nChar == '\\')
742 if (p == pEnd)
744 bInvalid = true;
745 break;
747 nChar = INetMIME::getUTF32Character(p, pEnd);
749 writeUTF8(aSink, nChar);
751 if (bInvalid)
752 break;
753 aValue = aSink.makeStringAndClear();
755 else
757 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
758 if (p == pStringEnd)
759 break;
760 p = pStringEnd;
762 else
764 sal_Unicode const * pTokenBegin = p;
765 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
766 ++p;
767 if (p == pTokenBegin)
768 break;
769 if (pParameters)
770 aValue = OString(
771 pTokenBegin, p - pTokenBegin,
772 RTL_TEXTENCODING_UTF8);
774 aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
776 aList.sort();
777 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
780 bool equalIgnoreCase(const sal_Char * pBegin1,
781 const sal_Char * pEnd1,
782 const sal_Char * pString2)
784 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
785 "equalIgnoreCase(): Bad sequences");
787 while (*pString2 != 0)
788 if (pBegin1 == pEnd1
789 || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
790 != rtl::toAsciiUpperCase(
791 static_cast<unsigned char>(*pString2++))))
792 return false;
793 return pBegin1 == pEnd1;
796 struct EncodingEntry
798 sal_Char const * m_aName;
799 rtl_TextEncoding const m_eEncoding;
802 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
803 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
804 // noted:
805 static EncodingEntry const aEncodingMap[]
806 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
807 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
808 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
809 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
810 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
811 { "ASCII", RTL_TEXTENCODING_ASCII_US },
812 { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
813 { "US", RTL_TEXTENCODING_ASCII_US },
814 { "IBM367", RTL_TEXTENCODING_ASCII_US },
815 { "CP367", RTL_TEXTENCODING_ASCII_US },
816 { "CSASCII", RTL_TEXTENCODING_ASCII_US },
817 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
818 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
819 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
820 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
821 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
822 { "L1", RTL_TEXTENCODING_ISO_8859_1 },
823 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
824 { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
825 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
826 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
827 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
828 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
829 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
830 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
831 { "L2", RTL_TEXTENCODING_ISO_8859_2 },
832 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
833 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
834 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
835 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
836 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
837 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
838 { "L3", RTL_TEXTENCODING_ISO_8859_3 },
839 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
840 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
841 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
842 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
843 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
844 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
845 { "L4", RTL_TEXTENCODING_ISO_8859_4 },
846 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
847 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
848 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
849 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
850 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
851 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
852 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
853 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
854 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
855 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
856 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
857 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
858 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
859 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
860 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
861 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
862 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
863 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
864 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
865 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
866 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
867 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
868 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
869 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
870 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
871 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
872 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
873 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
874 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
875 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
876 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
877 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
878 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
879 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
880 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
881 { "L5", RTL_TEXTENCODING_ISO_8859_9 },
882 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
883 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
884 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
885 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
886 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
887 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
888 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
889 { "IBM437", RTL_TEXTENCODING_IBM_437 },
890 { "CP437", RTL_TEXTENCODING_IBM_437 },
891 { "437", RTL_TEXTENCODING_IBM_437 },
892 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
893 { "IBM850", RTL_TEXTENCODING_IBM_850 },
894 { "CP850", RTL_TEXTENCODING_IBM_850 },
895 { "850", RTL_TEXTENCODING_IBM_850 },
896 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
897 { "IBM860", RTL_TEXTENCODING_IBM_860 },
898 { "CP860", RTL_TEXTENCODING_IBM_860 },
899 { "860", RTL_TEXTENCODING_IBM_860 },
900 { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
901 { "IBM861", RTL_TEXTENCODING_IBM_861 },
902 { "CP861", RTL_TEXTENCODING_IBM_861 },
903 { "861", RTL_TEXTENCODING_IBM_861 },
904 { "CP-IS", RTL_TEXTENCODING_IBM_861 },
905 { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
906 { "IBM863", RTL_TEXTENCODING_IBM_863 },
907 { "CP863", RTL_TEXTENCODING_IBM_863 },
908 { "863", RTL_TEXTENCODING_IBM_863 },
909 { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
910 { "IBM865", RTL_TEXTENCODING_IBM_865 },
911 { "CP865", RTL_TEXTENCODING_IBM_865 },
912 { "865", RTL_TEXTENCODING_IBM_865 },
913 { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
914 { "IBM775", RTL_TEXTENCODING_IBM_775 },
915 { "CP775", RTL_TEXTENCODING_IBM_775 },
916 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
917 { "IBM852", RTL_TEXTENCODING_IBM_852 },
918 { "CP852", RTL_TEXTENCODING_IBM_852 },
919 { "852", RTL_TEXTENCODING_IBM_852 },
920 { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
921 { "IBM855", RTL_TEXTENCODING_IBM_855 },
922 { "CP855", RTL_TEXTENCODING_IBM_855 },
923 { "855", RTL_TEXTENCODING_IBM_855 },
924 { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
925 { "IBM857", RTL_TEXTENCODING_IBM_857 },
926 { "CP857", RTL_TEXTENCODING_IBM_857 },
927 { "857", RTL_TEXTENCODING_IBM_857 },
928 { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
929 { "IBM862", RTL_TEXTENCODING_IBM_862 },
930 { "CP862", RTL_TEXTENCODING_IBM_862 },
931 { "862", RTL_TEXTENCODING_IBM_862 },
932 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
933 { "IBM864", RTL_TEXTENCODING_IBM_864 },
934 { "CP864", RTL_TEXTENCODING_IBM_864 },
935 { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
936 { "IBM866", RTL_TEXTENCODING_IBM_866 },
937 { "CP866", RTL_TEXTENCODING_IBM_866 },
938 { "866", RTL_TEXTENCODING_IBM_866 },
939 { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
940 { "IBM869", RTL_TEXTENCODING_IBM_869 },
941 { "CP869", RTL_TEXTENCODING_IBM_869 },
942 { "869", RTL_TEXTENCODING_IBM_869 },
943 { "CP-GR", RTL_TEXTENCODING_IBM_869 },
944 { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
945 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
946 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
947 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
948 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
949 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
950 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
951 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
952 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
953 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
954 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
955 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
956 { "GB2312", RTL_TEXTENCODING_GB_2312 },
957 { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
958 { "BIG5", RTL_TEXTENCODING_BIG5 },
959 { "CSBIG5", RTL_TEXTENCODING_BIG5 },
960 { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
961 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
962 RTL_TEXTENCODING_EUC_JP },
963 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
964 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
965 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
966 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
967 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
968 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
969 { "UTF-7", RTL_TEXTENCODING_UTF7 },
970 { "UTF-8", RTL_TEXTENCODING_UTF8 },
971 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
972 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
973 { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
974 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
975 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
976 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
977 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
978 { "CSUCS4", RTL_TEXTENCODING_UCS4 },
979 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
980 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
982 rtl_TextEncoding getCharsetEncoding(sal_Char const * pBegin,
983 sal_Char const * pEnd)
985 for (const EncodingEntry& i : aEncodingMap)
986 if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
987 return i.m_eEncoding;
988 return RTL_TEXTENCODING_DONTKNOW;
993 // INetMIME
995 // static
996 bool INetMIME::isAtomChar(sal_uInt32 nChar)
998 static const bool aMap[128]
999 = { false, false, false, false, false, false, false, false,
1000 false, false, false, false, false, false, false, false,
1001 false, false, false, false, false, false, false, false,
1002 false, false, false, false, false, false, false, false,
1003 false, true, false, true, true, true, true, true, // !"#$%&'
1004 false, false, true, true, false, true, false, true, //()*+,-./
1005 true, true, true, true, true, true, true, true, //01234567
1006 true, true, false, false, false, true, false, true, //89:;<=>?
1007 false, true, true, true, true, true, true, true, //@ABCDEFG
1008 true, true, true, true, true, true, true, true, //HIJKLMNO
1009 true, true, true, true, true, true, true, true, //PQRSTUVW
1010 true, true, true, false, false, false, true, true, //XYZ[\]^_
1011 true, true, true, true, true, true, true, true, //`abcdefg
1012 true, true, true, true, true, true, true, true, //hijklmno
1013 true, true, true, true, true, true, true, true, //pqrstuvw
1014 true, true, true, true, true, true, true, false //xyz{|}~
1016 return rtl::isAscii(nChar) && aMap[nChar];
1019 // static
1020 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
1022 static const bool aMap[128]
1023 = { false, false, false, false, false, false, false, false,
1024 false, false, false, false, false, false, false, false,
1025 false, false, false, false, false, false, false, false,
1026 false, false, false, false, false, false, false, false,
1027 false, true, false, true, true, false, true, true, // !"#$%&'
1028 false, false, false, true, true, true, true, true, //()*+,-./
1029 true, true, true, true, true, true, true, true, //01234567
1030 true, true, true, true, true, true, true, true, //89:;<=>?
1031 true, true, true, true, true, true, true, true, //@ABCDEFG
1032 true, true, true, true, true, true, true, true, //HIJKLMNO
1033 true, true, true, true, true, true, true, true, //PQRSTUVW
1034 true, true, true, true, false, true, true, true, //XYZ[\]^_
1035 true, true, true, true, true, true, true, true, //`abcdefg
1036 true, true, true, true, true, true, true, true, //hijklmno
1037 true, true, true, true, true, true, true, true, //pqrstuvw
1038 true, true, true, false, true, true, true, false //xyz{|}~
1040 return rtl::isAscii(nChar) && aMap[nChar];
1043 // static
1044 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
1045 const sal_Unicode * pEnd1,
1046 const sal_Char * pString2)
1048 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
1049 "INetMIME::equalIgnoreCase(): Bad sequences");
1051 while (*pString2 != 0)
1052 if (pBegin1 == pEnd1
1053 || (rtl::toAsciiUpperCase(*pBegin1++)
1054 != rtl::toAsciiUpperCase(
1055 static_cast<unsigned char>(*pString2++))))
1056 return false;
1057 return pBegin1 == pEnd1;
1060 // static
1061 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
1062 const sal_Unicode * pEnd, bool bLeadingZeroes,
1063 sal_uInt32 & rValue)
1065 sal_uInt64 nTheValue = 0;
1066 const sal_Unicode * p = rBegin;
1067 for ( ; p != pEnd; ++p)
1069 int nWeight = getWeight(*p);
1070 if (nWeight < 0)
1071 break;
1072 nTheValue = 10 * nTheValue + nWeight;
1073 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
1074 return false;
1076 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1077 return false;
1078 rBegin = p;
1079 rValue = sal_uInt32(nTheValue);
1080 return true;
1083 // static
1084 sal_Unicode const * INetMIME::scanContentType(
1085 OUString const & rStr, OUString * pType,
1086 OUString * pSubType, INetContentTypeParameterList * pParameters)
1088 sal_Unicode const * pBegin = rStr.getStr();
1089 sal_Unicode const * pEnd = pBegin + rStr.getLength();
1090 sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1091 sal_Unicode const * pTypeBegin = p;
1092 while (p != pEnd && isTokenChar(*p))
1094 ++p;
1096 if (p == pTypeBegin)
1097 return nullptr;
1098 sal_Unicode const * pTypeEnd = p;
1100 p = skipLinearWhiteSpaceComment(p, pEnd);
1101 if (p == pEnd || *p++ != '/')
1102 return nullptr;
1104 p = skipLinearWhiteSpaceComment(p, pEnd);
1105 sal_Unicode const * pSubTypeBegin = p;
1106 while (p != pEnd && isTokenChar(*p))
1108 ++p;
1110 if (p == pSubTypeBegin)
1111 return nullptr;
1112 sal_Unicode const * pSubTypeEnd = p;
1114 if (pType != nullptr)
1116 *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1118 if (pSubType != nullptr)
1120 *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1121 .toAsciiLowerCase();
1124 return scanParameters(p, pEnd, pParameters);
1127 // static
1128 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1130 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1131 // versions of StarOffice send mails with header fields where encoded
1132 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1133 // '=', ',', '.', '"', ')', without any required white space in between.
1134 // And there appear to exist some broken mailers that only encode single
1135 // letters within words, like "Appel
1136 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1137 // detect encoded words even when not properly surrounded by white space.
1139 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1141 // encoded-word = "=?"
1142 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1143 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1144 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1145 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1146 // "?="
1148 // base64 = ALPHA / DIGIT / "+" / "/"
1150 const sal_Char * pBegin = rBody.getStr();
1151 const sal_Char * pEnd = pBegin + rBody.getLength();
1153 OUStringBuffer sDecoded;
1154 const sal_Char * pCopyBegin = pBegin;
1156 /* bool bStartEncodedWord = true; */
1157 const sal_Char * pWSPBegin = pBegin;
1159 for (const sal_Char * p = pBegin; p != pEnd;)
1161 OUString sEncodedText;
1162 if (*p == '=' /* && bStartEncodedWord */)
1164 const sal_Char * q = p + 1;
1165 bool bEncodedWord = q != pEnd && *q++ == '?';
1167 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1168 if (bEncodedWord)
1170 const sal_Char * pCharsetBegin = q;
1171 const sal_Char * pLanguageBegin = nullptr;
1172 int nAlphaCount = 0;
1173 for (bool bDone = false; !bDone;)
1174 if (q == pEnd)
1176 bEncodedWord = false;
1177 bDone = true;
1179 else
1181 sal_Char cChar = *q++;
1182 switch (cChar)
1184 case '*':
1185 pLanguageBegin = q - 1;
1186 nAlphaCount = 0;
1187 break;
1189 case '-':
1190 if (pLanguageBegin != nullptr)
1192 if (nAlphaCount == 0)
1193 pLanguageBegin = nullptr;
1194 else
1195 nAlphaCount = 0;
1197 break;
1199 case '?':
1200 if (pCharsetBegin == q - 1)
1201 bEncodedWord = false;
1202 else
1204 eCharsetEncoding
1205 = getCharsetEncoding(
1206 pCharsetBegin,
1207 pLanguageBegin == nullptr
1208 || nAlphaCount == 0 ?
1209 q - 1 : pLanguageBegin);
1210 bEncodedWord = isMIMECharsetEncoding(
1211 eCharsetEncoding);
1212 eCharsetEncoding
1213 = translateFromMIME(eCharsetEncoding);
1215 bDone = true;
1216 break;
1218 default:
1219 if (pLanguageBegin != nullptr
1220 && (!rtl::isAsciiAlpha(
1221 static_cast<unsigned char>(cChar))
1222 || ++nAlphaCount > 8))
1223 pLanguageBegin = nullptr;
1224 break;
1229 bool bEncodingB = false;
1230 if (bEncodedWord)
1232 if (q == pEnd)
1233 bEncodedWord = false;
1234 else
1236 switch (*q++)
1238 case 'B':
1239 case 'b':
1240 bEncodingB = true;
1241 break;
1243 case 'Q':
1244 case 'q':
1245 bEncodingB = false;
1246 break;
1248 default:
1249 bEncodedWord = false;
1250 break;
1255 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1257 OStringBuffer sText;
1258 if (bEncodedWord)
1260 if (bEncodingB)
1262 for (bool bDone = false; !bDone;)
1264 if (pEnd - q < 4)
1266 bEncodedWord = false;
1267 bDone = true;
1269 else
1271 bool bFinal = false;
1272 int nCount = 3;
1273 sal_uInt32 nValue = 0;
1274 for (int nShift = 18; nShift >= 0; nShift -= 6)
1276 int nWeight = getBase64Weight(*q++);
1277 if (nWeight == -2)
1279 bEncodedWord = false;
1280 bDone = true;
1281 break;
1283 if (nWeight == -1)
1285 if (!bFinal)
1287 if (nShift >= 12)
1289 bEncodedWord = false;
1290 bDone = true;
1291 break;
1293 bFinal = true;
1294 nCount = nShift == 6 ? 1 : 2;
1297 else
1298 nValue |= nWeight << nShift;
1300 if (bEncodedWord)
1302 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1303 sText.append(sal_Char(nValue >> nShift & 0xFF));
1304 if (*q == '?')
1306 ++q;
1307 bDone = true;
1309 if (bFinal && !bDone)
1311 bEncodedWord = false;
1312 bDone = true;
1318 else
1320 const sal_Char * pEncodedTextBegin = q;
1321 const sal_Char * pEncodedTextCopyBegin = q;
1322 for (bool bDone = false; !bDone;)
1323 if (q == pEnd)
1325 bEncodedWord = false;
1326 bDone = true;
1328 else
1330 sal_uInt32 nChar = *q++;
1331 switch (nChar)
1333 case '=':
1335 if (pEnd - q < 2)
1337 bEncodedWord = false;
1338 bDone = true;
1339 break;
1341 int nDigit1 = getHexWeight(q[0]);
1342 int nDigit2 = getHexWeight(q[1]);
1343 if (nDigit1 < 0 || nDigit2 < 0)
1345 bEncodedWord = false;
1346 bDone = true;
1347 break;
1349 sText.append(rBody.copy(
1350 (pEncodedTextCopyBegin - pBegin),
1351 (q - 1 - pEncodedTextCopyBegin)));
1352 sText.append(sal_Char(nDigit1 << 4 | nDigit2));
1353 q += 2;
1354 pEncodedTextCopyBegin = q;
1355 break;
1358 case '?':
1359 if (q - pEncodedTextBegin > 1)
1360 sText.append(rBody.copy(
1361 (pEncodedTextCopyBegin - pBegin),
1362 (q - 1 - pEncodedTextCopyBegin)));
1363 else
1364 bEncodedWord = false;
1365 bDone = true;
1366 break;
1368 case '_':
1369 sText.append(rBody.copy(
1370 (pEncodedTextCopyBegin - pBegin),
1371 (q - 1 - pEncodedTextCopyBegin)));
1372 sText.append(' ');
1373 pEncodedTextCopyBegin = q;
1374 break;
1376 default:
1377 if (!isVisible(nChar))
1379 bEncodedWord = false;
1380 bDone = true;
1382 break;
1388 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1390 std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1391 sal_Size nUnicodeSize = 0;
1392 if (bEncodedWord)
1394 pUnicodeBuffer
1395 = convertToUnicode(sText.getStr(),
1396 sText.getStr() + sText.getLength(),
1397 eCharsetEncoding, nUnicodeSize);
1398 if (!pUnicodeBuffer)
1399 bEncodedWord = false;
1402 if (bEncodedWord)
1404 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1405 sDecoded.append(
1406 pUnicodeBuffer.get(),
1407 static_cast< sal_Int32 >(nUnicodeSize));
1408 pUnicodeBuffer.reset();
1409 p = q;
1410 pCopyBegin = p;
1412 pWSPBegin = p;
1413 while (p != pEnd && isWhiteSpace(*p))
1414 ++p;
1415 /* bStartEncodedWord = p != pWSPBegin; */
1416 continue;
1420 if (!sEncodedText.isEmpty())
1421 sDecoded.append(sEncodedText);
1423 if (p == pEnd)
1424 break;
1426 switch (*p++)
1428 case '"':
1429 /* bStartEncodedWord = true; */
1430 break;
1432 case '(':
1433 /* bStartEncodedWord = true; */
1434 break;
1436 case ')':
1437 /* bStartEncodedWord = false; */
1438 break;
1440 default:
1442 const sal_Char * pUTF8Begin = p - 1;
1443 const sal_Char * pUTF8End = pUTF8Begin;
1444 sal_uInt32 nCharacter = 0;
1445 if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
1446 nCharacter))
1448 appendISO88591(sDecoded, pCopyBegin, p - 1);
1449 sal_Unicode aUTF16Buf[2];
1450 sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf;
1451 sDecoded.append(aUTF16Buf, nUTF16Len);
1452 p = pUTF8End;
1453 pCopyBegin = p;
1455 /* bStartEncodedWord = false; */
1456 break;
1459 pWSPBegin = p;
1462 appendISO88591(sDecoded, pCopyBegin, pEnd);
1463 return sDecoded.makeStringAndClear();
1466 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */