Version 6.1.4.1, tag libreoffice-6.1.4.1
[LibreOffice.git] / tools / source / inet / inetmime.cxx
blob0ec6bcdbb70ed5f35c99b3a30d4cc89c0d59cbeb
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <algorithm>
21 #include <cstddef>
22 #include <limits>
23 #include <forward_list>
24 #include <memory>
26 #include <osl/diagnose.h>
27 #include <rtl/ustring.hxx>
28 #include <rtl/strbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/inetmime.hxx>
31 #include <rtl/character.hxx>
33 namespace {
35 rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
36 const sal_Char * pEnd);
38 /** Check for US-ASCII white space character.
40 @param nChar Some UCS-4 character.
42 @return True if nChar is a US-ASCII white space character (US-ASCII
43 0x09 or 0x20).
45 inline bool isWhiteSpace(sal_uInt32 nChar)
47 return nChar == '\t' || nChar == ' ';
50 /** Get the Base 64 digit weight of a US-ASCII character.
52 @param nChar Some UCS-4 character.
54 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
55 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
56 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
57 character (US-ASCII '='), return -1; otherwise, return -2.
59 inline int getBase64Weight(sal_uInt32 nChar)
61 return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
62 rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
63 rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
64 nChar == '+' ? 62 :
65 nChar == '/' ? 63 :
66 nChar == '=' ? -1 : -2;
69 inline bool startsWithLineFolding(const sal_Unicode * pBegin,
70 const sal_Unicode * pEnd)
72 DBG_ASSERT(pBegin && pBegin <= pEnd,
73 "startsWithLineFolding(): Bad sequence");
75 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
76 && isWhiteSpace(pBegin[2]); // CR, LF
79 inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
80 eEncoding)
82 #if defined(_WIN32)
83 return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
84 RTL_TEXTENCODING_MS_1252 : eEncoding;
85 #else
86 return eEncoding;
87 #endif
90 inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
92 return rtl_isOctetTextEncoding(eEncoding);
95 sal_Unicode * convertToUnicode(const sal_Char * pBegin,
96 const sal_Char * pEnd,
97 rtl_TextEncoding eEncoding,
98 sal_Size & rSize)
100 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
101 return nullptr;
102 rtl_TextToUnicodeConverter hConverter
103 = rtl_createTextToUnicodeConverter(eEncoding);
104 rtl_TextToUnicodeContext hContext
105 = rtl_createTextToUnicodeContext(hConverter);
106 sal_Unicode * pBuffer;
107 sal_uInt32 nInfo;
108 for (sal_Size nBufferSize = pEnd - pBegin;;
109 nBufferSize += nBufferSize / 3 + 1)
111 pBuffer = new sal_Unicode[nBufferSize];
112 sal_Size nSrcCvtBytes;
113 rSize = rtl_convertTextToUnicode(
114 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
115 nBufferSize,
116 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
119 &nInfo, &nSrcCvtBytes);
120 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
121 break;
122 delete[] pBuffer;
123 rtl_resetTextToUnicodeContext(hConverter, hContext);
125 rtl_destroyTextToUnicodeContext(hConverter, hContext);
126 rtl_destroyTextToUnicodeConverter(hConverter);
127 if (nInfo != 0)
129 delete[] pBuffer;
130 pBuffer = nullptr;
132 return pBuffer;
135 sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
136 const sal_Unicode * pEnd,
137 rtl_TextEncoding eEncoding,
138 sal_Size & rSize)
140 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
141 return nullptr;
142 rtl_UnicodeToTextConverter hConverter
143 = rtl_createUnicodeToTextConverter(eEncoding);
144 rtl_UnicodeToTextContext hContext
145 = rtl_createUnicodeToTextContext(hConverter);
146 sal_Char * pBuffer;
147 sal_uInt32 nInfo;
148 for (sal_Size nBufferSize = pEnd - pBegin;;
149 nBufferSize += nBufferSize / 3 + 1)
151 pBuffer = new sal_Char[nBufferSize];
152 sal_Size nSrcCvtBytes;
153 rSize = rtl_convertUnicodeToText(
154 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
155 nBufferSize,
156 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
157 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
158 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
159 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
160 &nInfo, &nSrcCvtBytes);
161 if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
162 break;
163 delete[] pBuffer;
164 rtl_resetUnicodeToTextContext(hConverter, hContext);
166 rtl_destroyUnicodeToTextContext(hConverter, hContext);
167 rtl_destroyUnicodeToTextConverter(hConverter);
168 if (nInfo != 0)
170 delete[] pBuffer;
171 pBuffer = nullptr;
173 return pBuffer;
176 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
178 @param pBuffer Points to a buffer, must not be null.
180 @param nUTF32 An UTF-32 character, must be in the range 0..0x10FFFF.
182 @return A pointer past the UTF-16 characters put into the buffer
183 (i.e., pBuffer + 1 or pBuffer + 2).
185 inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
186 sal_uInt32 nUTF32)
188 DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
189 if (nUTF32 < 0x10000)
190 *pBuffer++ = sal_Unicode(nUTF32);
191 else
193 nUTF32 -= 0x10000;
194 *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
195 *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
197 return pBuffer;
200 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
202 // See RFC 2279 for a discussion of UTF-8.
203 DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
205 if (nChar < 0x80)
206 rSink.append(sal_Char(nChar));
207 else if (nChar < 0x800)
208 rSink.append(sal_Char(nChar >> 6 | 0xC0))
209 .append(sal_Char((nChar & 0x3F) | 0x80));
210 else if (nChar < 0x10000)
211 rSink.append(sal_Char(nChar >> 12 | 0xE0))
212 .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
213 .append(sal_Char((nChar & 0x3F) | 0x80));
214 else if (nChar < 0x200000)
215 rSink.append(sal_Char(nChar >> 18 | 0xF0))
216 .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
217 .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
218 .append(sal_Char((nChar & 0x3F) | 0x80));
219 else if (nChar < 0x4000000)
220 rSink.append(sal_Char(nChar >> 24 | 0xF8))
221 .append(sal_Char((nChar >> 18 & 0x3F) | 0x80))
222 .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
223 .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
224 .append(sal_Char((nChar & 0x3F) | 0x80));
225 else
226 rSink.append(sal_Char(nChar >> 30 | 0xFC))
227 .append(sal_Char((nChar >> 24 & 0x3F) | 0x80))
228 .append(sal_Char((nChar >> 18 & 0x3F) | 0x80))
229 .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
230 .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
231 .append(sal_Char((nChar & 0x3F) | 0x80));
234 bool translateUTF8Char(const sal_Char *& rBegin,
235 const sal_Char * pEnd,
236 rtl_TextEncoding eEncoding,
237 sal_uInt32 & rCharacter)
239 if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
240 || static_cast< unsigned char >(*rBegin) >= 0xFE)
241 return false;
243 int nCount;
244 sal_uInt32 nMin;
245 sal_uInt32 nUCS4;
246 const sal_Char * p = rBegin;
247 if (static_cast< unsigned char >(*p) < 0xE0)
249 nCount = 1;
250 nMin = 0x80;
251 nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
253 else if (static_cast< unsigned char >(*p) < 0xF0)
255 nCount = 2;
256 nMin = 0x800;
257 nUCS4 = static_cast< unsigned char >(*p) & 0xF;
259 else if (static_cast< unsigned char >(*p) < 0xF8)
261 nCount = 3;
262 nMin = 0x10000;
263 nUCS4 = static_cast< unsigned char >(*p) & 7;
265 else if (static_cast< unsigned char >(*p) < 0xFC)
267 nCount = 4;
268 nMin = 0x200000;
269 nUCS4 = static_cast< unsigned char >(*p) & 3;
271 else
273 nCount = 5;
274 nMin = 0x4000000;
275 nUCS4 = static_cast< unsigned char >(*p) & 1;
277 ++p;
279 for (; nCount-- > 0; ++p)
280 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
281 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
282 else
283 return false;
285 if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
286 return false;
288 if (eEncoding >= RTL_TEXTENCODING_UCS4)
289 rCharacter = nUCS4;
290 else
292 sal_Unicode aUTF16[2];
293 const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
294 sal_Size nSize;
295 sal_Char * pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
296 nSize);
297 if (!pBuffer)
298 return false;
299 DBG_ASSERT(nSize == 1,
300 "translateUTF8Char(): Bad conversion");
301 rCharacter = *pBuffer;
302 delete[] pBuffer;
304 rBegin = p;
305 return true;
308 void appendISO88591(OUString & rText, sal_Char const * pBegin,
309 sal_Char const * pEnd);
311 struct Parameter
313 OString m_aAttribute;
314 OString m_aCharset;
315 OString m_aLanguage;
316 OString m_aValue;
317 sal_uInt32 m_nSection;
318 bool m_bExtended;
320 bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
322 int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
323 return nComp < 0 ||
324 (nComp == 0 && m_nSection < rhs.m_nSection);
326 struct IsSameSection // is used to check container for duplicates with std::any_of
328 const OString& rAttribute;
329 const sal_uInt32 nSection;
330 bool operator()(const Parameter& r) const
331 { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
335 typedef std::forward_list<Parameter> ParameterList;
337 bool parseParameters(ParameterList const & rInput,
338 INetContentTypeParameterList * pOutput);
340 // appendISO88591
342 void appendISO88591(OUString & rText, sal_Char const * pBegin,
343 sal_Char const * pEnd)
345 sal_Int32 nLength = pEnd - pBegin;
346 std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
347 for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
348 *p++ = static_cast<unsigned char>(*pBegin++);
349 rText += OUString(pBuffer.get(), nLength);
352 // parseParameters
354 bool parseParameters(ParameterList const & rInput,
355 INetContentTypeParameterList * pOutput)
357 if (pOutput)
358 pOutput->clear();
360 for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
362 if (it->m_nSection > 0
363 && (itPrev == rInput.end()
364 || itPrev->m_nSection != it->m_nSection - 1
365 || itPrev->m_aAttribute != it->m_aAttribute))
366 return false;
369 if (pOutput)
370 for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
372 bool bCharset = !it->m_aCharset.isEmpty();
373 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
374 if (bCharset)
375 eEncoding
376 = getCharsetEncoding(it->m_aCharset.getStr(),
377 it->m_aCharset.getStr()
378 + it->m_aCharset.getLength());
379 OUString aValue;
380 bool bBadEncoding = false;
381 itNext = it;
384 sal_Size nSize;
385 sal_Unicode * pUnicode
386 = convertToUnicode(itNext->m_aValue.getStr(),
387 itNext->m_aValue.getStr()
388 + itNext->m_aValue.getLength(),
389 bCharset && it->m_bExtended ?
390 eEncoding :
391 RTL_TEXTENCODING_UTF8,
392 nSize);
393 if (!pUnicode && !(bCharset && it->m_bExtended))
394 pUnicode = convertToUnicode(
395 itNext->m_aValue.getStr(),
396 itNext->m_aValue.getStr()
397 + itNext->m_aValue.getLength(),
398 RTL_TEXTENCODING_ISO_8859_1, nSize);
399 if (!pUnicode)
401 bBadEncoding = true;
402 break;
404 aValue += OUString(pUnicode, static_cast<sal_Int32>(nSize));
405 delete[] pUnicode;
406 ++itNext;
408 while (itNext != rInput.end() && itNext->m_nSection != 0);
410 if (bBadEncoding)
412 aValue.clear();
413 itNext = it;
416 if (itNext->m_bExtended)
418 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
419 aValue += OUStringLiteral1(
420 sal_Unicode(
421 static_cast<unsigned char>(itNext->m_aValue[i]))
422 | 0xF800); // map to unicode corporate use sub area
424 else
426 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
427 aValue += OUStringLiteral1( static_cast<unsigned char>(itNext->m_aValue[i]) );
429 ++itNext;
431 while (itNext != rInput.end() && itNext->m_nSection != 0);
433 auto const ret = pOutput->insert(
434 {it->m_aAttribute,
435 {it->m_aCharset, it->m_aLanguage, aValue, !bBadEncoding}});
436 SAL_INFO_IF(!ret.second, "tools",
437 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
439 return true;
442 /** Check whether some character is valid within an RFC 2045 <token>.
444 @param nChar Some UCS-4 character.
446 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
447 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
448 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
450 bool isTokenChar(sal_uInt32 nChar)
452 static const bool aMap[128]
453 = { false, false, false, false, false, false, false, false,
454 false, false, false, false, false, false, false, false,
455 false, false, false, false, false, false, false, false,
456 false, false, false, false, false, false, false, false,
457 false, true, false, true, true, true, true, true, // !"#$%&'
458 false, false, true, true, false, true, true, false, //()*+,-./
459 true, true, true, true, true, true, true, true, //01234567
460 true, true, false, false, false, false, false, false, //89:;<=>?
461 false, true, true, true, true, true, true, true, //@ABCDEFG
462 true, true, true, true, true, true, true, true, //HIJKLMNO
463 true, true, true, true, true, true, true, true, //PQRSTUVW
464 true, true, true, false, false, false, true, true, //XYZ[\]^_
465 true, true, true, true, true, true, true, true, //`abcdefg
466 true, true, true, true, true, true, true, true, //hijklmno
467 true, true, true, true, true, true, true, true, //pqrstuvw
468 true, true, true, true, true, true, true, false //xyz{|}~
470 return rtl::isAscii(nChar) && aMap[nChar];
473 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
474 const sal_Unicode * pEnd)
476 DBG_ASSERT(pBegin && pBegin <= pEnd,
477 "skipComment(): Bad sequence");
479 if (pBegin != pEnd && *pBegin == '(')
481 sal_uInt32 nLevel = 0;
482 for (const sal_Unicode * p = pBegin; p != pEnd;)
483 switch (*p++)
485 case '(':
486 ++nLevel;
487 break;
489 case ')':
490 if (--nLevel == 0)
491 return p;
492 break;
494 case '\\':
495 if (p != pEnd)
496 ++p;
497 break;
500 return pBegin;
503 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
504 pBegin,
505 const sal_Unicode *
506 pEnd)
508 DBG_ASSERT(pBegin && pBegin <= pEnd,
509 "skipLinearWhiteSpaceComment(): Bad sequence");
511 while (pBegin != pEnd)
512 switch (*pBegin)
514 case '\t':
515 case ' ':
516 ++pBegin;
517 break;
519 case 0x0D: // CR
520 if (startsWithLineFolding(pBegin, pEnd))
521 pBegin += 3;
522 else
523 return pBegin;
524 break;
526 case '(':
528 const sal_Unicode * p = skipComment(pBegin, pEnd);
529 if (p == pBegin)
530 return pBegin;
531 pBegin = p;
532 break;
535 default:
536 return pBegin;
538 return pBegin;
541 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
542 const sal_Unicode * pEnd)
544 DBG_ASSERT(pBegin && pBegin <= pEnd,
545 "skipQuotedString(): Bad sequence");
547 if (pBegin != pEnd && *pBegin == '"')
548 for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
549 switch (*p++)
551 case 0x0D: // CR
552 if (pEnd - p < 2 || *p++ != 0x0A // LF
553 || !isWhiteSpace(*p++))
554 return pBegin;
555 break;
557 case '"':
558 return p;
560 case '\\':
561 if (p != pEnd)
562 ++p;
563 break;
565 return pBegin;
568 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
569 sal_Unicode const * pEnd,
570 INetContentTypeParameterList *
571 pParameters)
573 ParameterList aList;
574 sal_Unicode const * pParameterBegin = pBegin;
575 for (sal_Unicode const * p = pParameterBegin;;)
577 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
578 if (pParameterBegin == pEnd || *pParameterBegin != ';')
579 break;
580 p = pParameterBegin + 1;
582 sal_Unicode const * pAttributeBegin
583 = skipLinearWhiteSpaceComment(p, pEnd);
584 p = pAttributeBegin;
585 bool bDowncaseAttribute = false;
586 while (p != pEnd && isTokenChar(*p) && *p != '*')
588 bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
589 ++p;
591 if (p == pAttributeBegin)
592 break;
593 OString aAttribute = OString(
594 pAttributeBegin, p - pAttributeBegin,
595 RTL_TEXTENCODING_ASCII_US);
596 if (bDowncaseAttribute)
597 aAttribute = aAttribute.toAsciiLowerCase();
599 sal_uInt32 nSection = 0;
600 if (p != pEnd && *p == '*')
602 ++p;
603 if (p != pEnd && rtl::isAsciiDigit(*p)
604 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
605 break;
608 bool bPresent = std::any_of(aList.begin(), aList.end(),
609 Parameter::IsSameSection{aAttribute, nSection});
610 if (bPresent)
611 break;
613 bool bExtended = false;
614 if (p != pEnd && *p == '*')
616 ++p;
617 bExtended = true;
620 p = skipLinearWhiteSpaceComment(p, pEnd);
622 if (p == pEnd || *p != '=')
623 break;
625 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
627 OString aCharset;
628 OString aLanguage;
629 OString aValue;
630 if (bExtended)
632 if (nSection == 0)
634 sal_Unicode const * pCharsetBegin = p;
635 bool bDowncaseCharset = false;
636 while (p != pEnd && isTokenChar(*p) && *p != '\'')
638 bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
639 ++p;
641 if (p == pCharsetBegin)
642 break;
643 if (pParameters)
645 aCharset = OString(
646 pCharsetBegin,
647 p - pCharsetBegin,
648 RTL_TEXTENCODING_ASCII_US);
649 if (bDowncaseCharset)
650 aCharset = aCharset.toAsciiLowerCase();
653 if (p == pEnd || *p != '\'')
654 break;
655 ++p;
657 sal_Unicode const * pLanguageBegin = p;
658 bool bDowncaseLanguage = false;
659 int nLetters = 0;
660 for (; p != pEnd; ++p)
661 if (rtl::isAsciiAlpha(*p))
663 if (++nLetters > 8)
664 break;
665 bDowncaseLanguage = bDowncaseLanguage
666 || rtl::isAsciiUpperCase(*p);
668 else if (*p == '-')
670 if (nLetters == 0)
671 break;
672 nLetters = 0;
674 else
675 break;
676 if (nLetters == 0 || nLetters > 8)
677 break;
678 if (pParameters)
680 aLanguage = OString(
681 pLanguageBegin,
682 p - pLanguageBegin,
683 RTL_TEXTENCODING_ASCII_US);
684 if (bDowncaseLanguage)
685 aLanguage = aLanguage.toAsciiLowerCase();
688 if (p == pEnd || *p != '\'')
689 break;
690 ++p;
692 if (pParameters)
694 OStringBuffer aSink;
695 while (p != pEnd)
697 auto q = p;
698 sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
699 if (rtl::isAscii(nChar) && !isTokenChar(nChar))
700 break;
701 p = q;
702 if (nChar == '%' && p + 1 < pEnd)
704 int nWeight1 = INetMIME::getHexWeight(p[0]);
705 int nWeight2 = INetMIME::getHexWeight(p[1]);
706 if (nWeight1 >= 0 && nWeight2 >= 0)
708 aSink.append(sal_Char(nWeight1 << 4 | nWeight2));
709 p += 2;
710 continue;
713 writeUTF8(aSink, nChar);
715 aValue = aSink.makeStringAndClear();
717 else
718 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
719 ++p;
721 else if (p != pEnd && *p == '"')
722 if (pParameters)
724 OStringBuffer aSink;
725 bool bInvalid = false;
726 for (++p;;)
728 if (p == pEnd)
730 bInvalid = true;
731 break;
733 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
734 if (nChar == '"')
735 break;
736 else if (nChar == 0x0D) // CR
738 if (pEnd - p < 2 || *p++ != 0x0A // LF
739 || !isWhiteSpace(*p))
741 bInvalid = true;
742 break;
744 nChar = static_cast<unsigned char>(*p++);
746 else if (nChar == '\\')
748 if (p == pEnd)
750 bInvalid = true;
751 break;
753 nChar = INetMIME::getUTF32Character(p, pEnd);
755 writeUTF8(aSink, nChar);
757 if (bInvalid)
758 break;
759 aValue = aSink.makeStringAndClear();
761 else
763 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
764 if (p == pStringEnd)
765 break;
766 p = pStringEnd;
768 else
770 sal_Unicode const * pTokenBegin = p;
771 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
772 ++p;
773 if (p == pTokenBegin)
774 break;
775 if (pParameters)
776 aValue = OString(
777 pTokenBegin, p - pTokenBegin,
778 RTL_TEXTENCODING_UTF8);
780 aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
782 aList.sort();
783 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
786 bool equalIgnoreCase(const sal_Char * pBegin1,
787 const sal_Char * pEnd1,
788 const sal_Char * pString2)
790 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
791 "equalIgnoreCase(): Bad sequences");
793 while (*pString2 != 0)
794 if (pBegin1 == pEnd1
795 || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
796 != rtl::toAsciiUpperCase(
797 static_cast<unsigned char>(*pString2++))))
798 return false;
799 return pBegin1 == pEnd1;
802 struct EncodingEntry
804 sal_Char const * m_aName;
805 rtl_TextEncoding m_eEncoding;
808 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
809 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
810 // noted:
811 static EncodingEntry const aEncodingMap[]
812 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
813 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
814 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
815 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
816 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
817 { "ASCII", RTL_TEXTENCODING_ASCII_US },
818 { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
819 { "US", RTL_TEXTENCODING_ASCII_US },
820 { "IBM367", RTL_TEXTENCODING_ASCII_US },
821 { "CP367", RTL_TEXTENCODING_ASCII_US },
822 { "CSASCII", RTL_TEXTENCODING_ASCII_US },
823 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
824 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
825 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
826 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
827 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
828 { "L1", RTL_TEXTENCODING_ISO_8859_1 },
829 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
830 { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
831 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
832 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
833 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
834 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
835 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
836 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
837 { "L2", RTL_TEXTENCODING_ISO_8859_2 },
838 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
839 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
840 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
841 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
842 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
843 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
844 { "L3", RTL_TEXTENCODING_ISO_8859_3 },
845 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
846 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
847 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
848 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
849 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
850 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
851 { "L4", RTL_TEXTENCODING_ISO_8859_4 },
852 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
853 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
854 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
855 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
856 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
857 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
858 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
859 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
860 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
861 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
862 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
863 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
864 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
865 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
866 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
867 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
868 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
869 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
870 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
871 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
872 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
873 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
874 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
875 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
876 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
877 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
878 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
879 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
880 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
881 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
882 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
883 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
884 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
885 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
886 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
887 { "L5", RTL_TEXTENCODING_ISO_8859_9 },
888 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
889 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
890 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
891 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
892 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
893 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
894 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
895 { "IBM437", RTL_TEXTENCODING_IBM_437 },
896 { "CP437", RTL_TEXTENCODING_IBM_437 },
897 { "437", RTL_TEXTENCODING_IBM_437 },
898 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
899 { "IBM850", RTL_TEXTENCODING_IBM_850 },
900 { "CP850", RTL_TEXTENCODING_IBM_850 },
901 { "850", RTL_TEXTENCODING_IBM_850 },
902 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
903 { "IBM860", RTL_TEXTENCODING_IBM_860 },
904 { "CP860", RTL_TEXTENCODING_IBM_860 },
905 { "860", RTL_TEXTENCODING_IBM_860 },
906 { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
907 { "IBM861", RTL_TEXTENCODING_IBM_861 },
908 { "CP861", RTL_TEXTENCODING_IBM_861 },
909 { "861", RTL_TEXTENCODING_IBM_861 },
910 { "CP-IS", RTL_TEXTENCODING_IBM_861 },
911 { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
912 { "IBM863", RTL_TEXTENCODING_IBM_863 },
913 { "CP863", RTL_TEXTENCODING_IBM_863 },
914 { "863", RTL_TEXTENCODING_IBM_863 },
915 { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
916 { "IBM865", RTL_TEXTENCODING_IBM_865 },
917 { "CP865", RTL_TEXTENCODING_IBM_865 },
918 { "865", RTL_TEXTENCODING_IBM_865 },
919 { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
920 { "IBM775", RTL_TEXTENCODING_IBM_775 },
921 { "CP775", RTL_TEXTENCODING_IBM_775 },
922 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
923 { "IBM852", RTL_TEXTENCODING_IBM_852 },
924 { "CP852", RTL_TEXTENCODING_IBM_852 },
925 { "852", RTL_TEXTENCODING_IBM_852 },
926 { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
927 { "IBM855", RTL_TEXTENCODING_IBM_855 },
928 { "CP855", RTL_TEXTENCODING_IBM_855 },
929 { "855", RTL_TEXTENCODING_IBM_855 },
930 { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
931 { "IBM857", RTL_TEXTENCODING_IBM_857 },
932 { "CP857", RTL_TEXTENCODING_IBM_857 },
933 { "857", RTL_TEXTENCODING_IBM_857 },
934 { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
935 { "IBM862", RTL_TEXTENCODING_IBM_862 },
936 { "CP862", RTL_TEXTENCODING_IBM_862 },
937 { "862", RTL_TEXTENCODING_IBM_862 },
938 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
939 { "IBM864", RTL_TEXTENCODING_IBM_864 },
940 { "CP864", RTL_TEXTENCODING_IBM_864 },
941 { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
942 { "IBM866", RTL_TEXTENCODING_IBM_866 },
943 { "CP866", RTL_TEXTENCODING_IBM_866 },
944 { "866", RTL_TEXTENCODING_IBM_866 },
945 { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
946 { "IBM869", RTL_TEXTENCODING_IBM_869 },
947 { "CP869", RTL_TEXTENCODING_IBM_869 },
948 { "869", RTL_TEXTENCODING_IBM_869 },
949 { "CP-GR", RTL_TEXTENCODING_IBM_869 },
950 { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
951 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
952 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
953 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
954 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
955 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
956 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
957 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
958 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
959 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
960 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
961 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
962 { "GB2312", RTL_TEXTENCODING_GB_2312 },
963 { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
964 { "BIG5", RTL_TEXTENCODING_BIG5 },
965 { "CSBIG5", RTL_TEXTENCODING_BIG5 },
966 { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
967 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
968 RTL_TEXTENCODING_EUC_JP },
969 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
970 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
971 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
972 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
973 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
974 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
975 { "UTF-7", RTL_TEXTENCODING_UTF7 },
976 { "UTF-8", RTL_TEXTENCODING_UTF8 },
977 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
978 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
979 { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
980 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
981 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
982 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
983 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
984 { "CSUCS4", RTL_TEXTENCODING_UCS4 },
985 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
986 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
988 rtl_TextEncoding getCharsetEncoding(sal_Char const * pBegin,
989 sal_Char const * pEnd)
991 for (const EncodingEntry& i : aEncodingMap)
992 if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
993 return i.m_eEncoding;
994 return RTL_TEXTENCODING_DONTKNOW;
999 // INetMIME
1001 // static
1002 bool INetMIME::isAtomChar(sal_uInt32 nChar)
1004 static const bool aMap[128]
1005 = { false, false, false, false, false, false, false, false,
1006 false, false, false, false, false, false, false, false,
1007 false, false, false, false, false, false, false, false,
1008 false, false, false, false, false, false, false, false,
1009 false, true, false, true, true, true, true, true, // !"#$%&'
1010 false, false, true, true, false, true, false, true, //()*+,-./
1011 true, true, true, true, true, true, true, true, //01234567
1012 true, true, false, false, false, true, false, true, //89:;<=>?
1013 false, true, true, true, true, true, true, true, //@ABCDEFG
1014 true, true, true, true, true, true, true, true, //HIJKLMNO
1015 true, true, true, true, true, true, true, true, //PQRSTUVW
1016 true, true, true, false, false, false, true, true, //XYZ[\]^_
1017 true, true, true, true, true, true, true, true, //`abcdefg
1018 true, true, true, true, true, true, true, true, //hijklmno
1019 true, true, true, true, true, true, true, true, //pqrstuvw
1020 true, true, true, true, true, true, true, false //xyz{|}~
1022 return rtl::isAscii(nChar) && aMap[nChar];
1025 // static
1026 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
1028 static const bool aMap[128]
1029 = { false, false, false, false, false, false, false, false,
1030 false, false, false, false, false, false, false, false,
1031 false, false, false, false, false, false, false, false,
1032 false, false, false, false, false, false, false, false,
1033 false, true, false, true, true, false, true, true, // !"#$%&'
1034 false, false, false, true, true, true, true, true, //()*+,-./
1035 true, true, true, true, true, true, true, true, //01234567
1036 true, true, true, true, true, true, true, true, //89:;<=>?
1037 true, true, true, true, true, true, true, true, //@ABCDEFG
1038 true, true, true, true, true, true, true, true, //HIJKLMNO
1039 true, true, true, true, true, true, true, true, //PQRSTUVW
1040 true, true, true, true, false, true, true, true, //XYZ[\]^_
1041 true, true, true, true, true, true, true, true, //`abcdefg
1042 true, true, true, true, true, true, true, true, //hijklmno
1043 true, true, true, true, true, true, true, true, //pqrstuvw
1044 true, true, true, false, true, true, true, false //xyz{|}~
1046 return rtl::isAscii(nChar) && aMap[nChar];
1049 // static
1050 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
1051 const sal_Unicode * pEnd1,
1052 const sal_Char * pString2)
1054 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
1055 "INetMIME::equalIgnoreCase(): Bad sequences");
1057 while (*pString2 != 0)
1058 if (pBegin1 == pEnd1
1059 || (rtl::toAsciiUpperCase(*pBegin1++)
1060 != rtl::toAsciiUpperCase(
1061 static_cast<unsigned char>(*pString2++))))
1062 return false;
1063 return pBegin1 == pEnd1;
1066 // static
1067 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
1068 const sal_Unicode * pEnd, bool bLeadingZeroes,
1069 sal_uInt32 & rValue)
1071 sal_uInt64 nTheValue = 0;
1072 const sal_Unicode * p = rBegin;
1073 for ( ; p != pEnd; ++p)
1075 int nWeight = getWeight(*p);
1076 if (nWeight < 0)
1077 break;
1078 nTheValue = 10 * nTheValue + nWeight;
1079 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
1080 return false;
1082 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1083 return false;
1084 rBegin = p;
1085 rValue = sal_uInt32(nTheValue);
1086 return true;
1089 // static
1090 sal_Unicode const * INetMIME::scanContentType(
1091 OUString const & rStr, OUString * pType,
1092 OUString * pSubType, INetContentTypeParameterList * pParameters)
1094 sal_Unicode const * pBegin = rStr.getStr();
1095 sal_Unicode const * pEnd = pBegin + rStr.getLength();
1096 sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1097 sal_Unicode const * pTypeBegin = p;
1098 while (p != pEnd && isTokenChar(*p))
1100 ++p;
1102 if (p == pTypeBegin)
1103 return nullptr;
1104 sal_Unicode const * pTypeEnd = p;
1106 p = skipLinearWhiteSpaceComment(p, pEnd);
1107 if (p == pEnd || *p++ != '/')
1108 return nullptr;
1110 p = skipLinearWhiteSpaceComment(p, pEnd);
1111 sal_Unicode const * pSubTypeBegin = p;
1112 while (p != pEnd && isTokenChar(*p))
1114 ++p;
1116 if (p == pSubTypeBegin)
1117 return nullptr;
1118 sal_Unicode const * pSubTypeEnd = p;
1120 if (pType != nullptr)
1122 *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1124 if (pSubType != nullptr)
1126 *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1127 .toAsciiLowerCase();
1130 return scanParameters(p, pEnd, pParameters);
1133 // static
1134 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1136 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1137 // versions of StarOffice send mails with header fields where encoded
1138 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1139 // '=', ',', '.', '"', ')', without any required white space in between.
1140 // And there appear to exist some broken mailers that only encode single
1141 // letters within words, like "Appel
1142 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1143 // detect encoded words even when not properly surrounded by white space.
1145 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1147 // encoded-word = "=?"
1148 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1149 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1150 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1151 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1152 // "?="
1154 // base64 = ALPHA / DIGIT / "+" / "/"
1156 const sal_Char * pBegin = rBody.getStr();
1157 const sal_Char * pEnd = pBegin + rBody.getLength();
1159 OUString sDecoded;
1160 const sal_Char * pCopyBegin = pBegin;
1162 /* bool bStartEncodedWord = true; */
1163 const sal_Char * pWSPBegin = pBegin;
1165 for (const sal_Char * p = pBegin; p != pEnd;)
1167 OUString sEncodedText;
1168 if (p != pEnd && *p == '=' /* && bStartEncodedWord */)
1170 const sal_Char * q = p + 1;
1171 bool bEncodedWord = q != pEnd && *q++ == '?';
1173 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1174 if (bEncodedWord)
1176 const sal_Char * pCharsetBegin = q;
1177 const sal_Char * pLanguageBegin = nullptr;
1178 int nAlphaCount = 0;
1179 for (bool bDone = false; !bDone;)
1180 if (q == pEnd)
1182 bEncodedWord = false;
1183 bDone = true;
1185 else
1187 sal_Char cChar = *q++;
1188 switch (cChar)
1190 case '*':
1191 pLanguageBegin = q - 1;
1192 nAlphaCount = 0;
1193 break;
1195 case '-':
1196 if (pLanguageBegin != nullptr)
1198 if (nAlphaCount == 0)
1199 pLanguageBegin = nullptr;
1200 else
1201 nAlphaCount = 0;
1203 break;
1205 case '?':
1206 if (pCharsetBegin == q - 1)
1207 bEncodedWord = false;
1208 else
1210 eCharsetEncoding
1211 = getCharsetEncoding(
1212 pCharsetBegin,
1213 pLanguageBegin == nullptr
1214 || nAlphaCount == 0 ?
1215 q - 1 : pLanguageBegin);
1216 bEncodedWord = isMIMECharsetEncoding(
1217 eCharsetEncoding);
1218 eCharsetEncoding
1219 = translateFromMIME(eCharsetEncoding);
1221 bDone = true;
1222 break;
1224 default:
1225 if (pLanguageBegin != nullptr
1226 && (!rtl::isAsciiAlpha(
1227 static_cast<unsigned char>(cChar))
1228 || ++nAlphaCount > 8))
1229 pLanguageBegin = nullptr;
1230 break;
1235 bool bEncodingB = false;
1236 if (bEncodedWord)
1238 if (q == pEnd)
1239 bEncodedWord = false;
1240 else
1242 switch (*q++)
1244 case 'B':
1245 case 'b':
1246 bEncodingB = true;
1247 break;
1249 case 'Q':
1250 case 'q':
1251 bEncodingB = false;
1252 break;
1254 default:
1255 bEncodedWord = false;
1256 break;
1261 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1263 OStringBuffer sText;
1264 if (bEncodedWord)
1266 if (bEncodingB)
1268 for (bool bDone = false; !bDone;)
1270 if (pEnd - q < 4)
1272 bEncodedWord = false;
1273 bDone = true;
1275 else
1277 bool bFinal = false;
1278 int nCount = 3;
1279 sal_uInt32 nValue = 0;
1280 for (int nShift = 18; nShift >= 0; nShift -= 6)
1282 int nWeight = getBase64Weight(*q++);
1283 if (nWeight == -2)
1285 bEncodedWord = false;
1286 bDone = true;
1287 break;
1289 if (nWeight == -1)
1291 if (!bFinal)
1293 if (nShift >= 12)
1295 bEncodedWord = false;
1296 bDone = true;
1297 break;
1299 bFinal = true;
1300 nCount = nShift == 6 ? 1 : 2;
1303 else
1304 nValue |= nWeight << nShift;
1306 if (bEncodedWord)
1308 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1309 sText.append(sal_Char(nValue >> nShift & 0xFF));
1310 if (*q == '?')
1312 ++q;
1313 bDone = true;
1315 if (bFinal && !bDone)
1317 bEncodedWord = false;
1318 bDone = true;
1324 else
1326 const sal_Char * pEncodedTextBegin = q;
1327 const sal_Char * pEncodedTextCopyBegin = q;
1328 for (bool bDone = false; !bDone;)
1329 if (q == pEnd)
1331 bEncodedWord = false;
1332 bDone = true;
1334 else
1336 sal_uInt32 nChar = *q++;
1337 switch (nChar)
1339 case '=':
1341 if (pEnd - q < 2)
1343 bEncodedWord = false;
1344 bDone = true;
1345 break;
1347 int nDigit1 = getHexWeight(q[0]);
1348 int nDigit2 = getHexWeight(q[1]);
1349 if (nDigit1 < 0 || nDigit2 < 0)
1351 bEncodedWord = false;
1352 bDone = true;
1353 break;
1355 sText.append(rBody.copy(
1356 (pEncodedTextCopyBegin - pBegin),
1357 (q - 1 - pEncodedTextCopyBegin)));
1358 sText.append(sal_Char(nDigit1 << 4 | nDigit2));
1359 q += 2;
1360 pEncodedTextCopyBegin = q;
1361 break;
1364 case '?':
1365 if (q - pEncodedTextBegin > 1)
1366 sText.append(rBody.copy(
1367 (pEncodedTextCopyBegin - pBegin),
1368 (q - 1 - pEncodedTextCopyBegin)));
1369 else
1370 bEncodedWord = false;
1371 bDone = true;
1372 break;
1374 case '_':
1375 sText.append(rBody.copy(
1376 (pEncodedTextCopyBegin - pBegin),
1377 (q - 1 - pEncodedTextCopyBegin)));
1378 sText.append(' ');
1379 pEncodedTextCopyBegin = q;
1380 break;
1382 default:
1383 if (!isVisible(nChar))
1385 bEncodedWord = false;
1386 bDone = true;
1388 break;
1394 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1396 sal_Unicode * pUnicodeBuffer = nullptr;
1397 sal_Size nUnicodeSize = 0;
1398 if (bEncodedWord)
1400 pUnicodeBuffer
1401 = convertToUnicode(sText.getStr(),
1402 sText.getStr() + sText.getLength(),
1403 eCharsetEncoding, nUnicodeSize);
1404 if (pUnicodeBuffer == nullptr)
1405 bEncodedWord = false;
1408 if (bEncodedWord)
1410 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1411 sDecoded += OUString(
1412 pUnicodeBuffer,
1413 static_cast< sal_Int32 >(nUnicodeSize));
1414 delete[] pUnicodeBuffer;
1415 p = q;
1416 pCopyBegin = p;
1418 pWSPBegin = p;
1419 while (p != pEnd && isWhiteSpace(*p))
1420 ++p;
1421 /* bStartEncodedWord = p != pWSPBegin; */
1422 continue;
1426 if (!sEncodedText.isEmpty())
1427 sDecoded += sEncodedText;
1429 if (p == pEnd)
1430 break;
1432 switch (*p++)
1434 case '"':
1435 /* bStartEncodedWord = true; */
1436 break;
1438 case '(':
1439 /* bStartEncodedWord = true; */
1440 break;
1442 case ')':
1443 /* bStartEncodedWord = false; */
1444 break;
1446 default:
1448 const sal_Char * pUTF8Begin = p - 1;
1449 const sal_Char * pUTF8End = pUTF8Begin;
1450 sal_uInt32 nCharacter = 0;
1451 if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
1452 nCharacter))
1454 appendISO88591(sDecoded, pCopyBegin, p - 1);
1455 sal_Unicode aUTF16Buf[2];
1456 sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf;
1457 sDecoded += OUString(aUTF16Buf, nUTF16Len);
1458 p = pUTF8End;
1459 pCopyBegin = p;
1461 /* bStartEncodedWord = false; */
1462 break;
1465 pWSPBegin = p;
1468 appendISO88591(sDecoded, pCopyBegin, pEnd);
1469 return sDecoded;
1472 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */