LanguageTool: don't crash if REST protocol isn't set
[LibreOffice.git] / tools / source / inet / inetmime.cxx
blobf7265523b6e2e8be93351814eeca371d3c376d23
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <algorithm>
21 #include <limits>
22 #include <forward_list>
23 #include <memory>
25 #include <sal/log.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/strbuf.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/inetmime.hxx>
31 #include <rtl/character.hxx>
33 namespace {
35 rtl_TextEncoding getCharsetEncoding(const char * pBegin,
36 const char * pEnd);
38 /** Check for US-ASCII white space character.
40 @param nChar Some UCS-4 character.
42 @return True if nChar is a US-ASCII white space character (US-ASCII
43 0x09 or 0x20).
45 bool isWhiteSpace(sal_uInt32 nChar)
47 return nChar == '\t' || nChar == ' ';
50 /** Get the Base 64 digit weight of a US-ASCII character.
52 @param nChar Some UCS-4 character.
54 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
55 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
56 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
57 character (US-ASCII '='), return -1; otherwise, return -2.
59 int getBase64Weight(sal_uInt32 nChar)
61 return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
62 rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
63 rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
64 nChar == '+' ? 62 :
65 nChar == '/' ? 63 :
66 nChar == '=' ? -1 : -2;
69 bool startsWithLineFolding(const sal_Unicode * pBegin,
70 const sal_Unicode * pEnd)
72 DBG_ASSERT(pBegin && pBegin <= pEnd,
73 "startsWithLineFolding(): Bad sequence");
75 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
76 && isWhiteSpace(pBegin[2]); // CR, LF
79 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
80 eEncoding)
82 #if defined(_WIN32)
83 return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
84 RTL_TEXTENCODING_MS_1252 : eEncoding;
85 #else
86 return eEncoding;
87 #endif
90 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
92 return rtl_isOctetTextEncoding(eEncoding);
95 std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin,
96 const char * pEnd,
97 rtl_TextEncoding eEncoding,
98 sal_Size & rSize)
100 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
101 return nullptr;
102 rtl_TextToUnicodeConverter hConverter
103 = rtl_createTextToUnicodeConverter(eEncoding);
104 rtl_TextToUnicodeContext hContext
105 = rtl_createTextToUnicodeContext(hConverter);
106 std::unique_ptr<sal_Unicode[]> pBuffer;
107 sal_uInt32 nInfo;
108 for (sal_Size nBufferSize = pEnd - pBegin;;
109 nBufferSize += nBufferSize / 3 + 1)
111 pBuffer.reset(new sal_Unicode[nBufferSize]);
112 sal_Size nSrcCvtBytes;
113 rSize = rtl_convertTextToUnicode(
114 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
115 nBufferSize,
116 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
119 &nInfo, &nSrcCvtBytes);
120 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
121 break;
122 pBuffer.reset();
123 rtl_resetTextToUnicodeContext(hConverter, hContext);
125 rtl_destroyTextToUnicodeContext(hConverter, hContext);
126 rtl_destroyTextToUnicodeConverter(hConverter);
127 if (nInfo != 0)
129 pBuffer.reset();
131 return pBuffer;
134 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
136 @param pBuffer Points to a buffer, must not be null.
138 @param nUTF32 A UTF-32 character, must be in the range 0..0x10FFFF.
140 @return A pointer past the UTF-16 characters put into the buffer
141 (i.e., pBuffer + 1 or pBuffer + 2).
143 sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
144 sal_uInt32 nUTF32)
146 DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
147 if (nUTF32 < 0x10000)
148 *pBuffer++ = sal_Unicode(nUTF32);
149 else
151 nUTF32 -= 0x10000;
152 *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
153 *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
155 return pBuffer;
158 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
160 // See RFC 2279 for a discussion of UTF-8.
161 DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
163 if (nChar < 0x80)
164 rSink.append(char(nChar));
165 else if (nChar < 0x800)
166 rSink.append(char(nChar >> 6 | 0xC0))
167 .append(char((nChar & 0x3F) | 0x80));
168 else if (nChar < 0x10000)
169 rSink.append(char(nChar >> 12 | 0xE0))
170 .append(char((nChar >> 6 & 0x3F) | 0x80))
171 .append(char((nChar & 0x3F) | 0x80));
172 else if (nChar < 0x200000)
173 rSink.append(char(nChar >> 18 | 0xF0))
174 .append(char((nChar >> 12 & 0x3F) | 0x80))
175 .append(char((nChar >> 6 & 0x3F) | 0x80))
176 .append(char((nChar & 0x3F) | 0x80));
177 else if (nChar < 0x4000000)
178 rSink.append(char(nChar >> 24 | 0xF8))
179 .append(char((nChar >> 18 & 0x3F) | 0x80))
180 .append(char((nChar >> 12 & 0x3F) | 0x80))
181 .append(char((nChar >> 6 & 0x3F) | 0x80))
182 .append(char((nChar & 0x3F) | 0x80));
183 else
184 rSink.append(char(nChar >> 30 | 0xFC))
185 .append(char((nChar >> 24 & 0x3F) | 0x80))
186 .append(char((nChar >> 18 & 0x3F) | 0x80))
187 .append(char((nChar >> 12 & 0x3F) | 0x80))
188 .append(char((nChar >> 6 & 0x3F) | 0x80))
189 .append(char((nChar & 0x3F) | 0x80));
192 bool translateUTF8Char(const char *& rBegin,
193 const char * pEnd,
194 sal_uInt32 & rCharacter)
196 if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
197 || static_cast< unsigned char >(*rBegin) >= 0xFE)
198 return false;
200 int nCount;
201 sal_uInt32 nMin;
202 sal_uInt32 nUCS4;
203 const char * p = rBegin;
204 if (static_cast< unsigned char >(*p) < 0xE0)
206 nCount = 1;
207 nMin = 0x80;
208 nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
210 else if (static_cast< unsigned char >(*p) < 0xF0)
212 nCount = 2;
213 nMin = 0x800;
214 nUCS4 = static_cast< unsigned char >(*p) & 0xF;
216 else if (static_cast< unsigned char >(*p) < 0xF8)
218 nCount = 3;
219 nMin = 0x10000;
220 nUCS4 = static_cast< unsigned char >(*p) & 7;
222 else if (static_cast< unsigned char >(*p) < 0xFC)
224 nCount = 4;
225 nMin = 0x200000;
226 nUCS4 = static_cast< unsigned char >(*p) & 3;
228 else
230 nCount = 5;
231 nMin = 0x4000000;
232 nUCS4 = static_cast< unsigned char >(*p) & 1;
234 ++p;
236 for (; nCount-- > 0; ++p)
237 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
238 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
239 else
240 return false;
242 if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
243 return false;
245 rCharacter = nUCS4;
246 rBegin = p;
247 return true;
250 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
251 char const * pEnd);
253 struct Parameter
255 OString m_aAttribute;
256 OString m_aCharset;
257 OString m_aLanguage;
258 OString m_aValue;
259 sal_uInt32 m_nSection;
260 bool m_bExtended;
262 bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
264 int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
265 return nComp < 0 ||
266 (nComp == 0 && m_nSection < rhs.m_nSection);
268 struct IsSameSection // is used to check container for duplicates with std::any_of
270 const OString& rAttribute;
271 const sal_uInt32 nSection;
272 bool operator()(const Parameter& r) const
273 { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
277 typedef std::forward_list<Parameter> ParameterList;
279 bool parseParameters(ParameterList const & rInput,
280 INetContentTypeParameterList * pOutput);
282 // appendISO88591
284 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
285 char const * pEnd)
287 sal_Int32 nLength = pEnd - pBegin;
288 std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
289 for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
290 *p++ = static_cast<unsigned char>(*pBegin++);
291 rText.append(pBuffer.get(), nLength);
294 // parseParameters
296 bool parseParameters(ParameterList const & rInput,
297 INetContentTypeParameterList * pOutput)
299 if (pOutput)
300 pOutput->clear();
302 for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
304 if (it->m_nSection > 0
305 && (itPrev == rInput.end()
306 || itPrev->m_nSection != it->m_nSection - 1
307 || itPrev->m_aAttribute != it->m_aAttribute))
308 return false;
311 if (pOutput)
312 for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
314 bool bCharset = !it->m_aCharset.isEmpty();
315 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
316 if (bCharset)
317 eEncoding
318 = getCharsetEncoding(it->m_aCharset.getStr(),
319 it->m_aCharset.getStr()
320 + it->m_aCharset.getLength());
321 OUStringBuffer aValue(64);
322 bool bBadEncoding = false;
323 itNext = it;
326 sal_Size nSize;
327 std::unique_ptr<sal_Unicode[]> pUnicode
328 = convertToUnicode(itNext->m_aValue.getStr(),
329 itNext->m_aValue.getStr()
330 + itNext->m_aValue.getLength(),
331 bCharset && it->m_bExtended ?
332 eEncoding :
333 RTL_TEXTENCODING_UTF8,
334 nSize);
335 if (!pUnicode && !(bCharset && it->m_bExtended))
336 pUnicode = convertToUnicode(
337 itNext->m_aValue.getStr(),
338 itNext->m_aValue.getStr()
339 + itNext->m_aValue.getLength(),
340 RTL_TEXTENCODING_ISO_8859_1, nSize);
341 if (!pUnicode)
343 bBadEncoding = true;
344 break;
346 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
347 ++itNext;
349 while (itNext != rInput.end() && itNext->m_nSection != 0);
351 if (bBadEncoding)
353 aValue.setLength(0);
354 itNext = it;
357 if (itNext->m_bExtended)
359 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
360 aValue.append(
361 static_cast<sal_Unicode>(
362 static_cast<unsigned char>(itNext->m_aValue[i])
363 | 0xF800)); // map to unicode corporate use sub area
365 else
367 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
368 aValue.append( itNext->m_aValue[i] );
370 ++itNext;
372 while (itNext != rInput.end() && itNext->m_nSection != 0);
374 auto const ret = pOutput->insert(
375 {it->m_aAttribute,
376 {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
377 SAL_INFO_IF(!ret.second, "tools",
378 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
380 return true;
383 /** Check whether some character is valid within an RFC 2045 <token>.
385 @param nChar Some UCS-4 character.
387 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
388 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
389 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
391 bool isTokenChar(sal_uInt32 nChar)
393 static const bool aMap[128]
394 = { false, false, false, false, false, false, false, false,
395 false, false, false, false, false, false, false, false,
396 false, false, false, false, false, false, false, false,
397 false, false, false, false, false, false, false, false,
398 false, true, false, true, true, true, true, true, // !"#$%&'
399 false, false, true, true, false, true, true, false, //()*+,-./
400 true, true, true, true, true, true, true, true, //01234567
401 true, true, false, false, false, false, false, false, //89:;<=>?
402 false, true, true, true, true, true, true, true, //@ABCDEFG
403 true, true, true, true, true, true, true, true, //HIJKLMNO
404 true, true, true, true, true, true, true, true, //PQRSTUVW
405 true, true, true, false, false, false, true, true, //XYZ[\]^_
406 true, true, true, true, true, true, true, true, //`abcdefg
407 true, true, true, true, true, true, true, true, //hijklmno
408 true, true, true, true, true, true, true, true, //pqrstuvw
409 true, true, true, true, true, true, true, false //xyz{|}~
411 return rtl::isAscii(nChar) && aMap[nChar];
414 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
415 const sal_Unicode * pEnd)
417 DBG_ASSERT(pBegin && pBegin <= pEnd,
418 "skipComment(): Bad sequence");
420 if (pBegin != pEnd && *pBegin == '(')
422 sal_uInt32 nLevel = 0;
423 for (const sal_Unicode * p = pBegin; p != pEnd;)
424 switch (*p++)
426 case '(':
427 ++nLevel;
428 break;
430 case ')':
431 if (--nLevel == 0)
432 return p;
433 break;
435 case '\\':
436 if (p != pEnd)
437 ++p;
438 break;
441 return pBegin;
444 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
445 pBegin,
446 const sal_Unicode *
447 pEnd)
449 DBG_ASSERT(pBegin && pBegin <= pEnd,
450 "skipLinearWhiteSpaceComment(): Bad sequence");
452 while (pBegin != pEnd)
453 switch (*pBegin)
455 case '\t':
456 case ' ':
457 ++pBegin;
458 break;
460 case 0x0D: // CR
461 if (startsWithLineFolding(pBegin, pEnd))
462 pBegin += 3;
463 else
464 return pBegin;
465 break;
467 case '(':
469 const sal_Unicode * p = skipComment(pBegin, pEnd);
470 if (p == pBegin)
471 return pBegin;
472 pBegin = p;
473 break;
476 default:
477 return pBegin;
479 return pBegin;
482 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
483 const sal_Unicode * pEnd)
485 DBG_ASSERT(pBegin && pBegin <= pEnd,
486 "skipQuotedString(): Bad sequence");
488 if (pBegin != pEnd && *pBegin == '"')
489 for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
490 switch (*p++)
492 case 0x0D: // CR
493 if (pEnd - p < 2 || *p++ != 0x0A // LF
494 || !isWhiteSpace(*p++))
495 return pBegin;
496 break;
498 case '"':
499 return p;
501 case '\\':
502 if (p != pEnd)
503 ++p;
504 break;
506 return pBegin;
509 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
510 sal_Unicode const * pEnd,
511 INetContentTypeParameterList *
512 pParameters)
514 ParameterList aList;
515 sal_Unicode const * pParameterBegin = pBegin;
516 for (sal_Unicode const * p = pParameterBegin;;)
518 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
519 if (pParameterBegin == pEnd || *pParameterBegin != ';')
520 break;
521 p = pParameterBegin + 1;
523 sal_Unicode const * pAttributeBegin
524 = skipLinearWhiteSpaceComment(p, pEnd);
525 p = pAttributeBegin;
526 bool bDowncaseAttribute = false;
527 while (p != pEnd && isTokenChar(*p) && *p != '*')
529 bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
530 ++p;
532 if (p == pAttributeBegin)
533 break;
534 OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
535 if (bDowncaseAttribute)
536 aAttribute = aAttribute.toAsciiLowerCase();
538 sal_uInt32 nSection = 0;
539 if (p != pEnd && *p == '*')
541 ++p;
542 if (p != pEnd && rtl::isAsciiDigit(*p)
543 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
544 break;
547 bool bPresent = std::any_of(aList.begin(), aList.end(),
548 Parameter::IsSameSection{aAttribute, nSection});
549 if (bPresent)
550 break;
552 bool bExtended = false;
553 if (p != pEnd && *p == '*')
555 ++p;
556 bExtended = true;
559 p = skipLinearWhiteSpaceComment(p, pEnd);
561 if (p == pEnd || *p != '=')
562 break;
564 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
566 OString aCharset;
567 OString aLanguage;
568 OString aValue;
569 if (bExtended)
571 if (nSection == 0)
573 sal_Unicode const * pCharsetBegin = p;
574 bool bDowncaseCharset = false;
575 while (p != pEnd && isTokenChar(*p) && *p != '\'')
577 bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
578 ++p;
580 if (p == pCharsetBegin)
581 break;
582 if (pParameters)
584 aCharset = OString(
585 pCharsetBegin,
586 p - pCharsetBegin,
587 RTL_TEXTENCODING_ASCII_US);
588 if (bDowncaseCharset)
589 aCharset = aCharset.toAsciiLowerCase();
592 if (p == pEnd || *p != '\'')
593 break;
594 ++p;
596 sal_Unicode const * pLanguageBegin = p;
597 bool bDowncaseLanguage = false;
598 int nLetters = 0;
599 for (; p != pEnd; ++p)
600 if (rtl::isAsciiAlpha(*p))
602 if (++nLetters > 8)
603 break;
604 bDowncaseLanguage = bDowncaseLanguage
605 || rtl::isAsciiUpperCase(*p);
607 else if (*p == '-')
609 if (nLetters == 0)
610 break;
611 nLetters = 0;
613 else
614 break;
615 if (nLetters == 0 || nLetters > 8)
616 break;
617 if (pParameters)
619 aLanguage = OString(
620 pLanguageBegin,
621 p - pLanguageBegin,
622 RTL_TEXTENCODING_ASCII_US);
623 if (bDowncaseLanguage)
624 aLanguage = aLanguage.toAsciiLowerCase();
627 if (p == pEnd || *p != '\'')
628 break;
629 ++p;
631 if (pParameters)
633 OStringBuffer aSink;
634 while (p != pEnd)
636 auto q = p;
637 sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
638 if (rtl::isAscii(nChar) && !isTokenChar(nChar))
639 break;
640 p = q;
641 if (nChar == '%' && p + 1 < pEnd)
643 int nWeight1 = INetMIME::getHexWeight(p[0]);
644 int nWeight2 = INetMIME::getHexWeight(p[1]);
645 if (nWeight1 >= 0 && nWeight2 >= 0)
647 aSink.append(char(nWeight1 << 4 | nWeight2));
648 p += 2;
649 continue;
652 writeUTF8(aSink, nChar);
654 aValue = aSink.makeStringAndClear();
656 else
657 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
658 ++p;
660 else if (p != pEnd && *p == '"')
661 if (pParameters)
663 OStringBuffer aSink(256);
664 bool bInvalid = false;
665 for (++p;;)
667 if (p == pEnd)
669 bInvalid = true;
670 break;
672 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
673 if (nChar == '"')
674 break;
675 else if (nChar == 0x0D) // CR
677 if (pEnd - p < 2 || *p++ != 0x0A // LF
678 || !isWhiteSpace(*p))
680 bInvalid = true;
681 break;
683 nChar = static_cast<unsigned char>(*p++);
685 else if (nChar == '\\')
687 if (p == pEnd)
689 bInvalid = true;
690 break;
692 nChar = INetMIME::getUTF32Character(p, pEnd);
694 writeUTF8(aSink, nChar);
696 if (bInvalid)
697 break;
698 aValue = aSink.makeStringAndClear();
700 else
702 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
703 if (p == pStringEnd)
704 break;
705 p = pStringEnd;
707 else
709 sal_Unicode const * pTokenBegin = p;
710 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
711 ++p;
712 if (p == pTokenBegin)
713 break;
714 if (pParameters)
715 aValue = OString(
716 pTokenBegin, p - pTokenBegin,
717 RTL_TEXTENCODING_UTF8);
719 aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
721 aList.sort();
722 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
725 bool equalIgnoreCase(const char * pBegin1,
726 const char * pEnd1,
727 const char * pString2)
729 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
730 "equalIgnoreCase(): Bad sequences");
732 while (*pString2 != 0)
733 if (pBegin1 == pEnd1
734 || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
735 != rtl::toAsciiUpperCase(
736 static_cast<unsigned char>(*pString2++))))
737 return false;
738 return pBegin1 == pEnd1;
741 struct EncodingEntry
743 char const * m_aName;
744 rtl_TextEncoding m_eEncoding;
747 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
748 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
749 // noted:
750 EncodingEntry const aEncodingMap[]
751 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
752 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
753 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
754 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
755 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
756 { "ASCII", RTL_TEXTENCODING_ASCII_US },
757 { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
758 { "US", RTL_TEXTENCODING_ASCII_US },
759 { "IBM367", RTL_TEXTENCODING_ASCII_US },
760 { "CP367", RTL_TEXTENCODING_ASCII_US },
761 { "CSASCII", RTL_TEXTENCODING_ASCII_US },
762 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
763 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
764 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
765 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
766 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
767 { "L1", RTL_TEXTENCODING_ISO_8859_1 },
768 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
769 { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
770 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
771 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
772 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
773 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
774 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
775 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
776 { "L2", RTL_TEXTENCODING_ISO_8859_2 },
777 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
778 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
779 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
780 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
781 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
782 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
783 { "L3", RTL_TEXTENCODING_ISO_8859_3 },
784 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
785 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
786 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
787 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
788 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
789 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
790 { "L4", RTL_TEXTENCODING_ISO_8859_4 },
791 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
792 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
793 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
794 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
795 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
796 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
797 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
798 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
799 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
800 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
801 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
802 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
803 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
804 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
805 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
806 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
807 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
808 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
809 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
810 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
811 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
812 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
813 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
814 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
815 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
816 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
817 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
818 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
819 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
820 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
821 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
822 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
823 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
824 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
825 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
826 { "L5", RTL_TEXTENCODING_ISO_8859_9 },
827 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
828 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
829 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
830 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
831 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
832 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
833 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
834 { "IBM437", RTL_TEXTENCODING_IBM_437 },
835 { "CP437", RTL_TEXTENCODING_IBM_437 },
836 { "437", RTL_TEXTENCODING_IBM_437 },
837 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
838 { "IBM850", RTL_TEXTENCODING_IBM_850 },
839 { "CP850", RTL_TEXTENCODING_IBM_850 },
840 { "850", RTL_TEXTENCODING_IBM_850 },
841 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
842 { "IBM860", RTL_TEXTENCODING_IBM_860 },
843 { "CP860", RTL_TEXTENCODING_IBM_860 },
844 { "860", RTL_TEXTENCODING_IBM_860 },
845 { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
846 { "IBM861", RTL_TEXTENCODING_IBM_861 },
847 { "CP861", RTL_TEXTENCODING_IBM_861 },
848 { "861", RTL_TEXTENCODING_IBM_861 },
849 { "CP-IS", RTL_TEXTENCODING_IBM_861 },
850 { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
851 { "IBM863", RTL_TEXTENCODING_IBM_863 },
852 { "CP863", RTL_TEXTENCODING_IBM_863 },
853 { "863", RTL_TEXTENCODING_IBM_863 },
854 { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
855 { "IBM865", RTL_TEXTENCODING_IBM_865 },
856 { "CP865", RTL_TEXTENCODING_IBM_865 },
857 { "865", RTL_TEXTENCODING_IBM_865 },
858 { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
859 { "IBM775", RTL_TEXTENCODING_IBM_775 },
860 { "CP775", RTL_TEXTENCODING_IBM_775 },
861 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
862 { "IBM852", RTL_TEXTENCODING_IBM_852 },
863 { "CP852", RTL_TEXTENCODING_IBM_852 },
864 { "852", RTL_TEXTENCODING_IBM_852 },
865 { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
866 { "IBM855", RTL_TEXTENCODING_IBM_855 },
867 { "CP855", RTL_TEXTENCODING_IBM_855 },
868 { "855", RTL_TEXTENCODING_IBM_855 },
869 { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
870 { "IBM857", RTL_TEXTENCODING_IBM_857 },
871 { "CP857", RTL_TEXTENCODING_IBM_857 },
872 { "857", RTL_TEXTENCODING_IBM_857 },
873 { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
874 { "IBM862", RTL_TEXTENCODING_IBM_862 },
875 { "CP862", RTL_TEXTENCODING_IBM_862 },
876 { "862", RTL_TEXTENCODING_IBM_862 },
877 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
878 { "IBM864", RTL_TEXTENCODING_IBM_864 },
879 { "CP864", RTL_TEXTENCODING_IBM_864 },
880 { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
881 { "IBM866", RTL_TEXTENCODING_IBM_866 },
882 { "CP866", RTL_TEXTENCODING_IBM_866 },
883 { "866", RTL_TEXTENCODING_IBM_866 },
884 { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
885 { "IBM869", RTL_TEXTENCODING_IBM_869 },
886 { "CP869", RTL_TEXTENCODING_IBM_869 },
887 { "869", RTL_TEXTENCODING_IBM_869 },
888 { "CP-GR", RTL_TEXTENCODING_IBM_869 },
889 { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
890 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
891 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
892 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
893 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
894 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
895 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
896 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
897 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
898 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
899 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
900 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
901 { "GB2312", RTL_TEXTENCODING_GB_2312 },
902 { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
903 { "BIG5", RTL_TEXTENCODING_BIG5 },
904 { "CSBIG5", RTL_TEXTENCODING_BIG5 },
905 { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
906 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
907 RTL_TEXTENCODING_EUC_JP },
908 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
909 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
910 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
911 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
912 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
913 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
914 { "UTF-7", RTL_TEXTENCODING_UTF7 },
915 { "UTF-8", RTL_TEXTENCODING_UTF8 },
916 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
917 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
918 { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
919 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
920 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
921 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
922 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
923 { "CSUCS4", RTL_TEXTENCODING_UCS4 },
924 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
925 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
927 rtl_TextEncoding getCharsetEncoding(char const * pBegin,
928 char const * pEnd)
930 for (const EncodingEntry& i : aEncodingMap)
931 if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
932 return i.m_eEncoding;
933 return RTL_TEXTENCODING_DONTKNOW;
938 // INetMIME
940 // static
941 bool INetMIME::isAtomChar(sal_uInt32 nChar)
943 static const bool aMap[128]
944 = { false, false, false, false, false, false, false, false,
945 false, false, false, false, false, false, false, false,
946 false, false, false, false, false, false, false, false,
947 false, false, false, false, false, false, false, false,
948 false, true, false, true, true, true, true, true, // !"#$%&'
949 false, false, true, true, false, true, false, true, //()*+,-./
950 true, true, true, true, true, true, true, true, //01234567
951 true, true, false, false, false, true, false, true, //89:;<=>?
952 false, true, true, true, true, true, true, true, //@ABCDEFG
953 true, true, true, true, true, true, true, true, //HIJKLMNO
954 true, true, true, true, true, true, true, true, //PQRSTUVW
955 true, true, true, false, false, false, true, true, //XYZ[\]^_
956 true, true, true, true, true, true, true, true, //`abcdefg
957 true, true, true, true, true, true, true, true, //hijklmno
958 true, true, true, true, true, true, true, true, //pqrstuvw
959 true, true, true, true, true, true, true, false //xyz{|}~
961 return rtl::isAscii(nChar) && aMap[nChar];
964 // static
965 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
967 static const bool aMap[128]
968 = { false, false, false, false, false, false, false, false,
969 false, false, false, false, false, false, false, false,
970 false, false, false, false, false, false, false, false,
971 false, false, false, false, false, false, false, false,
972 false, true, false, true, true, false, true, true, // !"#$%&'
973 false, false, false, true, true, true, true, true, //()*+,-./
974 true, true, true, true, true, true, true, true, //01234567
975 true, true, true, true, true, true, true, true, //89:;<=>?
976 true, true, true, true, true, true, true, true, //@ABCDEFG
977 true, true, true, true, true, true, true, true, //HIJKLMNO
978 true, true, true, true, true, true, true, true, //PQRSTUVW
979 true, true, true, true, false, true, true, true, //XYZ[\]^_
980 true, true, true, true, true, true, true, true, //`abcdefg
981 true, true, true, true, true, true, true, true, //hijklmno
982 true, true, true, true, true, true, true, true, //pqrstuvw
983 true, true, true, false, true, true, true, false //xyz{|}~
985 return rtl::isAscii(nChar) && aMap[nChar];
988 // static
989 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
990 const sal_Unicode * pEnd1,
991 const char * pString2)
993 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
994 "INetMIME::equalIgnoreCase(): Bad sequences");
996 while (*pString2 != 0)
997 if (pBegin1 == pEnd1
998 || (rtl::toAsciiUpperCase(*pBegin1++)
999 != rtl::toAsciiUpperCase(
1000 static_cast<unsigned char>(*pString2++))))
1001 return false;
1002 return pBegin1 == pEnd1;
1005 // static
1006 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
1007 const sal_Unicode * pEnd, bool bLeadingZeroes,
1008 sal_uInt32 & rValue)
1010 sal_uInt64 nTheValue = 0;
1011 const sal_Unicode * p = rBegin;
1012 for ( ; p != pEnd; ++p)
1014 int nWeight = getWeight(*p);
1015 if (nWeight < 0)
1016 break;
1017 nTheValue = 10 * nTheValue + nWeight;
1018 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
1019 return false;
1021 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1022 return false;
1023 rBegin = p;
1024 rValue = sal_uInt32(nTheValue);
1025 return true;
1028 // static
1029 sal_Unicode const * INetMIME::scanContentType(
1030 OUString const & rStr, OUString * pType,
1031 OUString * pSubType, INetContentTypeParameterList * pParameters)
1033 sal_Unicode const * pBegin = rStr.getStr();
1034 sal_Unicode const * pEnd = pBegin + rStr.getLength();
1035 sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1036 sal_Unicode const * pTypeBegin = p;
1037 while (p != pEnd && isTokenChar(*p))
1039 ++p;
1041 if (p == pTypeBegin)
1042 return nullptr;
1043 sal_Unicode const * pTypeEnd = p;
1045 p = skipLinearWhiteSpaceComment(p, pEnd);
1046 if (p == pEnd || *p++ != '/')
1047 return nullptr;
1049 p = skipLinearWhiteSpaceComment(p, pEnd);
1050 sal_Unicode const * pSubTypeBegin = p;
1051 while (p != pEnd && isTokenChar(*p))
1053 ++p;
1055 if (p == pSubTypeBegin)
1056 return nullptr;
1057 sal_Unicode const * pSubTypeEnd = p;
1059 if (pType != nullptr)
1061 *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1063 if (pSubType != nullptr)
1065 *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1066 .toAsciiLowerCase();
1069 return scanParameters(p, pEnd, pParameters);
1072 // static
1073 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1075 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1076 // versions of StarOffice send mails with header fields where encoded
1077 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1078 // '=', ',', '.', '"', ')', without any required white space in between.
1079 // And there appear to exist some broken mailers that only encode single
1080 // letters within words, like "Appel
1081 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1082 // detect encoded words even when not properly surrounded by white space.
1084 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1086 // encoded-word = "=?"
1087 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1088 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1089 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1090 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1091 // "?="
1093 // base64 = ALPHA / DIGIT / "+" / "/"
1095 const char * pBegin = rBody.getStr();
1096 const char * pEnd = pBegin + rBody.getLength();
1098 OUStringBuffer sDecoded;
1099 const char * pCopyBegin = pBegin;
1101 /* bool bStartEncodedWord = true; */
1102 const char * pWSPBegin = pBegin;
1104 for (const char * p = pBegin; p != pEnd;)
1106 if (*p == '=' /* && bStartEncodedWord */)
1108 const char * q = p + 1;
1109 bool bEncodedWord = q != pEnd && *q++ == '?';
1111 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1112 if (bEncodedWord)
1114 const char * pCharsetBegin = q;
1115 const char * pLanguageBegin = nullptr;
1116 int nAlphaCount = 0;
1117 for (bool bDone = false; !bDone;)
1118 if (q == pEnd)
1120 bEncodedWord = false;
1121 bDone = true;
1123 else
1125 char cChar = *q++;
1126 switch (cChar)
1128 case '*':
1129 pLanguageBegin = q - 1;
1130 nAlphaCount = 0;
1131 break;
1133 case '-':
1134 if (pLanguageBegin != nullptr)
1136 if (nAlphaCount == 0)
1137 pLanguageBegin = nullptr;
1138 else
1139 nAlphaCount = 0;
1141 break;
1143 case '?':
1144 if (pCharsetBegin == q - 1)
1145 bEncodedWord = false;
1146 else
1148 eCharsetEncoding
1149 = getCharsetEncoding(
1150 pCharsetBegin,
1151 pLanguageBegin == nullptr
1152 || nAlphaCount == 0 ?
1153 q - 1 : pLanguageBegin);
1154 bEncodedWord = isMIMECharsetEncoding(
1155 eCharsetEncoding);
1156 eCharsetEncoding
1157 = translateFromMIME(eCharsetEncoding);
1159 bDone = true;
1160 break;
1162 default:
1163 if (pLanguageBegin != nullptr
1164 && (!rtl::isAsciiAlpha(
1165 static_cast<unsigned char>(cChar))
1166 || ++nAlphaCount > 8))
1167 pLanguageBegin = nullptr;
1168 break;
1173 bool bEncodingB = false;
1174 if (bEncodedWord)
1176 if (q == pEnd)
1177 bEncodedWord = false;
1178 else
1180 switch (*q++)
1182 case 'B':
1183 case 'b':
1184 bEncodingB = true;
1185 break;
1187 case 'Q':
1188 case 'q':
1189 bEncodingB = false;
1190 break;
1192 default:
1193 bEncodedWord = false;
1194 break;
1199 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1201 OStringBuffer sText;
1202 if (bEncodedWord)
1204 if (bEncodingB)
1206 for (bool bDone = false; !bDone;)
1208 if (pEnd - q < 4)
1210 bEncodedWord = false;
1211 bDone = true;
1213 else
1215 bool bFinal = false;
1216 int nCount = 3;
1217 sal_uInt32 nValue = 0;
1218 for (int nShift = 18; nShift >= 0; nShift -= 6)
1220 int nWeight = getBase64Weight(*q++);
1221 if (nWeight == -2)
1223 bEncodedWord = false;
1224 bDone = true;
1225 break;
1227 if (nWeight == -1)
1229 if (!bFinal)
1231 if (nShift >= 12)
1233 bEncodedWord = false;
1234 bDone = true;
1235 break;
1237 bFinal = true;
1238 nCount = nShift == 6 ? 1 : 2;
1241 else
1242 nValue |= nWeight << nShift;
1244 if (bEncodedWord)
1246 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1247 sText.append(char(nValue >> nShift & 0xFF));
1248 if (*q == '?')
1250 ++q;
1251 bDone = true;
1253 if (bFinal && !bDone)
1255 bEncodedWord = false;
1256 bDone = true;
1262 else
1264 const char * pEncodedTextBegin = q;
1265 const char * pEncodedTextCopyBegin = q;
1266 for (bool bDone = false; !bDone;)
1267 if (q == pEnd)
1269 bEncodedWord = false;
1270 bDone = true;
1272 else
1274 sal_uInt32 nChar = static_cast<unsigned char>(*q++);
1275 switch (nChar)
1277 case '=':
1279 if (pEnd - q < 2)
1281 bEncodedWord = false;
1282 bDone = true;
1283 break;
1285 int nDigit1 = getHexWeight(q[0]);
1286 int nDigit2 = getHexWeight(q[1]);
1287 if (nDigit1 < 0 || nDigit2 < 0)
1289 bEncodedWord = false;
1290 bDone = true;
1291 break;
1293 sText.append(rBody.subView(
1294 (pEncodedTextCopyBegin - pBegin),
1295 (q - 1 - pEncodedTextCopyBegin)));
1296 sText.append(char(nDigit1 << 4 | nDigit2));
1297 q += 2;
1298 pEncodedTextCopyBegin = q;
1299 break;
1302 case '?':
1303 if (q - pEncodedTextBegin > 1)
1304 sText.append(rBody.subView(
1305 (pEncodedTextCopyBegin - pBegin),
1306 (q - 1 - pEncodedTextCopyBegin)));
1307 else
1308 bEncodedWord = false;
1309 bDone = true;
1310 break;
1312 case '_':
1313 sText.append(rBody.subView(
1314 (pEncodedTextCopyBegin - pBegin),
1315 (q - 1 - pEncodedTextCopyBegin)));
1316 sText.append(' ');
1317 pEncodedTextCopyBegin = q;
1318 break;
1320 default:
1321 if (!isVisible(nChar))
1323 bEncodedWord = false;
1324 bDone = true;
1326 break;
1332 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1334 std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1335 sal_Size nUnicodeSize = 0;
1336 if (bEncodedWord)
1338 pUnicodeBuffer
1339 = convertToUnicode(sText.getStr(),
1340 sText.getStr() + sText.getLength(),
1341 eCharsetEncoding, nUnicodeSize);
1342 if (!pUnicodeBuffer)
1343 bEncodedWord = false;
1346 if (bEncodedWord)
1348 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1349 sDecoded.append(
1350 pUnicodeBuffer.get(),
1351 static_cast< sal_Int32 >(nUnicodeSize));
1352 pUnicodeBuffer.reset();
1353 p = q;
1354 pCopyBegin = p;
1356 pWSPBegin = p;
1357 while (p != pEnd && isWhiteSpace(*p))
1358 ++p;
1359 /* bStartEncodedWord = p != pWSPBegin; */
1360 continue;
1364 if (p == pEnd)
1365 break;
1367 switch (*p++)
1369 case '"':
1370 /* bStartEncodedWord = true; */
1371 break;
1373 case '(':
1374 /* bStartEncodedWord = true; */
1375 break;
1377 case ')':
1378 /* bStartEncodedWord = false; */
1379 break;
1381 default:
1383 const char * pUTF8Begin = p - 1;
1384 const char * pUTF8End = pUTF8Begin;
1385 sal_uInt32 nCharacter = 0;
1386 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1388 appendISO88591(sDecoded, pCopyBegin, p - 1);
1389 sal_Unicode aUTF16Buf[2];
1390 sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf;
1391 sDecoded.append(aUTF16Buf, nUTF16Len);
1392 p = pUTF8End;
1393 pCopyBegin = p;
1395 /* bStartEncodedWord = false; */
1396 break;
1399 pWSPBegin = p;
1402 appendISO88591(sDecoded, pCopyBegin, pEnd);
1403 return sDecoded.makeStringAndClear();
1406 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */