tools/source/inet/inetmime.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <algorithm>
  21 #include <cstddef>
  22 #include <limits>
  23 #include <forward_list>
  24 #include <memory>
  25
  26 #include <osl/diagnose.h>
  27 #include <rtl/ustring.hxx>
  28 #include <rtl/strbuf.hxx>
  29 #include <rtl/tencinfo.h>
  30 #include <tools/inetmime.hxx>
  31 #include <rtl/character.hxx>
  32
  33 namespace {
  34
  35 rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
  36                                            const sal_Char * pEnd);
  37
  38 /** Check for US-ASCII white space character.
  39
  40     @param nChar  Some UCS-4 character.
  41
  42     @return  True if nChar is a US-ASCII white space character (US-ASCII
  43     0x09 or 0x20).
  44  */
  45 inline bool isWhiteSpace(sal_uInt32 nChar)
  46 {
  47     return nChar == '\t' || nChar == ' ';
  48 }
  49
  50 /** Get the Base 64 digit weight of a US-ASCII character.
  51
  52     @param nChar  Some UCS-4 character.
  53
  54     @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
  55     'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
  56     corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
  57     character (US-ASCII '='), return -1; otherwise, return -2.
  58  */
  59 inline int getBase64Weight(sal_uInt32 nChar)
  60 {
  61     return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
  62            rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
  63            rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
  64            nChar == '+' ? 62 :
  65            nChar == '/' ? 63 :
  66            nChar == '=' ? -1 : -2;
  67 }
  68
  69 inline bool startsWithLineFolding(const sal_Unicode * pBegin,
  70                                             const sal_Unicode * pEnd)
  71 {
  72     DBG_ASSERT(pBegin && pBegin <= pEnd,
  73                "startsWithLineFolding(): Bad sequence");
  74
  75     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
  76            && isWhiteSpace(pBegin[2]); // CR, LF
  77 }
  78
  79 inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
  80                                                         eEncoding)
  81 {
  82 #if defined(_WIN32)
  83     return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
  84                RTL_TEXTENCODING_MS_1252 : eEncoding;
  85 #else
  86     return eEncoding;
  87 #endif
  88 }
  89
  90 inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
  91 {
  92     return rtl_isOctetTextEncoding(eEncoding);
  93 }
  94
  95 sal_Unicode * convertToUnicode(const sal_Char * pBegin,
  96                                          const sal_Char * pEnd,
  97                                          rtl_TextEncoding eEncoding,
  98                                          sal_Size & rSize)
  99 {
 100     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
 101         return nullptr;
 102     rtl_TextToUnicodeConverter hConverter
 103         = rtl_createTextToUnicodeConverter(eEncoding);
 104     rtl_TextToUnicodeContext hContext
 105         = rtl_createTextToUnicodeContext(hConverter);
 106     sal_Unicode * pBuffer;
 107     sal_uInt32 nInfo;
 108     for (sal_Size nBufferSize = pEnd - pBegin;;
 109          nBufferSize += nBufferSize / 3 + 1)
 110     {
 111         pBuffer = new sal_Unicode[nBufferSize];
 112         sal_Size nSrcCvtBytes;
 113         rSize = rtl_convertTextToUnicode(
 114                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
 115                     nBufferSize,
 116                     RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
 117                         | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
 118                         | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
 119                     &nInfo, &nSrcCvtBytes);
 120         if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
 121             break;
 122         delete[] pBuffer;
 123         rtl_resetTextToUnicodeContext(hConverter, hContext);
 124     }
 125     rtl_destroyTextToUnicodeContext(hConverter, hContext);
 126     rtl_destroyTextToUnicodeConverter(hConverter);
 127     if (nInfo != 0)
 128     {
 129         delete[] pBuffer;
 130         pBuffer = nullptr;
 131     }
 132     return pBuffer;
 133 }
 134
 135 sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
 136                                         const sal_Unicode * pEnd,
 137                                         rtl_TextEncoding eEncoding,
 138                                         sal_Size & rSize)
 139 {
 140     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
 141         return nullptr;
 142     rtl_UnicodeToTextConverter hConverter
 143         = rtl_createUnicodeToTextConverter(eEncoding);
 144     rtl_UnicodeToTextContext hContext
 145         = rtl_createUnicodeToTextContext(hConverter);
 146     sal_Char * pBuffer;
 147     sal_uInt32 nInfo;
 148     for (sal_Size nBufferSize = pEnd - pBegin;;
 149          nBufferSize += nBufferSize / 3 + 1)
 150     {
 151         pBuffer = new sal_Char[nBufferSize];
 152         sal_Size nSrcCvtBytes;
 153         rSize = rtl_convertUnicodeToText(
 154                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
 155                     nBufferSize,
 156                     RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
 157                         | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
 158                         | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
 159                         | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
 160                     &nInfo, &nSrcCvtBytes);
 161         if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
 162             break;
 163         delete[] pBuffer;
 164         rtl_resetUnicodeToTextContext(hConverter, hContext);
 165     }
 166     rtl_destroyUnicodeToTextContext(hConverter, hContext);
 167     rtl_destroyUnicodeToTextConverter(hConverter);
 168     if (nInfo != 0)
 169     {
 170         delete[] pBuffer;
 171         pBuffer = nullptr;
 172     }
 173     return pBuffer;
 174 }
 175
 176 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
 177
 178     @param pBuffer  Points to a buffer, must not be null.
 179
 180     @param nUTF32  An UTF-32 character, must be in the range 0..0x10FFFF.
 181
 182     @return  A pointer past the UTF-16 characters put into the buffer
 183     (i.e., pBuffer + 1 or pBuffer + 2).
 184  */
 185 inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
 186                                                  sal_uInt32 nUTF32)
 187 {
 188     DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
 189     if (nUTF32 < 0x10000)
 190         *pBuffer++ = sal_Unicode(nUTF32);
 191     else
 192     {
 193         nUTF32 -= 0x10000;
 194         *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
 195         *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
 196     }
 197     return pBuffer;
 198 }
 199
 200 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
 201 {
 202     // See RFC 2279 for a discussion of UTF-8.
 203     DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
 204
 205     if (nChar < 0x80)
 206         rSink.append(sal_Char(nChar));
 207     else if (nChar < 0x800)
 208         rSink.append(sal_Char(nChar >> 6 | 0xC0))
 209              .append(sal_Char((nChar & 0x3F) | 0x80));
 210     else if (nChar < 0x10000)
 211         rSink.append(sal_Char(nChar >> 12 | 0xE0))
 212              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
 213              .append(sal_Char((nChar & 0x3F) | 0x80));
 214     else if (nChar < 0x200000)
 215         rSink.append(sal_Char(nChar >> 18 | 0xF0))
 216              .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
 217              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
 218              .append(sal_Char((nChar & 0x3F) | 0x80));
 219     else if (nChar < 0x4000000)
 220         rSink.append(sal_Char(nChar >> 24 | 0xF8))
 221              .append(sal_Char((nChar >> 18 & 0x3F) | 0x80))
 222              .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
 223              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
 224              .append(sal_Char((nChar & 0x3F) | 0x80));
 225     else
 226         rSink.append(sal_Char(nChar >> 30 | 0xFC))
 227              .append(sal_Char((nChar >> 24 & 0x3F) | 0x80))
 228              .append(sal_Char((nChar >> 18 & 0x3F) | 0x80))
 229              .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
 230              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
 231              .append(sal_Char((nChar & 0x3F) | 0x80));
 232 }
 233
 234 bool translateUTF8Char(const sal_Char *& rBegin,
 235                                  const sal_Char * pEnd,
 236                                  rtl_TextEncoding eEncoding,
 237                                  sal_uInt32 & rCharacter)
 238 {
 239     if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
 240         || static_cast< unsigned char >(*rBegin) >= 0xFE)
 241         return false;
 242
 243     int nCount;
 244     sal_uInt32 nMin;
 245     sal_uInt32 nUCS4;
 246     const sal_Char * p = rBegin;
 247     if (static_cast< unsigned char >(*p) < 0xE0)
 248     {
 249         nCount = 1;
 250         nMin = 0x80;
 251         nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
 252     }
 253     else if (static_cast< unsigned char >(*p) < 0xF0)
 254     {
 255         nCount = 2;
 256         nMin = 0x800;
 257         nUCS4 = static_cast< unsigned char >(*p) & 0xF;
 258     }
 259     else if (static_cast< unsigned char >(*p) < 0xF8)
 260     {
 261         nCount = 3;
 262         nMin = 0x10000;
 263         nUCS4 = static_cast< unsigned char >(*p) & 7;
 264     }
 265     else if (static_cast< unsigned char >(*p) < 0xFC)
 266     {
 267         nCount = 4;
 268         nMin = 0x200000;
 269         nUCS4 = static_cast< unsigned char >(*p) & 3;
 270     }
 271     else
 272     {
 273         nCount = 5;
 274         nMin = 0x4000000;
 275         nUCS4 = static_cast< unsigned char >(*p) & 1;
 276     }
 277     ++p;
 278
 279     for (; nCount-- > 0; ++p)
 280         if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
 281             nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
 282         else
 283             return false;
 284
 285     if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
 286         return false;
 287
 288     if (eEncoding >= RTL_TEXTENCODING_UCS4)
 289         rCharacter = nUCS4;
 290     else
 291     {
 292         sal_Unicode aUTF16[2];
 293         const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
 294         sal_Size nSize;
 295         sal_Char * pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
 296                                                 nSize);
 297         if (!pBuffer)
 298             return false;
 299         DBG_ASSERT(nSize == 1,
 300                    "translateUTF8Char(): Bad conversion");
 301         rCharacter = *pBuffer;
 302         delete[] pBuffer;
 303     }
 304     rBegin = p;
 305     return true;
 306 }
 307
 308 void appendISO88591(OUString & rText, sal_Char const * pBegin,
 309                     sal_Char const * pEnd);
 310
 311 struct Parameter
 312 {
 313     OString m_aAttribute;
 314     OString m_aCharset;
 315     OString m_aLanguage;
 316     OString m_aValue;
 317     sal_uInt32 m_nSection;
 318     bool m_bExtended;
 319
 320     bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
 321     {
 322         int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
 323         return nComp < 0 ||
 324                 (nComp == 0 && m_nSection < rhs.m_nSection);
 325     }
 326     struct IsSameSection // is used to check container for duplicates with std::any_of
 327     {
 328         const OString& rAttribute;
 329         const sal_uInt32 nSection;
 330         bool operator()(const Parameter& r) const
 331         { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
 332     };
 333 };
 334
 335 typedef std::forward_list<Parameter> ParameterList;
 336
 337 bool parseParameters(ParameterList const & rInput,
 338                      INetContentTypeParameterList * pOutput);
 339
 340 //  appendISO88591
 341
 342 void appendISO88591(OUString & rText, sal_Char const * pBegin,
 343                     sal_Char const * pEnd)
 344 {
 345     sal_Int32 nLength = pEnd - pBegin;
 346     std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
 347     for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
 348         *p++ = static_cast<unsigned char>(*pBegin++);
 349     rText += OUString(pBuffer.get(), nLength);
 350 }
 351
 352 //  parseParameters
 353
 354 bool parseParameters(ParameterList const & rInput,
 355                      INetContentTypeParameterList * pOutput)
 356 {
 357     if (pOutput)
 358         pOutput->clear();
 359
 360     for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
 361     {
 362         if (it->m_nSection > 0
 363             && (itPrev == rInput.end()
 364                 || itPrev->m_nSection != it->m_nSection - 1
 365                 || itPrev->m_aAttribute != it->m_aAttribute))
 366             return false;
 367     }
 368
 369     if (pOutput)
 370         for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
 371         {
 372             bool bCharset = !it->m_aCharset.isEmpty();
 373             rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
 374             if (bCharset)
 375                 eEncoding
 376                     = getCharsetEncoding(it->m_aCharset.getStr(),
 377                                                    it->m_aCharset.getStr()
 378                                                        + it->m_aCharset.getLength());
 379             OUString aValue;
 380             bool bBadEncoding = false;
 381             itNext = it;
 382             do
 383             {
 384                 sal_Size nSize;
 385                 sal_Unicode * pUnicode
 386                     = convertToUnicode(itNext->m_aValue.getStr(),
 387                                                  itNext->m_aValue.getStr()
 388                                                      + itNext->m_aValue.getLength(),
 389                                                  bCharset && it->m_bExtended ?
 390                                                      eEncoding :
 391                                                      RTL_TEXTENCODING_UTF8,
 392                                                  nSize);
 393                 if (!pUnicode && !(bCharset && it->m_bExtended))
 394                     pUnicode = convertToUnicode(
 395                                    itNext->m_aValue.getStr(),
 396                                    itNext->m_aValue.getStr()
 397                                        + itNext->m_aValue.getLength(),
 398                                    RTL_TEXTENCODING_ISO_8859_1, nSize);
 399                 if (!pUnicode)
 400                 {
 401                     bBadEncoding = true;
 402                     break;
 403                 }
 404                 aValue += OUString(pUnicode, static_cast<sal_Int32>(nSize));
 405                 delete[] pUnicode;
 406                 ++itNext;
 407             }
 408             while (itNext != rInput.end() && itNext->m_nSection != 0);
 409
 410             if (bBadEncoding)
 411             {
 412                 aValue.clear();
 413                 itNext = it;
 414                 do
 415                 {
 416                     if (itNext->m_bExtended)
 417                     {
 418                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 419                             aValue += OUStringLiteral1(
 420                                 sal_Unicode(
 421                                     static_cast<unsigned char>(itNext->m_aValue[i]))
 422                                 | 0xF800); // map to unicode corporate use sub area
 423                     }
 424                     else
 425                     {
 426                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 427                             aValue += OUStringLiteral1( static_cast<unsigned char>(itNext->m_aValue[i]) );
 428                     }
 429                     ++itNext;
 430                 }
 431                 while (itNext != rInput.end() && itNext->m_nSection != 0);
 432             }
 433             auto const ret = pOutput->insert(
 434                 {it->m_aAttribute,
 435                  {it->m_aCharset, it->m_aLanguage, aValue, !bBadEncoding}});
 436             SAL_INFO_IF(!ret.second, "tools",
 437                 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
 438         }
 439     return true;
 440 }
 441
 442 /** Check whether some character is valid within an RFC 2045 <token>.
 443
 444     @param nChar  Some UCS-4 character.
 445
 446     @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
 447     'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
 448     '-', '.', '^', '_', '`', '{', '|', '}', or '~').
 449  */
 450 bool isTokenChar(sal_uInt32 nChar)
 451 {
 452     static const bool aMap[128]
 453         = { false, false, false, false, false, false, false, false,
 454             false, false, false, false, false, false, false, false,
 455             false, false, false, false, false, false, false, false,
 456             false, false, false, false, false, false, false, false,
 457             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 458             false, false,  true,  true, false,  true,  true, false, //()*+,-./
 459              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 460              true,  true, false, false, false, false, false, false, //89:;<=>?
 461             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 462              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 463              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 464              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 465              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 466              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 467              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 468              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 469           };
 470     return rtl::isAscii(nChar) && aMap[nChar];
 471 }
 472
 473 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
 474                                           const sal_Unicode * pEnd)
 475 {
 476     DBG_ASSERT(pBegin && pBegin <= pEnd,
 477                "skipComment(): Bad sequence");
 478
 479     if (pBegin != pEnd && *pBegin == '(')
 480     {
 481         sal_uInt32 nLevel = 0;
 482         for (const sal_Unicode * p = pBegin; p != pEnd;)
 483             switch (*p++)
 484             {
 485                 case '(':
 486                     ++nLevel;
 487                     break;
 488
 489                 case ')':
 490                     if (--nLevel == 0)
 491                         return p;
 492                     break;
 493
 494                 case '\\':
 495                     if (p != pEnd)
 496                         ++p;
 497                     break;
 498             }
 499     }
 500     return pBegin;
 501 }
 502
 503 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
 504                                                               pBegin,
 505                                                           const sal_Unicode *
 506                                                               pEnd)
 507 {
 508     DBG_ASSERT(pBegin && pBegin <= pEnd,
 509                "skipLinearWhiteSpaceComment(): Bad sequence");
 510
 511     while (pBegin != pEnd)
 512         switch (*pBegin)
 513         {
 514             case '\t':
 515             case ' ':
 516                 ++pBegin;
 517                 break;
 518
 519             case 0x0D: // CR
 520                 if (startsWithLineFolding(pBegin, pEnd))
 521                     pBegin += 3;
 522                 else
 523                     return pBegin;
 524                 break;
 525
 526             case '(':
 527             {
 528                 const sal_Unicode * p = skipComment(pBegin, pEnd);
 529                 if (p == pBegin)
 530                     return pBegin;
 531                 pBegin = p;
 532                 break;
 533             }
 534
 535             default:
 536                 return pBegin;
 537         }
 538     return pBegin;
 539 }
 540
 541 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
 542                                                const sal_Unicode * pEnd)
 543 {
 544     DBG_ASSERT(pBegin && pBegin <= pEnd,
 545                "skipQuotedString(): Bad sequence");
 546
 547     if (pBegin != pEnd && *pBegin == '"')
 548         for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
 549             switch (*p++)
 550             {
 551                 case 0x0D: // CR
 552                     if (pEnd - p < 2 || *p++ != 0x0A // LF
 553                         || !isWhiteSpace(*p++))
 554                         return pBegin;
 555                     break;
 556
 557                 case '"':
 558                     return p;
 559
 560                 case '\\':
 561                     if (p != pEnd)
 562                         ++p;
 563                     break;
 564             }
 565     return pBegin;
 566 }
 567
 568 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
 569                                              sal_Unicode const * pEnd,
 570                                              INetContentTypeParameterList *
 571                                                  pParameters)
 572 {
 573     ParameterList aList;
 574     sal_Unicode const * pParameterBegin = pBegin;
 575     for (sal_Unicode const * p = pParameterBegin;;)
 576     {
 577         pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
 578         if (pParameterBegin == pEnd || *pParameterBegin != ';')
 579             break;
 580         p = pParameterBegin + 1;
 581
 582         sal_Unicode const * pAttributeBegin
 583             = skipLinearWhiteSpaceComment(p, pEnd);
 584         p = pAttributeBegin;
 585         bool bDowncaseAttribute = false;
 586         while (p != pEnd && isTokenChar(*p) && *p != '*')
 587         {
 588             bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
 589             ++p;
 590         }
 591         if (p == pAttributeBegin)
 592             break;
 593         OString aAttribute = OString(
 594             pAttributeBegin, p - pAttributeBegin,
 595             RTL_TEXTENCODING_ASCII_US);
 596         if (bDowncaseAttribute)
 597             aAttribute = aAttribute.toAsciiLowerCase();
 598
 599         sal_uInt32 nSection = 0;
 600         if (p != pEnd && *p == '*')
 601         {
 602             ++p;
 603             if (p != pEnd && rtl::isAsciiDigit(*p)
 604                 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
 605                 break;
 606         }
 607
 608         bool bPresent = std::any_of(aList.begin(), aList.end(),
 609                                     Parameter::IsSameSection{aAttribute, nSection});
 610         if (bPresent)
 611             break;
 612
 613         bool bExtended = false;
 614         if (p != pEnd && *p == '*')
 615         {
 616             ++p;
 617             bExtended = true;
 618         }
 619
 620         p = skipLinearWhiteSpaceComment(p, pEnd);
 621
 622         if (p == pEnd || *p != '=')
 623             break;
 624
 625         p = skipLinearWhiteSpaceComment(p + 1, pEnd);
 626
 627         OString aCharset;
 628         OString aLanguage;
 629         OString aValue;
 630         if (bExtended)
 631         {
 632             if (nSection == 0)
 633             {
 634                 sal_Unicode const * pCharsetBegin = p;
 635                 bool bDowncaseCharset = false;
 636                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
 637                 {
 638                     bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
 639                     ++p;
 640                 }
 641                 if (p == pCharsetBegin)
 642                     break;
 643                 if (pParameters)
 644                 {
 645                     aCharset = OString(
 646                         pCharsetBegin,
 647                         p - pCharsetBegin,
 648                         RTL_TEXTENCODING_ASCII_US);
 649                     if (bDowncaseCharset)
 650                         aCharset = aCharset.toAsciiLowerCase();
 651                 }
 652
 653                 if (p == pEnd || *p != '\'')
 654                     break;
 655                 ++p;
 656
 657                 sal_Unicode const * pLanguageBegin = p;
 658                 bool bDowncaseLanguage = false;
 659                 int nLetters = 0;
 660                 for (; p != pEnd; ++p)
 661                     if (rtl::isAsciiAlpha(*p))
 662                     {
 663                         if (++nLetters > 8)
 664                             break;
 665                         bDowncaseLanguage = bDowncaseLanguage
 666                                             || rtl::isAsciiUpperCase(*p);
 667                     }
 668                     else if (*p == '-')
 669                     {
 670                         if (nLetters == 0)
 671                             break;
 672                         nLetters = 0;
 673                     }
 674                     else
 675                         break;
 676                 if (nLetters == 0 || nLetters > 8)
 677                     break;
 678                 if (pParameters)
 679                 {
 680                     aLanguage = OString(
 681                         pLanguageBegin,
 682                         p - pLanguageBegin,
 683                         RTL_TEXTENCODING_ASCII_US);
 684                     if (bDowncaseLanguage)
 685                         aLanguage = aLanguage.toAsciiLowerCase();
 686                 }
 687
 688                 if (p == pEnd || *p != '\'')
 689                     break;
 690                 ++p;
 691             }
 692             if (pParameters)
 693             {
 694                 OStringBuffer aSink;
 695                 while (p != pEnd)
 696                 {
 697                     auto q = p;
 698                     sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
 699                     if (rtl::isAscii(nChar) && !isTokenChar(nChar))
 700                         break;
 701                     p = q;
 702                     if (nChar == '%' && p + 1 < pEnd)
 703                     {
 704                         int nWeight1 = INetMIME::getHexWeight(p[0]);
 705                         int nWeight2 = INetMIME::getHexWeight(p[1]);
 706                         if (nWeight1 >= 0 && nWeight2 >= 0)
 707                         {
 708                             aSink.append(sal_Char(nWeight1 << 4 | nWeight2));
 709                             p += 2;
 710                             continue;
 711                         }
 712                     }
 713                     writeUTF8(aSink, nChar);
 714                 }
 715                 aValue = aSink.makeStringAndClear();
 716             }
 717             else
 718                 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 719                     ++p;
 720         }
 721         else if (p != pEnd && *p == '"')
 722             if (pParameters)
 723             {
 724                 OStringBuffer aSink;
 725                 bool bInvalid = false;
 726                 for (++p;;)
 727                 {
 728                     if (p == pEnd)
 729                     {
 730                         bInvalid = true;
 731                         break;
 732                     }
 733                     sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
 734                     if (nChar == '"')
 735                         break;
 736                     else if (nChar == 0x0D) // CR
 737                     {
 738                         if (pEnd - p < 2 || *p++ != 0x0A // LF
 739                             || !isWhiteSpace(*p))
 740                         {
 741                             bInvalid = true;
 742                             break;
 743                         }
 744                         nChar = static_cast<unsigned char>(*p++);
 745                     }
 746                     else if (nChar == '\\')
 747                     {
 748                         if (p == pEnd)
 749                         {
 750                             bInvalid = true;
 751                             break;
 752                         }
 753                         nChar = INetMIME::getUTF32Character(p, pEnd);
 754                     }
 755                     writeUTF8(aSink, nChar);
 756                 }
 757                 if (bInvalid)
 758                     break;
 759                 aValue = aSink.makeStringAndClear();
 760             }
 761             else
 762             {
 763                 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
 764                 if (p == pStringEnd)
 765                     break;
 766                 p = pStringEnd;
 767             }
 768         else
 769         {
 770             sal_Unicode const * pTokenBegin = p;
 771             while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 772                 ++p;
 773             if (p == pTokenBegin)
 774                 break;
 775             if (pParameters)
 776                 aValue = OString(
 777                     pTokenBegin, p - pTokenBegin,
 778                     RTL_TEXTENCODING_UTF8);
 779         }
 780         aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
 781     }
 782     aList.sort();
 783     return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
 784 }
 785
 786 bool equalIgnoreCase(const sal_Char * pBegin1,
 787                                const sal_Char * pEnd1,
 788                                const sal_Char * pString2)
 789 {
 790     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 791                "equalIgnoreCase(): Bad sequences");
 792
 793     while (*pString2 != 0)
 794         if (pBegin1 == pEnd1
 795             || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
 796                 != rtl::toAsciiUpperCase(
 797                     static_cast<unsigned char>(*pString2++))))
 798             return false;
 799     return pBegin1 == pEnd1;
 800 }
 801
 802 struct EncodingEntry
 803 {
 804     sal_Char const * m_aName;
 805     rtl_TextEncoding m_eEncoding;
 806 };
 807
 808 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
 809 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
 810 // noted:
 811 static EncodingEntry const aEncodingMap[]
 812     = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
 813         { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
 814         { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
 815         { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
 816         { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
 817         { "ASCII", RTL_TEXTENCODING_ASCII_US },
 818         { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
 819         { "US", RTL_TEXTENCODING_ASCII_US },
 820         { "IBM367", RTL_TEXTENCODING_ASCII_US },
 821         { "CP367", RTL_TEXTENCODING_ASCII_US },
 822         { "CSASCII", RTL_TEXTENCODING_ASCII_US },
 823         { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 824         { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
 825         { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
 826         { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 827         { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 828         { "L1", RTL_TEXTENCODING_ISO_8859_1 },
 829         { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
 830         { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
 831         { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 832         { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 833         { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
 834         { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
 835         { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 836         { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 837         { "L2", RTL_TEXTENCODING_ISO_8859_2 },
 838         { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 839         { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 840         { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
 841         { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
 842         { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 843         { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 844         { "L3", RTL_TEXTENCODING_ISO_8859_3 },
 845         { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 846         { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 847         { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
 848         { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
 849         { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 850         { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 851         { "L4", RTL_TEXTENCODING_ISO_8859_4 },
 852         { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 853         { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 854         { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
 855         { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
 856         { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 857         { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 858         { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 859         { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 860         { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
 861         { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
 862         { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 863         { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
 864         { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
 865         { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 866         { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 867         { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 868         { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
 869         { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
 870         { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 871         { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
 872         { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
 873         { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
 874         { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
 875         { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
 876         { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 877         { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
 878         { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
 879         { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 880         { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 881         { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 882         { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 883         { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
 884         { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
 885         { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 886         { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 887         { "L5", RTL_TEXTENCODING_ISO_8859_9 },
 888         { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 889         { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
 890         { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
 891         { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
 892         { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 893         { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
 894         { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 895         { "IBM437", RTL_TEXTENCODING_IBM_437 },
 896         { "CP437", RTL_TEXTENCODING_IBM_437 },
 897         { "437", RTL_TEXTENCODING_IBM_437 },
 898         { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
 899         { "IBM850", RTL_TEXTENCODING_IBM_850 },
 900         { "CP850", RTL_TEXTENCODING_IBM_850 },
 901         { "850", RTL_TEXTENCODING_IBM_850 },
 902         { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
 903         { "IBM860", RTL_TEXTENCODING_IBM_860 },
 904         { "CP860", RTL_TEXTENCODING_IBM_860 },
 905         { "860", RTL_TEXTENCODING_IBM_860 },
 906         { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
 907         { "IBM861", RTL_TEXTENCODING_IBM_861 },
 908         { "CP861", RTL_TEXTENCODING_IBM_861 },
 909         { "861", RTL_TEXTENCODING_IBM_861 },
 910         { "CP-IS", RTL_TEXTENCODING_IBM_861 },
 911         { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
 912         { "IBM863", RTL_TEXTENCODING_IBM_863 },
 913         { "CP863", RTL_TEXTENCODING_IBM_863 },
 914         { "863", RTL_TEXTENCODING_IBM_863 },
 915         { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
 916         { "IBM865", RTL_TEXTENCODING_IBM_865 },
 917         { "CP865", RTL_TEXTENCODING_IBM_865 },
 918         { "865", RTL_TEXTENCODING_IBM_865 },
 919         { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
 920         { "IBM775", RTL_TEXTENCODING_IBM_775 },
 921         { "CP775", RTL_TEXTENCODING_IBM_775 },
 922         { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
 923         { "IBM852", RTL_TEXTENCODING_IBM_852 },
 924         { "CP852", RTL_TEXTENCODING_IBM_852 },
 925         { "852", RTL_TEXTENCODING_IBM_852 },
 926         { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
 927         { "IBM855", RTL_TEXTENCODING_IBM_855 },
 928         { "CP855", RTL_TEXTENCODING_IBM_855 },
 929         { "855", RTL_TEXTENCODING_IBM_855 },
 930         { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
 931         { "IBM857", RTL_TEXTENCODING_IBM_857 },
 932         { "CP857", RTL_TEXTENCODING_IBM_857 },
 933         { "857", RTL_TEXTENCODING_IBM_857 },
 934         { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
 935         { "IBM862", RTL_TEXTENCODING_IBM_862 },
 936         { "CP862", RTL_TEXTENCODING_IBM_862 },
 937         { "862", RTL_TEXTENCODING_IBM_862 },
 938         { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
 939         { "IBM864", RTL_TEXTENCODING_IBM_864 },
 940         { "CP864", RTL_TEXTENCODING_IBM_864 },
 941         { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
 942         { "IBM866", RTL_TEXTENCODING_IBM_866 },
 943         { "CP866", RTL_TEXTENCODING_IBM_866 },
 944         { "866", RTL_TEXTENCODING_IBM_866 },
 945         { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
 946         { "IBM869", RTL_TEXTENCODING_IBM_869 },
 947         { "CP869", RTL_TEXTENCODING_IBM_869 },
 948         { "869", RTL_TEXTENCODING_IBM_869 },
 949         { "CP-GR", RTL_TEXTENCODING_IBM_869 },
 950         { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
 951         { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
 952         { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
 953         { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
 954         { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
 955         { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
 956         { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
 957         { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
 958         { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
 959         { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
 960         { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
 961         { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
 962         { "GB2312", RTL_TEXTENCODING_GB_2312 },
 963         { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
 964         { "BIG5", RTL_TEXTENCODING_BIG5 },
 965         { "CSBIG5", RTL_TEXTENCODING_BIG5 },
 966         { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
 967         { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
 968           RTL_TEXTENCODING_EUC_JP },
 969         { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
 970         { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
 971         { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
 972         { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
 973         { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
 974         { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
 975         { "UTF-7", RTL_TEXTENCODING_UTF7 },
 976         { "UTF-8", RTL_TEXTENCODING_UTF8 },
 977         { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
 978         { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
 979         { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
 980         { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
 981         { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
 982         { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
 983         { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
 984         { "CSUCS4", RTL_TEXTENCODING_UCS4 },
 985         { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
 986         { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
 987
 988 rtl_TextEncoding getCharsetEncoding(sal_Char const * pBegin,
 989                                               sal_Char const * pEnd)
 990 {
 991     for (const EncodingEntry& i : aEncodingMap)
 992         if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
 993             return i.m_eEncoding;
 994     return RTL_TEXTENCODING_DONTKNOW;
 995 }
 996
 997 }
 998
 999 //  INetMIME
1000
1001 // static
1002 bool INetMIME::isAtomChar(sal_uInt32 nChar)
1003 {
1004     static const bool aMap[128]
1005         = { false, false, false, false, false, false, false, false,
1006             false, false, false, false, false, false, false, false,
1007             false, false, false, false, false, false, false, false,
1008             false, false, false, false, false, false, false, false,
1009             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
1010             false, false,  true,  true, false,  true, false,  true, //()*+,-./
1011              true,  true,  true,  true,  true,  true,  true,  true, //01234567
1012              true,  true, false, false, false,  true, false,  true, //89:;<=>?
1013             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
1014              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
1015              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
1016              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
1017              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
1018              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
1019              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
1020              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
1021           };
1022     return rtl::isAscii(nChar) && aMap[nChar];
1023 }
1024
1025 // static
1026 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
1027 {
1028     static const bool aMap[128]
1029         = { false, false, false, false, false, false, false, false,
1030             false, false, false, false, false, false, false, false,
1031             false, false, false, false, false, false, false, false,
1032             false, false, false, false, false, false, false, false,
1033             false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
1034             false, false, false,  true,  true,  true,  true,  true, //()*+,-./
1035              true,  true,  true,  true,  true,  true,  true,  true, //01234567
1036              true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
1037              true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
1038              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
1039              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
1040              true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
1041              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
1042              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
1043              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
1044              true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
1045           };
1046     return rtl::isAscii(nChar) && aMap[nChar];
1047 }
1048
1049 // static
1050 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
1051                                const sal_Unicode * pEnd1,
1052                                const sal_Char * pString2)
1053 {
1054     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
1055                "INetMIME::equalIgnoreCase(): Bad sequences");
1056
1057     while (*pString2 != 0)
1058         if (pBegin1 == pEnd1
1059             || (rtl::toAsciiUpperCase(*pBegin1++)
1060                 != rtl::toAsciiUpperCase(
1061                     static_cast<unsigned char>(*pString2++))))
1062             return false;
1063     return pBegin1 == pEnd1;
1064 }
1065
1066 // static
1067 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
1068                             const sal_Unicode * pEnd, bool bLeadingZeroes,
1069                             sal_uInt32 & rValue)
1070 {
1071     sal_uInt64 nTheValue = 0;
1072     const sal_Unicode * p = rBegin;
1073     for ( ; p != pEnd; ++p)
1074     {
1075         int nWeight = getWeight(*p);
1076         if (nWeight < 0)
1077             break;
1078         nTheValue = 10 * nTheValue + nWeight;
1079         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
1080             return false;
1081     }
1082     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1083         return false;
1084     rBegin = p;
1085     rValue = sal_uInt32(nTheValue);
1086     return true;
1087 }
1088
1089 // static
1090 sal_Unicode const * INetMIME::scanContentType(
1091     OUString const & rStr, OUString * pType,
1092     OUString * pSubType, INetContentTypeParameterList * pParameters)
1093 {
1094     sal_Unicode const * pBegin = rStr.getStr();
1095     sal_Unicode const * pEnd = pBegin + rStr.getLength();
1096     sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1097     sal_Unicode const * pTypeBegin = p;
1098     while (p != pEnd && isTokenChar(*p))
1099     {
1100         ++p;
1101     }
1102     if (p == pTypeBegin)
1103         return nullptr;
1104     sal_Unicode const * pTypeEnd = p;
1105
1106     p = skipLinearWhiteSpaceComment(p, pEnd);
1107     if (p == pEnd || *p++ != '/')
1108         return nullptr;
1109
1110     p = skipLinearWhiteSpaceComment(p, pEnd);
1111     sal_Unicode const * pSubTypeBegin = p;
1112     while (p != pEnd && isTokenChar(*p))
1113     {
1114         ++p;
1115     }
1116     if (p == pSubTypeBegin)
1117         return nullptr;
1118     sal_Unicode const * pSubTypeEnd = p;
1119
1120     if (pType != nullptr)
1121     {
1122         *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1123     }
1124     if (pSubType != nullptr)
1125     {
1126         *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1127             .toAsciiLowerCase();
1128     }
1129
1130     return scanParameters(p, pEnd, pParameters);
1131 }
1132
1133 // static
1134 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1135 {
1136     // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1137     // versions of StarOffice send mails with header fields where encoded
1138     // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1139     // '=', ',', '.', '"', ')', without any required white space in between.
1140     // And there appear to exist some broken mailers that only encode single
1141     // letters within words, like "Appel
1142     // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1143     // detect encoded words even when not properly surrounded by white space.
1144
1145     // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1146
1147     // encoded-word = "=?"
1148     //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1149     //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1150     //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1151     //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1152     //     "?="
1153
1154     // base64 = ALPHA / DIGIT / "+" / "/"
1155
1156     const sal_Char * pBegin = rBody.getStr();
1157     const sal_Char * pEnd = pBegin + rBody.getLength();
1158
1159     OUString sDecoded;
1160     const sal_Char * pCopyBegin = pBegin;
1161
1162     /* bool bStartEncodedWord = true; */
1163     const sal_Char * pWSPBegin = pBegin;
1164
1165     for (const sal_Char * p = pBegin; p != pEnd;)
1166     {
1167         OUString sEncodedText;
1168         if (p != pEnd && *p == '=' /* && bStartEncodedWord */)
1169         {
1170             const sal_Char * q = p + 1;
1171             bool bEncodedWord = q != pEnd && *q++ == '?';
1172
1173             rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1174             if (bEncodedWord)
1175             {
1176                 const sal_Char * pCharsetBegin = q;
1177                 const sal_Char * pLanguageBegin = nullptr;
1178                 int nAlphaCount = 0;
1179                 for (bool bDone = false; !bDone;)
1180                     if (q == pEnd)
1181                     {
1182                         bEncodedWord = false;
1183                         bDone = true;
1184                     }
1185                     else
1186                     {
1187                         sal_Char cChar = *q++;
1188                         switch (cChar)
1189                         {
1190                             case '*':
1191                                 pLanguageBegin = q - 1;
1192                                 nAlphaCount = 0;
1193                                 break;
1194
1195                             case '-':
1196                                 if (pLanguageBegin != nullptr)
1197                                 {
1198                                     if (nAlphaCount == 0)
1199                                         pLanguageBegin = nullptr;
1200                                     else
1201                                         nAlphaCount = 0;
1202                                 }
1203                                 break;
1204
1205                             case '?':
1206                                 if (pCharsetBegin == q - 1)
1207                                     bEncodedWord = false;
1208                                 else
1209                                 {
1210                                     eCharsetEncoding
1211                                         = getCharsetEncoding(
1212                                               pCharsetBegin,
1213                                               pLanguageBegin == nullptr
1214                                               || nAlphaCount == 0 ?
1215                                                   q - 1 : pLanguageBegin);
1216                                     bEncodedWord = isMIMECharsetEncoding(
1217                                                        eCharsetEncoding);
1218                                     eCharsetEncoding
1219                                         = translateFromMIME(eCharsetEncoding);
1220                                 }
1221                                 bDone = true;
1222                                 break;
1223
1224                             default:
1225                                 if (pLanguageBegin != nullptr
1226                                     && (!rtl::isAsciiAlpha(
1227                                             static_cast<unsigned char>(cChar))
1228                                         || ++nAlphaCount > 8))
1229                                     pLanguageBegin = nullptr;
1230                                 break;
1231                         }
1232                     }
1233             }
1234
1235             bool bEncodingB = false;
1236             if (bEncodedWord)
1237             {
1238                 if (q == pEnd)
1239                     bEncodedWord = false;
1240                 else
1241                 {
1242                     switch (*q++)
1243                     {
1244                         case 'B':
1245                         case 'b':
1246                             bEncodingB = true;
1247                             break;
1248
1249                         case 'Q':
1250                         case 'q':
1251                             bEncodingB = false;
1252                             break;
1253
1254                         default:
1255                             bEncodedWord = false;
1256                             break;
1257                     }
1258                 }
1259             }
1260
1261             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1262
1263             OStringBuffer sText;
1264             if (bEncodedWord)
1265             {
1266                 if (bEncodingB)
1267                 {
1268                     for (bool bDone = false; !bDone;)
1269                     {
1270                         if (pEnd - q < 4)
1271                         {
1272                             bEncodedWord = false;
1273                             bDone = true;
1274                         }
1275                         else
1276                         {
1277                             bool bFinal = false;
1278                             int nCount = 3;
1279                             sal_uInt32 nValue = 0;
1280                             for (int nShift = 18; nShift >= 0; nShift -= 6)
1281                             {
1282                                 int nWeight = getBase64Weight(*q++);
1283                                 if (nWeight == -2)
1284                                 {
1285                                     bEncodedWord = false;
1286                                     bDone = true;
1287                                     break;
1288                                 }
1289                                 if (nWeight == -1)
1290                                 {
1291                                     if (!bFinal)
1292                                     {
1293                                         if (nShift >= 12)
1294                                         {
1295                                             bEncodedWord = false;
1296                                             bDone = true;
1297                                             break;
1298                                         }
1299                                         bFinal = true;
1300                                         nCount = nShift == 6 ? 1 : 2;
1301                                     }
1302                                 }
1303                                 else
1304                                     nValue |= nWeight << nShift;
1305                             }
1306                             if (bEncodedWord)
1307                             {
1308                                 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1309                                     sText.append(sal_Char(nValue >> nShift & 0xFF));
1310                                 if (*q == '?')
1311                                 {
1312                                     ++q;
1313                                     bDone = true;
1314                                 }
1315                                 if (bFinal && !bDone)
1316                                 {
1317                                     bEncodedWord = false;
1318                                     bDone = true;
1319                                 }
1320                             }
1321                         }
1322                     }
1323                 }
1324                 else
1325                 {
1326                     const sal_Char * pEncodedTextBegin = q;
1327                     const sal_Char * pEncodedTextCopyBegin = q;
1328                     for (bool bDone = false; !bDone;)
1329                         if (q == pEnd)
1330                         {
1331                             bEncodedWord = false;
1332                             bDone = true;
1333                         }
1334                         else
1335                         {
1336                             sal_uInt32 nChar = *q++;
1337                             switch (nChar)
1338                             {
1339                                 case '=':
1340                                 {
1341                                     if (pEnd - q < 2)
1342                                     {
1343                                         bEncodedWord = false;
1344                                         bDone = true;
1345                                         break;
1346                                     }
1347                                     int nDigit1 = getHexWeight(q[0]);
1348                                     int nDigit2 = getHexWeight(q[1]);
1349                                     if (nDigit1 < 0 || nDigit2 < 0)
1350                                     {
1351                                         bEncodedWord = false;
1352                                         bDone = true;
1353                                         break;
1354                                     }
1355                                     sText.append(rBody.copy(
1356                                         (pEncodedTextCopyBegin - pBegin),
1357                                         (q - 1 - pEncodedTextCopyBegin)));
1358                                     sText.append(sal_Char(nDigit1 << 4 | nDigit2));
1359                                     q += 2;
1360                                     pEncodedTextCopyBegin = q;
1361                                     break;
1362                                 }
1363
1364                                 case '?':
1365                                     if (q - pEncodedTextBegin > 1)
1366                                         sText.append(rBody.copy(
1367                                             (pEncodedTextCopyBegin - pBegin),
1368                                             (q - 1 - pEncodedTextCopyBegin)));
1369                                     else
1370                                         bEncodedWord = false;
1371                                     bDone = true;
1372                                     break;
1373
1374                                 case '_':
1375                                     sText.append(rBody.copy(
1376                                         (pEncodedTextCopyBegin - pBegin),
1377                                         (q - 1 - pEncodedTextCopyBegin)));
1378                                     sText.append(' ');
1379                                     pEncodedTextCopyBegin = q;
1380                                     break;
1381
1382                                 default:
1383                                     if (!isVisible(nChar))
1384                                     {
1385                                         bEncodedWord = false;
1386                                         bDone = true;
1387                                     }
1388                                     break;
1389                             }
1390                         }
1391                 }
1392             }
1393
1394             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1395
1396             sal_Unicode * pUnicodeBuffer = nullptr;
1397             sal_Size nUnicodeSize = 0;
1398             if (bEncodedWord)
1399             {
1400                 pUnicodeBuffer
1401                     = convertToUnicode(sText.getStr(),
1402                                        sText.getStr() + sText.getLength(),
1403                                        eCharsetEncoding, nUnicodeSize);
1404                 if (pUnicodeBuffer == nullptr)
1405                     bEncodedWord = false;
1406             }
1407
1408             if (bEncodedWord)
1409             {
1410                 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1411                 sDecoded += OUString(
1412                     pUnicodeBuffer,
1413                     static_cast< sal_Int32 >(nUnicodeSize));
1414                 delete[] pUnicodeBuffer;
1415                 p = q;
1416                 pCopyBegin = p;
1417
1418                 pWSPBegin = p;
1419                 while (p != pEnd && isWhiteSpace(*p))
1420                     ++p;
1421                 /* bStartEncodedWord = p != pWSPBegin; */
1422                 continue;
1423             }
1424         }
1425
1426         if (!sEncodedText.isEmpty())
1427             sDecoded += sEncodedText;
1428
1429         if (p == pEnd)
1430             break;
1431
1432         switch (*p++)
1433         {
1434             case '"':
1435                 /* bStartEncodedWord = true; */
1436                 break;
1437
1438             case '(':
1439                 /* bStartEncodedWord = true; */
1440                 break;
1441
1442             case ')':
1443                 /* bStartEncodedWord = false; */
1444                 break;
1445
1446             default:
1447             {
1448                 const sal_Char * pUTF8Begin = p - 1;
1449                 const sal_Char * pUTF8End = pUTF8Begin;
1450                 sal_uInt32 nCharacter = 0;
1451                 if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
1452                                       nCharacter))
1453                 {
1454                     appendISO88591(sDecoded, pCopyBegin, p - 1);
1455                     sal_Unicode aUTF16Buf[2];
1456                     sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf;
1457                     sDecoded += OUString(aUTF16Buf, nUTF16Len);
1458                     p = pUTF8End;
1459                     pCopyBegin = p;
1460                 }
1461                 /* bStartEncodedWord = false; */
1462                 break;
1463             }
1464         }
1465         pWSPBegin = p;
1466     }
1467
1468     appendISO88591(sDecoded, pCopyBegin, pEnd);
1469     return sDecoded;
1470 }
1471
1472 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */