tools/source/inet/inetmime.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <algorithm>
  21 #include <limits>
  22 #include <forward_list>
  23 #include <memory>
  24
  25 #include <sal/log.hxx>
  26 #include <rtl/ustring.hxx>
  27 #include <rtl/strbuf.hxx>
  28 #include <rtl/ustrbuf.hxx>
  29 #include <rtl/tencinfo.h>
  30 #include <tools/inetmime.hxx>
  31 #include <rtl/character.hxx>
  32
  33 namespace {
  34
  35 rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
  36                                            const sal_Char * pEnd);
  37
  38 /** Check for US-ASCII white space character.
  39
  40     @param nChar  Some UCS-4 character.
  41
  42     @return  True if nChar is a US-ASCII white space character (US-ASCII
  43     0x09 or 0x20).
  44  */
  45 bool isWhiteSpace(sal_uInt32 nChar)
  46 {
  47     return nChar == '\t' || nChar == ' ';
  48 }
  49
  50 /** Get the Base 64 digit weight of a US-ASCII character.
  51
  52     @param nChar  Some UCS-4 character.
  53
  54     @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
  55     'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
  56     corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
  57     character (US-ASCII '='), return -1; otherwise, return -2.
  58  */
  59 int getBase64Weight(sal_uInt32 nChar)
  60 {
  61     return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
  62            rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
  63            rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
  64            nChar == '+' ? 62 :
  65            nChar == '/' ? 63 :
  66            nChar == '=' ? -1 : -2;
  67 }
  68
  69 bool startsWithLineFolding(const sal_Unicode * pBegin,
  70                                             const sal_Unicode * pEnd)
  71 {
  72     DBG_ASSERT(pBegin && pBegin <= pEnd,
  73                "startsWithLineFolding(): Bad sequence");
  74
  75     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
  76            && isWhiteSpace(pBegin[2]); // CR, LF
  77 }
  78
  79 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
  80                                                         eEncoding)
  81 {
  82 #if defined(_WIN32)
  83     return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
  84                RTL_TEXTENCODING_MS_1252 : eEncoding;
  85 #else
  86     return eEncoding;
  87 #endif
  88 }
  89
  90 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
  91 {
  92     return rtl_isOctetTextEncoding(eEncoding);
  93 }
  94
  95 std::unique_ptr<sal_Unicode[]> convertToUnicode(const sal_Char * pBegin,
  96                                          const sal_Char * pEnd,
  97                                          rtl_TextEncoding eEncoding,
  98                                          sal_Size & rSize)
  99 {
 100     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
 101         return nullptr;
 102     rtl_TextToUnicodeConverter hConverter
 103         = rtl_createTextToUnicodeConverter(eEncoding);
 104     rtl_TextToUnicodeContext hContext
 105         = rtl_createTextToUnicodeContext(hConverter);
 106     std::unique_ptr<sal_Unicode[]> pBuffer;
 107     sal_uInt32 nInfo;
 108     for (sal_Size nBufferSize = pEnd - pBegin;;
 109          nBufferSize += nBufferSize / 3 + 1)
 110     {
 111         pBuffer.reset(new sal_Unicode[nBufferSize]);
 112         sal_Size nSrcCvtBytes;
 113         rSize = rtl_convertTextToUnicode(
 114                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
 115                     nBufferSize,
 116                     RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
 117                         | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
 118                         | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
 119                     &nInfo, &nSrcCvtBytes);
 120         if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
 121             break;
 122         pBuffer.reset();
 123         rtl_resetTextToUnicodeContext(hConverter, hContext);
 124     }
 125     rtl_destroyTextToUnicodeContext(hConverter, hContext);
 126     rtl_destroyTextToUnicodeConverter(hConverter);
 127     if (nInfo != 0)
 128     {
 129         pBuffer.reset();
 130     }
 131     return pBuffer;
 132 }
 133
 134 std::unique_ptr<sal_Char[]> convertFromUnicode(const sal_Unicode * pBegin,
 135                                         const sal_Unicode * pEnd,
 136                                         rtl_TextEncoding eEncoding,
 137                                         sal_Size & rSize)
 138 {
 139     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
 140         return nullptr;
 141     rtl_UnicodeToTextConverter hConverter
 142         = rtl_createUnicodeToTextConverter(eEncoding);
 143     rtl_UnicodeToTextContext hContext
 144         = rtl_createUnicodeToTextContext(hConverter);
 145     std::unique_ptr<sal_Char[]> pBuffer;
 146     sal_uInt32 nInfo;
 147     for (sal_Size nBufferSize = pEnd - pBegin;;
 148          nBufferSize += nBufferSize / 3 + 1)
 149     {
 150         pBuffer.reset(new sal_Char[nBufferSize]);
 151         sal_Size nSrcCvtBytes;
 152         rSize = rtl_convertUnicodeToText(
 153                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
 154                     nBufferSize,
 155                     RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
 156                         | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
 157                         | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
 158                         | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
 159                     &nInfo, &nSrcCvtBytes);
 160         if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
 161             break;
 162         pBuffer.reset();
 163         rtl_resetUnicodeToTextContext(hConverter, hContext);
 164     }
 165     rtl_destroyUnicodeToTextContext(hConverter, hContext);
 166     rtl_destroyUnicodeToTextConverter(hConverter);
 167     if (nInfo != 0)
 168     {
 169         pBuffer.reset();
 170     }
 171     return pBuffer;
 172 }
 173
 174 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
 175
 176     @param pBuffer  Points to a buffer, must not be null.
 177
 178     @param nUTF32  A UTF-32 character, must be in the range 0..0x10FFFF.
 179
 180     @return  A pointer past the UTF-16 characters put into the buffer
 181     (i.e., pBuffer + 1 or pBuffer + 2).
 182  */
 183 sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
 184                                                  sal_uInt32 nUTF32)
 185 {
 186     DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
 187     if (nUTF32 < 0x10000)
 188         *pBuffer++ = sal_Unicode(nUTF32);
 189     else
 190     {
 191         nUTF32 -= 0x10000;
 192         *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
 193         *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
 194     }
 195     return pBuffer;
 196 }
 197
 198 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
 199 {
 200     // See RFC 2279 for a discussion of UTF-8.
 201     DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
 202
 203     if (nChar < 0x80)
 204         rSink.append(sal_Char(nChar));
 205     else if (nChar < 0x800)
 206         rSink.append(sal_Char(nChar >> 6 | 0xC0))
 207              .append(sal_Char((nChar & 0x3F) | 0x80));
 208     else if (nChar < 0x10000)
 209         rSink.append(sal_Char(nChar >> 12 | 0xE0))
 210              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
 211              .append(sal_Char((nChar & 0x3F) | 0x80));
 212     else if (nChar < 0x200000)
 213         rSink.append(sal_Char(nChar >> 18 | 0xF0))
 214              .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
 215              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
 216              .append(sal_Char((nChar & 0x3F) | 0x80));
 217     else if (nChar < 0x4000000)
 218         rSink.append(sal_Char(nChar >> 24 | 0xF8))
 219              .append(sal_Char((nChar >> 18 & 0x3F) | 0x80))
 220              .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
 221              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
 222              .append(sal_Char((nChar & 0x3F) | 0x80));
 223     else
 224         rSink.append(sal_Char(nChar >> 30 | 0xFC))
 225              .append(sal_Char((nChar >> 24 & 0x3F) | 0x80))
 226              .append(sal_Char((nChar >> 18 & 0x3F) | 0x80))
 227              .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
 228              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
 229              .append(sal_Char((nChar & 0x3F) | 0x80));
 230 }
 231
 232 bool translateUTF8Char(const sal_Char *& rBegin,
 233                                  const sal_Char * pEnd,
 234                                  rtl_TextEncoding eEncoding,
 235                                  sal_uInt32 & rCharacter)
 236 {
 237     if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
 238         || static_cast< unsigned char >(*rBegin) >= 0xFE)
 239         return false;
 240
 241     int nCount;
 242     sal_uInt32 nMin;
 243     sal_uInt32 nUCS4;
 244     const sal_Char * p = rBegin;
 245     if (static_cast< unsigned char >(*p) < 0xE0)
 246     {
 247         nCount = 1;
 248         nMin = 0x80;
 249         nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
 250     }
 251     else if (static_cast< unsigned char >(*p) < 0xF0)
 252     {
 253         nCount = 2;
 254         nMin = 0x800;
 255         nUCS4 = static_cast< unsigned char >(*p) & 0xF;
 256     }
 257     else if (static_cast< unsigned char >(*p) < 0xF8)
 258     {
 259         nCount = 3;
 260         nMin = 0x10000;
 261         nUCS4 = static_cast< unsigned char >(*p) & 7;
 262     }
 263     else if (static_cast< unsigned char >(*p) < 0xFC)
 264     {
 265         nCount = 4;
 266         nMin = 0x200000;
 267         nUCS4 = static_cast< unsigned char >(*p) & 3;
 268     }
 269     else
 270     {
 271         nCount = 5;
 272         nMin = 0x4000000;
 273         nUCS4 = static_cast< unsigned char >(*p) & 1;
 274     }
 275     ++p;
 276
 277     for (; nCount-- > 0; ++p)
 278         if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
 279             nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
 280         else
 281             return false;
 282
 283     if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
 284         return false;
 285
 286     if (eEncoding >= RTL_TEXTENCODING_UCS4)
 287         rCharacter = nUCS4;
 288     else
 289     {
 290         sal_Unicode aUTF16[2];
 291         const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
 292         sal_Size nSize;
 293         std::unique_ptr<sal_Char[]> pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
 294                                                 nSize);
 295         if (!pBuffer)
 296             return false;
 297         DBG_ASSERT(nSize == 1,
 298                    "translateUTF8Char(): Bad conversion");
 299         rCharacter = pBuffer[0];
 300     }
 301     rBegin = p;
 302     return true;
 303 }
 304
 305 void appendISO88591(OUStringBuffer & rText, sal_Char const * pBegin,
 306                     sal_Char const * pEnd);
 307
 308 struct Parameter
 309 {
 310     OString const m_aAttribute;
 311     OString const m_aCharset;
 312     OString const m_aLanguage;
 313     OString const m_aValue;
 314     sal_uInt32 const m_nSection;
 315     bool const m_bExtended;
 316
 317     bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
 318     {
 319         int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
 320         return nComp < 0 ||
 321                 (nComp == 0 && m_nSection < rhs.m_nSection);
 322     }
 323     struct IsSameSection // is used to check container for duplicates with std::any_of
 324     {
 325         const OString& rAttribute;
 326         const sal_uInt32 nSection;
 327         bool operator()(const Parameter& r) const
 328         { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
 329     };
 330 };
 331
 332 typedef std::forward_list<Parameter> ParameterList;
 333
 334 bool parseParameters(ParameterList const & rInput,
 335                      INetContentTypeParameterList * pOutput);
 336
 337 //  appendISO88591
 338
 339 void appendISO88591(OUStringBuffer & rText, sal_Char const * pBegin,
 340                     sal_Char const * pEnd)
 341 {
 342     sal_Int32 nLength = pEnd - pBegin;
 343     std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
 344     for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
 345         *p++ = static_cast<unsigned char>(*pBegin++);
 346     rText.append(pBuffer.get(), nLength);
 347 }
 348
 349 //  parseParameters
 350
 351 bool parseParameters(ParameterList const & rInput,
 352                      INetContentTypeParameterList * pOutput)
 353 {
 354     if (pOutput)
 355         pOutput->clear();
 356
 357     for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
 358     {
 359         if (it->m_nSection > 0
 360             && (itPrev == rInput.end()
 361                 || itPrev->m_nSection != it->m_nSection - 1
 362                 || itPrev->m_aAttribute != it->m_aAttribute))
 363             return false;
 364     }
 365
 366     if (pOutput)
 367         for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
 368         {
 369             bool bCharset = !it->m_aCharset.isEmpty();
 370             rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
 371             if (bCharset)
 372                 eEncoding
 373                     = getCharsetEncoding(it->m_aCharset.getStr(),
 374                                                    it->m_aCharset.getStr()
 375                                                        + it->m_aCharset.getLength());
 376             OUStringBuffer aValue(64);
 377             bool bBadEncoding = false;
 378             itNext = it;
 379             do
 380             {
 381                 sal_Size nSize;
 382                 std::unique_ptr<sal_Unicode[]> pUnicode
 383                     = convertToUnicode(itNext->m_aValue.getStr(),
 384                                                  itNext->m_aValue.getStr()
 385                                                      + itNext->m_aValue.getLength(),
 386                                                  bCharset && it->m_bExtended ?
 387                                                      eEncoding :
 388                                                      RTL_TEXTENCODING_UTF8,
 389                                                  nSize);
 390                 if (!pUnicode && !(bCharset && it->m_bExtended))
 391                     pUnicode = convertToUnicode(
 392                                    itNext->m_aValue.getStr(),
 393                                    itNext->m_aValue.getStr()
 394                                        + itNext->m_aValue.getLength(),
 395                                    RTL_TEXTENCODING_ISO_8859_1, nSize);
 396                 if (!pUnicode)
 397                 {
 398                     bBadEncoding = true;
 399                     break;
 400                 }
 401                 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
 402                 ++itNext;
 403             }
 404             while (itNext != rInput.end() && itNext->m_nSection != 0);
 405
 406             if (bBadEncoding)
 407             {
 408                 aValue.setLength(0);
 409                 itNext = it;
 410                 do
 411                 {
 412                     if (itNext->m_bExtended)
 413                     {
 414                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 415                             aValue.append(
 416                                 static_cast<sal_Unicode>(
 417                                     static_cast<unsigned char>(itNext->m_aValue[i])
 418                                     | 0xF800)); // map to unicode corporate use sub area
 419                     }
 420                     else
 421                     {
 422                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 423                             aValue.append( static_cast<char>(itNext->m_aValue[i]) );
 424                     }
 425                     ++itNext;
 426                 }
 427                 while (itNext != rInput.end() && itNext->m_nSection != 0);
 428             }
 429             auto const ret = pOutput->insert(
 430                 {it->m_aAttribute,
 431                  {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
 432             SAL_INFO_IF(!ret.second, "tools",
 433                 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
 434         }
 435     return true;
 436 }
 437
 438 /** Check whether some character is valid within an RFC 2045 <token>.
 439
 440     @param nChar  Some UCS-4 character.
 441
 442     @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
 443     'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
 444     '-', '.', '^', '_', '`', '{', '|', '}', or '~').
 445  */
 446 bool isTokenChar(sal_uInt32 nChar)
 447 {
 448     static const bool aMap[128]
 449         = { false, false, false, false, false, false, false, false,
 450             false, false, false, false, false, false, false, false,
 451             false, false, false, false, false, false, false, false,
 452             false, false, false, false, false, false, false, false,
 453             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 454             false, false,  true,  true, false,  true,  true, false, //()*+,-./
 455              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 456              true,  true, false, false, false, false, false, false, //89:;<=>?
 457             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 458              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 459              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 460              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 461              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 462              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 463              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 464              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 465           };
 466     return rtl::isAscii(nChar) && aMap[nChar];
 467 }
 468
 469 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
 470                                           const sal_Unicode * pEnd)
 471 {
 472     DBG_ASSERT(pBegin && pBegin <= pEnd,
 473                "skipComment(): Bad sequence");
 474
 475     if (pBegin != pEnd && *pBegin == '(')
 476     {
 477         sal_uInt32 nLevel = 0;
 478         for (const sal_Unicode * p = pBegin; p != pEnd;)
 479             switch (*p++)
 480             {
 481                 case '(':
 482                     ++nLevel;
 483                     break;
 484
 485                 case ')':
 486                     if (--nLevel == 0)
 487                         return p;
 488                     break;
 489
 490                 case '\\':
 491                     if (p != pEnd)
 492                         ++p;
 493                     break;
 494             }
 495     }
 496     return pBegin;
 497 }
 498
 499 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
 500                                                               pBegin,
 501                                                           const sal_Unicode *
 502                                                               pEnd)
 503 {
 504     DBG_ASSERT(pBegin && pBegin <= pEnd,
 505                "skipLinearWhiteSpaceComment(): Bad sequence");
 506
 507     while (pBegin != pEnd)
 508         switch (*pBegin)
 509         {
 510             case '\t':
 511             case ' ':
 512                 ++pBegin;
 513                 break;
 514
 515             case 0x0D: // CR
 516                 if (startsWithLineFolding(pBegin, pEnd))
 517                     pBegin += 3;
 518                 else
 519                     return pBegin;
 520                 break;
 521
 522             case '(':
 523             {
 524                 const sal_Unicode * p = skipComment(pBegin, pEnd);
 525                 if (p == pBegin)
 526                     return pBegin;
 527                 pBegin = p;
 528                 break;
 529             }
 530
 531             default:
 532                 return pBegin;
 533         }
 534     return pBegin;
 535 }
 536
 537 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
 538                                                const sal_Unicode * pEnd)
 539 {
 540     DBG_ASSERT(pBegin && pBegin <= pEnd,
 541                "skipQuotedString(): Bad sequence");
 542
 543     if (pBegin != pEnd && *pBegin == '"')
 544         for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
 545             switch (*p++)
 546             {
 547                 case 0x0D: // CR
 548                     if (pEnd - p < 2 || *p++ != 0x0A // LF
 549                         || !isWhiteSpace(*p++))
 550                         return pBegin;
 551                     break;
 552
 553                 case '"':
 554                     return p;
 555
 556                 case '\\':
 557                     if (p != pEnd)
 558                         ++p;
 559                     break;
 560             }
 561     return pBegin;
 562 }
 563
 564 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
 565                                              sal_Unicode const * pEnd,
 566                                              INetContentTypeParameterList *
 567                                                  pParameters)
 568 {
 569     ParameterList aList;
 570     sal_Unicode const * pParameterBegin = pBegin;
 571     for (sal_Unicode const * p = pParameterBegin;;)
 572     {
 573         pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
 574         if (pParameterBegin == pEnd || *pParameterBegin != ';')
 575             break;
 576         p = pParameterBegin + 1;
 577
 578         sal_Unicode const * pAttributeBegin
 579             = skipLinearWhiteSpaceComment(p, pEnd);
 580         p = pAttributeBegin;
 581         bool bDowncaseAttribute = false;
 582         while (p != pEnd && isTokenChar(*p) && *p != '*')
 583         {
 584             bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
 585             ++p;
 586         }
 587         if (p == pAttributeBegin)
 588             break;
 589         OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
 590         if (bDowncaseAttribute)
 591             aAttribute = aAttribute.toAsciiLowerCase();
 592
 593         sal_uInt32 nSection = 0;
 594         if (p != pEnd && *p == '*')
 595         {
 596             ++p;
 597             if (p != pEnd && rtl::isAsciiDigit(*p)
 598                 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
 599                 break;
 600         }
 601
 602         bool bPresent = std::any_of(aList.begin(), aList.end(),
 603                                     Parameter::IsSameSection{aAttribute, nSection});
 604         if (bPresent)
 605             break;
 606
 607         bool bExtended = false;
 608         if (p != pEnd && *p == '*')
 609         {
 610             ++p;
 611             bExtended = true;
 612         }
 613
 614         p = skipLinearWhiteSpaceComment(p, pEnd);
 615
 616         if (p == pEnd || *p != '=')
 617             break;
 618
 619         p = skipLinearWhiteSpaceComment(p + 1, pEnd);
 620
 621         OString aCharset;
 622         OString aLanguage;
 623         OString aValue;
 624         if (bExtended)
 625         {
 626             if (nSection == 0)
 627             {
 628                 sal_Unicode const * pCharsetBegin = p;
 629                 bool bDowncaseCharset = false;
 630                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
 631                 {
 632                     bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
 633                     ++p;
 634                 }
 635                 if (p == pCharsetBegin)
 636                     break;
 637                 if (pParameters)
 638                 {
 639                     aCharset = OString(
 640                         pCharsetBegin,
 641                         p - pCharsetBegin,
 642                         RTL_TEXTENCODING_ASCII_US);
 643                     if (bDowncaseCharset)
 644                         aCharset = aCharset.toAsciiLowerCase();
 645                 }
 646
 647                 if (p == pEnd || *p != '\'')
 648                     break;
 649                 ++p;
 650
 651                 sal_Unicode const * pLanguageBegin = p;
 652                 bool bDowncaseLanguage = false;
 653                 int nLetters = 0;
 654                 for (; p != pEnd; ++p)
 655                     if (rtl::isAsciiAlpha(*p))
 656                     {
 657                         if (++nLetters > 8)
 658                             break;
 659                         bDowncaseLanguage = bDowncaseLanguage
 660                                             || rtl::isAsciiUpperCase(*p);
 661                     }
 662                     else if (*p == '-')
 663                     {
 664                         if (nLetters == 0)
 665                             break;
 666                         nLetters = 0;
 667                     }
 668                     else
 669                         break;
 670                 if (nLetters == 0 || nLetters > 8)
 671                     break;
 672                 if (pParameters)
 673                 {
 674                     aLanguage = OString(
 675                         pLanguageBegin,
 676                         p - pLanguageBegin,
 677                         RTL_TEXTENCODING_ASCII_US);
 678                     if (bDowncaseLanguage)
 679                         aLanguage = aLanguage.toAsciiLowerCase();
 680                 }
 681
 682                 if (p == pEnd || *p != '\'')
 683                     break;
 684                 ++p;
 685             }
 686             if (pParameters)
 687             {
 688                 OStringBuffer aSink;
 689                 while (p != pEnd)
 690                 {
 691                     auto q = p;
 692                     sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
 693                     if (rtl::isAscii(nChar) && !isTokenChar(nChar))
 694                         break;
 695                     p = q;
 696                     if (nChar == '%' && p + 1 < pEnd)
 697                     {
 698                         int nWeight1 = INetMIME::getHexWeight(p[0]);
 699                         int nWeight2 = INetMIME::getHexWeight(p[1]);
 700                         if (nWeight1 >= 0 && nWeight2 >= 0)
 701                         {
 702                             aSink.append(sal_Char(nWeight1 << 4 | nWeight2));
 703                             p += 2;
 704                             continue;
 705                         }
 706                     }
 707                     writeUTF8(aSink, nChar);
 708                 }
 709                 aValue = aSink.makeStringAndClear();
 710             }
 711             else
 712                 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 713                     ++p;
 714         }
 715         else if (p != pEnd && *p == '"')
 716             if (pParameters)
 717             {
 718                 OStringBuffer aSink(256);
 719                 bool bInvalid = false;
 720                 for (++p;;)
 721                 {
 722                     if (p == pEnd)
 723                     {
 724                         bInvalid = true;
 725                         break;
 726                     }
 727                     sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
 728                     if (nChar == '"')
 729                         break;
 730                     else if (nChar == 0x0D) // CR
 731                     {
 732                         if (pEnd - p < 2 || *p++ != 0x0A // LF
 733                             || !isWhiteSpace(*p))
 734                         {
 735                             bInvalid = true;
 736                             break;
 737                         }
 738                         nChar = static_cast<unsigned char>(*p++);
 739                     }
 740                     else if (nChar == '\\')
 741                     {
 742                         if (p == pEnd)
 743                         {
 744                             bInvalid = true;
 745                             break;
 746                         }
 747                         nChar = INetMIME::getUTF32Character(p, pEnd);
 748                     }
 749                     writeUTF8(aSink, nChar);
 750                 }
 751                 if (bInvalid)
 752                     break;
 753                 aValue = aSink.makeStringAndClear();
 754             }
 755             else
 756             {
 757                 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
 758                 if (p == pStringEnd)
 759                     break;
 760                 p = pStringEnd;
 761             }
 762         else
 763         {
 764             sal_Unicode const * pTokenBegin = p;
 765             while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 766                 ++p;
 767             if (p == pTokenBegin)
 768                 break;
 769             if (pParameters)
 770                 aValue = OString(
 771                     pTokenBegin, p - pTokenBegin,
 772                     RTL_TEXTENCODING_UTF8);
 773         }
 774         aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
 775     }
 776     aList.sort();
 777     return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
 778 }
 779
 780 bool equalIgnoreCase(const sal_Char * pBegin1,
 781                                const sal_Char * pEnd1,
 782                                const sal_Char * pString2)
 783 {
 784     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 785                "equalIgnoreCase(): Bad sequences");
 786
 787     while (*pString2 != 0)
 788         if (pBegin1 == pEnd1
 789             || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
 790                 != rtl::toAsciiUpperCase(
 791                     static_cast<unsigned char>(*pString2++))))
 792             return false;
 793     return pBegin1 == pEnd1;
 794 }
 795
 796 struct EncodingEntry
 797 {
 798     sal_Char const * m_aName;
 799     rtl_TextEncoding const m_eEncoding;
 800 };
 801
 802 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
 803 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
 804 // noted:
 805 static EncodingEntry const aEncodingMap[]
 806     = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
 807         { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
 808         { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
 809         { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
 810         { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
 811         { "ASCII", RTL_TEXTENCODING_ASCII_US },
 812         { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
 813         { "US", RTL_TEXTENCODING_ASCII_US },
 814         { "IBM367", RTL_TEXTENCODING_ASCII_US },
 815         { "CP367", RTL_TEXTENCODING_ASCII_US },
 816         { "CSASCII", RTL_TEXTENCODING_ASCII_US },
 817         { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 818         { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
 819         { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
 820         { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 821         { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 822         { "L1", RTL_TEXTENCODING_ISO_8859_1 },
 823         { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
 824         { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
 825         { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 826         { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 827         { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
 828         { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
 829         { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 830         { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 831         { "L2", RTL_TEXTENCODING_ISO_8859_2 },
 832         { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 833         { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 834         { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
 835         { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
 836         { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 837         { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 838         { "L3", RTL_TEXTENCODING_ISO_8859_3 },
 839         { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 840         { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 841         { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
 842         { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
 843         { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 844         { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 845         { "L4", RTL_TEXTENCODING_ISO_8859_4 },
 846         { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 847         { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 848         { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
 849         { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
 850         { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 851         { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 852         { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 853         { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 854         { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
 855         { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
 856         { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 857         { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
 858         { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
 859         { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 860         { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 861         { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 862         { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
 863         { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
 864         { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 865         { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
 866         { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
 867         { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
 868         { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
 869         { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
 870         { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 871         { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
 872         { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
 873         { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 874         { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 875         { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 876         { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 877         { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
 878         { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
 879         { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 880         { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 881         { "L5", RTL_TEXTENCODING_ISO_8859_9 },
 882         { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 883         { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
 884         { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
 885         { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
 886         { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 887         { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
 888         { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 889         { "IBM437", RTL_TEXTENCODING_IBM_437 },
 890         { "CP437", RTL_TEXTENCODING_IBM_437 },
 891         { "437", RTL_TEXTENCODING_IBM_437 },
 892         { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
 893         { "IBM850", RTL_TEXTENCODING_IBM_850 },
 894         { "CP850", RTL_TEXTENCODING_IBM_850 },
 895         { "850", RTL_TEXTENCODING_IBM_850 },
 896         { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
 897         { "IBM860", RTL_TEXTENCODING_IBM_860 },
 898         { "CP860", RTL_TEXTENCODING_IBM_860 },
 899         { "860", RTL_TEXTENCODING_IBM_860 },
 900         { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
 901         { "IBM861", RTL_TEXTENCODING_IBM_861 },
 902         { "CP861", RTL_TEXTENCODING_IBM_861 },
 903         { "861", RTL_TEXTENCODING_IBM_861 },
 904         { "CP-IS", RTL_TEXTENCODING_IBM_861 },
 905         { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
 906         { "IBM863", RTL_TEXTENCODING_IBM_863 },
 907         { "CP863", RTL_TEXTENCODING_IBM_863 },
 908         { "863", RTL_TEXTENCODING_IBM_863 },
 909         { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
 910         { "IBM865", RTL_TEXTENCODING_IBM_865 },
 911         { "CP865", RTL_TEXTENCODING_IBM_865 },
 912         { "865", RTL_TEXTENCODING_IBM_865 },
 913         { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
 914         { "IBM775", RTL_TEXTENCODING_IBM_775 },
 915         { "CP775", RTL_TEXTENCODING_IBM_775 },
 916         { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
 917         { "IBM852", RTL_TEXTENCODING_IBM_852 },
 918         { "CP852", RTL_TEXTENCODING_IBM_852 },
 919         { "852", RTL_TEXTENCODING_IBM_852 },
 920         { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
 921         { "IBM855", RTL_TEXTENCODING_IBM_855 },
 922         { "CP855", RTL_TEXTENCODING_IBM_855 },
 923         { "855", RTL_TEXTENCODING_IBM_855 },
 924         { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
 925         { "IBM857", RTL_TEXTENCODING_IBM_857 },
 926         { "CP857", RTL_TEXTENCODING_IBM_857 },
 927         { "857", RTL_TEXTENCODING_IBM_857 },
 928         { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
 929         { "IBM862", RTL_TEXTENCODING_IBM_862 },
 930         { "CP862", RTL_TEXTENCODING_IBM_862 },
 931         { "862", RTL_TEXTENCODING_IBM_862 },
 932         { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
 933         { "IBM864", RTL_TEXTENCODING_IBM_864 },
 934         { "CP864", RTL_TEXTENCODING_IBM_864 },
 935         { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
 936         { "IBM866", RTL_TEXTENCODING_IBM_866 },
 937         { "CP866", RTL_TEXTENCODING_IBM_866 },
 938         { "866", RTL_TEXTENCODING_IBM_866 },
 939         { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
 940         { "IBM869", RTL_TEXTENCODING_IBM_869 },
 941         { "CP869", RTL_TEXTENCODING_IBM_869 },
 942         { "869", RTL_TEXTENCODING_IBM_869 },
 943         { "CP-GR", RTL_TEXTENCODING_IBM_869 },
 944         { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
 945         { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
 946         { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
 947         { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
 948         { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
 949         { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
 950         { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
 951         { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
 952         { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
 953         { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
 954         { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
 955         { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
 956         { "GB2312", RTL_TEXTENCODING_GB_2312 },
 957         { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
 958         { "BIG5", RTL_TEXTENCODING_BIG5 },
 959         { "CSBIG5", RTL_TEXTENCODING_BIG5 },
 960         { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
 961         { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
 962           RTL_TEXTENCODING_EUC_JP },
 963         { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
 964         { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
 965         { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
 966         { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
 967         { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
 968         { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
 969         { "UTF-7", RTL_TEXTENCODING_UTF7 },
 970         { "UTF-8", RTL_TEXTENCODING_UTF8 },
 971         { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
 972         { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
 973         { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
 974         { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
 975         { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
 976         { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
 977         { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
 978         { "CSUCS4", RTL_TEXTENCODING_UCS4 },
 979         { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
 980         { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
 981
 982 rtl_TextEncoding getCharsetEncoding(sal_Char const * pBegin,
 983                                               sal_Char const * pEnd)
 984 {
 985     for (const EncodingEntry& i : aEncodingMap)
 986         if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
 987             return i.m_eEncoding;
 988     return RTL_TEXTENCODING_DONTKNOW;
 989 }
 990
 991 }
 992
 993 //  INetMIME
 994
 995 // static
 996 bool INetMIME::isAtomChar(sal_uInt32 nChar)
 997 {
 998     static const bool aMap[128]
 999         = { false, false, false, false, false, false, false, false,
1000             false, false, false, false, false, false, false, false,
1001             false, false, false, false, false, false, false, false,
1002             false, false, false, false, false, false, false, false,
1003             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
1004             false, false,  true,  true, false,  true, false,  true, //()*+,-./
1005              true,  true,  true,  true,  true,  true,  true,  true, //01234567
1006              true,  true, false, false, false,  true, false,  true, //89:;<=>?
1007             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
1008              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
1009              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
1010              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
1011              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
1012              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
1013              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
1014              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
1015           };
1016     return rtl::isAscii(nChar) && aMap[nChar];
1017 }
1018
1019 // static
1020 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
1021 {
1022     static const bool aMap[128]
1023         = { false, false, false, false, false, false, false, false,
1024             false, false, false, false, false, false, false, false,
1025             false, false, false, false, false, false, false, false,
1026             false, false, false, false, false, false, false, false,
1027             false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
1028             false, false, false,  true,  true,  true,  true,  true, //()*+,-./
1029              true,  true,  true,  true,  true,  true,  true,  true, //01234567
1030              true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
1031              true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
1032              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
1033              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
1034              true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
1035              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
1036              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
1037              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
1038              true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
1039           };
1040     return rtl::isAscii(nChar) && aMap[nChar];
1041 }
1042
1043 // static
1044 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
1045                                const sal_Unicode * pEnd1,
1046                                const sal_Char * pString2)
1047 {
1048     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
1049                "INetMIME::equalIgnoreCase(): Bad sequences");
1050
1051     while (*pString2 != 0)
1052         if (pBegin1 == pEnd1
1053             || (rtl::toAsciiUpperCase(*pBegin1++)
1054                 != rtl::toAsciiUpperCase(
1055                     static_cast<unsigned char>(*pString2++))))
1056             return false;
1057     return pBegin1 == pEnd1;
1058 }
1059
1060 // static
1061 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
1062                             const sal_Unicode * pEnd, bool bLeadingZeroes,
1063                             sal_uInt32 & rValue)
1064 {
1065     sal_uInt64 nTheValue = 0;
1066     const sal_Unicode * p = rBegin;
1067     for ( ; p != pEnd; ++p)
1068     {
1069         int nWeight = getWeight(*p);
1070         if (nWeight < 0)
1071             break;
1072         nTheValue = 10 * nTheValue + nWeight;
1073         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
1074             return false;
1075     }
1076     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1077         return false;
1078     rBegin = p;
1079     rValue = sal_uInt32(nTheValue);
1080     return true;
1081 }
1082
1083 // static
1084 sal_Unicode const * INetMIME::scanContentType(
1085     OUString const & rStr, OUString * pType,
1086     OUString * pSubType, INetContentTypeParameterList * pParameters)
1087 {
1088     sal_Unicode const * pBegin = rStr.getStr();
1089     sal_Unicode const * pEnd = pBegin + rStr.getLength();
1090     sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1091     sal_Unicode const * pTypeBegin = p;
1092     while (p != pEnd && isTokenChar(*p))
1093     {
1094         ++p;
1095     }
1096     if (p == pTypeBegin)
1097         return nullptr;
1098     sal_Unicode const * pTypeEnd = p;
1099
1100     p = skipLinearWhiteSpaceComment(p, pEnd);
1101     if (p == pEnd || *p++ != '/')
1102         return nullptr;
1103
1104     p = skipLinearWhiteSpaceComment(p, pEnd);
1105     sal_Unicode const * pSubTypeBegin = p;
1106     while (p != pEnd && isTokenChar(*p))
1107     {
1108         ++p;
1109     }
1110     if (p == pSubTypeBegin)
1111         return nullptr;
1112     sal_Unicode const * pSubTypeEnd = p;
1113
1114     if (pType != nullptr)
1115     {
1116         *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1117     }
1118     if (pSubType != nullptr)
1119     {
1120         *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1121             .toAsciiLowerCase();
1122     }
1123
1124     return scanParameters(p, pEnd, pParameters);
1125 }
1126
1127 // static
1128 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1129 {
1130     // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1131     // versions of StarOffice send mails with header fields where encoded
1132     // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1133     // '=', ',', '.', '"', ')', without any required white space in between.
1134     // And there appear to exist some broken mailers that only encode single
1135     // letters within words, like "Appel
1136     // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1137     // detect encoded words even when not properly surrounded by white space.
1138
1139     // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1140
1141     // encoded-word = "=?"
1142     //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1143     //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1144     //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1145     //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1146     //     "?="
1147
1148     // base64 = ALPHA / DIGIT / "+" / "/"
1149
1150     const sal_Char * pBegin = rBody.getStr();
1151     const sal_Char * pEnd = pBegin + rBody.getLength();
1152
1153     OUStringBuffer sDecoded;
1154     const sal_Char * pCopyBegin = pBegin;
1155
1156     /* bool bStartEncodedWord = true; */
1157     const sal_Char * pWSPBegin = pBegin;
1158
1159     for (const sal_Char * p = pBegin; p != pEnd;)
1160     {
1161         OUString sEncodedText;
1162         if (*p == '=' /* && bStartEncodedWord */)
1163         {
1164             const sal_Char * q = p + 1;
1165             bool bEncodedWord = q != pEnd && *q++ == '?';
1166
1167             rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1168             if (bEncodedWord)
1169             {
1170                 const sal_Char * pCharsetBegin = q;
1171                 const sal_Char * pLanguageBegin = nullptr;
1172                 int nAlphaCount = 0;
1173                 for (bool bDone = false; !bDone;)
1174                     if (q == pEnd)
1175                     {
1176                         bEncodedWord = false;
1177                         bDone = true;
1178                     }
1179                     else
1180                     {
1181                         sal_Char cChar = *q++;
1182                         switch (cChar)
1183                         {
1184                             case '*':
1185                                 pLanguageBegin = q - 1;
1186                                 nAlphaCount = 0;
1187                                 break;
1188
1189                             case '-':
1190                                 if (pLanguageBegin != nullptr)
1191                                 {
1192                                     if (nAlphaCount == 0)
1193                                         pLanguageBegin = nullptr;
1194                                     else
1195                                         nAlphaCount = 0;
1196                                 }
1197                                 break;
1198
1199                             case '?':
1200                                 if (pCharsetBegin == q - 1)
1201                                     bEncodedWord = false;
1202                                 else
1203                                 {
1204                                     eCharsetEncoding
1205                                         = getCharsetEncoding(
1206                                               pCharsetBegin,
1207                                               pLanguageBegin == nullptr
1208                                               || nAlphaCount == 0 ?
1209                                                   q - 1 : pLanguageBegin);
1210                                     bEncodedWord = isMIMECharsetEncoding(
1211                                                        eCharsetEncoding);
1212                                     eCharsetEncoding
1213                                         = translateFromMIME(eCharsetEncoding);
1214                                 }
1215                                 bDone = true;
1216                                 break;
1217
1218                             default:
1219                                 if (pLanguageBegin != nullptr
1220                                     && (!rtl::isAsciiAlpha(
1221                                             static_cast<unsigned char>(cChar))
1222                                         || ++nAlphaCount > 8))
1223                                     pLanguageBegin = nullptr;
1224                                 break;
1225                         }
1226                     }
1227             }
1228
1229             bool bEncodingB = false;
1230             if (bEncodedWord)
1231             {
1232                 if (q == pEnd)
1233                     bEncodedWord = false;
1234                 else
1235                 {
1236                     switch (*q++)
1237                     {
1238                         case 'B':
1239                         case 'b':
1240                             bEncodingB = true;
1241                             break;
1242
1243                         case 'Q':
1244                         case 'q':
1245                             bEncodingB = false;
1246                             break;
1247
1248                         default:
1249                             bEncodedWord = false;
1250                             break;
1251                     }
1252                 }
1253             }
1254
1255             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1256
1257             OStringBuffer sText;
1258             if (bEncodedWord)
1259             {
1260                 if (bEncodingB)
1261                 {
1262                     for (bool bDone = false; !bDone;)
1263                     {
1264                         if (pEnd - q < 4)
1265                         {
1266                             bEncodedWord = false;
1267                             bDone = true;
1268                         }
1269                         else
1270                         {
1271                             bool bFinal = false;
1272                             int nCount = 3;
1273                             sal_uInt32 nValue = 0;
1274                             for (int nShift = 18; nShift >= 0; nShift -= 6)
1275                             {
1276                                 int nWeight = getBase64Weight(*q++);
1277                                 if (nWeight == -2)
1278                                 {
1279                                     bEncodedWord = false;
1280                                     bDone = true;
1281                                     break;
1282                                 }
1283                                 if (nWeight == -1)
1284                                 {
1285                                     if (!bFinal)
1286                                     {
1287                                         if (nShift >= 12)
1288                                         {
1289                                             bEncodedWord = false;
1290                                             bDone = true;
1291                                             break;
1292                                         }
1293                                         bFinal = true;
1294                                         nCount = nShift == 6 ? 1 : 2;
1295                                     }
1296                                 }
1297                                 else
1298                                     nValue |= nWeight << nShift;
1299                             }
1300                             if (bEncodedWord)
1301                             {
1302                                 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1303                                     sText.append(sal_Char(nValue >> nShift & 0xFF));
1304                                 if (*q == '?')
1305                                 {
1306                                     ++q;
1307                                     bDone = true;
1308                                 }
1309                                 if (bFinal && !bDone)
1310                                 {
1311                                     bEncodedWord = false;
1312                                     bDone = true;
1313                                 }
1314                             }
1315                         }
1316                     }
1317                 }
1318                 else
1319                 {
1320                     const sal_Char * pEncodedTextBegin = q;
1321                     const sal_Char * pEncodedTextCopyBegin = q;
1322                     for (bool bDone = false; !bDone;)
1323                         if (q == pEnd)
1324                         {
1325                             bEncodedWord = false;
1326                             bDone = true;
1327                         }
1328                         else
1329                         {
1330                             sal_uInt32 nChar = *q++;
1331                             switch (nChar)
1332                             {
1333                                 case '=':
1334                                 {
1335                                     if (pEnd - q < 2)
1336                                     {
1337                                         bEncodedWord = false;
1338                                         bDone = true;
1339                                         break;
1340                                     }
1341                                     int nDigit1 = getHexWeight(q[0]);
1342                                     int nDigit2 = getHexWeight(q[1]);
1343                                     if (nDigit1 < 0 || nDigit2 < 0)
1344                                     {
1345                                         bEncodedWord = false;
1346                                         bDone = true;
1347                                         break;
1348                                     }
1349                                     sText.append(rBody.copy(
1350                                         (pEncodedTextCopyBegin - pBegin),
1351                                         (q - 1 - pEncodedTextCopyBegin)));
1352                                     sText.append(sal_Char(nDigit1 << 4 | nDigit2));
1353                                     q += 2;
1354                                     pEncodedTextCopyBegin = q;
1355                                     break;
1356                                 }
1357
1358                                 case '?':
1359                                     if (q - pEncodedTextBegin > 1)
1360                                         sText.append(rBody.copy(
1361                                             (pEncodedTextCopyBegin - pBegin),
1362                                             (q - 1 - pEncodedTextCopyBegin)));
1363                                     else
1364                                         bEncodedWord = false;
1365                                     bDone = true;
1366                                     break;
1367
1368                                 case '_':
1369                                     sText.append(rBody.copy(
1370                                         (pEncodedTextCopyBegin - pBegin),
1371                                         (q - 1 - pEncodedTextCopyBegin)));
1372                                     sText.append(' ');
1373                                     pEncodedTextCopyBegin = q;
1374                                     break;
1375
1376                                 default:
1377                                     if (!isVisible(nChar))
1378                                     {
1379                                         bEncodedWord = false;
1380                                         bDone = true;
1381                                     }
1382                                     break;
1383                             }
1384                         }
1385                 }
1386             }
1387
1388             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1389
1390             std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1391             sal_Size nUnicodeSize = 0;
1392             if (bEncodedWord)
1393             {
1394                 pUnicodeBuffer
1395                     = convertToUnicode(sText.getStr(),
1396                                        sText.getStr() + sText.getLength(),
1397                                        eCharsetEncoding, nUnicodeSize);
1398                 if (!pUnicodeBuffer)
1399                     bEncodedWord = false;
1400             }
1401
1402             if (bEncodedWord)
1403             {
1404                 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1405                 sDecoded.append(
1406                     pUnicodeBuffer.get(),
1407                     static_cast< sal_Int32 >(nUnicodeSize));
1408                 pUnicodeBuffer.reset();
1409                 p = q;
1410                 pCopyBegin = p;
1411
1412                 pWSPBegin = p;
1413                 while (p != pEnd && isWhiteSpace(*p))
1414                     ++p;
1415                 /* bStartEncodedWord = p != pWSPBegin; */
1416                 continue;
1417             }
1418         }
1419
1420         if (!sEncodedText.isEmpty())
1421             sDecoded.append(sEncodedText);
1422
1423         if (p == pEnd)
1424             break;
1425
1426         switch (*p++)
1427         {
1428             case '"':
1429                 /* bStartEncodedWord = true; */
1430                 break;
1431
1432             case '(':
1433                 /* bStartEncodedWord = true; */
1434                 break;
1435
1436             case ')':
1437                 /* bStartEncodedWord = false; */
1438                 break;
1439
1440             default:
1441             {
1442                 const sal_Char * pUTF8Begin = p - 1;
1443                 const sal_Char * pUTF8End = pUTF8Begin;
1444                 sal_uInt32 nCharacter = 0;
1445                 if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
1446                                       nCharacter))
1447                 {
1448                     appendISO88591(sDecoded, pCopyBegin, p - 1);
1449                     sal_Unicode aUTF16Buf[2];
1450                     sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf;
1451                     sDecoded.append(aUTF16Buf, nUTF16Len);
1452                     p = pUTF8End;
1453                     pCopyBegin = p;
1454                 }
1455                 /* bStartEncodedWord = false; */
1456                 break;
1457             }
1458         }
1459         pWSPBegin = p;
1460     }
1461
1462     appendISO88591(sDecoded, pCopyBegin, pEnd);
1463     return sDecoded.makeStringAndClear();
1464 }
1465
1466 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */