tools/source/inet/inetmime.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <algorithm>
  21 #include <limits>
  22 #include <forward_list>
  23 #include <memory>
  24
  25 #include <sal/log.hxx>
  26 #include <rtl/ustring.hxx>
  27 #include <rtl/strbuf.hxx>
  28 #include <rtl/ustrbuf.hxx>
  29 #include <rtl/tencinfo.h>
  30 #include <tools/debug.hxx>
  31 #include <tools/inetmime.hxx>
  32 #include <rtl/character.hxx>
  33
  34 namespace {
  35
  36 rtl_TextEncoding getCharsetEncoding(const char * pBegin,
  37                                            const char * pEnd);
  38
  39 /** Check for US-ASCII white space character.
  40
  41     @param nChar  Some UCS-4 character.
  42
  43     @return  True if nChar is a US-ASCII white space character (US-ASCII
  44     0x09 or 0x20).
  45  */
  46 bool isWhiteSpace(sal_uInt32 nChar)
  47 {
  48     return nChar == '\t' || nChar == ' ';
  49 }
  50
  51 /** Get the Base 64 digit weight of a US-ASCII character.
  52
  53     @param nChar  Some UCS-4 character.
  54
  55     @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
  56     'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
  57     corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
  58     character (US-ASCII '='), return -1; otherwise, return -2.
  59  */
  60 int getBase64Weight(sal_uInt32 nChar)
  61 {
  62     return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
  63            rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
  64            rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
  65            nChar == '+' ? 62 :
  66            nChar == '/' ? 63 :
  67            nChar == '=' ? -1 : -2;
  68 }
  69
  70 bool startsWithLineFolding(const sal_Unicode * pBegin,
  71                                             const sal_Unicode * pEnd)
  72 {
  73     DBG_ASSERT(pBegin && pBegin <= pEnd,
  74                "startsWithLineFolding(): Bad sequence");
  75
  76     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
  77            && isWhiteSpace(pBegin[2]); // CR, LF
  78 }
  79
  80 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
  81                                                         eEncoding)
  82 {
  83 #if defined(_WIN32)
  84     return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
  85                RTL_TEXTENCODING_MS_1252 : eEncoding;
  86 #else
  87     return eEncoding;
  88 #endif
  89 }
  90
  91 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
  92 {
  93     return rtl_isOctetTextEncoding(eEncoding);
  94 }
  95
  96 std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin,
  97                                          const char * pEnd,
  98                                          rtl_TextEncoding eEncoding,
  99                                          sal_Size & rSize)
 100 {
 101     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
 102         return nullptr;
 103     rtl_TextToUnicodeConverter hConverter
 104         = rtl_createTextToUnicodeConverter(eEncoding);
 105     rtl_TextToUnicodeContext hContext
 106         = rtl_createTextToUnicodeContext(hConverter);
 107     std::unique_ptr<sal_Unicode[]> pBuffer;
 108     sal_uInt32 nInfo;
 109     for (sal_Size nBufferSize = pEnd - pBegin;;
 110          nBufferSize += nBufferSize / 3 + 1)
 111     {
 112         pBuffer.reset(new sal_Unicode[nBufferSize]);
 113         sal_Size nSrcCvtBytes;
 114         rSize = rtl_convertTextToUnicode(
 115                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
 116                     nBufferSize,
 117                     RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
 118                         | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
 119                         | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
 120                     &nInfo, &nSrcCvtBytes);
 121         if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
 122             break;
 123         pBuffer.reset();
 124         rtl_resetTextToUnicodeContext(hConverter, hContext);
 125     }
 126     rtl_destroyTextToUnicodeContext(hConverter, hContext);
 127     rtl_destroyTextToUnicodeConverter(hConverter);
 128     if (nInfo != 0)
 129     {
 130         pBuffer.reset();
 131     }
 132     return pBuffer;
 133 }
 134
 135 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
 136 {
 137     // See RFC 2279 for a discussion of UTF-8.
 138     DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
 139
 140     if (nChar < 0x80)
 141         rSink.append(char(nChar));
 142     else if (nChar < 0x800)
 143         rSink.append(OStringChar(char(nChar >> 6 | 0xC0))
 144                 + OStringChar(char((nChar & 0x3F) | 0x80)));
 145     else if (nChar < 0x10000)
 146         rSink.append(
 147             OStringChar(char(nChar >> 12 | 0xE0))
 148              + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 149              + OStringChar(char((nChar & 0x3F) | 0x80)));
 150     else if (nChar < 0x200000)
 151         rSink.append(
 152             OStringChar(char(nChar >> 18 | 0xF0))
 153              + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
 154              + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 155              + OStringChar(char((nChar & 0x3F) | 0x80)));
 156     else if (nChar < 0x4000000)
 157         rSink.append(
 158             OStringChar(char(nChar >> 24 | 0xF8))
 159             + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
 160             + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
 161             + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 162             + OStringChar(char((nChar & 0x3F) | 0x80)));
 163     else
 164         rSink.append(
 165             OStringChar(char(nChar >> 30 | 0xFC))
 166             + OStringChar(char((nChar >> 24 & 0x3F) | 0x80))
 167             + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
 168             + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
 169             + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 170             + OStringChar(char((nChar & 0x3F) | 0x80)));
 171 }
 172
 173 bool translateUTF8Char(const char *& rBegin,
 174                                  const char * pEnd,
 175                                  sal_uInt32 & rCharacter)
 176 {
 177     if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
 178         || static_cast< unsigned char >(*rBegin) >= 0xFE)
 179         return false;
 180
 181     int nCount;
 182     sal_uInt32 nMin;
 183     sal_uInt32 nUCS4;
 184     const char * p = rBegin;
 185     if (static_cast< unsigned char >(*p) < 0xE0)
 186     {
 187         nCount = 1;
 188         nMin = 0x80;
 189         nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
 190     }
 191     else if (static_cast< unsigned char >(*p) < 0xF0)
 192     {
 193         nCount = 2;
 194         nMin = 0x800;
 195         nUCS4 = static_cast< unsigned char >(*p) & 0xF;
 196     }
 197     else if (static_cast< unsigned char >(*p) < 0xF8)
 198     {
 199         nCount = 3;
 200         nMin = 0x10000;
 201         nUCS4 = static_cast< unsigned char >(*p) & 7;
 202     }
 203     else if (static_cast< unsigned char >(*p) < 0xFC)
 204     {
 205         nCount = 4;
 206         nMin = 0x200000;
 207         nUCS4 = static_cast< unsigned char >(*p) & 3;
 208     }
 209     else
 210     {
 211         nCount = 5;
 212         nMin = 0x4000000;
 213         nUCS4 = static_cast< unsigned char >(*p) & 1;
 214     }
 215     ++p;
 216
 217     for (; nCount-- > 0; ++p)
 218         if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
 219             nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
 220         else
 221             return false;
 222
 223     if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
 224         return false;
 225
 226     rCharacter = nUCS4;
 227     rBegin = p;
 228     return true;
 229 }
 230
 231 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
 232                     char const * pEnd);
 233
 234 struct Parameter
 235 {
 236     OString m_aAttribute;
 237     OString m_aCharset;
 238     OString m_aLanguage;
 239     OString m_aValue;
 240     sal_uInt32 m_nSection;
 241     bool m_bExtended;
 242
 243     bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
 244     {
 245         int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
 246         return nComp < 0 ||
 247                 (nComp == 0 && m_nSection < rhs.m_nSection);
 248     }
 249     struct IsSameSection // is used to check container for duplicates with std::any_of
 250     {
 251         const OString& rAttribute;
 252         const sal_uInt32 nSection;
 253         bool operator()(const Parameter& r) const
 254         { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
 255     };
 256 };
 257
 258 typedef std::forward_list<Parameter> ParameterList;
 259
 260 bool parseParameters(ParameterList const & rInput,
 261                      INetContentTypeParameterList * pOutput);
 262
 263 //  appendISO88591
 264
 265 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
 266                     char const * pEnd)
 267 {
 268     sal_Int32 nLength = pEnd - pBegin;
 269     std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
 270     for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
 271         *p++ = static_cast<unsigned char>(*pBegin++);
 272     rText.append(pBuffer.get(), nLength);
 273 }
 274
 275 //  parseParameters
 276
 277 bool parseParameters(ParameterList const & rInput,
 278                      INetContentTypeParameterList * pOutput)
 279 {
 280     if (pOutput)
 281         pOutput->clear();
 282
 283     for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
 284     {
 285         if (it->m_nSection > 0
 286             && (itPrev == rInput.end()
 287                 || itPrev->m_nSection != it->m_nSection - 1
 288                 || itPrev->m_aAttribute != it->m_aAttribute))
 289             return false;
 290     }
 291
 292     if (pOutput)
 293         for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
 294         {
 295             bool bCharset = !it->m_aCharset.isEmpty();
 296             rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
 297             if (bCharset)
 298                 eEncoding
 299                     = getCharsetEncoding(it->m_aCharset.getStr(),
 300                                                    it->m_aCharset.getStr()
 301                                                        + it->m_aCharset.getLength());
 302             OUStringBuffer aValue(64);
 303             bool bBadEncoding = false;
 304             itNext = it;
 305             do
 306             {
 307                 sal_Size nSize;
 308                 std::unique_ptr<sal_Unicode[]> pUnicode
 309                     = convertToUnicode(itNext->m_aValue.getStr(),
 310                                                  itNext->m_aValue.getStr()
 311                                                      + itNext->m_aValue.getLength(),
 312                                                  bCharset && it->m_bExtended ?
 313                                                      eEncoding :
 314                                                      RTL_TEXTENCODING_UTF8,
 315                                                  nSize);
 316                 if (!pUnicode && !(bCharset && it->m_bExtended))
 317                     pUnicode = convertToUnicode(
 318                                    itNext->m_aValue.getStr(),
 319                                    itNext->m_aValue.getStr()
 320                                        + itNext->m_aValue.getLength(),
 321                                    RTL_TEXTENCODING_ISO_8859_1, nSize);
 322                 if (!pUnicode)
 323                 {
 324                     bBadEncoding = true;
 325                     break;
 326                 }
 327                 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
 328                 ++itNext;
 329             }
 330             while (itNext != rInput.end() && itNext->m_nSection != 0);
 331
 332             if (bBadEncoding)
 333             {
 334                 aValue.setLength(0);
 335                 itNext = it;
 336                 do
 337                 {
 338                     if (itNext->m_bExtended)
 339                     {
 340                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 341                             aValue.append(
 342                                 static_cast<sal_Unicode>(
 343                                     static_cast<unsigned char>(itNext->m_aValue[i])
 344                                     | 0xF800)); // map to unicode corporate use sub area
 345                     }
 346                     else
 347                     {
 348                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 349                             aValue.append( itNext->m_aValue[i] );
 350                     }
 351                     ++itNext;
 352                 }
 353                 while (itNext != rInput.end() && itNext->m_nSection != 0);
 354             }
 355             auto const ret = pOutput->insert(
 356                 {it->m_aAttribute,
 357                  {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
 358             SAL_INFO_IF(!ret.second, "tools",
 359                 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
 360         }
 361     return true;
 362 }
 363
 364 /** Check whether some character is valid within an RFC 2045 <token>.
 365
 366     @param nChar  Some UCS-4 character.
 367
 368     @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
 369     'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
 370     '-', '.', '^', '_', '`', '{', '|', '}', or '~').
 371  */
 372 bool isTokenChar(sal_uInt32 nChar)
 373 {
 374     static const bool aMap[128]
 375         = { false, false, false, false, false, false, false, false,
 376             false, false, false, false, false, false, false, false,
 377             false, false, false, false, false, false, false, false,
 378             false, false, false, false, false, false, false, false,
 379             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 380             false, false,  true,  true, false,  true,  true, false, //()*+,-./
 381              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 382              true,  true, false, false, false, false, false, false, //89:;<=>?
 383             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 384              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 385              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 386              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 387              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 388              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 389              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 390              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 391           };
 392     return rtl::isAscii(nChar) && aMap[nChar];
 393 }
 394
 395 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
 396                                           const sal_Unicode * pEnd)
 397 {
 398     DBG_ASSERT(pBegin && pBegin <= pEnd,
 399                "skipComment(): Bad sequence");
 400
 401     if (pBegin != pEnd && *pBegin == '(')
 402     {
 403         sal_uInt32 nLevel = 0;
 404         for (const sal_Unicode * p = pBegin; p != pEnd;)
 405             switch (*p++)
 406             {
 407                 case '(':
 408                     ++nLevel;
 409                     break;
 410
 411                 case ')':
 412                     if (--nLevel == 0)
 413                         return p;
 414                     break;
 415
 416                 case '\\':
 417                     if (p != pEnd)
 418                         ++p;
 419                     break;
 420             }
 421     }
 422     return pBegin;
 423 }
 424
 425 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
 426                                                               pBegin,
 427                                                           const sal_Unicode *
 428                                                               pEnd)
 429 {
 430     DBG_ASSERT(pBegin && pBegin <= pEnd,
 431                "skipLinearWhiteSpaceComment(): Bad sequence");
 432
 433     while (pBegin != pEnd)
 434         switch (*pBegin)
 435         {
 436             case '\t':
 437             case ' ':
 438                 ++pBegin;
 439                 break;
 440
 441             case 0x0D: // CR
 442                 if (startsWithLineFolding(pBegin, pEnd))
 443                     pBegin += 3;
 444                 else
 445                     return pBegin;
 446                 break;
 447
 448             case '(':
 449             {
 450                 const sal_Unicode * p = skipComment(pBegin, pEnd);
 451                 if (p == pBegin)
 452                     return pBegin;
 453                 pBegin = p;
 454                 break;
 455             }
 456
 457             default:
 458                 return pBegin;
 459         }
 460     return pBegin;
 461 }
 462
 463 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
 464                                                const sal_Unicode * pEnd)
 465 {
 466     DBG_ASSERT(pBegin && pBegin <= pEnd,
 467                "skipQuotedString(): Bad sequence");
 468
 469     if (pBegin != pEnd && *pBegin == '"')
 470         for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
 471             switch (*p++)
 472             {
 473                 case 0x0D: // CR
 474                     if (pEnd - p < 2 || *p++ != 0x0A // LF
 475                         || !isWhiteSpace(*p++))
 476                         return pBegin;
 477                     break;
 478
 479                 case '"':
 480                     return p;
 481
 482                 case '\\':
 483                     if (p != pEnd)
 484                         ++p;
 485                     break;
 486             }
 487     return pBegin;
 488 }
 489
 490 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
 491                                              sal_Unicode const * pEnd,
 492                                              INetContentTypeParameterList *
 493                                                  pParameters)
 494 {
 495     ParameterList aList;
 496     sal_Unicode const * pParameterBegin = pBegin;
 497     for (sal_Unicode const * p = pParameterBegin;;)
 498     {
 499         pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
 500         if (pParameterBegin == pEnd || *pParameterBegin != ';')
 501             break;
 502         p = pParameterBegin + 1;
 503
 504         sal_Unicode const * pAttributeBegin
 505             = skipLinearWhiteSpaceComment(p, pEnd);
 506         p = pAttributeBegin;
 507         bool bDowncaseAttribute = false;
 508         while (p != pEnd && isTokenChar(*p) && *p != '*')
 509         {
 510             bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
 511             ++p;
 512         }
 513         if (p == pAttributeBegin)
 514             break;
 515         OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
 516         if (bDowncaseAttribute)
 517             aAttribute = aAttribute.toAsciiLowerCase();
 518
 519         sal_uInt32 nSection = 0;
 520         if (p != pEnd && *p == '*')
 521         {
 522             ++p;
 523             if (p != pEnd && rtl::isAsciiDigit(*p)
 524                 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
 525                 break;
 526         }
 527
 528         bool bPresent = std::any_of(aList.begin(), aList.end(),
 529                                     Parameter::IsSameSection{aAttribute, nSection});
 530         if (bPresent)
 531             break;
 532
 533         bool bExtended = false;
 534         if (p != pEnd && *p == '*')
 535         {
 536             ++p;
 537             bExtended = true;
 538         }
 539
 540         p = skipLinearWhiteSpaceComment(p, pEnd);
 541
 542         if (p == pEnd || *p != '=')
 543             break;
 544
 545         p = skipLinearWhiteSpaceComment(p + 1, pEnd);
 546
 547         OString aCharset;
 548         OString aLanguage;
 549         OString aValue;
 550         if (bExtended)
 551         {
 552             if (nSection == 0)
 553             {
 554                 sal_Unicode const * pCharsetBegin = p;
 555                 bool bDowncaseCharset = false;
 556                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
 557                 {
 558                     bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
 559                     ++p;
 560                 }
 561                 if (p == pCharsetBegin)
 562                     break;
 563                 if (pParameters)
 564                 {
 565                     aCharset = OString(
 566                         pCharsetBegin,
 567                         p - pCharsetBegin,
 568                         RTL_TEXTENCODING_ASCII_US);
 569                     if (bDowncaseCharset)
 570                         aCharset = aCharset.toAsciiLowerCase();
 571                 }
 572
 573                 if (p == pEnd || *p != '\'')
 574                     break;
 575                 ++p;
 576
 577                 sal_Unicode const * pLanguageBegin = p;
 578                 bool bDowncaseLanguage = false;
 579                 int nLetters = 0;
 580                 for (; p != pEnd; ++p)
 581                     if (rtl::isAsciiAlpha(*p))
 582                     {
 583                         if (++nLetters > 8)
 584                             break;
 585                         bDowncaseLanguage = bDowncaseLanguage
 586                                             || rtl::isAsciiUpperCase(*p);
 587                     }
 588                     else if (*p == '-')
 589                     {
 590                         if (nLetters == 0)
 591                             break;
 592                         nLetters = 0;
 593                     }
 594                     else
 595                         break;
 596                 if (nLetters == 0 || nLetters > 8)
 597                     break;
 598                 if (pParameters)
 599                 {
 600                     aLanguage = OString(
 601                         pLanguageBegin,
 602                         p - pLanguageBegin,
 603                         RTL_TEXTENCODING_ASCII_US);
 604                     if (bDowncaseLanguage)
 605                         aLanguage = aLanguage.toAsciiLowerCase();
 606                 }
 607
 608                 if (p == pEnd || *p != '\'')
 609                     break;
 610                 ++p;
 611             }
 612             if (pParameters)
 613             {
 614                 OStringBuffer aSink;
 615                 while (p != pEnd)
 616                 {
 617                     auto q = p;
 618                     sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
 619                     if (rtl::isAscii(nChar) && !isTokenChar(nChar))
 620                         break;
 621                     p = q;
 622                     if (nChar == '%' && p + 1 < pEnd)
 623                     {
 624                         int nWeight1 = INetMIME::getHexWeight(p[0]);
 625                         int nWeight2 = INetMIME::getHexWeight(p[1]);
 626                         if (nWeight1 >= 0 && nWeight2 >= 0)
 627                         {
 628                             aSink.append(char(nWeight1 << 4 | nWeight2));
 629                             p += 2;
 630                             continue;
 631                         }
 632                     }
 633                     writeUTF8(aSink, nChar);
 634                 }
 635                 aValue = aSink.makeStringAndClear();
 636             }
 637             else
 638                 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 639                     ++p;
 640         }
 641         else if (p != pEnd && *p == '"')
 642             if (pParameters)
 643             {
 644                 OStringBuffer aSink(256);
 645                 bool bInvalid = false;
 646                 for (++p;;)
 647                 {
 648                     if (p == pEnd)
 649                     {
 650                         bInvalid = true;
 651                         break;
 652                     }
 653                     sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
 654                     if (nChar == '"')
 655                         break;
 656                     else if (nChar == 0x0D) // CR
 657                     {
 658                         if (pEnd - p < 2 || *p++ != 0x0A // LF
 659                             || !isWhiteSpace(*p))
 660                         {
 661                             bInvalid = true;
 662                             break;
 663                         }
 664                         nChar = static_cast<unsigned char>(*p++);
 665                     }
 666                     else if (nChar == '\\')
 667                     {
 668                         if (p == pEnd)
 669                         {
 670                             bInvalid = true;
 671                             break;
 672                         }
 673                         nChar = INetMIME::getUTF32Character(p, pEnd);
 674                     }
 675                     writeUTF8(aSink, nChar);
 676                 }
 677                 if (bInvalid)
 678                     break;
 679                 aValue = aSink.makeStringAndClear();
 680             }
 681             else
 682             {
 683                 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
 684                 if (p == pStringEnd)
 685                     break;
 686                 p = pStringEnd;
 687             }
 688         else
 689         {
 690             sal_Unicode const * pTokenBegin = p;
 691             while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 692                 ++p;
 693             if (p == pTokenBegin)
 694                 break;
 695             if (pParameters)
 696                 aValue = OString(
 697                     pTokenBegin, p - pTokenBegin,
 698                     RTL_TEXTENCODING_UTF8);
 699         }
 700         aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
 701     }
 702     aList.sort();
 703     return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
 704 }
 705
 706 bool equalIgnoreCase(const char * pBegin1,
 707                                const char * pEnd1,
 708                                const char * pString2)
 709 {
 710     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 711                "equalIgnoreCase(): Bad sequences");
 712
 713     while (*pString2 != 0)
 714         if (pBegin1 == pEnd1
 715             || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
 716                 != rtl::toAsciiUpperCase(
 717                     static_cast<unsigned char>(*pString2++))))
 718             return false;
 719     return pBegin1 == pEnd1;
 720 }
 721
 722 struct EncodingEntry
 723 {
 724     char const * m_aName;
 725     rtl_TextEncoding m_eEncoding;
 726 };
 727
 728 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
 729 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
 730 // noted:
 731 EncodingEntry const aEncodingMap[]
 732     = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
 733         { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
 734         { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
 735         { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
 736         { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
 737         { "ASCII", RTL_TEXTENCODING_ASCII_US },
 738         { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
 739         { "US", RTL_TEXTENCODING_ASCII_US },
 740         { "IBM367", RTL_TEXTENCODING_ASCII_US },
 741         { "CP367", RTL_TEXTENCODING_ASCII_US },
 742         { "CSASCII", RTL_TEXTENCODING_ASCII_US },
 743         { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 744         { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
 745         { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
 746         { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 747         { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 748         { "L1", RTL_TEXTENCODING_ISO_8859_1 },
 749         { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
 750         { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
 751         { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 752         { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 753         { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
 754         { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
 755         { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 756         { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 757         { "L2", RTL_TEXTENCODING_ISO_8859_2 },
 758         { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 759         { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 760         { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
 761         { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
 762         { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 763         { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 764         { "L3", RTL_TEXTENCODING_ISO_8859_3 },
 765         { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 766         { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 767         { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
 768         { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
 769         { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 770         { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 771         { "L4", RTL_TEXTENCODING_ISO_8859_4 },
 772         { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 773         { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 774         { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
 775         { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
 776         { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 777         { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 778         { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 779         { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 780         { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
 781         { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
 782         { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 783         { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
 784         { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
 785         { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 786         { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 787         { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 788         { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
 789         { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
 790         { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 791         { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
 792         { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
 793         { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
 794         { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
 795         { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
 796         { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 797         { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
 798         { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
 799         { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 800         { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 801         { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 802         { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 803         { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
 804         { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
 805         { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 806         { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 807         { "L5", RTL_TEXTENCODING_ISO_8859_9 },
 808         { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 809         { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
 810         { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
 811         { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
 812         { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 813         { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
 814         { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 815         { "IBM437", RTL_TEXTENCODING_IBM_437 },
 816         { "CP437", RTL_TEXTENCODING_IBM_437 },
 817         { "437", RTL_TEXTENCODING_IBM_437 },
 818         { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
 819         { "IBM850", RTL_TEXTENCODING_IBM_850 },
 820         { "CP850", RTL_TEXTENCODING_IBM_850 },
 821         { "850", RTL_TEXTENCODING_IBM_850 },
 822         { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
 823         { "IBM860", RTL_TEXTENCODING_IBM_860 },
 824         { "CP860", RTL_TEXTENCODING_IBM_860 },
 825         { "860", RTL_TEXTENCODING_IBM_860 },
 826         { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
 827         { "IBM861", RTL_TEXTENCODING_IBM_861 },
 828         { "CP861", RTL_TEXTENCODING_IBM_861 },
 829         { "861", RTL_TEXTENCODING_IBM_861 },
 830         { "CP-IS", RTL_TEXTENCODING_IBM_861 },
 831         { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
 832         { "IBM863", RTL_TEXTENCODING_IBM_863 },
 833         { "CP863", RTL_TEXTENCODING_IBM_863 },
 834         { "863", RTL_TEXTENCODING_IBM_863 },
 835         { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
 836         { "IBM865", RTL_TEXTENCODING_IBM_865 },
 837         { "CP865", RTL_TEXTENCODING_IBM_865 },
 838         { "865", RTL_TEXTENCODING_IBM_865 },
 839         { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
 840         { "IBM775", RTL_TEXTENCODING_IBM_775 },
 841         { "CP775", RTL_TEXTENCODING_IBM_775 },
 842         { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
 843         { "IBM852", RTL_TEXTENCODING_IBM_852 },
 844         { "CP852", RTL_TEXTENCODING_IBM_852 },
 845         { "852", RTL_TEXTENCODING_IBM_852 },
 846         { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
 847         { "IBM855", RTL_TEXTENCODING_IBM_855 },
 848         { "CP855", RTL_TEXTENCODING_IBM_855 },
 849         { "855", RTL_TEXTENCODING_IBM_855 },
 850         { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
 851         { "IBM857", RTL_TEXTENCODING_IBM_857 },
 852         { "CP857", RTL_TEXTENCODING_IBM_857 },
 853         { "857", RTL_TEXTENCODING_IBM_857 },
 854         { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
 855         { "IBM862", RTL_TEXTENCODING_IBM_862 },
 856         { "CP862", RTL_TEXTENCODING_IBM_862 },
 857         { "862", RTL_TEXTENCODING_IBM_862 },
 858         { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
 859         { "IBM864", RTL_TEXTENCODING_IBM_864 },
 860         { "CP864", RTL_TEXTENCODING_IBM_864 },
 861         { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
 862         { "IBM866", RTL_TEXTENCODING_IBM_866 },
 863         { "CP866", RTL_TEXTENCODING_IBM_866 },
 864         { "866", RTL_TEXTENCODING_IBM_866 },
 865         { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
 866         { "IBM869", RTL_TEXTENCODING_IBM_869 },
 867         { "CP869", RTL_TEXTENCODING_IBM_869 },
 868         { "869", RTL_TEXTENCODING_IBM_869 },
 869         { "CP-GR", RTL_TEXTENCODING_IBM_869 },
 870         { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
 871         { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
 872         { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
 873         { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
 874         { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
 875         { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
 876         { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
 877         { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
 878         { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
 879         { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
 880         { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
 881         { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
 882         { "GB2312", RTL_TEXTENCODING_GB_2312 },
 883         { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
 884         { "BIG5", RTL_TEXTENCODING_BIG5 },
 885         { "CSBIG5", RTL_TEXTENCODING_BIG5 },
 886         { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
 887         { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
 888           RTL_TEXTENCODING_EUC_JP },
 889         { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
 890         { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
 891         { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
 892         { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
 893         { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
 894         { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
 895         { "UTF-7", RTL_TEXTENCODING_UTF7 },
 896         { "UTF-8", RTL_TEXTENCODING_UTF8 },
 897         { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
 898         { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
 899         { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
 900         { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
 901         { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
 902         { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
 903         { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
 904         { "CSUCS4", RTL_TEXTENCODING_UCS4 },
 905         { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
 906         { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
 907
 908 rtl_TextEncoding getCharsetEncoding(char const * pBegin,
 909                                               char const * pEnd)
 910 {
 911     for (const EncodingEntry& i : aEncodingMap)
 912         if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
 913             return i.m_eEncoding;
 914     return RTL_TEXTENCODING_DONTKNOW;
 915 }
 916
 917 }
 918
 919 //  INetMIME
 920
 921 // static
 922 bool INetMIME::isAtomChar(sal_uInt32 nChar)
 923 {
 924     static const bool aMap[128]
 925         = { false, false, false, false, false, false, false, false,
 926             false, false, false, false, false, false, false, false,
 927             false, false, false, false, false, false, false, false,
 928             false, false, false, false, false, false, false, false,
 929             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 930             false, false,  true,  true, false,  true, false,  true, //()*+,-./
 931              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 932              true,  true, false, false, false,  true, false,  true, //89:;<=>?
 933             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 934              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 935              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 936              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 937              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 938              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 939              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 940              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 941           };
 942     return rtl::isAscii(nChar) && aMap[nChar];
 943 }
 944
 945 // static
 946 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
 947 {
 948     static const bool aMap[128]
 949         = { false, false, false, false, false, false, false, false,
 950             false, false, false, false, false, false, false, false,
 951             false, false, false, false, false, false, false, false,
 952             false, false, false, false, false, false, false, false,
 953             false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
 954             false, false, false,  true,  true,  true,  true,  true, //()*+,-./
 955              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 956              true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
 957              true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 958              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 959              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 960              true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
 961              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 962              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 963              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 964              true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
 965           };
 966     return rtl::isAscii(nChar) && aMap[nChar];
 967 }
 968
 969 // static
 970 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
 971                                const sal_Unicode * pEnd1,
 972                                const char * pString2)
 973 {
 974     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 975                "INetMIME::equalIgnoreCase(): Bad sequences");
 976
 977     while (*pString2 != 0)
 978         if (pBegin1 == pEnd1
 979             || (rtl::toAsciiUpperCase(*pBegin1++)
 980                 != rtl::toAsciiUpperCase(
 981                     static_cast<unsigned char>(*pString2++))))
 982             return false;
 983     return pBegin1 == pEnd1;
 984 }
 985
 986 // static
 987 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
 988                             const sal_Unicode * pEnd, bool bLeadingZeroes,
 989                             sal_uInt32 & rValue)
 990 {
 991     sal_uInt64 nTheValue = 0;
 992     const sal_Unicode * p = rBegin;
 993     for ( ; p != pEnd; ++p)
 994     {
 995         int nWeight = getWeight(*p);
 996         if (nWeight < 0)
 997             break;
 998         nTheValue = 10 * nTheValue + nWeight;
 999         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
1000             return false;
1001     }
1002     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1003         return false;
1004     rBegin = p;
1005     rValue = sal_uInt32(nTheValue);
1006     return true;
1007 }
1008
1009 // static
1010 sal_Unicode const * INetMIME::scanContentType(
1011     std::u16string_view rStr, OUString * pType,
1012     OUString * pSubType, INetContentTypeParameterList * pParameters)
1013 {
1014     sal_Unicode const * pBegin = rStr.data();
1015     sal_Unicode const * pEnd = pBegin + rStr.size();
1016     sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1017     sal_Unicode const * pTypeBegin = p;
1018     while (p != pEnd && isTokenChar(*p))
1019     {
1020         ++p;
1021     }
1022     if (p == pTypeBegin)
1023         return nullptr;
1024     sal_Unicode const * pTypeEnd = p;
1025
1026     p = skipLinearWhiteSpaceComment(p, pEnd);
1027     if (p == pEnd || *p++ != '/')
1028         return nullptr;
1029
1030     p = skipLinearWhiteSpaceComment(p, pEnd);
1031     sal_Unicode const * pSubTypeBegin = p;
1032     while (p != pEnd && isTokenChar(*p))
1033     {
1034         ++p;
1035     }
1036     if (p == pSubTypeBegin)
1037         return nullptr;
1038     sal_Unicode const * pSubTypeEnd = p;
1039
1040     if (pType != nullptr)
1041     {
1042         *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1043     }
1044     if (pSubType != nullptr)
1045     {
1046         *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1047             .toAsciiLowerCase();
1048     }
1049
1050     return scanParameters(p, pEnd, pParameters);
1051 }
1052
1053 // static
1054 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1055 {
1056     // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1057     // versions of StarOffice send mails with header fields where encoded
1058     // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1059     // '=', ',', '.', '"', ')', without any required white space in between.
1060     // And there appear to exist some broken mailers that only encode single
1061     // letters within words, like "Appel
1062     // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1063     // detect encoded words even when not properly surrounded by white space.
1064
1065     // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1066
1067     // encoded-word = "=?"
1068     //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1069     //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1070     //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1071     //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1072     //     "?="
1073
1074     // base64 = ALPHA / DIGIT / "+" / "/"
1075
1076     const char * pBegin = rBody.getStr();
1077     const char * pEnd = pBegin + rBody.getLength();
1078
1079     OUStringBuffer sDecoded;
1080     const char * pCopyBegin = pBegin;
1081
1082     /* bool bStartEncodedWord = true; */
1083     const char * pWSPBegin = pBegin;
1084
1085     for (const char * p = pBegin; p != pEnd;)
1086     {
1087         if (*p == '=' /* && bStartEncodedWord */)
1088         {
1089             const char * q = p + 1;
1090             bool bEncodedWord = q != pEnd && *q++ == '?';
1091
1092             rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1093             if (bEncodedWord)
1094             {
1095                 const char * pCharsetBegin = q;
1096                 const char * pLanguageBegin = nullptr;
1097                 int nAlphaCount = 0;
1098                 for (bool bDone = false; !bDone;)
1099                     if (q == pEnd)
1100                     {
1101                         bEncodedWord = false;
1102                         bDone = true;
1103                     }
1104                     else
1105                     {
1106                         char cChar = *q++;
1107                         switch (cChar)
1108                         {
1109                             case '*':
1110                                 pLanguageBegin = q - 1;
1111                                 nAlphaCount = 0;
1112                                 break;
1113
1114                             case '-':
1115                                 if (pLanguageBegin != nullptr)
1116                                 {
1117                                     if (nAlphaCount == 0)
1118                                         pLanguageBegin = nullptr;
1119                                     else
1120                                         nAlphaCount = 0;
1121                                 }
1122                                 break;
1123
1124                             case '?':
1125                                 if (pCharsetBegin == q - 1)
1126                                     bEncodedWord = false;
1127                                 else
1128                                 {
1129                                     eCharsetEncoding
1130                                         = getCharsetEncoding(
1131                                               pCharsetBegin,
1132                                               pLanguageBegin == nullptr
1133                                               || nAlphaCount == 0 ?
1134                                                   q - 1 : pLanguageBegin);
1135                                     bEncodedWord = isMIMECharsetEncoding(
1136                                                        eCharsetEncoding);
1137                                     eCharsetEncoding
1138                                         = translateFromMIME(eCharsetEncoding);
1139                                 }
1140                                 bDone = true;
1141                                 break;
1142
1143                             default:
1144                                 if (pLanguageBegin != nullptr
1145                                     && (!rtl::isAsciiAlpha(
1146                                             static_cast<unsigned char>(cChar))
1147                                         || ++nAlphaCount > 8))
1148                                     pLanguageBegin = nullptr;
1149                                 break;
1150                         }
1151                     }
1152             }
1153
1154             bool bEncodingB = false;
1155             if (bEncodedWord)
1156             {
1157                 if (q == pEnd)
1158                     bEncodedWord = false;
1159                 else
1160                 {
1161                     switch (*q++)
1162                     {
1163                         case 'B':
1164                         case 'b':
1165                             bEncodingB = true;
1166                             break;
1167
1168                         case 'Q':
1169                         case 'q':
1170                             bEncodingB = false;
1171                             break;
1172
1173                         default:
1174                             bEncodedWord = false;
1175                             break;
1176                     }
1177                 }
1178             }
1179
1180             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1181
1182             OStringBuffer sText;
1183             if (bEncodedWord)
1184             {
1185                 if (bEncodingB)
1186                 {
1187                     for (bool bDone = false; !bDone;)
1188                     {
1189                         if (pEnd - q < 4)
1190                         {
1191                             bEncodedWord = false;
1192                             bDone = true;
1193                         }
1194                         else
1195                         {
1196                             bool bFinal = false;
1197                             int nCount = 3;
1198                             sal_uInt32 nValue = 0;
1199                             for (int nShift = 18; nShift >= 0; nShift -= 6)
1200                             {
1201                                 int nWeight = getBase64Weight(*q++);
1202                                 if (nWeight == -2)
1203                                 {
1204                                     bEncodedWord = false;
1205                                     bDone = true;
1206                                     break;
1207                                 }
1208                                 if (nWeight == -1)
1209                                 {
1210                                     if (!bFinal)
1211                                     {
1212                                         if (nShift >= 12)
1213                                         {
1214                                             bEncodedWord = false;
1215                                             bDone = true;
1216                                             break;
1217                                         }
1218                                         bFinal = true;
1219                                         nCount = nShift == 6 ? 1 : 2;
1220                                     }
1221                                 }
1222                                 else
1223                                     nValue |= nWeight << nShift;
1224                             }
1225                             if (bEncodedWord)
1226                             {
1227                                 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1228                                     sText.append(char(nValue >> nShift & 0xFF));
1229                                 if (*q == '?')
1230                                 {
1231                                     ++q;
1232                                     bDone = true;
1233                                 }
1234                                 if (bFinal && !bDone)
1235                                 {
1236                                     bEncodedWord = false;
1237                                     bDone = true;
1238                                 }
1239                             }
1240                         }
1241                     }
1242                 }
1243                 else
1244                 {
1245                     const char * pEncodedTextBegin = q;
1246                     const char * pEncodedTextCopyBegin = q;
1247                     for (bool bDone = false; !bDone;)
1248                         if (q == pEnd)
1249                         {
1250                             bEncodedWord = false;
1251                             bDone = true;
1252                         }
1253                         else
1254                         {
1255                             sal_uInt32 nChar = static_cast<unsigned char>(*q++);
1256                             switch (nChar)
1257                             {
1258                                 case '=':
1259                                 {
1260                                     if (pEnd - q < 2)
1261                                     {
1262                                         bEncodedWord = false;
1263                                         bDone = true;
1264                                         break;
1265                                     }
1266                                     int nDigit1 = getHexWeight(q[0]);
1267                                     int nDigit2 = getHexWeight(q[1]);
1268                                     if (nDigit1 < 0 || nDigit2 < 0)
1269                                     {
1270                                         bEncodedWord = false;
1271                                         bDone = true;
1272                                         break;
1273                                     }
1274                                     sText.append(
1275                                         rBody.subView(
1276                                             (pEncodedTextCopyBegin - pBegin),
1277                                             (q - 1 - pEncodedTextCopyBegin))
1278                                         + OStringChar(char(nDigit1 << 4 | nDigit2)));
1279                                     q += 2;
1280                                     pEncodedTextCopyBegin = q;
1281                                     break;
1282                                 }
1283
1284                                 case '?':
1285                                     if (q - pEncodedTextBegin > 1)
1286                                         sText.append(rBody.subView(
1287                                             (pEncodedTextCopyBegin - pBegin),
1288                                             (q - 1 - pEncodedTextCopyBegin)));
1289                                     else
1290                                         bEncodedWord = false;
1291                                     bDone = true;
1292                                     break;
1293
1294                                 case '_':
1295                                     sText.append(
1296                                         rBody.subView(
1297                                             (pEncodedTextCopyBegin - pBegin),
1298                                             (q - 1 - pEncodedTextCopyBegin))
1299                                         + OString::Concat(" "));
1300                                     pEncodedTextCopyBegin = q;
1301                                     break;
1302
1303                                 default:
1304                                     if (!isVisible(nChar))
1305                                     {
1306                                         bEncodedWord = false;
1307                                         bDone = true;
1308                                     }
1309                                     break;
1310                             }
1311                         }
1312                 }
1313             }
1314
1315             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1316
1317             std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1318             sal_Size nUnicodeSize = 0;
1319             if (bEncodedWord)
1320             {
1321                 pUnicodeBuffer
1322                     = convertToUnicode(sText.getStr(),
1323                                        sText.getStr() + sText.getLength(),
1324                                        eCharsetEncoding, nUnicodeSize);
1325                 if (!pUnicodeBuffer)
1326                     bEncodedWord = false;
1327             }
1328
1329             if (bEncodedWord)
1330             {
1331                 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1332                 sDecoded.append(
1333                     pUnicodeBuffer.get(),
1334                     static_cast< sal_Int32 >(nUnicodeSize));
1335                 pUnicodeBuffer.reset();
1336                 p = q;
1337                 pCopyBegin = p;
1338
1339                 pWSPBegin = p;
1340                 while (p != pEnd && isWhiteSpace(*p))
1341                     ++p;
1342                 /* bStartEncodedWord = p != pWSPBegin; */
1343                 continue;
1344             }
1345         }
1346
1347         if (p == pEnd)
1348             break;
1349
1350         switch (*p++)
1351         {
1352             case '"':
1353                 /* bStartEncodedWord = true; */
1354                 break;
1355
1356             case '(':
1357                 /* bStartEncodedWord = true; */
1358                 break;
1359
1360             case ')':
1361                 /* bStartEncodedWord = false; */
1362                 break;
1363
1364             default:
1365             {
1366                 const char * pUTF8Begin = p - 1;
1367                 const char * pUTF8End = pUTF8Begin;
1368                 sal_uInt32 nCharacter = 0;
1369                 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1370                 {
1371                     appendISO88591(sDecoded, pCopyBegin, p - 1);
1372                     sDecoded.appendUtf32(nCharacter);
1373                     p = pUTF8End;
1374                     pCopyBegin = p;
1375                 }
1376                 /* bStartEncodedWord = false; */
1377                 break;
1378             }
1379         }
1380         pWSPBegin = p;
1381     }
1382
1383     appendISO88591(sDecoded, pCopyBegin, pEnd);
1384     return sDecoded.makeStringAndClear();
1385 }
1386
1387 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */