tools/source/inet/inetmime.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <algorithm>
  21 #include <limits>
  22 #include <forward_list>
  23 #include <memory>
  24
  25 #include <sal/log.hxx>
  26 #include <rtl/ustring.hxx>
  27 #include <rtl/strbuf.hxx>
  28 #include <rtl/ustrbuf.hxx>
  29 #include <rtl/tencinfo.h>
  30 #include <tools/inetmime.hxx>
  31 #include <rtl/character.hxx>
  32
  33 namespace {
  34
  35 rtl_TextEncoding getCharsetEncoding(const char * pBegin,
  36                                            const char * pEnd);
  37
  38 /** Check for US-ASCII white space character.
  39
  40     @param nChar  Some UCS-4 character.
  41
  42     @return  True if nChar is a US-ASCII white space character (US-ASCII
  43     0x09 or 0x20).
  44  */
  45 bool isWhiteSpace(sal_uInt32 nChar)
  46 {
  47     return nChar == '\t' || nChar == ' ';
  48 }
  49
  50 /** Get the Base 64 digit weight of a US-ASCII character.
  51
  52     @param nChar  Some UCS-4 character.
  53
  54     @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
  55     'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
  56     corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
  57     character (US-ASCII '='), return -1; otherwise, return -2.
  58  */
  59 int getBase64Weight(sal_uInt32 nChar)
  60 {
  61     return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
  62            rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
  63            rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
  64            nChar == '+' ? 62 :
  65            nChar == '/' ? 63 :
  66            nChar == '=' ? -1 : -2;
  67 }
  68
  69 bool startsWithLineFolding(const sal_Unicode * pBegin,
  70                                             const sal_Unicode * pEnd)
  71 {
  72     DBG_ASSERT(pBegin && pBegin <= pEnd,
  73                "startsWithLineFolding(): Bad sequence");
  74
  75     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
  76            && isWhiteSpace(pBegin[2]); // CR, LF
  77 }
  78
  79 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
  80                                                         eEncoding)
  81 {
  82 #if defined(_WIN32)
  83     return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
  84                RTL_TEXTENCODING_MS_1252 : eEncoding;
  85 #else
  86     return eEncoding;
  87 #endif
  88 }
  89
  90 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
  91 {
  92     return rtl_isOctetTextEncoding(eEncoding);
  93 }
  94
  95 std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin,
  96                                          const char * pEnd,
  97                                          rtl_TextEncoding eEncoding,
  98                                          sal_Size & rSize)
  99 {
 100     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
 101         return nullptr;
 102     rtl_TextToUnicodeConverter hConverter
 103         = rtl_createTextToUnicodeConverter(eEncoding);
 104     rtl_TextToUnicodeContext hContext
 105         = rtl_createTextToUnicodeContext(hConverter);
 106     std::unique_ptr<sal_Unicode[]> pBuffer;
 107     sal_uInt32 nInfo;
 108     for (sal_Size nBufferSize = pEnd - pBegin;;
 109          nBufferSize += nBufferSize / 3 + 1)
 110     {
 111         pBuffer.reset(new sal_Unicode[nBufferSize]);
 112         sal_Size nSrcCvtBytes;
 113         rSize = rtl_convertTextToUnicode(
 114                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
 115                     nBufferSize,
 116                     RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
 117                         | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
 118                         | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
 119                     &nInfo, &nSrcCvtBytes);
 120         if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
 121             break;
 122         pBuffer.reset();
 123         rtl_resetTextToUnicodeContext(hConverter, hContext);
 124     }
 125     rtl_destroyTextToUnicodeContext(hConverter, hContext);
 126     rtl_destroyTextToUnicodeConverter(hConverter);
 127     if (nInfo != 0)
 128     {
 129         pBuffer.reset();
 130     }
 131     return pBuffer;
 132 }
 133
 134 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
 135 {
 136     // See RFC 2279 for a discussion of UTF-8.
 137     DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
 138
 139     if (nChar < 0x80)
 140         rSink.append(char(nChar));
 141     else if (nChar < 0x800)
 142         rSink.append(OStringChar(char(nChar >> 6 | 0xC0))
 143                 + OStringChar(char((nChar & 0x3F) | 0x80)));
 144     else if (nChar < 0x10000)
 145         rSink.append(
 146             OStringChar(char(nChar >> 12 | 0xE0))
 147              + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 148              + OStringChar(char((nChar & 0x3F) | 0x80)));
 149     else if (nChar < 0x200000)
 150         rSink.append(
 151             OStringChar(char(nChar >> 18 | 0xF0))
 152              + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
 153              + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 154              + OStringChar(char((nChar & 0x3F) | 0x80)));
 155     else if (nChar < 0x4000000)
 156         rSink.append(
 157             OStringChar(char(nChar >> 24 | 0xF8))
 158             + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
 159             + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
 160             + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 161             + OStringChar(char((nChar & 0x3F) | 0x80)));
 162     else
 163         rSink.append(
 164             OStringChar(char(nChar >> 30 | 0xFC))
 165             + OStringChar(char((nChar >> 24 & 0x3F) | 0x80))
 166             + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
 167             + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
 168             + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 169             + OStringChar(char((nChar & 0x3F) | 0x80)));
 170 }
 171
 172 bool translateUTF8Char(const char *& rBegin,
 173                                  const char * pEnd,
 174                                  sal_uInt32 & rCharacter)
 175 {
 176     if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
 177         || static_cast< unsigned char >(*rBegin) >= 0xFE)
 178         return false;
 179
 180     int nCount;
 181     sal_uInt32 nMin;
 182     sal_uInt32 nUCS4;
 183     const char * p = rBegin;
 184     if (static_cast< unsigned char >(*p) < 0xE0)
 185     {
 186         nCount = 1;
 187         nMin = 0x80;
 188         nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
 189     }
 190     else if (static_cast< unsigned char >(*p) < 0xF0)
 191     {
 192         nCount = 2;
 193         nMin = 0x800;
 194         nUCS4 = static_cast< unsigned char >(*p) & 0xF;
 195     }
 196     else if (static_cast< unsigned char >(*p) < 0xF8)
 197     {
 198         nCount = 3;
 199         nMin = 0x10000;
 200         nUCS4 = static_cast< unsigned char >(*p) & 7;
 201     }
 202     else if (static_cast< unsigned char >(*p) < 0xFC)
 203     {
 204         nCount = 4;
 205         nMin = 0x200000;
 206         nUCS4 = static_cast< unsigned char >(*p) & 3;
 207     }
 208     else
 209     {
 210         nCount = 5;
 211         nMin = 0x4000000;
 212         nUCS4 = static_cast< unsigned char >(*p) & 1;
 213     }
 214     ++p;
 215
 216     for (; nCount-- > 0; ++p)
 217         if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
 218             nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
 219         else
 220             return false;
 221
 222     if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
 223         return false;
 224
 225     rCharacter = nUCS4;
 226     rBegin = p;
 227     return true;
 228 }
 229
 230 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
 231                     char const * pEnd);
 232
 233 struct Parameter
 234 {
 235     OString m_aAttribute;
 236     OString m_aCharset;
 237     OString m_aLanguage;
 238     OString m_aValue;
 239     sal_uInt32 m_nSection;
 240     bool m_bExtended;
 241
 242     bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
 243     {
 244         int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
 245         return nComp < 0 ||
 246                 (nComp == 0 && m_nSection < rhs.m_nSection);
 247     }
 248     struct IsSameSection // is used to check container for duplicates with std::any_of
 249     {
 250         const OString& rAttribute;
 251         const sal_uInt32 nSection;
 252         bool operator()(const Parameter& r) const
 253         { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
 254     };
 255 };
 256
 257 typedef std::forward_list<Parameter> ParameterList;
 258
 259 bool parseParameters(ParameterList const & rInput,
 260                      INetContentTypeParameterList * pOutput);
 261
 262 //  appendISO88591
 263
 264 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
 265                     char const * pEnd)
 266 {
 267     sal_Int32 nLength = pEnd - pBegin;
 268     std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
 269     for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
 270         *p++ = static_cast<unsigned char>(*pBegin++);
 271     rText.append(pBuffer.get(), nLength);
 272 }
 273
 274 //  parseParameters
 275
 276 bool parseParameters(ParameterList const & rInput,
 277                      INetContentTypeParameterList * pOutput)
 278 {
 279     if (pOutput)
 280         pOutput->clear();
 281
 282     for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
 283     {
 284         if (it->m_nSection > 0
 285             && (itPrev == rInput.end()
 286                 || itPrev->m_nSection != it->m_nSection - 1
 287                 || itPrev->m_aAttribute != it->m_aAttribute))
 288             return false;
 289     }
 290
 291     if (pOutput)
 292         for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
 293         {
 294             bool bCharset = !it->m_aCharset.isEmpty();
 295             rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
 296             if (bCharset)
 297                 eEncoding
 298                     = getCharsetEncoding(it->m_aCharset.getStr(),
 299                                                    it->m_aCharset.getStr()
 300                                                        + it->m_aCharset.getLength());
 301             OUStringBuffer aValue(64);
 302             bool bBadEncoding = false;
 303             itNext = it;
 304             do
 305             {
 306                 sal_Size nSize;
 307                 std::unique_ptr<sal_Unicode[]> pUnicode
 308                     = convertToUnicode(itNext->m_aValue.getStr(),
 309                                                  itNext->m_aValue.getStr()
 310                                                      + itNext->m_aValue.getLength(),
 311                                                  bCharset && it->m_bExtended ?
 312                                                      eEncoding :
 313                                                      RTL_TEXTENCODING_UTF8,
 314                                                  nSize);
 315                 if (!pUnicode && !(bCharset && it->m_bExtended))
 316                     pUnicode = convertToUnicode(
 317                                    itNext->m_aValue.getStr(),
 318                                    itNext->m_aValue.getStr()
 319                                        + itNext->m_aValue.getLength(),
 320                                    RTL_TEXTENCODING_ISO_8859_1, nSize);
 321                 if (!pUnicode)
 322                 {
 323                     bBadEncoding = true;
 324                     break;
 325                 }
 326                 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
 327                 ++itNext;
 328             }
 329             while (itNext != rInput.end() && itNext->m_nSection != 0);
 330
 331             if (bBadEncoding)
 332             {
 333                 aValue.setLength(0);
 334                 itNext = it;
 335                 do
 336                 {
 337                     if (itNext->m_bExtended)
 338                     {
 339                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 340                             aValue.append(
 341                                 static_cast<sal_Unicode>(
 342                                     static_cast<unsigned char>(itNext->m_aValue[i])
 343                                     | 0xF800)); // map to unicode corporate use sub area
 344                     }
 345                     else
 346                     {
 347                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 348                             aValue.append( itNext->m_aValue[i] );
 349                     }
 350                     ++itNext;
 351                 }
 352                 while (itNext != rInput.end() && itNext->m_nSection != 0);
 353             }
 354             auto const ret = pOutput->insert(
 355                 {it->m_aAttribute,
 356                  {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
 357             SAL_INFO_IF(!ret.second, "tools",
 358                 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
 359         }
 360     return true;
 361 }
 362
 363 /** Check whether some character is valid within an RFC 2045 <token>.
 364
 365     @param nChar  Some UCS-4 character.
 366
 367     @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
 368     'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
 369     '-', '.', '^', '_', '`', '{', '|', '}', or '~').
 370  */
 371 bool isTokenChar(sal_uInt32 nChar)
 372 {
 373     static const bool aMap[128]
 374         = { false, false, false, false, false, false, false, false,
 375             false, false, false, false, false, false, false, false,
 376             false, false, false, false, false, false, false, false,
 377             false, false, false, false, false, false, false, false,
 378             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 379             false, false,  true,  true, false,  true,  true, false, //()*+,-./
 380              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 381              true,  true, false, false, false, false, false, false, //89:;<=>?
 382             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 383              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 384              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 385              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 386              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 387              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 388              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 389              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 390           };
 391     return rtl::isAscii(nChar) && aMap[nChar];
 392 }
 393
 394 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
 395                                           const sal_Unicode * pEnd)
 396 {
 397     DBG_ASSERT(pBegin && pBegin <= pEnd,
 398                "skipComment(): Bad sequence");
 399
 400     if (pBegin != pEnd && *pBegin == '(')
 401     {
 402         sal_uInt32 nLevel = 0;
 403         for (const sal_Unicode * p = pBegin; p != pEnd;)
 404             switch (*p++)
 405             {
 406                 case '(':
 407                     ++nLevel;
 408                     break;
 409
 410                 case ')':
 411                     if (--nLevel == 0)
 412                         return p;
 413                     break;
 414
 415                 case '\\':
 416                     if (p != pEnd)
 417                         ++p;
 418                     break;
 419             }
 420     }
 421     return pBegin;
 422 }
 423
 424 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
 425                                                               pBegin,
 426                                                           const sal_Unicode *
 427                                                               pEnd)
 428 {
 429     DBG_ASSERT(pBegin && pBegin <= pEnd,
 430                "skipLinearWhiteSpaceComment(): Bad sequence");
 431
 432     while (pBegin != pEnd)
 433         switch (*pBegin)
 434         {
 435             case '\t':
 436             case ' ':
 437                 ++pBegin;
 438                 break;
 439
 440             case 0x0D: // CR
 441                 if (startsWithLineFolding(pBegin, pEnd))
 442                     pBegin += 3;
 443                 else
 444                     return pBegin;
 445                 break;
 446
 447             case '(':
 448             {
 449                 const sal_Unicode * p = skipComment(pBegin, pEnd);
 450                 if (p == pBegin)
 451                     return pBegin;
 452                 pBegin = p;
 453                 break;
 454             }
 455
 456             default:
 457                 return pBegin;
 458         }
 459     return pBegin;
 460 }
 461
 462 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
 463                                                const sal_Unicode * pEnd)
 464 {
 465     DBG_ASSERT(pBegin && pBegin <= pEnd,
 466                "skipQuotedString(): Bad sequence");
 467
 468     if (pBegin != pEnd && *pBegin == '"')
 469         for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
 470             switch (*p++)
 471             {
 472                 case 0x0D: // CR
 473                     if (pEnd - p < 2 || *p++ != 0x0A // LF
 474                         || !isWhiteSpace(*p++))
 475                         return pBegin;
 476                     break;
 477
 478                 case '"':
 479                     return p;
 480
 481                 case '\\':
 482                     if (p != pEnd)
 483                         ++p;
 484                     break;
 485             }
 486     return pBegin;
 487 }
 488
 489 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
 490                                              sal_Unicode const * pEnd,
 491                                              INetContentTypeParameterList *
 492                                                  pParameters)
 493 {
 494     ParameterList aList;
 495     sal_Unicode const * pParameterBegin = pBegin;
 496     for (sal_Unicode const * p = pParameterBegin;;)
 497     {
 498         pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
 499         if (pParameterBegin == pEnd || *pParameterBegin != ';')
 500             break;
 501         p = pParameterBegin + 1;
 502
 503         sal_Unicode const * pAttributeBegin
 504             = skipLinearWhiteSpaceComment(p, pEnd);
 505         p = pAttributeBegin;
 506         bool bDowncaseAttribute = false;
 507         while (p != pEnd && isTokenChar(*p) && *p != '*')
 508         {
 509             bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
 510             ++p;
 511         }
 512         if (p == pAttributeBegin)
 513             break;
 514         OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
 515         if (bDowncaseAttribute)
 516             aAttribute = aAttribute.toAsciiLowerCase();
 517
 518         sal_uInt32 nSection = 0;
 519         if (p != pEnd && *p == '*')
 520         {
 521             ++p;
 522             if (p != pEnd && rtl::isAsciiDigit(*p)
 523                 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
 524                 break;
 525         }
 526
 527         bool bPresent = std::any_of(aList.begin(), aList.end(),
 528                                     Parameter::IsSameSection{aAttribute, nSection});
 529         if (bPresent)
 530             break;
 531
 532         bool bExtended = false;
 533         if (p != pEnd && *p == '*')
 534         {
 535             ++p;
 536             bExtended = true;
 537         }
 538
 539         p = skipLinearWhiteSpaceComment(p, pEnd);
 540
 541         if (p == pEnd || *p != '=')
 542             break;
 543
 544         p = skipLinearWhiteSpaceComment(p + 1, pEnd);
 545
 546         OString aCharset;
 547         OString aLanguage;
 548         OString aValue;
 549         if (bExtended)
 550         {
 551             if (nSection == 0)
 552             {
 553                 sal_Unicode const * pCharsetBegin = p;
 554                 bool bDowncaseCharset = false;
 555                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
 556                 {
 557                     bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
 558                     ++p;
 559                 }
 560                 if (p == pCharsetBegin)
 561                     break;
 562                 if (pParameters)
 563                 {
 564                     aCharset = OString(
 565                         pCharsetBegin,
 566                         p - pCharsetBegin,
 567                         RTL_TEXTENCODING_ASCII_US);
 568                     if (bDowncaseCharset)
 569                         aCharset = aCharset.toAsciiLowerCase();
 570                 }
 571
 572                 if (p == pEnd || *p != '\'')
 573                     break;
 574                 ++p;
 575
 576                 sal_Unicode const * pLanguageBegin = p;
 577                 bool bDowncaseLanguage = false;
 578                 int nLetters = 0;
 579                 for (; p != pEnd; ++p)
 580                     if (rtl::isAsciiAlpha(*p))
 581                     {
 582                         if (++nLetters > 8)
 583                             break;
 584                         bDowncaseLanguage = bDowncaseLanguage
 585                                             || rtl::isAsciiUpperCase(*p);
 586                     }
 587                     else if (*p == '-')
 588                     {
 589                         if (nLetters == 0)
 590                             break;
 591                         nLetters = 0;
 592                     }
 593                     else
 594                         break;
 595                 if (nLetters == 0 || nLetters > 8)
 596                     break;
 597                 if (pParameters)
 598                 {
 599                     aLanguage = OString(
 600                         pLanguageBegin,
 601                         p - pLanguageBegin,
 602                         RTL_TEXTENCODING_ASCII_US);
 603                     if (bDowncaseLanguage)
 604                         aLanguage = aLanguage.toAsciiLowerCase();
 605                 }
 606
 607                 if (p == pEnd || *p != '\'')
 608                     break;
 609                 ++p;
 610             }
 611             if (pParameters)
 612             {
 613                 OStringBuffer aSink;
 614                 while (p != pEnd)
 615                 {
 616                     auto q = p;
 617                     sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
 618                     if (rtl::isAscii(nChar) && !isTokenChar(nChar))
 619                         break;
 620                     p = q;
 621                     if (nChar == '%' && p + 1 < pEnd)
 622                     {
 623                         int nWeight1 = INetMIME::getHexWeight(p[0]);
 624                         int nWeight2 = INetMIME::getHexWeight(p[1]);
 625                         if (nWeight1 >= 0 && nWeight2 >= 0)
 626                         {
 627                             aSink.append(char(nWeight1 << 4 | nWeight2));
 628                             p += 2;
 629                             continue;
 630                         }
 631                     }
 632                     writeUTF8(aSink, nChar);
 633                 }
 634                 aValue = aSink.makeStringAndClear();
 635             }
 636             else
 637                 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 638                     ++p;
 639         }
 640         else if (p != pEnd && *p == '"')
 641             if (pParameters)
 642             {
 643                 OStringBuffer aSink(256);
 644                 bool bInvalid = false;
 645                 for (++p;;)
 646                 {
 647                     if (p == pEnd)
 648                     {
 649                         bInvalid = true;
 650                         break;
 651                     }
 652                     sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
 653                     if (nChar == '"')
 654                         break;
 655                     else if (nChar == 0x0D) // CR
 656                     {
 657                         if (pEnd - p < 2 || *p++ != 0x0A // LF
 658                             || !isWhiteSpace(*p))
 659                         {
 660                             bInvalid = true;
 661                             break;
 662                         }
 663                         nChar = static_cast<unsigned char>(*p++);
 664                     }
 665                     else if (nChar == '\\')
 666                     {
 667                         if (p == pEnd)
 668                         {
 669                             bInvalid = true;
 670                             break;
 671                         }
 672                         nChar = INetMIME::getUTF32Character(p, pEnd);
 673                     }
 674                     writeUTF8(aSink, nChar);
 675                 }
 676                 if (bInvalid)
 677                     break;
 678                 aValue = aSink.makeStringAndClear();
 679             }
 680             else
 681             {
 682                 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
 683                 if (p == pStringEnd)
 684                     break;
 685                 p = pStringEnd;
 686             }
 687         else
 688         {
 689             sal_Unicode const * pTokenBegin = p;
 690             while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 691                 ++p;
 692             if (p == pTokenBegin)
 693                 break;
 694             if (pParameters)
 695                 aValue = OString(
 696                     pTokenBegin, p - pTokenBegin,
 697                     RTL_TEXTENCODING_UTF8);
 698         }
 699         aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
 700     }
 701     aList.sort();
 702     return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
 703 }
 704
 705 bool equalIgnoreCase(const char * pBegin1,
 706                                const char * pEnd1,
 707                                const char * pString2)
 708 {
 709     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 710                "equalIgnoreCase(): Bad sequences");
 711
 712     while (*pString2 != 0)
 713         if (pBegin1 == pEnd1
 714             || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
 715                 != rtl::toAsciiUpperCase(
 716                     static_cast<unsigned char>(*pString2++))))
 717             return false;
 718     return pBegin1 == pEnd1;
 719 }
 720
 721 struct EncodingEntry
 722 {
 723     char const * m_aName;
 724     rtl_TextEncoding m_eEncoding;
 725 };
 726
 727 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
 728 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
 729 // noted:
 730 EncodingEntry const aEncodingMap[]
 731     = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
 732         { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
 733         { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
 734         { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
 735         { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
 736         { "ASCII", RTL_TEXTENCODING_ASCII_US },
 737         { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
 738         { "US", RTL_TEXTENCODING_ASCII_US },
 739         { "IBM367", RTL_TEXTENCODING_ASCII_US },
 740         { "CP367", RTL_TEXTENCODING_ASCII_US },
 741         { "CSASCII", RTL_TEXTENCODING_ASCII_US },
 742         { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 743         { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
 744         { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
 745         { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 746         { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 747         { "L1", RTL_TEXTENCODING_ISO_8859_1 },
 748         { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
 749         { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
 750         { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 751         { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 752         { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
 753         { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
 754         { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 755         { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 756         { "L2", RTL_TEXTENCODING_ISO_8859_2 },
 757         { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 758         { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 759         { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
 760         { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
 761         { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 762         { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 763         { "L3", RTL_TEXTENCODING_ISO_8859_3 },
 764         { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 765         { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 766         { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
 767         { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
 768         { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 769         { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 770         { "L4", RTL_TEXTENCODING_ISO_8859_4 },
 771         { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 772         { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 773         { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
 774         { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
 775         { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 776         { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 777         { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 778         { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 779         { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
 780         { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
 781         { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 782         { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
 783         { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
 784         { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 785         { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 786         { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 787         { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
 788         { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
 789         { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 790         { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
 791         { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
 792         { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
 793         { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
 794         { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
 795         { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 796         { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
 797         { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
 798         { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 799         { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 800         { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 801         { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 802         { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
 803         { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
 804         { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 805         { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 806         { "L5", RTL_TEXTENCODING_ISO_8859_9 },
 807         { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 808         { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
 809         { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
 810         { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
 811         { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 812         { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
 813         { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 814         { "IBM437", RTL_TEXTENCODING_IBM_437 },
 815         { "CP437", RTL_TEXTENCODING_IBM_437 },
 816         { "437", RTL_TEXTENCODING_IBM_437 },
 817         { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
 818         { "IBM850", RTL_TEXTENCODING_IBM_850 },
 819         { "CP850", RTL_TEXTENCODING_IBM_850 },
 820         { "850", RTL_TEXTENCODING_IBM_850 },
 821         { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
 822         { "IBM860", RTL_TEXTENCODING_IBM_860 },
 823         { "CP860", RTL_TEXTENCODING_IBM_860 },
 824         { "860", RTL_TEXTENCODING_IBM_860 },
 825         { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
 826         { "IBM861", RTL_TEXTENCODING_IBM_861 },
 827         { "CP861", RTL_TEXTENCODING_IBM_861 },
 828         { "861", RTL_TEXTENCODING_IBM_861 },
 829         { "CP-IS", RTL_TEXTENCODING_IBM_861 },
 830         { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
 831         { "IBM863", RTL_TEXTENCODING_IBM_863 },
 832         { "CP863", RTL_TEXTENCODING_IBM_863 },
 833         { "863", RTL_TEXTENCODING_IBM_863 },
 834         { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
 835         { "IBM865", RTL_TEXTENCODING_IBM_865 },
 836         { "CP865", RTL_TEXTENCODING_IBM_865 },
 837         { "865", RTL_TEXTENCODING_IBM_865 },
 838         { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
 839         { "IBM775", RTL_TEXTENCODING_IBM_775 },
 840         { "CP775", RTL_TEXTENCODING_IBM_775 },
 841         { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
 842         { "IBM852", RTL_TEXTENCODING_IBM_852 },
 843         { "CP852", RTL_TEXTENCODING_IBM_852 },
 844         { "852", RTL_TEXTENCODING_IBM_852 },
 845         { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
 846         { "IBM855", RTL_TEXTENCODING_IBM_855 },
 847         { "CP855", RTL_TEXTENCODING_IBM_855 },
 848         { "855", RTL_TEXTENCODING_IBM_855 },
 849         { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
 850         { "IBM857", RTL_TEXTENCODING_IBM_857 },
 851         { "CP857", RTL_TEXTENCODING_IBM_857 },
 852         { "857", RTL_TEXTENCODING_IBM_857 },
 853         { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
 854         { "IBM862", RTL_TEXTENCODING_IBM_862 },
 855         { "CP862", RTL_TEXTENCODING_IBM_862 },
 856         { "862", RTL_TEXTENCODING_IBM_862 },
 857         { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
 858         { "IBM864", RTL_TEXTENCODING_IBM_864 },
 859         { "CP864", RTL_TEXTENCODING_IBM_864 },
 860         { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
 861         { "IBM866", RTL_TEXTENCODING_IBM_866 },
 862         { "CP866", RTL_TEXTENCODING_IBM_866 },
 863         { "866", RTL_TEXTENCODING_IBM_866 },
 864         { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
 865         { "IBM869", RTL_TEXTENCODING_IBM_869 },
 866         { "CP869", RTL_TEXTENCODING_IBM_869 },
 867         { "869", RTL_TEXTENCODING_IBM_869 },
 868         { "CP-GR", RTL_TEXTENCODING_IBM_869 },
 869         { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
 870         { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
 871         { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
 872         { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
 873         { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
 874         { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
 875         { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
 876         { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
 877         { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
 878         { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
 879         { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
 880         { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
 881         { "GB2312", RTL_TEXTENCODING_GB_2312 },
 882         { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
 883         { "BIG5", RTL_TEXTENCODING_BIG5 },
 884         { "CSBIG5", RTL_TEXTENCODING_BIG5 },
 885         { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
 886         { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
 887           RTL_TEXTENCODING_EUC_JP },
 888         { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
 889         { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
 890         { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
 891         { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
 892         { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
 893         { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
 894         { "UTF-7", RTL_TEXTENCODING_UTF7 },
 895         { "UTF-8", RTL_TEXTENCODING_UTF8 },
 896         { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
 897         { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
 898         { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
 899         { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
 900         { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
 901         { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
 902         { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
 903         { "CSUCS4", RTL_TEXTENCODING_UCS4 },
 904         { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
 905         { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
 906
 907 rtl_TextEncoding getCharsetEncoding(char const * pBegin,
 908                                               char const * pEnd)
 909 {
 910     for (const EncodingEntry& i : aEncodingMap)
 911         if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
 912             return i.m_eEncoding;
 913     return RTL_TEXTENCODING_DONTKNOW;
 914 }
 915
 916 }
 917
 918 //  INetMIME
 919
 920 // static
 921 bool INetMIME::isAtomChar(sal_uInt32 nChar)
 922 {
 923     static const bool aMap[128]
 924         = { false, false, false, false, false, false, false, false,
 925             false, false, false, false, false, false, false, false,
 926             false, false, false, false, false, false, false, false,
 927             false, false, false, false, false, false, false, false,
 928             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 929             false, false,  true,  true, false,  true, false,  true, //()*+,-./
 930              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 931              true,  true, false, false, false,  true, false,  true, //89:;<=>?
 932             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 933              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 934              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 935              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 936              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 937              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 938              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 939              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 940           };
 941     return rtl::isAscii(nChar) && aMap[nChar];
 942 }
 943
 944 // static
 945 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
 946 {
 947     static const bool aMap[128]
 948         = { false, false, false, false, false, false, false, false,
 949             false, false, false, false, false, false, false, false,
 950             false, false, false, false, false, false, false, false,
 951             false, false, false, false, false, false, false, false,
 952             false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
 953             false, false, false,  true,  true,  true,  true,  true, //()*+,-./
 954              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 955              true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
 956              true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 957              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 958              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 959              true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
 960              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 961              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 962              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 963              true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
 964           };
 965     return rtl::isAscii(nChar) && aMap[nChar];
 966 }
 967
 968 // static
 969 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
 970                                const sal_Unicode * pEnd1,
 971                                const char * pString2)
 972 {
 973     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 974                "INetMIME::equalIgnoreCase(): Bad sequences");
 975
 976     while (*pString2 != 0)
 977         if (pBegin1 == pEnd1
 978             || (rtl::toAsciiUpperCase(*pBegin1++)
 979                 != rtl::toAsciiUpperCase(
 980                     static_cast<unsigned char>(*pString2++))))
 981             return false;
 982     return pBegin1 == pEnd1;
 983 }
 984
 985 // static
 986 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
 987                             const sal_Unicode * pEnd, bool bLeadingZeroes,
 988                             sal_uInt32 & rValue)
 989 {
 990     sal_uInt64 nTheValue = 0;
 991     const sal_Unicode * p = rBegin;
 992     for ( ; p != pEnd; ++p)
 993     {
 994         int nWeight = getWeight(*p);
 995         if (nWeight < 0)
 996             break;
 997         nTheValue = 10 * nTheValue + nWeight;
 998         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
 999             return false;
1000     }
1001     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1002         return false;
1003     rBegin = p;
1004     rValue = sal_uInt32(nTheValue);
1005     return true;
1006 }
1007
1008 // static
1009 sal_Unicode const * INetMIME::scanContentType(
1010     std::u16string_view rStr, OUString * pType,
1011     OUString * pSubType, INetContentTypeParameterList * pParameters)
1012 {
1013     sal_Unicode const * pBegin = rStr.data();
1014     sal_Unicode const * pEnd = pBegin + rStr.size();
1015     sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1016     sal_Unicode const * pTypeBegin = p;
1017     while (p != pEnd && isTokenChar(*p))
1018     {
1019         ++p;
1020     }
1021     if (p == pTypeBegin)
1022         return nullptr;
1023     sal_Unicode const * pTypeEnd = p;
1024
1025     p = skipLinearWhiteSpaceComment(p, pEnd);
1026     if (p == pEnd || *p++ != '/')
1027         return nullptr;
1028
1029     p = skipLinearWhiteSpaceComment(p, pEnd);
1030     sal_Unicode const * pSubTypeBegin = p;
1031     while (p != pEnd && isTokenChar(*p))
1032     {
1033         ++p;
1034     }
1035     if (p == pSubTypeBegin)
1036         return nullptr;
1037     sal_Unicode const * pSubTypeEnd = p;
1038
1039     if (pType != nullptr)
1040     {
1041         *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1042     }
1043     if (pSubType != nullptr)
1044     {
1045         *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1046             .toAsciiLowerCase();
1047     }
1048
1049     return scanParameters(p, pEnd, pParameters);
1050 }
1051
1052 // static
1053 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1054 {
1055     // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1056     // versions of StarOffice send mails with header fields where encoded
1057     // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1058     // '=', ',', '.', '"', ')', without any required white space in between.
1059     // And there appear to exist some broken mailers that only encode single
1060     // letters within words, like "Appel
1061     // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1062     // detect encoded words even when not properly surrounded by white space.
1063
1064     // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1065
1066     // encoded-word = "=?"
1067     //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1068     //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1069     //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1070     //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1071     //     "?="
1072
1073     // base64 = ALPHA / DIGIT / "+" / "/"
1074
1075     const char * pBegin = rBody.getStr();
1076     const char * pEnd = pBegin + rBody.getLength();
1077
1078     OUStringBuffer sDecoded;
1079     const char * pCopyBegin = pBegin;
1080
1081     /* bool bStartEncodedWord = true; */
1082     const char * pWSPBegin = pBegin;
1083
1084     for (const char * p = pBegin; p != pEnd;)
1085     {
1086         if (*p == '=' /* && bStartEncodedWord */)
1087         {
1088             const char * q = p + 1;
1089             bool bEncodedWord = q != pEnd && *q++ == '?';
1090
1091             rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1092             if (bEncodedWord)
1093             {
1094                 const char * pCharsetBegin = q;
1095                 const char * pLanguageBegin = nullptr;
1096                 int nAlphaCount = 0;
1097                 for (bool bDone = false; !bDone;)
1098                     if (q == pEnd)
1099                     {
1100                         bEncodedWord = false;
1101                         bDone = true;
1102                     }
1103                     else
1104                     {
1105                         char cChar = *q++;
1106                         switch (cChar)
1107                         {
1108                             case '*':
1109                                 pLanguageBegin = q - 1;
1110                                 nAlphaCount = 0;
1111                                 break;
1112
1113                             case '-':
1114                                 if (pLanguageBegin != nullptr)
1115                                 {
1116                                     if (nAlphaCount == 0)
1117                                         pLanguageBegin = nullptr;
1118                                     else
1119                                         nAlphaCount = 0;
1120                                 }
1121                                 break;
1122
1123                             case '?':
1124                                 if (pCharsetBegin == q - 1)
1125                                     bEncodedWord = false;
1126                                 else
1127                                 {
1128                                     eCharsetEncoding
1129                                         = getCharsetEncoding(
1130                                               pCharsetBegin,
1131                                               pLanguageBegin == nullptr
1132                                               || nAlphaCount == 0 ?
1133                                                   q - 1 : pLanguageBegin);
1134                                     bEncodedWord = isMIMECharsetEncoding(
1135                                                        eCharsetEncoding);
1136                                     eCharsetEncoding
1137                                         = translateFromMIME(eCharsetEncoding);
1138                                 }
1139                                 bDone = true;
1140                                 break;
1141
1142                             default:
1143                                 if (pLanguageBegin != nullptr
1144                                     && (!rtl::isAsciiAlpha(
1145                                             static_cast<unsigned char>(cChar))
1146                                         || ++nAlphaCount > 8))
1147                                     pLanguageBegin = nullptr;
1148                                 break;
1149                         }
1150                     }
1151             }
1152
1153             bool bEncodingB = false;
1154             if (bEncodedWord)
1155             {
1156                 if (q == pEnd)
1157                     bEncodedWord = false;
1158                 else
1159                 {
1160                     switch (*q++)
1161                     {
1162                         case 'B':
1163                         case 'b':
1164                             bEncodingB = true;
1165                             break;
1166
1167                         case 'Q':
1168                         case 'q':
1169                             bEncodingB = false;
1170                             break;
1171
1172                         default:
1173                             bEncodedWord = false;
1174                             break;
1175                     }
1176                 }
1177             }
1178
1179             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1180
1181             OStringBuffer sText;
1182             if (bEncodedWord)
1183             {
1184                 if (bEncodingB)
1185                 {
1186                     for (bool bDone = false; !bDone;)
1187                     {
1188                         if (pEnd - q < 4)
1189                         {
1190                             bEncodedWord = false;
1191                             bDone = true;
1192                         }
1193                         else
1194                         {
1195                             bool bFinal = false;
1196                             int nCount = 3;
1197                             sal_uInt32 nValue = 0;
1198                             for (int nShift = 18; nShift >= 0; nShift -= 6)
1199                             {
1200                                 int nWeight = getBase64Weight(*q++);
1201                                 if (nWeight == -2)
1202                                 {
1203                                     bEncodedWord = false;
1204                                     bDone = true;
1205                                     break;
1206                                 }
1207                                 if (nWeight == -1)
1208                                 {
1209                                     if (!bFinal)
1210                                     {
1211                                         if (nShift >= 12)
1212                                         {
1213                                             bEncodedWord = false;
1214                                             bDone = true;
1215                                             break;
1216                                         }
1217                                         bFinal = true;
1218                                         nCount = nShift == 6 ? 1 : 2;
1219                                     }
1220                                 }
1221                                 else
1222                                     nValue |= nWeight << nShift;
1223                             }
1224                             if (bEncodedWord)
1225                             {
1226                                 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1227                                     sText.append(char(nValue >> nShift & 0xFF));
1228                                 if (*q == '?')
1229                                 {
1230                                     ++q;
1231                                     bDone = true;
1232                                 }
1233                                 if (bFinal && !bDone)
1234                                 {
1235                                     bEncodedWord = false;
1236                                     bDone = true;
1237                                 }
1238                             }
1239                         }
1240                     }
1241                 }
1242                 else
1243                 {
1244                     const char * pEncodedTextBegin = q;
1245                     const char * pEncodedTextCopyBegin = q;
1246                     for (bool bDone = false; !bDone;)
1247                         if (q == pEnd)
1248                         {
1249                             bEncodedWord = false;
1250                             bDone = true;
1251                         }
1252                         else
1253                         {
1254                             sal_uInt32 nChar = static_cast<unsigned char>(*q++);
1255                             switch (nChar)
1256                             {
1257                                 case '=':
1258                                 {
1259                                     if (pEnd - q < 2)
1260                                     {
1261                                         bEncodedWord = false;
1262                                         bDone = true;
1263                                         break;
1264                                     }
1265                                     int nDigit1 = getHexWeight(q[0]);
1266                                     int nDigit2 = getHexWeight(q[1]);
1267                                     if (nDigit1 < 0 || nDigit2 < 0)
1268                                     {
1269                                         bEncodedWord = false;
1270                                         bDone = true;
1271                                         break;
1272                                     }
1273                                     sText.append(
1274                                         rBody.subView(
1275                                             (pEncodedTextCopyBegin - pBegin),
1276                                             (q - 1 - pEncodedTextCopyBegin))
1277                                         + OStringChar(char(nDigit1 << 4 | nDigit2)));
1278                                     q += 2;
1279                                     pEncodedTextCopyBegin = q;
1280                                     break;
1281                                 }
1282
1283                                 case '?':
1284                                     if (q - pEncodedTextBegin > 1)
1285                                         sText.append(rBody.subView(
1286                                             (pEncodedTextCopyBegin - pBegin),
1287                                             (q - 1 - pEncodedTextCopyBegin)));
1288                                     else
1289                                         bEncodedWord = false;
1290                                     bDone = true;
1291                                     break;
1292
1293                                 case '_':
1294                                     sText.append(
1295                                         rBody.subView(
1296                                             (pEncodedTextCopyBegin - pBegin),
1297                                             (q - 1 - pEncodedTextCopyBegin))
1298                                         + OString::Concat(" "));
1299                                     pEncodedTextCopyBegin = q;
1300                                     break;
1301
1302                                 default:
1303                                     if (!isVisible(nChar))
1304                                     {
1305                                         bEncodedWord = false;
1306                                         bDone = true;
1307                                     }
1308                                     break;
1309                             }
1310                         }
1311                 }
1312             }
1313
1314             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1315
1316             std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1317             sal_Size nUnicodeSize = 0;
1318             if (bEncodedWord)
1319             {
1320                 pUnicodeBuffer
1321                     = convertToUnicode(sText.getStr(),
1322                                        sText.getStr() + sText.getLength(),
1323                                        eCharsetEncoding, nUnicodeSize);
1324                 if (!pUnicodeBuffer)
1325                     bEncodedWord = false;
1326             }
1327
1328             if (bEncodedWord)
1329             {
1330                 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1331                 sDecoded.append(
1332                     pUnicodeBuffer.get(),
1333                     static_cast< sal_Int32 >(nUnicodeSize));
1334                 pUnicodeBuffer.reset();
1335                 p = q;
1336                 pCopyBegin = p;
1337
1338                 pWSPBegin = p;
1339                 while (p != pEnd && isWhiteSpace(*p))
1340                     ++p;
1341                 /* bStartEncodedWord = p != pWSPBegin; */
1342                 continue;
1343             }
1344         }
1345
1346         if (p == pEnd)
1347             break;
1348
1349         switch (*p++)
1350         {
1351             case '"':
1352                 /* bStartEncodedWord = true; */
1353                 break;
1354
1355             case '(':
1356                 /* bStartEncodedWord = true; */
1357                 break;
1358
1359             case ')':
1360                 /* bStartEncodedWord = false; */
1361                 break;
1362
1363             default:
1364             {
1365                 const char * pUTF8Begin = p - 1;
1366                 const char * pUTF8End = pUTF8Begin;
1367                 sal_uInt32 nCharacter = 0;
1368                 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1369                 {
1370                     appendISO88591(sDecoded, pCopyBegin, p - 1);
1371                     sDecoded.appendUtf32(nCharacter);
1372                     p = pUTF8End;
1373                     pCopyBegin = p;
1374                 }
1375                 /* bStartEncodedWord = false; */
1376                 break;
1377             }
1378         }
1379         pWSPBegin = p;
1380     }
1381
1382     appendISO88591(sDecoded, pCopyBegin, pEnd);
1383     return sDecoded.makeStringAndClear();
1384 }
1385
1386 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */