tools/source/inet/inetmime.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <algorithm>
  21 #include <limits>
  22 #include <forward_list>
  23 #include <memory>
  24
  25 #include <sal/log.hxx>
  26 #include <rtl/ustring.hxx>
  27 #include <rtl/strbuf.hxx>
  28 #include <rtl/ustrbuf.hxx>
  29 #include <rtl/tencinfo.h>
  30 #include <tools/debug.hxx>
  31 #include <tools/inetmime.hxx>
  32 #include <rtl/character.hxx>
  33
  34 namespace {
  35
  36 rtl_TextEncoding getCharsetEncoding(const char * pBegin,
  37                                            const char * pEnd);
  38
  39 /** Check for US-ASCII white space character.
  40
  41     @param nChar  Some UCS-4 character.
  42
  43     @return  True if nChar is a US-ASCII white space character (US-ASCII
  44     0x09 or 0x20).
  45  */
  46 bool isWhiteSpace(sal_uInt32 nChar)
  47 {
  48     return nChar == '\t' || nChar == ' ';
  49 }
  50
  51 /** Get the Base 64 digit weight of a US-ASCII character.
  52
  53     @param nChar  Some UCS-4 character.
  54
  55     @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
  56     'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
  57     corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
  58     character (US-ASCII '='), return -1; otherwise, return -2.
  59  */
  60 int getBase64Weight(sal_uInt32 nChar)
  61 {
  62     return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
  63            rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
  64            rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
  65            nChar == '+' ? 62 :
  66            nChar == '/' ? 63 :
  67            nChar == '=' ? -1 : -2;
  68 }
  69
  70 bool startsWithLineFolding(const sal_Unicode * pBegin,
  71                                             const sal_Unicode * pEnd)
  72 {
  73     assert(pBegin && pBegin <= pEnd && "startsWithLineFolding(): Bad sequence");
  74
  75     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
  76            && isWhiteSpace(pBegin[2]); // CR, LF
  77 }
  78
  79 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
  80                                                         eEncoding)
  81 {
  82 #if defined(_WIN32)
  83     return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
  84                RTL_TEXTENCODING_MS_1252 : eEncoding;
  85 #else
  86     return eEncoding;
  87 #endif
  88 }
  89
  90 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
  91 {
  92     return rtl_isOctetTextEncoding(eEncoding);
  93 }
  94
  95 std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin,
  96                                          const char * pEnd,
  97                                          rtl_TextEncoding eEncoding,
  98                                          sal_Size & rSize)
  99 {
 100     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
 101         return nullptr;
 102     rtl_TextToUnicodeConverter hConverter
 103         = rtl_createTextToUnicodeConverter(eEncoding);
 104     rtl_TextToUnicodeContext hContext
 105         = rtl_createTextToUnicodeContext(hConverter);
 106     std::unique_ptr<sal_Unicode[]> pBuffer;
 107     sal_uInt32 nInfo;
 108     for (sal_Size nBufferSize = pEnd - pBegin;;
 109          nBufferSize += nBufferSize / 3 + 1)
 110     {
 111         pBuffer.reset(new sal_Unicode[nBufferSize]);
 112         sal_Size nSrcCvtBytes;
 113         rSize = rtl_convertTextToUnicode(
 114                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
 115                     nBufferSize,
 116                     RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
 117                         | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
 118                         | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
 119                     &nInfo, &nSrcCvtBytes);
 120         if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
 121             break;
 122         pBuffer.reset();
 123         rtl_resetTextToUnicodeContext(hConverter, hContext);
 124     }
 125     rtl_destroyTextToUnicodeContext(hConverter, hContext);
 126     rtl_destroyTextToUnicodeConverter(hConverter);
 127     if (nInfo != 0)
 128     {
 129         pBuffer.reset();
 130     }
 131     return pBuffer;
 132 }
 133
 134 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
 135 {
 136     // See RFC 2279 for a discussion of UTF-8.
 137     DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
 138
 139     if (nChar < 0x80)
 140         rSink.append(char(nChar));
 141     else if (nChar < 0x800)
 142         rSink.append(OStringChar(char(nChar >> 6 | 0xC0))
 143                 + OStringChar(char((nChar & 0x3F) | 0x80)));
 144     else if (nChar < 0x10000)
 145         rSink.append(
 146             OStringChar(char(nChar >> 12 | 0xE0))
 147              + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 148              + OStringChar(char((nChar & 0x3F) | 0x80)));
 149     else if (nChar < 0x200000)
 150         rSink.append(
 151             OStringChar(char(nChar >> 18 | 0xF0))
 152              + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
 153              + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 154              + OStringChar(char((nChar & 0x3F) | 0x80)));
 155     else if (nChar < 0x4000000)
 156         rSink.append(
 157             OStringChar(char(nChar >> 24 | 0xF8))
 158             + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
 159             + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
 160             + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 161             + OStringChar(char((nChar & 0x3F) | 0x80)));
 162     else
 163         rSink.append(
 164             OStringChar(char(nChar >> 30 | 0xFC))
 165             + OStringChar(char((nChar >> 24 & 0x3F) | 0x80))
 166             + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
 167             + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
 168             + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
 169             + OStringChar(char((nChar & 0x3F) | 0x80)));
 170 }
 171
 172 bool translateUTF8Char(const char *& rBegin,
 173                                  const char * pEnd,
 174                                  sal_uInt32 & rCharacter)
 175 {
 176     if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
 177         || static_cast< unsigned char >(*rBegin) >= 0xFE)
 178         return false;
 179
 180     int nCount;
 181     sal_uInt32 nMin;
 182     sal_uInt32 nUCS4;
 183     const char * p = rBegin;
 184     if (static_cast< unsigned char >(*p) < 0xE0)
 185     {
 186         nCount = 1;
 187         nMin = 0x80;
 188         nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
 189     }
 190     else if (static_cast< unsigned char >(*p) < 0xF0)
 191     {
 192         nCount = 2;
 193         nMin = 0x800;
 194         nUCS4 = static_cast< unsigned char >(*p) & 0xF;
 195     }
 196     else if (static_cast< unsigned char >(*p) < 0xF8)
 197     {
 198         nCount = 3;
 199         nMin = 0x10000;
 200         nUCS4 = static_cast< unsigned char >(*p) & 7;
 201     }
 202     else if (static_cast< unsigned char >(*p) < 0xFC)
 203     {
 204         nCount = 4;
 205         nMin = 0x200000;
 206         nUCS4 = static_cast< unsigned char >(*p) & 3;
 207     }
 208     else
 209     {
 210         nCount = 5;
 211         nMin = 0x4000000;
 212         nUCS4 = static_cast< unsigned char >(*p) & 1;
 213     }
 214     ++p;
 215
 216     for (; nCount-- > 0; ++p)
 217         if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
 218             nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
 219         else
 220             return false;
 221
 222     if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
 223         return false;
 224
 225     rCharacter = nUCS4;
 226     rBegin = p;
 227     return true;
 228 }
 229
 230 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
 231                     char const * pEnd);
 232
 233 struct Parameter
 234 {
 235     OString m_aAttribute;
 236     OString m_aCharset;
 237     OString m_aLanguage;
 238     OString m_aValue;
 239     sal_uInt32 m_nSection;
 240     bool m_bExtended;
 241
 242     bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
 243     {
 244         int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
 245         return nComp < 0 ||
 246                 (nComp == 0 && m_nSection < rhs.m_nSection);
 247     }
 248     struct IsSameSection // is used to check container for duplicates with std::any_of
 249     {
 250         const OString& rAttribute;
 251         const sal_uInt32 nSection;
 252         bool operator()(const Parameter& r) const
 253         { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
 254     };
 255 };
 256
 257 typedef std::forward_list<Parameter> ParameterList;
 258
 259 bool parseParameters(ParameterList const & rInput,
 260                      INetContentTypeParameterList * pOutput);
 261
 262 //  appendISO88591
 263
 264 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
 265                     char const * pEnd)
 266 {
 267     sal_Int32 nLength = pEnd - pBegin;
 268     std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
 269     for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
 270         *p++ = static_cast<unsigned char>(*pBegin++);
 271     rText.append(pBuffer.get(), nLength);
 272 }
 273
 274 //  parseParameters
 275
 276 bool parseParameters(ParameterList const & rInput,
 277                      INetContentTypeParameterList * pOutput)
 278 {
 279     if (pOutput)
 280         pOutput->clear();
 281
 282     for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
 283     {
 284         if (it->m_nSection > 0
 285             && (itPrev == rInput.end()
 286                 || itPrev->m_nSection != it->m_nSection - 1
 287                 || itPrev->m_aAttribute != it->m_aAttribute))
 288             return false;
 289     }
 290
 291     if (pOutput)
 292         for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
 293         {
 294             bool bCharset = !it->m_aCharset.isEmpty();
 295             rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
 296             if (bCharset)
 297                 eEncoding
 298                     = getCharsetEncoding(it->m_aCharset.getStr(),
 299                                                    it->m_aCharset.getStr()
 300                                                        + it->m_aCharset.getLength());
 301             OUStringBuffer aValue(64);
 302             bool bBadEncoding = false;
 303             itNext = it;
 304             do
 305             {
 306                 sal_Size nSize;
 307                 std::unique_ptr<sal_Unicode[]> pUnicode
 308                     = convertToUnicode(itNext->m_aValue.getStr(),
 309                                                  itNext->m_aValue.getStr()
 310                                                      + itNext->m_aValue.getLength(),
 311                                                  bCharset && it->m_bExtended ?
 312                                                      eEncoding :
 313                                                      RTL_TEXTENCODING_UTF8,
 314                                                  nSize);
 315                 if (!pUnicode && !(bCharset && it->m_bExtended))
 316                     pUnicode = convertToUnicode(
 317                                    itNext->m_aValue.getStr(),
 318                                    itNext->m_aValue.getStr()
 319                                        + itNext->m_aValue.getLength(),
 320                                    RTL_TEXTENCODING_ISO_8859_1, nSize);
 321                 if (!pUnicode)
 322                 {
 323                     bBadEncoding = true;
 324                     break;
 325                 }
 326                 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
 327                 ++itNext;
 328             }
 329             while (itNext != rInput.end() && itNext->m_nSection != 0);
 330
 331             if (bBadEncoding)
 332             {
 333                 aValue.setLength(0);
 334                 itNext = it;
 335                 do
 336                 {
 337                     if (itNext->m_bExtended)
 338                     {
 339                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 340                             aValue.append(
 341                                 static_cast<sal_Unicode>(
 342                                     static_cast<unsigned char>(itNext->m_aValue[i])
 343                                     | 0xF800)); // map to unicode corporate use sub area
 344                     }
 345                     else
 346                     {
 347                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 348                             aValue.append( itNext->m_aValue[i] );
 349                     }
 350                     ++itNext;
 351                 }
 352                 while (itNext != rInput.end() && itNext->m_nSection != 0);
 353             }
 354             auto const ret = pOutput->insert(
 355                 {it->m_aAttribute,
 356                  {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
 357             SAL_INFO_IF(!ret.second, "tools",
 358                 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
 359         }
 360     return true;
 361 }
 362
 363 /** Check whether some character is valid within an RFC 2045 <token>.
 364
 365     @param nChar  Some UCS-4 character.
 366
 367     @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
 368     'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
 369     '-', '.', '^', '_', '`', '{', '|', '}', or '~').
 370  */
 371 bool isTokenChar(sal_uInt32 nChar)
 372 {
 373     static const bool aMap[128]
 374         = { false, false, false, false, false, false, false, false,
 375             false, false, false, false, false, false, false, false,
 376             false, false, false, false, false, false, false, false,
 377             false, false, false, false, false, false, false, false,
 378             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 379             false, false,  true,  true, false,  true,  true, false, //()*+,-./
 380              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 381              true,  true, false, false, false, false, false, false, //89:;<=>?
 382             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 383              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 384              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 385              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 386              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 387              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 388              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 389              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 390           };
 391     return rtl::isAscii(nChar) && aMap[nChar];
 392 }
 393
 394 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
 395                                           const sal_Unicode * pEnd)
 396 {
 397     assert(pBegin && pBegin <= pEnd && "skipComment(): Bad sequence");
 398
 399     if (pBegin != pEnd && *pBegin == '(')
 400     {
 401         sal_uInt32 nLevel = 0;
 402         for (const sal_Unicode * p = pBegin; p != pEnd;)
 403             switch (*p++)
 404             {
 405                 case '(':
 406                     ++nLevel;
 407                     break;
 408
 409                 case ')':
 410                     if (--nLevel == 0)
 411                         return p;
 412                     break;
 413
 414                 case '\\':
 415                     if (p != pEnd)
 416                         ++p;
 417                     break;
 418             }
 419     }
 420     return pBegin;
 421 }
 422
 423 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
 424                                                               pBegin,
 425                                                           const sal_Unicode *
 426                                                               pEnd)
 427 {
 428     assert(pBegin && pBegin <= pEnd && "skipLinearWhiteSpaceComment(): Bad sequence");
 429
 430     while (pBegin != pEnd)
 431         switch (*pBegin)
 432         {
 433             case '\t':
 434             case ' ':
 435                 ++pBegin;
 436                 break;
 437
 438             case 0x0D: // CR
 439                 if (startsWithLineFolding(pBegin, pEnd))
 440                     pBegin += 3;
 441                 else
 442                     return pBegin;
 443                 break;
 444
 445             case '(':
 446             {
 447                 const sal_Unicode * p = skipComment(pBegin, pEnd);
 448                 if (p == pBegin)
 449                     return pBegin;
 450                 pBegin = p;
 451                 break;
 452             }
 453
 454             default:
 455                 return pBegin;
 456         }
 457     return pBegin;
 458 }
 459
 460 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
 461                                                const sal_Unicode * pEnd)
 462 {
 463     assert(pBegin && pBegin <= pEnd && "skipQuotedString(): Bad sequence");
 464
 465     if (pBegin != pEnd && *pBegin == '"')
 466         for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
 467             switch (*p++)
 468             {
 469                 case 0x0D: // CR
 470                     if (pEnd - p < 2 || *p++ != 0x0A // LF
 471                         || !isWhiteSpace(*p++))
 472                         return pBegin;
 473                     break;
 474
 475                 case '"':
 476                     return p;
 477
 478                 case '\\':
 479                     if (p != pEnd)
 480                         ++p;
 481                     break;
 482             }
 483     return pBegin;
 484 }
 485
 486 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
 487                                              sal_Unicode const * pEnd,
 488                                              INetContentTypeParameterList *
 489                                                  pParameters)
 490 {
 491     ParameterList aList;
 492     sal_Unicode const * pParameterBegin = pBegin;
 493     for (sal_Unicode const * p = pParameterBegin;;)
 494     {
 495         pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
 496         if (pParameterBegin == pEnd || *pParameterBegin != ';')
 497             break;
 498         p = pParameterBegin + 1;
 499
 500         sal_Unicode const * pAttributeBegin
 501             = skipLinearWhiteSpaceComment(p, pEnd);
 502         p = pAttributeBegin;
 503         bool bDowncaseAttribute = false;
 504         while (p != pEnd && isTokenChar(*p) && *p != '*')
 505         {
 506             bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
 507             ++p;
 508         }
 509         if (p == pAttributeBegin)
 510             break;
 511         OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
 512         if (bDowncaseAttribute)
 513             aAttribute = aAttribute.toAsciiLowerCase();
 514
 515         sal_uInt32 nSection = 0;
 516         if (p != pEnd && *p == '*')
 517         {
 518             ++p;
 519             if (p != pEnd && rtl::isAsciiDigit(*p)
 520                 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
 521                 break;
 522         }
 523
 524         bool bPresent = std::any_of(aList.begin(), aList.end(),
 525                                     Parameter::IsSameSection{aAttribute, nSection});
 526         if (bPresent)
 527             break;
 528
 529         bool bExtended = false;
 530         if (p != pEnd && *p == '*')
 531         {
 532             ++p;
 533             bExtended = true;
 534         }
 535
 536         p = skipLinearWhiteSpaceComment(p, pEnd);
 537
 538         if (p == pEnd || *p != '=')
 539             break;
 540
 541         p = skipLinearWhiteSpaceComment(p + 1, pEnd);
 542
 543         OString aCharset;
 544         OString aLanguage;
 545         OString aValue;
 546         if (bExtended)
 547         {
 548             if (nSection == 0)
 549             {
 550                 sal_Unicode const * pCharsetBegin = p;
 551                 bool bDowncaseCharset = false;
 552                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
 553                 {
 554                     bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
 555                     ++p;
 556                 }
 557                 if (p == pCharsetBegin)
 558                     break;
 559                 if (pParameters)
 560                 {
 561                     aCharset = OString(
 562                         pCharsetBegin,
 563                         p - pCharsetBegin,
 564                         RTL_TEXTENCODING_ASCII_US);
 565                     if (bDowncaseCharset)
 566                         aCharset = aCharset.toAsciiLowerCase();
 567                 }
 568
 569                 if (p == pEnd || *p != '\'')
 570                     break;
 571                 ++p;
 572
 573                 sal_Unicode const * pLanguageBegin = p;
 574                 bool bDowncaseLanguage = false;
 575                 int nLetters = 0;
 576                 for (; p != pEnd; ++p)
 577                     if (rtl::isAsciiAlpha(*p))
 578                     {
 579                         if (++nLetters > 8)
 580                             break;
 581                         bDowncaseLanguage = bDowncaseLanguage
 582                                             || rtl::isAsciiUpperCase(*p);
 583                     }
 584                     else if (*p == '-')
 585                     {
 586                         if (nLetters == 0)
 587                             break;
 588                         nLetters = 0;
 589                     }
 590                     else
 591                         break;
 592                 if (nLetters == 0 || nLetters > 8)
 593                     break;
 594                 if (pParameters)
 595                 {
 596                     aLanguage = OString(
 597                         pLanguageBegin,
 598                         p - pLanguageBegin,
 599                         RTL_TEXTENCODING_ASCII_US);
 600                     if (bDowncaseLanguage)
 601                         aLanguage = aLanguage.toAsciiLowerCase();
 602                 }
 603
 604                 if (p == pEnd || *p != '\'')
 605                     break;
 606                 ++p;
 607             }
 608             if (pParameters)
 609             {
 610                 OStringBuffer aSink;
 611                 while (p != pEnd)
 612                 {
 613                     auto q = p;
 614                     sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
 615                     if (rtl::isAscii(nChar) && !isTokenChar(nChar))
 616                         break;
 617                     p = q;
 618                     if (nChar == '%' && p + 1 < pEnd)
 619                     {
 620                         int nWeight1 = INetMIME::getHexWeight(p[0]);
 621                         int nWeight2 = INetMIME::getHexWeight(p[1]);
 622                         if (nWeight1 >= 0 && nWeight2 >= 0)
 623                         {
 624                             aSink.append(char(nWeight1 << 4 | nWeight2));
 625                             p += 2;
 626                             continue;
 627                         }
 628                     }
 629                     writeUTF8(aSink, nChar);
 630                 }
 631                 aValue = aSink.makeStringAndClear();
 632             }
 633             else
 634                 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 635                     ++p;
 636         }
 637         else if (p != pEnd && *p == '"')
 638             if (pParameters)
 639             {
 640                 OStringBuffer aSink(256);
 641                 bool bInvalid = false;
 642                 for (++p;;)
 643                 {
 644                     if (p == pEnd)
 645                     {
 646                         bInvalid = true;
 647                         break;
 648                     }
 649                     sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
 650                     if (nChar == '"')
 651                         break;
 652                     else if (nChar == 0x0D) // CR
 653                     {
 654                         if (pEnd - p < 2 || *p++ != 0x0A // LF
 655                             || !isWhiteSpace(*p))
 656                         {
 657                             bInvalid = true;
 658                             break;
 659                         }
 660                         nChar = static_cast<unsigned char>(*p++);
 661                     }
 662                     else if (nChar == '\\')
 663                     {
 664                         if (p == pEnd)
 665                         {
 666                             bInvalid = true;
 667                             break;
 668                         }
 669                         nChar = INetMIME::getUTF32Character(p, pEnd);
 670                     }
 671                     writeUTF8(aSink, nChar);
 672                 }
 673                 if (bInvalid)
 674                     break;
 675                 aValue = aSink.makeStringAndClear();
 676             }
 677             else
 678             {
 679                 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
 680                 if (p == pStringEnd)
 681                     break;
 682                 p = pStringEnd;
 683             }
 684         else
 685         {
 686             sal_Unicode const * pTokenBegin = p;
 687             while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 688                 ++p;
 689             if (p == pTokenBegin)
 690                 break;
 691             if (pParameters)
 692                 aValue = OString(
 693                     pTokenBegin, p - pTokenBegin,
 694                     RTL_TEXTENCODING_UTF8);
 695         }
 696         aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
 697     }
 698     aList.sort();
 699     return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
 700 }
 701
 702 bool equalIgnoreCase(const char * pBegin1,
 703                                const char * pEnd1,
 704                                const char * pString2)
 705 {
 706     assert(pBegin1 && pBegin1 <= pEnd1 && pString2 &&
 707                "equalIgnoreCase(): Bad sequences");
 708
 709     while (*pString2 != 0)
 710         if (pBegin1 == pEnd1
 711             || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
 712                 != rtl::toAsciiUpperCase(
 713                     static_cast<unsigned char>(*pString2++))))
 714             return false;
 715     return pBegin1 == pEnd1;
 716 }
 717
 718 struct EncodingEntry
 719 {
 720     char const * m_aName;
 721     rtl_TextEncoding m_eEncoding;
 722 };
 723
 724 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
 725 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
 726 // noted:
 727 EncodingEntry const aEncodingMap[]
 728     = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
 729         { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
 730         { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
 731         { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
 732         { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
 733         { "ASCII", RTL_TEXTENCODING_ASCII_US },
 734         { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
 735         { "US", RTL_TEXTENCODING_ASCII_US },
 736         { "IBM367", RTL_TEXTENCODING_ASCII_US },
 737         { "CP367", RTL_TEXTENCODING_ASCII_US },
 738         { "CSASCII", RTL_TEXTENCODING_ASCII_US },
 739         { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 740         { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
 741         { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
 742         { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 743         { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 744         { "L1", RTL_TEXTENCODING_ISO_8859_1 },
 745         { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
 746         { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
 747         { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 748         { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 749         { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
 750         { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
 751         { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 752         { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 753         { "L2", RTL_TEXTENCODING_ISO_8859_2 },
 754         { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 755         { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 756         { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
 757         { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
 758         { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 759         { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 760         { "L3", RTL_TEXTENCODING_ISO_8859_3 },
 761         { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 762         { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 763         { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
 764         { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
 765         { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 766         { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 767         { "L4", RTL_TEXTENCODING_ISO_8859_4 },
 768         { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 769         { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 770         { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
 771         { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
 772         { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 773         { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 774         { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 775         { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 776         { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
 777         { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
 778         { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 779         { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
 780         { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
 781         { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 782         { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 783         { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 784         { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
 785         { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
 786         { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 787         { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
 788         { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
 789         { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
 790         { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
 791         { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
 792         { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 793         { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
 794         { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
 795         { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 796         { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 797         { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 798         { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 799         { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
 800         { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
 801         { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 802         { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 803         { "L5", RTL_TEXTENCODING_ISO_8859_9 },
 804         { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 805         { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
 806         { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
 807         { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
 808         { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 809         { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
 810         { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 811         { "IBM437", RTL_TEXTENCODING_IBM_437 },
 812         { "CP437", RTL_TEXTENCODING_IBM_437 },
 813         { "437", RTL_TEXTENCODING_IBM_437 },
 814         { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
 815         { "IBM850", RTL_TEXTENCODING_IBM_850 },
 816         { "CP850", RTL_TEXTENCODING_IBM_850 },
 817         { "850", RTL_TEXTENCODING_IBM_850 },
 818         { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
 819         { "IBM860", RTL_TEXTENCODING_IBM_860 },
 820         { "CP860", RTL_TEXTENCODING_IBM_860 },
 821         { "860", RTL_TEXTENCODING_IBM_860 },
 822         { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
 823         { "IBM861", RTL_TEXTENCODING_IBM_861 },
 824         { "CP861", RTL_TEXTENCODING_IBM_861 },
 825         { "861", RTL_TEXTENCODING_IBM_861 },
 826         { "CP-IS", RTL_TEXTENCODING_IBM_861 },
 827         { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
 828         { "IBM863", RTL_TEXTENCODING_IBM_863 },
 829         { "CP863", RTL_TEXTENCODING_IBM_863 },
 830         { "863", RTL_TEXTENCODING_IBM_863 },
 831         { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
 832         { "IBM865", RTL_TEXTENCODING_IBM_865 },
 833         { "CP865", RTL_TEXTENCODING_IBM_865 },
 834         { "865", RTL_TEXTENCODING_IBM_865 },
 835         { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
 836         { "IBM775", RTL_TEXTENCODING_IBM_775 },
 837         { "CP775", RTL_TEXTENCODING_IBM_775 },
 838         { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
 839         { "IBM852", RTL_TEXTENCODING_IBM_852 },
 840         { "CP852", RTL_TEXTENCODING_IBM_852 },
 841         { "852", RTL_TEXTENCODING_IBM_852 },
 842         { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
 843         { "IBM855", RTL_TEXTENCODING_IBM_855 },
 844         { "CP855", RTL_TEXTENCODING_IBM_855 },
 845         { "855", RTL_TEXTENCODING_IBM_855 },
 846         { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
 847         { "IBM857", RTL_TEXTENCODING_IBM_857 },
 848         { "CP857", RTL_TEXTENCODING_IBM_857 },
 849         { "857", RTL_TEXTENCODING_IBM_857 },
 850         { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
 851         { "IBM862", RTL_TEXTENCODING_IBM_862 },
 852         { "CP862", RTL_TEXTENCODING_IBM_862 },
 853         { "862", RTL_TEXTENCODING_IBM_862 },
 854         { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
 855         { "IBM864", RTL_TEXTENCODING_IBM_864 },
 856         { "CP864", RTL_TEXTENCODING_IBM_864 },
 857         { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
 858         { "IBM866", RTL_TEXTENCODING_IBM_866 },
 859         { "CP866", RTL_TEXTENCODING_IBM_866 },
 860         { "866", RTL_TEXTENCODING_IBM_866 },
 861         { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
 862         { "IBM869", RTL_TEXTENCODING_IBM_869 },
 863         { "CP869", RTL_TEXTENCODING_IBM_869 },
 864         { "869", RTL_TEXTENCODING_IBM_869 },
 865         { "CP-GR", RTL_TEXTENCODING_IBM_869 },
 866         { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
 867         { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
 868         { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
 869         { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
 870         { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
 871         { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
 872         { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
 873         { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
 874         { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
 875         { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
 876         { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
 877         { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
 878         { "GB2312", RTL_TEXTENCODING_GB_2312 },
 879         { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
 880         { "BIG5", RTL_TEXTENCODING_BIG5 },
 881         { "CSBIG5", RTL_TEXTENCODING_BIG5 },
 882         { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
 883         { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
 884           RTL_TEXTENCODING_EUC_JP },
 885         { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
 886         { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
 887         { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
 888         { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
 889         { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
 890         { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
 891         { "UTF-7", RTL_TEXTENCODING_UTF7 },
 892         { "UTF-8", RTL_TEXTENCODING_UTF8 },
 893         { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
 894         { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
 895         { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
 896         { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
 897         { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
 898         { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
 899         { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
 900         { "CSUCS4", RTL_TEXTENCODING_UCS4 },
 901         { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
 902         { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
 903
 904 rtl_TextEncoding getCharsetEncoding(char const * pBegin,
 905                                               char const * pEnd)
 906 {
 907     for (const EncodingEntry& i : aEncodingMap)
 908         if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
 909             return i.m_eEncoding;
 910     return RTL_TEXTENCODING_DONTKNOW;
 911 }
 912
 913 }
 914
 915 //  INetMIME
 916
 917 // static
 918 bool INetMIME::isAtomChar(sal_uInt32 nChar)
 919 {
 920     static const bool aMap[128]
 921         = { false, false, false, false, false, false, false, false,
 922             false, false, false, false, false, false, false, false,
 923             false, false, false, false, false, false, false, false,
 924             false, false, false, false, false, false, false, false,
 925             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 926             false, false,  true,  true, false,  true, false,  true, //()*+,-./
 927              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 928              true,  true, false, false, false,  true, false,  true, //89:;<=>?
 929             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 930              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 931              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 932              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 933              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 934              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 935              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 936              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 937           };
 938     return rtl::isAscii(nChar) && aMap[nChar];
 939 }
 940
 941 // static
 942 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
 943 {
 944     static const bool aMap[128]
 945         = { false, false, false, false, false, false, false, false,
 946             false, false, false, false, false, false, false, false,
 947             false, false, false, false, false, false, false, false,
 948             false, false, false, false, false, false, false, false,
 949             false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
 950             false, false, false,  true,  true,  true,  true,  true, //()*+,-./
 951              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 952              true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
 953              true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 954              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 955              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 956              true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
 957              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 958              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 959              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 960              true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
 961           };
 962     return rtl::isAscii(nChar) && aMap[nChar];
 963 }
 964
 965 // static
 966 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
 967                                const sal_Unicode * pEnd1,
 968                                const char * pString2)
 969 {
 970     assert(pBegin1 && pBegin1 <= pEnd1 && pString2 &&
 971                "INetMIME::equalIgnoreCase(): Bad sequences");
 972
 973     while (*pString2 != 0)
 974         if (pBegin1 == pEnd1
 975             || (rtl::toAsciiUpperCase(*pBegin1++)
 976                 != rtl::toAsciiUpperCase(
 977                     static_cast<unsigned char>(*pString2++))))
 978             return false;
 979     return pBegin1 == pEnd1;
 980 }
 981
 982 // static
 983 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
 984                             const sal_Unicode * pEnd, bool bLeadingZeroes,
 985                             sal_uInt32 & rValue)
 986 {
 987     sal_uInt64 nTheValue = 0;
 988     const sal_Unicode * p = rBegin;
 989     for ( ; p != pEnd; ++p)
 990     {
 991         int nWeight = getWeight(*p);
 992         if (nWeight < 0)
 993             break;
 994         nTheValue = 10 * nTheValue + nWeight;
 995         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
 996             return false;
 997     }
 998     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
 999         return false;
1000     rBegin = p;
1001     rValue = sal_uInt32(nTheValue);
1002     return true;
1003 }
1004
1005 // static
1006 sal_Unicode const * INetMIME::scanContentType(
1007     std::u16string_view rStr, OUString * pType,
1008     OUString * pSubType, INetContentTypeParameterList * pParameters)
1009 {
1010     sal_Unicode const * pBegin = rStr.data();
1011     sal_Unicode const * pEnd = pBegin + rStr.size();
1012     sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1013     sal_Unicode const * pTypeBegin = p;
1014     while (p != pEnd && isTokenChar(*p))
1015     {
1016         ++p;
1017     }
1018     if (p == pTypeBegin)
1019         return nullptr;
1020     sal_Unicode const * pTypeEnd = p;
1021
1022     p = skipLinearWhiteSpaceComment(p, pEnd);
1023     if (p == pEnd || *p++ != '/')
1024         return nullptr;
1025
1026     p = skipLinearWhiteSpaceComment(p, pEnd);
1027     sal_Unicode const * pSubTypeBegin = p;
1028     while (p != pEnd && isTokenChar(*p))
1029     {
1030         ++p;
1031     }
1032     if (p == pSubTypeBegin)
1033         return nullptr;
1034     sal_Unicode const * pSubTypeEnd = p;
1035
1036     if (pType != nullptr)
1037     {
1038         *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1039     }
1040     if (pSubType != nullptr)
1041     {
1042         *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1043             .toAsciiLowerCase();
1044     }
1045
1046     return scanParameters(p, pEnd, pParameters);
1047 }
1048
1049 // static
1050 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1051 {
1052     // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1053     // versions of StarOffice send mails with header fields where encoded
1054     // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1055     // '=', ',', '.', '"', ')', without any required white space in between.
1056     // And there appear to exist some broken mailers that only encode single
1057     // letters within words, like "Appel
1058     // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1059     // detect encoded words even when not properly surrounded by white space.
1060
1061     // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1062
1063     // encoded-word = "=?"
1064     //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1065     //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1066     //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1067     //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1068     //     "?="
1069
1070     // base64 = ALPHA / DIGIT / "+" / "/"
1071
1072     const char * pBegin = rBody.getStr();
1073     const char * pEnd = pBegin + rBody.getLength();
1074
1075     OUStringBuffer sDecoded;
1076     const char * pCopyBegin = pBegin;
1077
1078     /* bool bStartEncodedWord = true; */
1079     const char * pWSPBegin = pBegin;
1080
1081     for (const char * p = pBegin; p != pEnd;)
1082     {
1083         if (*p == '=' /* && bStartEncodedWord */)
1084         {
1085             const char * q = p + 1;
1086             bool bEncodedWord = q != pEnd && *q++ == '?';
1087
1088             rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1089             if (bEncodedWord)
1090             {
1091                 const char * pCharsetBegin = q;
1092                 const char * pLanguageBegin = nullptr;
1093                 int nAlphaCount = 0;
1094                 for (bool bDone = false; !bDone;)
1095                     if (q == pEnd)
1096                     {
1097                         bEncodedWord = false;
1098                         bDone = true;
1099                     }
1100                     else
1101                     {
1102                         char cChar = *q++;
1103                         switch (cChar)
1104                         {
1105                             case '*':
1106                                 pLanguageBegin = q - 1;
1107                                 nAlphaCount = 0;
1108                                 break;
1109
1110                             case '-':
1111                                 if (pLanguageBegin != nullptr)
1112                                 {
1113                                     if (nAlphaCount == 0)
1114                                         pLanguageBegin = nullptr;
1115                                     else
1116                                         nAlphaCount = 0;
1117                                 }
1118                                 break;
1119
1120                             case '?':
1121                                 if (pCharsetBegin == q - 1)
1122                                     bEncodedWord = false;
1123                                 else
1124                                 {
1125                                     eCharsetEncoding
1126                                         = getCharsetEncoding(
1127                                               pCharsetBegin,
1128                                               pLanguageBegin == nullptr
1129                                               || nAlphaCount == 0 ?
1130                                                   q - 1 : pLanguageBegin);
1131                                     bEncodedWord = isMIMECharsetEncoding(
1132                                                        eCharsetEncoding);
1133                                     eCharsetEncoding
1134                                         = translateFromMIME(eCharsetEncoding);
1135                                 }
1136                                 bDone = true;
1137                                 break;
1138
1139                             default:
1140                                 if (pLanguageBegin != nullptr
1141                                     && (!rtl::isAsciiAlpha(
1142                                             static_cast<unsigned char>(cChar))
1143                                         || ++nAlphaCount > 8))
1144                                     pLanguageBegin = nullptr;
1145                                 break;
1146                         }
1147                     }
1148             }
1149
1150             bool bEncodingB = false;
1151             if (bEncodedWord)
1152             {
1153                 if (q == pEnd)
1154                     bEncodedWord = false;
1155                 else
1156                 {
1157                     switch (*q++)
1158                     {
1159                         case 'B':
1160                         case 'b':
1161                             bEncodingB = true;
1162                             break;
1163
1164                         case 'Q':
1165                         case 'q':
1166                             bEncodingB = false;
1167                             break;
1168
1169                         default:
1170                             bEncodedWord = false;
1171                             break;
1172                     }
1173                 }
1174             }
1175
1176             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1177
1178             OStringBuffer sText;
1179             if (bEncodedWord)
1180             {
1181                 if (bEncodingB)
1182                 {
1183                     for (bool bDone = false; !bDone;)
1184                     {
1185                         if (pEnd - q < 4)
1186                         {
1187                             bEncodedWord = false;
1188                             bDone = true;
1189                         }
1190                         else
1191                         {
1192                             bool bFinal = false;
1193                             int nCount = 3;
1194                             sal_uInt32 nValue = 0;
1195                             for (int nShift = 18; nShift >= 0; nShift -= 6)
1196                             {
1197                                 int nWeight = getBase64Weight(*q++);
1198                                 if (nWeight == -2)
1199                                 {
1200                                     bEncodedWord = false;
1201                                     bDone = true;
1202                                     break;
1203                                 }
1204                                 if (nWeight == -1)
1205                                 {
1206                                     if (!bFinal)
1207                                     {
1208                                         if (nShift >= 12)
1209                                         {
1210                                             bEncodedWord = false;
1211                                             bDone = true;
1212                                             break;
1213                                         }
1214                                         bFinal = true;
1215                                         nCount = nShift == 6 ? 1 : 2;
1216                                     }
1217                                 }
1218                                 else
1219                                     nValue |= nWeight << nShift;
1220                             }
1221                             if (bEncodedWord)
1222                             {
1223                                 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1224                                     sText.append(char(nValue >> nShift & 0xFF));
1225                                 if (*q == '?')
1226                                 {
1227                                     ++q;
1228                                     bDone = true;
1229                                 }
1230                                 if (bFinal && !bDone)
1231                                 {
1232                                     bEncodedWord = false;
1233                                     bDone = true;
1234                                 }
1235                             }
1236                         }
1237                     }
1238                 }
1239                 else
1240                 {
1241                     const char * pEncodedTextBegin = q;
1242                     const char * pEncodedTextCopyBegin = q;
1243                     for (bool bDone = false; !bDone;)
1244                         if (q == pEnd)
1245                         {
1246                             bEncodedWord = false;
1247                             bDone = true;
1248                         }
1249                         else
1250                         {
1251                             sal_uInt32 nChar = static_cast<unsigned char>(*q++);
1252                             switch (nChar)
1253                             {
1254                                 case '=':
1255                                 {
1256                                     if (pEnd - q < 2)
1257                                     {
1258                                         bEncodedWord = false;
1259                                         bDone = true;
1260                                         break;
1261                                     }
1262                                     int nDigit1 = getHexWeight(q[0]);
1263                                     int nDigit2 = getHexWeight(q[1]);
1264                                     if (nDigit1 < 0 || nDigit2 < 0)
1265                                     {
1266                                         bEncodedWord = false;
1267                                         bDone = true;
1268                                         break;
1269                                     }
1270                                     sText.append(
1271                                         rBody.subView(
1272                                             (pEncodedTextCopyBegin - pBegin),
1273                                             (q - 1 - pEncodedTextCopyBegin))
1274                                         + OStringChar(char(nDigit1 << 4 | nDigit2)));
1275                                     q += 2;
1276                                     pEncodedTextCopyBegin = q;
1277                                     break;
1278                                 }
1279
1280                                 case '?':
1281                                     if (q - pEncodedTextBegin > 1)
1282                                         sText.append(rBody.subView(
1283                                             (pEncodedTextCopyBegin - pBegin),
1284                                             (q - 1 - pEncodedTextCopyBegin)));
1285                                     else
1286                                         bEncodedWord = false;
1287                                     bDone = true;
1288                                     break;
1289
1290                                 case '_':
1291                                     sText.append(
1292                                         rBody.subView(
1293                                             (pEncodedTextCopyBegin - pBegin),
1294                                             (q - 1 - pEncodedTextCopyBegin))
1295                                         + OString::Concat(" "));
1296                                     pEncodedTextCopyBegin = q;
1297                                     break;
1298
1299                                 default:
1300                                     if (!isVisible(nChar))
1301                                     {
1302                                         bEncodedWord = false;
1303                                         bDone = true;
1304                                     }
1305                                     break;
1306                             }
1307                         }
1308                 }
1309             }
1310
1311             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1312
1313             std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1314             sal_Size nUnicodeSize = 0;
1315             if (bEncodedWord)
1316             {
1317                 pUnicodeBuffer
1318                     = convertToUnicode(sText.getStr(),
1319                                        sText.getStr() + sText.getLength(),
1320                                        eCharsetEncoding, nUnicodeSize);
1321                 if (!pUnicodeBuffer)
1322                     bEncodedWord = false;
1323             }
1324
1325             if (bEncodedWord)
1326             {
1327                 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1328                 sDecoded.append(
1329                     pUnicodeBuffer.get(),
1330                     static_cast< sal_Int32 >(nUnicodeSize));
1331                 pUnicodeBuffer.reset();
1332                 p = q;
1333                 pCopyBegin = p;
1334
1335                 pWSPBegin = p;
1336                 while (p != pEnd && isWhiteSpace(*p))
1337                     ++p;
1338                 /* bStartEncodedWord = p != pWSPBegin; */
1339                 continue;
1340             }
1341         }
1342
1343         if (p == pEnd)
1344             break;
1345
1346         switch (*p++)
1347         {
1348             case '"':
1349                 /* bStartEncodedWord = true; */
1350                 break;
1351
1352             case '(':
1353                 /* bStartEncodedWord = true; */
1354                 break;
1355
1356             case ')':
1357                 /* bStartEncodedWord = false; */
1358                 break;
1359
1360             default:
1361             {
1362                 const char * pUTF8Begin = p - 1;
1363                 const char * pUTF8End = pUTF8Begin;
1364                 sal_uInt32 nCharacter = 0;
1365                 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1366                 {
1367                     appendISO88591(sDecoded, pCopyBegin, p - 1);
1368                     sDecoded.appendUtf32(nCharacter);
1369                     p = pUTF8End;
1370                     pCopyBegin = p;
1371                 }
1372                 /* bStartEncodedWord = false; */
1373                 break;
1374             }
1375         }
1376         pWSPBegin = p;
1377     }
1378
1379     appendISO88591(sDecoded, pCopyBegin, pEnd);
1380     return sDecoded.makeStringAndClear();
1381 }
1382
1383 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */