tools/source/inet/inetmime.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <algorithm>
  21 #include <limits>
  22 #include <forward_list>
  23 #include <memory>
  24
  25 #include <sal/log.hxx>
  26 #include <rtl/ustring.hxx>
  27 #include <rtl/strbuf.hxx>
  28 #include <rtl/ustrbuf.hxx>
  29 #include <rtl/tencinfo.h>
  30 #include <tools/inetmime.hxx>
  31 #include <rtl/character.hxx>
  32
  33 namespace {
  34
  35 rtl_TextEncoding getCharsetEncoding(const char * pBegin,
  36                                            const char * pEnd);
  37
  38 /** Check for US-ASCII white space character.
  39
  40     @param nChar  Some UCS-4 character.
  41
  42     @return  True if nChar is a US-ASCII white space character (US-ASCII
  43     0x09 or 0x20).
  44  */
  45 bool isWhiteSpace(sal_uInt32 nChar)
  46 {
  47     return nChar == '\t' || nChar == ' ';
  48 }
  49
  50 /** Get the Base 64 digit weight of a US-ASCII character.
  51
  52     @param nChar  Some UCS-4 character.
  53
  54     @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
  55     'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
  56     corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
  57     character (US-ASCII '='), return -1; otherwise, return -2.
  58  */
  59 int getBase64Weight(sal_uInt32 nChar)
  60 {
  61     return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
  62            rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
  63            rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
  64            nChar == '+' ? 62 :
  65            nChar == '/' ? 63 :
  66            nChar == '=' ? -1 : -2;
  67 }
  68
  69 bool startsWithLineFolding(const sal_Unicode * pBegin,
  70                                             const sal_Unicode * pEnd)
  71 {
  72     DBG_ASSERT(pBegin && pBegin <= pEnd,
  73                "startsWithLineFolding(): Bad sequence");
  74
  75     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
  76            && isWhiteSpace(pBegin[2]); // CR, LF
  77 }
  78
  79 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
  80                                                         eEncoding)
  81 {
  82 #if defined(_WIN32)
  83     return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
  84                RTL_TEXTENCODING_MS_1252 : eEncoding;
  85 #else
  86     return eEncoding;
  87 #endif
  88 }
  89
  90 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
  91 {
  92     return rtl_isOctetTextEncoding(eEncoding);
  93 }
  94
  95 std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin,
  96                                          const char * pEnd,
  97                                          rtl_TextEncoding eEncoding,
  98                                          sal_Size & rSize)
  99 {
 100     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
 101         return nullptr;
 102     rtl_TextToUnicodeConverter hConverter
 103         = rtl_createTextToUnicodeConverter(eEncoding);
 104     rtl_TextToUnicodeContext hContext
 105         = rtl_createTextToUnicodeContext(hConverter);
 106     std::unique_ptr<sal_Unicode[]> pBuffer;
 107     sal_uInt32 nInfo;
 108     for (sal_Size nBufferSize = pEnd - pBegin;;
 109          nBufferSize += nBufferSize / 3 + 1)
 110     {
 111         pBuffer.reset(new sal_Unicode[nBufferSize]);
 112         sal_Size nSrcCvtBytes;
 113         rSize = rtl_convertTextToUnicode(
 114                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
 115                     nBufferSize,
 116                     RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
 117                         | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
 118                         | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
 119                     &nInfo, &nSrcCvtBytes);
 120         if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
 121             break;
 122         pBuffer.reset();
 123         rtl_resetTextToUnicodeContext(hConverter, hContext);
 124     }
 125     rtl_destroyTextToUnicodeContext(hConverter, hContext);
 126     rtl_destroyTextToUnicodeConverter(hConverter);
 127     if (nInfo != 0)
 128     {
 129         pBuffer.reset();
 130     }
 131     return pBuffer;
 132 }
 133
 134 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
 135
 136     @param pBuffer  Points to a buffer, must not be null.
 137
 138     @param nUTF32  A UTF-32 character, must be in the range 0..0x10FFFF.
 139
 140     @return  A pointer past the UTF-16 characters put into the buffer
 141     (i.e., pBuffer + 1 or pBuffer + 2).
 142  */
 143 sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
 144                                                  sal_uInt32 nUTF32)
 145 {
 146     DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
 147     if (nUTF32 < 0x10000)
 148         *pBuffer++ = sal_Unicode(nUTF32);
 149     else
 150     {
 151         nUTF32 -= 0x10000;
 152         *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
 153         *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
 154     }
 155     return pBuffer;
 156 }
 157
 158 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
 159 {
 160     // See RFC 2279 for a discussion of UTF-8.
 161     DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
 162
 163     if (nChar < 0x80)
 164         rSink.append(char(nChar));
 165     else if (nChar < 0x800)
 166         rSink.append(char(nChar >> 6 | 0xC0))
 167              .append(char((nChar & 0x3F) | 0x80));
 168     else if (nChar < 0x10000)
 169         rSink.append(char(nChar >> 12 | 0xE0))
 170              .append(char((nChar >> 6 & 0x3F) | 0x80))
 171              .append(char((nChar & 0x3F) | 0x80));
 172     else if (nChar < 0x200000)
 173         rSink.append(char(nChar >> 18 | 0xF0))
 174              .append(char((nChar >> 12 & 0x3F) | 0x80))
 175              .append(char((nChar >> 6 & 0x3F) | 0x80))
 176              .append(char((nChar & 0x3F) | 0x80));
 177     else if (nChar < 0x4000000)
 178         rSink.append(char(nChar >> 24 | 0xF8))
 179              .append(char((nChar >> 18 & 0x3F) | 0x80))
 180              .append(char((nChar >> 12 & 0x3F) | 0x80))
 181              .append(char((nChar >> 6 & 0x3F) | 0x80))
 182              .append(char((nChar & 0x3F) | 0x80));
 183     else
 184         rSink.append(char(nChar >> 30 | 0xFC))
 185              .append(char((nChar >> 24 & 0x3F) | 0x80))
 186              .append(char((nChar >> 18 & 0x3F) | 0x80))
 187              .append(char((nChar >> 12 & 0x3F) | 0x80))
 188              .append(char((nChar >> 6 & 0x3F) | 0x80))
 189              .append(char((nChar & 0x3F) | 0x80));
 190 }
 191
 192 bool translateUTF8Char(const char *& rBegin,
 193                                  const char * pEnd,
 194                                  sal_uInt32 & rCharacter)
 195 {
 196     if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
 197         || static_cast< unsigned char >(*rBegin) >= 0xFE)
 198         return false;
 199
 200     int nCount;
 201     sal_uInt32 nMin;
 202     sal_uInt32 nUCS4;
 203     const char * p = rBegin;
 204     if (static_cast< unsigned char >(*p) < 0xE0)
 205     {
 206         nCount = 1;
 207         nMin = 0x80;
 208         nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
 209     }
 210     else if (static_cast< unsigned char >(*p) < 0xF0)
 211     {
 212         nCount = 2;
 213         nMin = 0x800;
 214         nUCS4 = static_cast< unsigned char >(*p) & 0xF;
 215     }
 216     else if (static_cast< unsigned char >(*p) < 0xF8)
 217     {
 218         nCount = 3;
 219         nMin = 0x10000;
 220         nUCS4 = static_cast< unsigned char >(*p) & 7;
 221     }
 222     else if (static_cast< unsigned char >(*p) < 0xFC)
 223     {
 224         nCount = 4;
 225         nMin = 0x200000;
 226         nUCS4 = static_cast< unsigned char >(*p) & 3;
 227     }
 228     else
 229     {
 230         nCount = 5;
 231         nMin = 0x4000000;
 232         nUCS4 = static_cast< unsigned char >(*p) & 1;
 233     }
 234     ++p;
 235
 236     for (; nCount-- > 0; ++p)
 237         if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
 238             nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
 239         else
 240             return false;
 241
 242     if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
 243         return false;
 244
 245     rCharacter = nUCS4;
 246     rBegin = p;
 247     return true;
 248 }
 249
 250 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
 251                     char const * pEnd);
 252
 253 struct Parameter
 254 {
 255     OString m_aAttribute;
 256     OString m_aCharset;
 257     OString m_aLanguage;
 258     OString m_aValue;
 259     sal_uInt32 m_nSection;
 260     bool m_bExtended;
 261
 262     bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
 263     {
 264         int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
 265         return nComp < 0 ||
 266                 (nComp == 0 && m_nSection < rhs.m_nSection);
 267     }
 268     struct IsSameSection // is used to check container for duplicates with std::any_of
 269     {
 270         const OString& rAttribute;
 271         const sal_uInt32 nSection;
 272         bool operator()(const Parameter& r) const
 273         { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
 274     };
 275 };
 276
 277 typedef std::forward_list<Parameter> ParameterList;
 278
 279 bool parseParameters(ParameterList const & rInput,
 280                      INetContentTypeParameterList * pOutput);
 281
 282 //  appendISO88591
 283
 284 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
 285                     char const * pEnd)
 286 {
 287     sal_Int32 nLength = pEnd - pBegin;
 288     std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
 289     for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
 290         *p++ = static_cast<unsigned char>(*pBegin++);
 291     rText.append(pBuffer.get(), nLength);
 292 }
 293
 294 //  parseParameters
 295
 296 bool parseParameters(ParameterList const & rInput,
 297                      INetContentTypeParameterList * pOutput)
 298 {
 299     if (pOutput)
 300         pOutput->clear();
 301
 302     for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
 303     {
 304         if (it->m_nSection > 0
 305             && (itPrev == rInput.end()
 306                 || itPrev->m_nSection != it->m_nSection - 1
 307                 || itPrev->m_aAttribute != it->m_aAttribute))
 308             return false;
 309     }
 310
 311     if (pOutput)
 312         for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
 313         {
 314             bool bCharset = !it->m_aCharset.isEmpty();
 315             rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
 316             if (bCharset)
 317                 eEncoding
 318                     = getCharsetEncoding(it->m_aCharset.getStr(),
 319                                                    it->m_aCharset.getStr()
 320                                                        + it->m_aCharset.getLength());
 321             OUStringBuffer aValue(64);
 322             bool bBadEncoding = false;
 323             itNext = it;
 324             do
 325             {
 326                 sal_Size nSize;
 327                 std::unique_ptr<sal_Unicode[]> pUnicode
 328                     = convertToUnicode(itNext->m_aValue.getStr(),
 329                                                  itNext->m_aValue.getStr()
 330                                                      + itNext->m_aValue.getLength(),
 331                                                  bCharset && it->m_bExtended ?
 332                                                      eEncoding :
 333                                                      RTL_TEXTENCODING_UTF8,
 334                                                  nSize);
 335                 if (!pUnicode && !(bCharset && it->m_bExtended))
 336                     pUnicode = convertToUnicode(
 337                                    itNext->m_aValue.getStr(),
 338                                    itNext->m_aValue.getStr()
 339                                        + itNext->m_aValue.getLength(),
 340                                    RTL_TEXTENCODING_ISO_8859_1, nSize);
 341                 if (!pUnicode)
 342                 {
 343                     bBadEncoding = true;
 344                     break;
 345                 }
 346                 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
 347                 ++itNext;
 348             }
 349             while (itNext != rInput.end() && itNext->m_nSection != 0);
 350
 351             if (bBadEncoding)
 352             {
 353                 aValue.setLength(0);
 354                 itNext = it;
 355                 do
 356                 {
 357                     if (itNext->m_bExtended)
 358                     {
 359                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 360                             aValue.append(
 361                                 static_cast<sal_Unicode>(
 362                                     static_cast<unsigned char>(itNext->m_aValue[i])
 363                                     | 0xF800)); // map to unicode corporate use sub area
 364                     }
 365                     else
 366                     {
 367                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
 368                             aValue.append( itNext->m_aValue[i] );
 369                     }
 370                     ++itNext;
 371                 }
 372                 while (itNext != rInput.end() && itNext->m_nSection != 0);
 373             }
 374             auto const ret = pOutput->insert(
 375                 {it->m_aAttribute,
 376                  {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
 377             SAL_INFO_IF(!ret.second, "tools",
 378                 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
 379         }
 380     return true;
 381 }
 382
 383 /** Check whether some character is valid within an RFC 2045 <token>.
 384
 385     @param nChar  Some UCS-4 character.
 386
 387     @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
 388     'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
 389     '-', '.', '^', '_', '`', '{', '|', '}', or '~').
 390  */
 391 bool isTokenChar(sal_uInt32 nChar)
 392 {
 393     static const bool aMap[128]
 394         = { false, false, false, false, false, false, false, false,
 395             false, false, false, false, false, false, false, false,
 396             false, false, false, false, false, false, false, false,
 397             false, false, false, false, false, false, false, false,
 398             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 399             false, false,  true,  true, false,  true,  true, false, //()*+,-./
 400              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 401              true,  true, false, false, false, false, false, false, //89:;<=>?
 402             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 403              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 404              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 405              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 406              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 407              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 408              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 409              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 410           };
 411     return rtl::isAscii(nChar) && aMap[nChar];
 412 }
 413
 414 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
 415                                           const sal_Unicode * pEnd)
 416 {
 417     DBG_ASSERT(pBegin && pBegin <= pEnd,
 418                "skipComment(): Bad sequence");
 419
 420     if (pBegin != pEnd && *pBegin == '(')
 421     {
 422         sal_uInt32 nLevel = 0;
 423         for (const sal_Unicode * p = pBegin; p != pEnd;)
 424             switch (*p++)
 425             {
 426                 case '(':
 427                     ++nLevel;
 428                     break;
 429
 430                 case ')':
 431                     if (--nLevel == 0)
 432                         return p;
 433                     break;
 434
 435                 case '\\':
 436                     if (p != pEnd)
 437                         ++p;
 438                     break;
 439             }
 440     }
 441     return pBegin;
 442 }
 443
 444 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
 445                                                               pBegin,
 446                                                           const sal_Unicode *
 447                                                               pEnd)
 448 {
 449     DBG_ASSERT(pBegin && pBegin <= pEnd,
 450                "skipLinearWhiteSpaceComment(): Bad sequence");
 451
 452     while (pBegin != pEnd)
 453         switch (*pBegin)
 454         {
 455             case '\t':
 456             case ' ':
 457                 ++pBegin;
 458                 break;
 459
 460             case 0x0D: // CR
 461                 if (startsWithLineFolding(pBegin, pEnd))
 462                     pBegin += 3;
 463                 else
 464                     return pBegin;
 465                 break;
 466
 467             case '(':
 468             {
 469                 const sal_Unicode * p = skipComment(pBegin, pEnd);
 470                 if (p == pBegin)
 471                     return pBegin;
 472                 pBegin = p;
 473                 break;
 474             }
 475
 476             default:
 477                 return pBegin;
 478         }
 479     return pBegin;
 480 }
 481
 482 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
 483                                                const sal_Unicode * pEnd)
 484 {
 485     DBG_ASSERT(pBegin && pBegin <= pEnd,
 486                "skipQuotedString(): Bad sequence");
 487
 488     if (pBegin != pEnd && *pBegin == '"')
 489         for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
 490             switch (*p++)
 491             {
 492                 case 0x0D: // CR
 493                     if (pEnd - p < 2 || *p++ != 0x0A // LF
 494                         || !isWhiteSpace(*p++))
 495                         return pBegin;
 496                     break;
 497
 498                 case '"':
 499                     return p;
 500
 501                 case '\\':
 502                     if (p != pEnd)
 503                         ++p;
 504                     break;
 505             }
 506     return pBegin;
 507 }
 508
 509 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
 510                                              sal_Unicode const * pEnd,
 511                                              INetContentTypeParameterList *
 512                                                  pParameters)
 513 {
 514     ParameterList aList;
 515     sal_Unicode const * pParameterBegin = pBegin;
 516     for (sal_Unicode const * p = pParameterBegin;;)
 517     {
 518         pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
 519         if (pParameterBegin == pEnd || *pParameterBegin != ';')
 520             break;
 521         p = pParameterBegin + 1;
 522
 523         sal_Unicode const * pAttributeBegin
 524             = skipLinearWhiteSpaceComment(p, pEnd);
 525         p = pAttributeBegin;
 526         bool bDowncaseAttribute = false;
 527         while (p != pEnd && isTokenChar(*p) && *p != '*')
 528         {
 529             bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
 530             ++p;
 531         }
 532         if (p == pAttributeBegin)
 533             break;
 534         OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
 535         if (bDowncaseAttribute)
 536             aAttribute = aAttribute.toAsciiLowerCase();
 537
 538         sal_uInt32 nSection = 0;
 539         if (p != pEnd && *p == '*')
 540         {
 541             ++p;
 542             if (p != pEnd && rtl::isAsciiDigit(*p)
 543                 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
 544                 break;
 545         }
 546
 547         bool bPresent = std::any_of(aList.begin(), aList.end(),
 548                                     Parameter::IsSameSection{aAttribute, nSection});
 549         if (bPresent)
 550             break;
 551
 552         bool bExtended = false;
 553         if (p != pEnd && *p == '*')
 554         {
 555             ++p;
 556             bExtended = true;
 557         }
 558
 559         p = skipLinearWhiteSpaceComment(p, pEnd);
 560
 561         if (p == pEnd || *p != '=')
 562             break;
 563
 564         p = skipLinearWhiteSpaceComment(p + 1, pEnd);
 565
 566         OString aCharset;
 567         OString aLanguage;
 568         OString aValue;
 569         if (bExtended)
 570         {
 571             if (nSection == 0)
 572             {
 573                 sal_Unicode const * pCharsetBegin = p;
 574                 bool bDowncaseCharset = false;
 575                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
 576                 {
 577                     bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
 578                     ++p;
 579                 }
 580                 if (p == pCharsetBegin)
 581                     break;
 582                 if (pParameters)
 583                 {
 584                     aCharset = OString(
 585                         pCharsetBegin,
 586                         p - pCharsetBegin,
 587                         RTL_TEXTENCODING_ASCII_US);
 588                     if (bDowncaseCharset)
 589                         aCharset = aCharset.toAsciiLowerCase();
 590                 }
 591
 592                 if (p == pEnd || *p != '\'')
 593                     break;
 594                 ++p;
 595
 596                 sal_Unicode const * pLanguageBegin = p;
 597                 bool bDowncaseLanguage = false;
 598                 int nLetters = 0;
 599                 for (; p != pEnd; ++p)
 600                     if (rtl::isAsciiAlpha(*p))
 601                     {
 602                         if (++nLetters > 8)
 603                             break;
 604                         bDowncaseLanguage = bDowncaseLanguage
 605                                             || rtl::isAsciiUpperCase(*p);
 606                     }
 607                     else if (*p == '-')
 608                     {
 609                         if (nLetters == 0)
 610                             break;
 611                         nLetters = 0;
 612                     }
 613                     else
 614                         break;
 615                 if (nLetters == 0 || nLetters > 8)
 616                     break;
 617                 if (pParameters)
 618                 {
 619                     aLanguage = OString(
 620                         pLanguageBegin,
 621                         p - pLanguageBegin,
 622                         RTL_TEXTENCODING_ASCII_US);
 623                     if (bDowncaseLanguage)
 624                         aLanguage = aLanguage.toAsciiLowerCase();
 625                 }
 626
 627                 if (p == pEnd || *p != '\'')
 628                     break;
 629                 ++p;
 630             }
 631             if (pParameters)
 632             {
 633                 OStringBuffer aSink;
 634                 while (p != pEnd)
 635                 {
 636                     auto q = p;
 637                     sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
 638                     if (rtl::isAscii(nChar) && !isTokenChar(nChar))
 639                         break;
 640                     p = q;
 641                     if (nChar == '%' && p + 1 < pEnd)
 642                     {
 643                         int nWeight1 = INetMIME::getHexWeight(p[0]);
 644                         int nWeight2 = INetMIME::getHexWeight(p[1]);
 645                         if (nWeight1 >= 0 && nWeight2 >= 0)
 646                         {
 647                             aSink.append(char(nWeight1 << 4 | nWeight2));
 648                             p += 2;
 649                             continue;
 650                         }
 651                     }
 652                     writeUTF8(aSink, nChar);
 653                 }
 654                 aValue = aSink.makeStringAndClear();
 655             }
 656             else
 657                 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 658                     ++p;
 659         }
 660         else if (p != pEnd && *p == '"')
 661             if (pParameters)
 662             {
 663                 OStringBuffer aSink(256);
 664                 bool bInvalid = false;
 665                 for (++p;;)
 666                 {
 667                     if (p == pEnd)
 668                     {
 669                         bInvalid = true;
 670                         break;
 671                     }
 672                     sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
 673                     if (nChar == '"')
 674                         break;
 675                     else if (nChar == 0x0D) // CR
 676                     {
 677                         if (pEnd - p < 2 || *p++ != 0x0A // LF
 678                             || !isWhiteSpace(*p))
 679                         {
 680                             bInvalid = true;
 681                             break;
 682                         }
 683                         nChar = static_cast<unsigned char>(*p++);
 684                     }
 685                     else if (nChar == '\\')
 686                     {
 687                         if (p == pEnd)
 688                         {
 689                             bInvalid = true;
 690                             break;
 691                         }
 692                         nChar = INetMIME::getUTF32Character(p, pEnd);
 693                     }
 694                     writeUTF8(aSink, nChar);
 695                 }
 696                 if (bInvalid)
 697                     break;
 698                 aValue = aSink.makeStringAndClear();
 699             }
 700             else
 701             {
 702                 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
 703                 if (p == pStringEnd)
 704                     break;
 705                 p = pStringEnd;
 706             }
 707         else
 708         {
 709             sal_Unicode const * pTokenBegin = p;
 710             while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
 711                 ++p;
 712             if (p == pTokenBegin)
 713                 break;
 714             if (pParameters)
 715                 aValue = OString(
 716                     pTokenBegin, p - pTokenBegin,
 717                     RTL_TEXTENCODING_UTF8);
 718         }
 719         aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
 720     }
 721     aList.sort();
 722     return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
 723 }
 724
 725 bool equalIgnoreCase(const char * pBegin1,
 726                                const char * pEnd1,
 727                                const char * pString2)
 728 {
 729     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 730                "equalIgnoreCase(): Bad sequences");
 731
 732     while (*pString2 != 0)
 733         if (pBegin1 == pEnd1
 734             || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
 735                 != rtl::toAsciiUpperCase(
 736                     static_cast<unsigned char>(*pString2++))))
 737             return false;
 738     return pBegin1 == pEnd1;
 739 }
 740
 741 struct EncodingEntry
 742 {
 743     char const * m_aName;
 744     rtl_TextEncoding m_eEncoding;
 745 };
 746
 747 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
 748 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
 749 // noted:
 750 EncodingEntry const aEncodingMap[]
 751     = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
 752         { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
 753         { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
 754         { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
 755         { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
 756         { "ASCII", RTL_TEXTENCODING_ASCII_US },
 757         { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
 758         { "US", RTL_TEXTENCODING_ASCII_US },
 759         { "IBM367", RTL_TEXTENCODING_ASCII_US },
 760         { "CP367", RTL_TEXTENCODING_ASCII_US },
 761         { "CSASCII", RTL_TEXTENCODING_ASCII_US },
 762         { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 763         { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
 764         { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
 765         { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
 766         { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 767         { "L1", RTL_TEXTENCODING_ISO_8859_1 },
 768         { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
 769         { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
 770         { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
 771         { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 772         { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
 773         { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
 774         { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
 775         { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 776         { "L2", RTL_TEXTENCODING_ISO_8859_2 },
 777         { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
 778         { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 779         { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
 780         { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
 781         { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
 782         { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 783         { "L3", RTL_TEXTENCODING_ISO_8859_3 },
 784         { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
 785         { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 786         { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
 787         { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
 788         { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
 789         { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 790         { "L4", RTL_TEXTENCODING_ISO_8859_4 },
 791         { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
 792         { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 793         { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
 794         { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
 795         { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
 796         { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 797         { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
 798         { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 799         { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
 800         { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
 801         { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
 802         { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
 803         { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
 804         { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 805         { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
 806         { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 807         { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
 808         { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
 809         { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
 810         { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
 811         { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
 812         { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
 813         { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
 814         { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
 815         { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 816         { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
 817         { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
 818         { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
 819         { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 820         { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
 821         { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 822         { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
 823         { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
 824         { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
 825         { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 826         { "L5", RTL_TEXTENCODING_ISO_8859_9 },
 827         { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
 828         { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
 829         { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
 830         { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
 831         { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 832         { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
 833         { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
 834         { "IBM437", RTL_TEXTENCODING_IBM_437 },
 835         { "CP437", RTL_TEXTENCODING_IBM_437 },
 836         { "437", RTL_TEXTENCODING_IBM_437 },
 837         { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
 838         { "IBM850", RTL_TEXTENCODING_IBM_850 },
 839         { "CP850", RTL_TEXTENCODING_IBM_850 },
 840         { "850", RTL_TEXTENCODING_IBM_850 },
 841         { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
 842         { "IBM860", RTL_TEXTENCODING_IBM_860 },
 843         { "CP860", RTL_TEXTENCODING_IBM_860 },
 844         { "860", RTL_TEXTENCODING_IBM_860 },
 845         { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
 846         { "IBM861", RTL_TEXTENCODING_IBM_861 },
 847         { "CP861", RTL_TEXTENCODING_IBM_861 },
 848         { "861", RTL_TEXTENCODING_IBM_861 },
 849         { "CP-IS", RTL_TEXTENCODING_IBM_861 },
 850         { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
 851         { "IBM863", RTL_TEXTENCODING_IBM_863 },
 852         { "CP863", RTL_TEXTENCODING_IBM_863 },
 853         { "863", RTL_TEXTENCODING_IBM_863 },
 854         { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
 855         { "IBM865", RTL_TEXTENCODING_IBM_865 },
 856         { "CP865", RTL_TEXTENCODING_IBM_865 },
 857         { "865", RTL_TEXTENCODING_IBM_865 },
 858         { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
 859         { "IBM775", RTL_TEXTENCODING_IBM_775 },
 860         { "CP775", RTL_TEXTENCODING_IBM_775 },
 861         { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
 862         { "IBM852", RTL_TEXTENCODING_IBM_852 },
 863         { "CP852", RTL_TEXTENCODING_IBM_852 },
 864         { "852", RTL_TEXTENCODING_IBM_852 },
 865         { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
 866         { "IBM855", RTL_TEXTENCODING_IBM_855 },
 867         { "CP855", RTL_TEXTENCODING_IBM_855 },
 868         { "855", RTL_TEXTENCODING_IBM_855 },
 869         { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
 870         { "IBM857", RTL_TEXTENCODING_IBM_857 },
 871         { "CP857", RTL_TEXTENCODING_IBM_857 },
 872         { "857", RTL_TEXTENCODING_IBM_857 },
 873         { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
 874         { "IBM862", RTL_TEXTENCODING_IBM_862 },
 875         { "CP862", RTL_TEXTENCODING_IBM_862 },
 876         { "862", RTL_TEXTENCODING_IBM_862 },
 877         { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
 878         { "IBM864", RTL_TEXTENCODING_IBM_864 },
 879         { "CP864", RTL_TEXTENCODING_IBM_864 },
 880         { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
 881         { "IBM866", RTL_TEXTENCODING_IBM_866 },
 882         { "CP866", RTL_TEXTENCODING_IBM_866 },
 883         { "866", RTL_TEXTENCODING_IBM_866 },
 884         { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
 885         { "IBM869", RTL_TEXTENCODING_IBM_869 },
 886         { "CP869", RTL_TEXTENCODING_IBM_869 },
 887         { "869", RTL_TEXTENCODING_IBM_869 },
 888         { "CP-GR", RTL_TEXTENCODING_IBM_869 },
 889         { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
 890         { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
 891         { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
 892         { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
 893         { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
 894         { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
 895         { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
 896         { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
 897         { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
 898         { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
 899         { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
 900         { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
 901         { "GB2312", RTL_TEXTENCODING_GB_2312 },
 902         { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
 903         { "BIG5", RTL_TEXTENCODING_BIG5 },
 904         { "CSBIG5", RTL_TEXTENCODING_BIG5 },
 905         { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
 906         { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
 907           RTL_TEXTENCODING_EUC_JP },
 908         { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
 909         { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
 910         { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
 911         { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
 912         { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
 913         { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
 914         { "UTF-7", RTL_TEXTENCODING_UTF7 },
 915         { "UTF-8", RTL_TEXTENCODING_UTF8 },
 916         { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
 917         { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
 918         { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
 919         { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
 920         { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
 921         { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
 922         { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
 923         { "CSUCS4", RTL_TEXTENCODING_UCS4 },
 924         { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
 925         { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
 926
 927 rtl_TextEncoding getCharsetEncoding(char const * pBegin,
 928                                               char const * pEnd)
 929 {
 930     for (const EncodingEntry& i : aEncodingMap)
 931         if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
 932             return i.m_eEncoding;
 933     return RTL_TEXTENCODING_DONTKNOW;
 934 }
 935
 936 }
 937
 938 //  INetMIME
 939
 940 // static
 941 bool INetMIME::isAtomChar(sal_uInt32 nChar)
 942 {
 943     static const bool aMap[128]
 944         = { false, false, false, false, false, false, false, false,
 945             false, false, false, false, false, false, false, false,
 946             false, false, false, false, false, false, false, false,
 947             false, false, false, false, false, false, false, false,
 948             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 949             false, false,  true,  true, false,  true, false,  true, //()*+,-./
 950              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 951              true,  true, false, false, false,  true, false,  true, //89:;<=>?
 952             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 953              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 954              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 955              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 956              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 957              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 958              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 959              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 960           };
 961     return rtl::isAscii(nChar) && aMap[nChar];
 962 }
 963
 964 // static
 965 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
 966 {
 967     static const bool aMap[128]
 968         = { false, false, false, false, false, false, false, false,
 969             false, false, false, false, false, false, false, false,
 970             false, false, false, false, false, false, false, false,
 971             false, false, false, false, false, false, false, false,
 972             false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
 973             false, false, false,  true,  true,  true,  true,  true, //()*+,-./
 974              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 975              true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
 976              true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 977              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 978              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 979              true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
 980              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 981              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 982              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 983              true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
 984           };
 985     return rtl::isAscii(nChar) && aMap[nChar];
 986 }
 987
 988 // static
 989 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
 990                                const sal_Unicode * pEnd1,
 991                                const char * pString2)
 992 {
 993     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 994                "INetMIME::equalIgnoreCase(): Bad sequences");
 995
 996     while (*pString2 != 0)
 997         if (pBegin1 == pEnd1
 998             || (rtl::toAsciiUpperCase(*pBegin1++)
 999                 != rtl::toAsciiUpperCase(
1000                     static_cast<unsigned char>(*pString2++))))
1001             return false;
1002     return pBegin1 == pEnd1;
1003 }
1004
1005 // static
1006 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
1007                             const sal_Unicode * pEnd, bool bLeadingZeroes,
1008                             sal_uInt32 & rValue)
1009 {
1010     sal_uInt64 nTheValue = 0;
1011     const sal_Unicode * p = rBegin;
1012     for ( ; p != pEnd; ++p)
1013     {
1014         int nWeight = getWeight(*p);
1015         if (nWeight < 0)
1016             break;
1017         nTheValue = 10 * nTheValue + nWeight;
1018         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
1019             return false;
1020     }
1021     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1022         return false;
1023     rBegin = p;
1024     rValue = sal_uInt32(nTheValue);
1025     return true;
1026 }
1027
1028 // static
1029 sal_Unicode const * INetMIME::scanContentType(
1030     OUString const & rStr, OUString * pType,
1031     OUString * pSubType, INetContentTypeParameterList * pParameters)
1032 {
1033     sal_Unicode const * pBegin = rStr.getStr();
1034     sal_Unicode const * pEnd = pBegin + rStr.getLength();
1035     sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1036     sal_Unicode const * pTypeBegin = p;
1037     while (p != pEnd && isTokenChar(*p))
1038     {
1039         ++p;
1040     }
1041     if (p == pTypeBegin)
1042         return nullptr;
1043     sal_Unicode const * pTypeEnd = p;
1044
1045     p = skipLinearWhiteSpaceComment(p, pEnd);
1046     if (p == pEnd || *p++ != '/')
1047         return nullptr;
1048
1049     p = skipLinearWhiteSpaceComment(p, pEnd);
1050     sal_Unicode const * pSubTypeBegin = p;
1051     while (p != pEnd && isTokenChar(*p))
1052     {
1053         ++p;
1054     }
1055     if (p == pSubTypeBegin)
1056         return nullptr;
1057     sal_Unicode const * pSubTypeEnd = p;
1058
1059     if (pType != nullptr)
1060     {
1061         *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1062     }
1063     if (pSubType != nullptr)
1064     {
1065         *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1066             .toAsciiLowerCase();
1067     }
1068
1069     return scanParameters(p, pEnd, pParameters);
1070 }
1071
1072 // static
1073 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1074 {
1075     // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1076     // versions of StarOffice send mails with header fields where encoded
1077     // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1078     // '=', ',', '.', '"', ')', without any required white space in between.
1079     // And there appear to exist some broken mailers that only encode single
1080     // letters within words, like "Appel
1081     // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1082     // detect encoded words even when not properly surrounded by white space.
1083
1084     // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1085
1086     // encoded-word = "=?"
1087     //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1088     //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1089     //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1090     //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1091     //     "?="
1092
1093     // base64 = ALPHA / DIGIT / "+" / "/"
1094
1095     const char * pBegin = rBody.getStr();
1096     const char * pEnd = pBegin + rBody.getLength();
1097
1098     OUStringBuffer sDecoded;
1099     const char * pCopyBegin = pBegin;
1100
1101     /* bool bStartEncodedWord = true; */
1102     const char * pWSPBegin = pBegin;
1103
1104     for (const char * p = pBegin; p != pEnd;)
1105     {
1106         if (*p == '=' /* && bStartEncodedWord */)
1107         {
1108             const char * q = p + 1;
1109             bool bEncodedWord = q != pEnd && *q++ == '?';
1110
1111             rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1112             if (bEncodedWord)
1113             {
1114                 const char * pCharsetBegin = q;
1115                 const char * pLanguageBegin = nullptr;
1116                 int nAlphaCount = 0;
1117                 for (bool bDone = false; !bDone;)
1118                     if (q == pEnd)
1119                     {
1120                         bEncodedWord = false;
1121                         bDone = true;
1122                     }
1123                     else
1124                     {
1125                         char cChar = *q++;
1126                         switch (cChar)
1127                         {
1128                             case '*':
1129                                 pLanguageBegin = q - 1;
1130                                 nAlphaCount = 0;
1131                                 break;
1132
1133                             case '-':
1134                                 if (pLanguageBegin != nullptr)
1135                                 {
1136                                     if (nAlphaCount == 0)
1137                                         pLanguageBegin = nullptr;
1138                                     else
1139                                         nAlphaCount = 0;
1140                                 }
1141                                 break;
1142
1143                             case '?':
1144                                 if (pCharsetBegin == q - 1)
1145                                     bEncodedWord = false;
1146                                 else
1147                                 {
1148                                     eCharsetEncoding
1149                                         = getCharsetEncoding(
1150                                               pCharsetBegin,
1151                                               pLanguageBegin == nullptr
1152                                               || nAlphaCount == 0 ?
1153                                                   q - 1 : pLanguageBegin);
1154                                     bEncodedWord = isMIMECharsetEncoding(
1155                                                        eCharsetEncoding);
1156                                     eCharsetEncoding
1157                                         = translateFromMIME(eCharsetEncoding);
1158                                 }
1159                                 bDone = true;
1160                                 break;
1161
1162                             default:
1163                                 if (pLanguageBegin != nullptr
1164                                     && (!rtl::isAsciiAlpha(
1165                                             static_cast<unsigned char>(cChar))
1166                                         || ++nAlphaCount > 8))
1167                                     pLanguageBegin = nullptr;
1168                                 break;
1169                         }
1170                     }
1171             }
1172
1173             bool bEncodingB = false;
1174             if (bEncodedWord)
1175             {
1176                 if (q == pEnd)
1177                     bEncodedWord = false;
1178                 else
1179                 {
1180                     switch (*q++)
1181                     {
1182                         case 'B':
1183                         case 'b':
1184                             bEncodingB = true;
1185                             break;
1186
1187                         case 'Q':
1188                         case 'q':
1189                             bEncodingB = false;
1190                             break;
1191
1192                         default:
1193                             bEncodedWord = false;
1194                             break;
1195                     }
1196                 }
1197             }
1198
1199             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1200
1201             OStringBuffer sText;
1202             if (bEncodedWord)
1203             {
1204                 if (bEncodingB)
1205                 {
1206                     for (bool bDone = false; !bDone;)
1207                     {
1208                         if (pEnd - q < 4)
1209                         {
1210                             bEncodedWord = false;
1211                             bDone = true;
1212                         }
1213                         else
1214                         {
1215                             bool bFinal = false;
1216                             int nCount = 3;
1217                             sal_uInt32 nValue = 0;
1218                             for (int nShift = 18; nShift >= 0; nShift -= 6)
1219                             {
1220                                 int nWeight = getBase64Weight(*q++);
1221                                 if (nWeight == -2)
1222                                 {
1223                                     bEncodedWord = false;
1224                                     bDone = true;
1225                                     break;
1226                                 }
1227                                 if (nWeight == -1)
1228                                 {
1229                                     if (!bFinal)
1230                                     {
1231                                         if (nShift >= 12)
1232                                         {
1233                                             bEncodedWord = false;
1234                                             bDone = true;
1235                                             break;
1236                                         }
1237                                         bFinal = true;
1238                                         nCount = nShift == 6 ? 1 : 2;
1239                                     }
1240                                 }
1241                                 else
1242                                     nValue |= nWeight << nShift;
1243                             }
1244                             if (bEncodedWord)
1245                             {
1246                                 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1247                                     sText.append(char(nValue >> nShift & 0xFF));
1248                                 if (*q == '?')
1249                                 {
1250                                     ++q;
1251                                     bDone = true;
1252                                 }
1253                                 if (bFinal && !bDone)
1254                                 {
1255                                     bEncodedWord = false;
1256                                     bDone = true;
1257                                 }
1258                             }
1259                         }
1260                     }
1261                 }
1262                 else
1263                 {
1264                     const char * pEncodedTextBegin = q;
1265                     const char * pEncodedTextCopyBegin = q;
1266                     for (bool bDone = false; !bDone;)
1267                         if (q == pEnd)
1268                         {
1269                             bEncodedWord = false;
1270                             bDone = true;
1271                         }
1272                         else
1273                         {
1274                             sal_uInt32 nChar = static_cast<unsigned char>(*q++);
1275                             switch (nChar)
1276                             {
1277                                 case '=':
1278                                 {
1279                                     if (pEnd - q < 2)
1280                                     {
1281                                         bEncodedWord = false;
1282                                         bDone = true;
1283                                         break;
1284                                     }
1285                                     int nDigit1 = getHexWeight(q[0]);
1286                                     int nDigit2 = getHexWeight(q[1]);
1287                                     if (nDigit1 < 0 || nDigit2 < 0)
1288                                     {
1289                                         bEncodedWord = false;
1290                                         bDone = true;
1291                                         break;
1292                                     }
1293                                     sText.append(rBody.copy(
1294                                         (pEncodedTextCopyBegin - pBegin),
1295                                         (q - 1 - pEncodedTextCopyBegin)));
1296                                     sText.append(char(nDigit1 << 4 | nDigit2));
1297                                     q += 2;
1298                                     pEncodedTextCopyBegin = q;
1299                                     break;
1300                                 }
1301
1302                                 case '?':
1303                                     if (q - pEncodedTextBegin > 1)
1304                                         sText.append(rBody.copy(
1305                                             (pEncodedTextCopyBegin - pBegin),
1306                                             (q - 1 - pEncodedTextCopyBegin)));
1307                                     else
1308                                         bEncodedWord = false;
1309                                     bDone = true;
1310                                     break;
1311
1312                                 case '_':
1313                                     sText.append(rBody.copy(
1314                                         (pEncodedTextCopyBegin - pBegin),
1315                                         (q - 1 - pEncodedTextCopyBegin)));
1316                                     sText.append(' ');
1317                                     pEncodedTextCopyBegin = q;
1318                                     break;
1319
1320                                 default:
1321                                     if (!isVisible(nChar))
1322                                     {
1323                                         bEncodedWord = false;
1324                                         bDone = true;
1325                                     }
1326                                     break;
1327                             }
1328                         }
1329                 }
1330             }
1331
1332             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1333
1334             std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1335             sal_Size nUnicodeSize = 0;
1336             if (bEncodedWord)
1337             {
1338                 pUnicodeBuffer
1339                     = convertToUnicode(sText.getStr(),
1340                                        sText.getStr() + sText.getLength(),
1341                                        eCharsetEncoding, nUnicodeSize);
1342                 if (!pUnicodeBuffer)
1343                     bEncodedWord = false;
1344             }
1345
1346             if (bEncodedWord)
1347             {
1348                 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1349                 sDecoded.append(
1350                     pUnicodeBuffer.get(),
1351                     static_cast< sal_Int32 >(nUnicodeSize));
1352                 pUnicodeBuffer.reset();
1353                 p = q;
1354                 pCopyBegin = p;
1355
1356                 pWSPBegin = p;
1357                 while (p != pEnd && isWhiteSpace(*p))
1358                     ++p;
1359                 /* bStartEncodedWord = p != pWSPBegin; */
1360                 continue;
1361             }
1362         }
1363
1364         if (p == pEnd)
1365             break;
1366
1367         switch (*p++)
1368         {
1369             case '"':
1370                 /* bStartEncodedWord = true; */
1371                 break;
1372
1373             case '(':
1374                 /* bStartEncodedWord = true; */
1375                 break;
1376
1377             case ')':
1378                 /* bStartEncodedWord = false; */
1379                 break;
1380
1381             default:
1382             {
1383                 const char * pUTF8Begin = p - 1;
1384                 const char * pUTF8End = pUTF8Begin;
1385                 sal_uInt32 nCharacter = 0;
1386                 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1387                 {
1388                     appendISO88591(sDecoded, pCopyBegin, p - 1);
1389                     sal_Unicode aUTF16Buf[2];
1390                     sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf;
1391                     sDecoded.append(aUTF16Buf, nUTF16Len);
1392                     p = pUTF8End;
1393                     pCopyBegin = p;
1394                 }
1395                 /* bStartEncodedWord = false; */
1396                 break;
1397             }
1398         }
1399         pWSPBegin = p;
1400     }
1401
1402     appendISO88591(sDecoded, pCopyBegin, pEnd);
1403     return sDecoded.makeStringAndClear();
1404 }
1405
1406 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */