tools/source/inet/inetmime.cxx

   1 /*************************************************************************
   2  *
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * Copyright 2008 by Sun Microsystems, Inc.
   6  *
   7  * OpenOffice.org - a multi-platform office productivity suite
   8  *
   9  * $RCSfile: inetmime.cxx,v $
  10  * $Revision: 1.14 $
  11  *
  12  * This file is part of OpenOffice.org.
  13  *
  14  * OpenOffice.org is free software: you can redistribute it and/or modify
  15  * it under the terms of the GNU Lesser General Public License version 3
  16  * only, as published by the Free Software Foundation.
  17  *
  18  * OpenOffice.org is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21  * GNU Lesser General Public License version 3 for more details
  22  * (a copy is included in the LICENSE file that accompanied this code).
  23  *
  24  * You should have received a copy of the GNU Lesser General Public License
  25  * version 3 along with OpenOffice.org.  If not, see
  26  * <http://www.openoffice.org/license.html>
  27  * for a copy of the LGPLv3 License.
  28  *
  29  ************************************************************************/
  30
  31 // MARKER(update_precomp.py): autogen include statement, do not remove
  32 #include "precompiled_tools.hxx"
  33
  34 #include <cstddef>
  35 #include <limits>
  36
  37 #include "rtl/tencinfo.h"
  38 #include <tools/datetime.hxx>
  39 #include <tools/inetmime.hxx>
  40
  41 namespace unnamed_tools_inetmime {} using namespace unnamed_tools_inetmime;
  42     // unnamed namespaces don't work well yet
  43
  44 //============================================================================
  45 namespace unnamed_tools_inetmime {
  46
  47 class Charset
  48 {
  49     rtl_TextEncoding m_eEncoding;
  50     const sal_uInt32 * m_pRanges;
  51
  52 public:
  53     inline Charset(rtl_TextEncoding eTheEncoding,
  54                    const sal_uInt32 * pTheRanges);
  55
  56     rtl_TextEncoding getEncoding() const { return m_eEncoding; }
  57
  58     bool contains(sal_uInt32 nChar) const;
  59 };
  60
  61 inline Charset::Charset(rtl_TextEncoding eTheEncoding,
  62                         const sal_uInt32 * pTheRanges):
  63     m_eEncoding(eTheEncoding),
  64     m_pRanges(pTheRanges)
  65 {
  66     DBG_ASSERT(m_pRanges, "Charset::Charset(): Bad ranges");
  67 }
  68
  69 //============================================================================
  70 void appendISO88591(UniString & rText, sal_Char const * pBegin,
  71                     sal_Char const * pEnd);
  72
  73 }
  74
  75 //============================================================================
  76 class INetMIMECharsetList_Impl
  77 {
  78     struct Node
  79     {
  80         Charset m_aCharset;
  81         bool m_bDisabled;
  82         Node * m_pNext;
  83
  84         inline Node(const Charset & rTheCharset, bool bTheDisabled,
  85                     Node * pTheNext);
  86     };
  87
  88     Node * m_pFirst;
  89
  90 public:
  91     INetMIMECharsetList_Impl(): m_pFirst(0) {}
  92
  93     ~INetMIMECharsetList_Impl();
  94
  95     void prepend(const Charset & rCharset)
  96     { m_pFirst = new Node(rCharset, false, m_pFirst); }
  97
  98     void includes(sal_uInt32 nChar);
  99
 100     rtl_TextEncoding getPreferredEncoding(rtl_TextEncoding eDefault
 101                                               = RTL_TEXTENCODING_DONTKNOW)
 102         const;
 103
 104     void reset();
 105 };
 106
 107 inline INetMIMECharsetList_Impl::Node::Node(const Charset & rTheCharset,
 108                                             bool bTheDisabled,
 109                                             Node * pTheNext):
 110     m_aCharset(rTheCharset),
 111     m_bDisabled(bTheDisabled),
 112     m_pNext(pTheNext)
 113 {}
 114
 115 //============================================================================
 116 namespace unnamed_tools_inetmime {
 117
 118 struct Parameter
 119 {
 120     Parameter * m_pNext;
 121     ByteString m_aAttribute;
 122     ByteString m_aCharset;
 123     ByteString m_aLanguage;
 124     ByteString m_aValue;
 125     sal_uInt32 m_nSection;
 126     bool m_bExtended;
 127
 128     inline Parameter(Parameter * pTheNext, ByteString const & rTheAttribute,
 129                      ByteString const & rTheCharset,
 130                      ByteString const & rTheLanguage,
 131                      ByteString const & rTheValue, sal_uInt32 nTheSection,
 132                      bool bTheExtended);
 133 };
 134
 135 inline Parameter::Parameter(Parameter * pTheNext,
 136                             ByteString const & rTheAttribute,
 137                             ByteString const & rTheCharset,
 138                             ByteString const & rTheLanguage,
 139                             ByteString const & rTheValue,
 140                             sal_uInt32 nTheSection, bool bTheExtended):
 141     m_pNext(pTheNext),
 142     m_aAttribute(rTheAttribute),
 143     m_aCharset(rTheCharset),
 144     m_aLanguage(rTheLanguage),
 145     m_aValue(rTheValue),
 146     m_nSection(nTheSection),
 147     m_bExtended(bTheExtended)
 148 {}
 149
 150 //============================================================================
 151 struct ParameterList
 152 {
 153     Parameter * m_pList;
 154
 155     ParameterList(): m_pList(0) {}
 156
 157     inline ~ParameterList();
 158
 159     Parameter ** find(ByteString const & rAttribute, sal_uInt32 nSection,
 160                       bool & rPresent);
 161 };
 162
 163 inline ParameterList::~ParameterList()
 164 {
 165     while (m_pList)
 166     {
 167         Parameter * pNext = m_pList->m_pNext;
 168         delete m_pList;
 169         m_pList = pNext;
 170     }
 171 }
 172
 173 //============================================================================
 174 bool parseParameters(ParameterList const & rInput,
 175                      INetContentTypeParameterList * pOutput);
 176
 177 }
 178
 179 //============================================================================
 180 //
 181 //  Charset
 182 //
 183 //============================================================================
 184
 185 bool Charset::contains(sal_uInt32 nChar) const
 186 {
 187     for (const sal_uInt32 * p = m_pRanges;;)
 188     {
 189         if (nChar < *p++)
 190             return false;
 191         if (nChar <= *p++)
 192             return true;
 193     }
 194 }
 195
 196 //============================================================================
 197 //
 198 //  appendISO88591
 199 //
 200 //============================================================================
 201
 202 namespace unnamed_tools_inetmime {
 203
 204 void appendISO88591(UniString & rText, sal_Char const * pBegin,
 205                     sal_Char const * pEnd)
 206 {
 207     xub_StrLen nLength = static_cast< xub_StrLen >(pEnd - pBegin);
 208     sal_Unicode * pBuffer = new sal_Unicode[nLength];
 209     for (sal_Unicode * p = pBuffer; pBegin != pEnd;)
 210         *p++ = sal_uChar(*pBegin++);
 211     rText.Append(pBuffer, nLength);
 212     delete[] pBuffer;
 213 }
 214
 215 }
 216
 217 //============================================================================
 218 //
 219 //  INetMIMECharsetList_Impl
 220 //
 221 //============================================================================
 222
 223 INetMIMECharsetList_Impl::~INetMIMECharsetList_Impl()
 224 {
 225     while (m_pFirst)
 226     {
 227         Node * pRemove = m_pFirst;
 228         m_pFirst = m_pFirst->m_pNext;
 229         delete pRemove;
 230     }
 231 }
 232
 233 //============================================================================
 234 void INetMIMECharsetList_Impl::includes(sal_uInt32 nChar)
 235 {
 236     for (Node * p = m_pFirst; p; p = p->m_pNext)
 237         if (!(p->m_bDisabled || p->m_aCharset.contains(nChar)))
 238             p->m_bDisabled = true;
 239 }
 240
 241 //============================================================================
 242 rtl_TextEncoding
 243 INetMIMECharsetList_Impl::getPreferredEncoding(rtl_TextEncoding eDefault)
 244     const
 245 {
 246     for (Node * p = m_pFirst; p; p = p->m_pNext)
 247         if (!p->m_bDisabled)
 248             return p->m_aCharset.getEncoding();
 249     return eDefault;
 250 }
 251
 252 //============================================================================
 253 void INetMIMECharsetList_Impl::reset()
 254 {
 255     for (Node * p = m_pFirst; p; p = p->m_pNext)
 256         p->m_bDisabled = false;
 257 }
 258
 259 //============================================================================
 260 //
 261 //  ParameterList
 262 //
 263 //============================================================================
 264
 265 Parameter ** ParameterList::find(ByteString const & rAttribute,
 266                                  sal_uInt32 nSection, bool & rPresent)
 267 {
 268     Parameter ** p = &m_pList;
 269     for (; *p; p = &(*p)->m_pNext)
 270     {
 271         StringCompare eCompare = rAttribute.CompareTo((*p)->m_aAttribute);
 272         if (eCompare == COMPARE_GREATER)
 273             break;
 274         else if (eCompare == COMPARE_EQUAL)
 275         {
 276             if (nSection > (*p)->m_nSection)
 277                 break;
 278             else if (nSection == (*p)->m_nSection)
 279             {
 280                 rPresent = true;
 281                 return p;
 282             }
 283         }
 284     }
 285     rPresent = false;
 286     return p;
 287 }
 288
 289 //============================================================================
 290 //
 291 //  parseParameters
 292 //
 293 //============================================================================
 294
 295 namespace unnamed_tools_inetmime {
 296
 297 bool parseParameters(ParameterList const & rInput,
 298                      INetContentTypeParameterList * pOutput)
 299 {
 300     if (pOutput)
 301         pOutput->Clear();
 302
 303     Parameter * pPrev = 0;
 304     for (Parameter * p = rInput.m_pList; p; p = p->m_pNext)
 305     {
 306         if (p->m_nSection > 0
 307             && (!pPrev
 308                 || pPrev->m_nSection != p->m_nSection - 1
 309                 || pPrev->m_aAttribute != p->m_aAttribute))
 310             return false;
 311         pPrev = p;
 312     }
 313
 314     if (pOutput)
 315         for (Parameter * p = rInput.m_pList; p;)
 316         {
 317             bool bCharset = p->m_aCharset.Len() != 0;
 318             rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
 319             if (bCharset)
 320                 eEncoding
 321                     = INetMIME::getCharsetEncoding(p->m_aCharset.GetBuffer(),
 322                                                    p->m_aCharset.GetBuffer()
 323                                                        + rInput.m_pList->
 324                                                              m_aCharset.
 325                                                                  Len());
 326             UniString aValue;
 327             bool bBadEncoding = false;
 328             Parameter * pNext = p;
 329             do
 330             {
 331                 sal_Size nSize;
 332                 sal_Unicode * pUnicode
 333                     = INetMIME::convertToUnicode(pNext->m_aValue.GetBuffer(),
 334                                                  pNext->m_aValue.GetBuffer()
 335                                                      + pNext->m_aValue.Len(),
 336                                                  bCharset && p->m_bExtended ?
 337                                                      eEncoding :
 338                                                      RTL_TEXTENCODING_UTF8,
 339                                                  nSize);
 340                 if (!pUnicode && !(bCharset && p->m_bExtended))
 341                     pUnicode = INetMIME::convertToUnicode(
 342                                    pNext->m_aValue.GetBuffer(),
 343                                    pNext->m_aValue.GetBuffer()
 344                                        + pNext->m_aValue.Len(),
 345                                    RTL_TEXTENCODING_ISO_8859_1, nSize);
 346                 if (!pUnicode)
 347                 {
 348                     bBadEncoding = true;
 349                     break;
 350                 }
 351                 aValue += UniString(pUnicode, static_cast< xub_StrLen >(nSize));
 352                 delete[] pUnicode;
 353                 pNext = pNext->m_pNext;
 354             }
 355             while (pNext && pNext->m_nSection > 0);
 356             if (bBadEncoding)
 357             {
 358                 aValue.Erase();
 359                 for (pNext = p;;)
 360                 {
 361                     if (pNext->m_bExtended)
 362                         for (xub_StrLen i = 0; i < pNext->m_aValue.Len(); ++i)
 363                             aValue += sal_Unicode(
 364                                 sal_Unicode(
 365                                     sal_uChar(pNext->m_aValue.GetChar(i)))
 366                                 | 0xF800);
 367                     else
 368                         for (xub_StrLen i = 0; i < pNext->m_aValue.Len(); ++i)
 369                             aValue
 370                                 += sal_Unicode(sal_uChar
 371                                                    (pNext->
 372                                                         m_aValue.GetChar(i)));
 373                     pNext = pNext->m_pNext;
 374                     if (!pNext || pNext->m_nSection == 0)
 375                         break;
 376                 };
 377             }
 378             pOutput->Insert(new INetContentTypeParameter(p->m_aAttribute,
 379                                                              p->m_aCharset,
 380                                                              p->m_aLanguage,
 381                                                              aValue,
 382                                                              !bBadEncoding),
 383                                 LIST_APPEND);
 384             p = pNext;
 385         }
 386     return true;
 387 }
 388
 389 }
 390
 391 //============================================================================
 392 //
 393 //  INetMIME
 394 //
 395 //============================================================================
 396
 397 // static
 398 bool INetMIME::isAtomChar(sal_uInt32 nChar)
 399 {
 400     static const bool aMap[128]
 401         = { false, false, false, false, false, false, false, false,
 402             false, false, false, false, false, false, false, false,
 403             false, false, false, false, false, false, false, false,
 404             false, false, false, false, false, false, false, false,
 405             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 406             false, false,  true,  true, false,  true, false,  true, //()*+,-./
 407              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 408              true,  true, false, false, false,  true, false,  true, //89:;<=>?
 409             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 410              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 411              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 412              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 413              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 414              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 415              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 416              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 417           };
 418     return isUSASCII(nChar) && aMap[nChar];
 419 }
 420
 421 //============================================================================
 422 // static
 423 bool INetMIME::isTokenChar(sal_uInt32 nChar)
 424 {
 425     static const sal_Char aMap[128]
 426         = { false, false, false, false, false, false, false, false,
 427             false, false, false, false, false, false, false, false,
 428             false, false, false, false, false, false, false, false,
 429             false, false, false, false, false, false, false, false,
 430             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 431             false, false,  true,  true, false,  true,  true, false, //()*+,-./
 432              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 433              true,  true, false, false, false, false, false, false, //89:;<=>?
 434             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 435              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 436              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 437              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 438              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 439              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 440              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 441              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 442           };
 443     return isUSASCII(nChar) && aMap[nChar];
 444 }
 445
 446 //============================================================================
 447 // static
 448 bool INetMIME::isEncodedWordTokenChar(sal_uInt32 nChar)
 449 {
 450     static const sal_Char aMap[128]
 451         = { false, false, false, false, false, false, false, false,
 452             false, false, false, false, false, false, false, false,
 453             false, false, false, false, false, false, false, false,
 454             false, false, false, false, false, false, false, false,
 455             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 456             false, false,  true,  true, false,  true, false, false, //()*+,-./
 457              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 458              true,  true, false, false, false, false, false, false, //89:;<=>?
 459             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 460              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 461              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 462              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 463              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 464              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 465              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 466              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 467           };
 468     return isUSASCII(nChar) && aMap[nChar];
 469 }
 470
 471 //============================================================================
 472 // static
 473 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
 474 {
 475     static const sal_Char aMap[128]
 476         = { false, false, false, false, false, false, false, false,
 477             false, false, false, false, false, false, false, false,
 478             false, false, false, false, false, false, false, false,
 479             false, false, false, false, false, false, false, false,
 480             false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
 481             false, false, false,  true,  true,  true,  true,  true, //()*+,-./
 482              true,  true,  true,  true,  true,  true,  true,  true, //01234567
 483              true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
 484              true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 485              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 486              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 487              true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
 488              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 489              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 490              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 491              true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
 492           };
 493     return isUSASCII(nChar) && aMap[nChar];
 494 }
 495
 496 //============================================================================
 497 // static
 498 sal_uInt32 INetMIME::getDigit(int nWeight)
 499 {
 500     DBG_ASSERT(nWeight >= 0 && nWeight < 10,
 501                "INetMIME::getDigit(): Bad weight");
 502
 503     static const sal_Char aDigits[16]
 504         = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
 505     return aDigits[nWeight];
 506 }
 507
 508 //============================================================================
 509 // static
 510 sal_uInt32 INetMIME::getHexDigit(int nWeight)
 511 {
 512     DBG_ASSERT(nWeight >= 0 && nWeight < 16,
 513                "INetMIME::getHexDigit(): Bad weight");
 514
 515     static const sal_Char aDigits[16]
 516         = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
 517             'D', 'E', 'F' };
 518     return aDigits[nWeight];
 519 }
 520
 521 //============================================================================
 522 // static
 523 sal_uInt32 INetMIME::getBase64Digit(int nWeight)
 524 {
 525     DBG_ASSERT(nWeight >= 0 && nWeight < 64,
 526                "INetMIME::getBase64Digit(): Bad weight");
 527
 528     static const sal_Char aDigits[64]
 529         = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
 530             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 531             'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
 532             'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
 533             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
 534     return aDigits[nWeight];
 535 }
 536
 537 //============================================================================
 538 // static
 539 bool INetMIME::equalIgnoreCase(const sal_Char * pBegin1,
 540                                const sal_Char * pEnd1,
 541                                const sal_Char * pBegin2,
 542                                const sal_Char * pEnd2)
 543 {
 544     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pBegin2 && pBegin2 <= pEnd2,
 545                "INetMIME::equalIgnoreCase(): Bad sequences");
 546
 547     if (pEnd1 - pBegin1 != pEnd2 - pBegin2)
 548         return false;
 549     while (pBegin1 != pEnd1)
 550         if (toUpperCase(*pBegin1++) != toUpperCase(*pBegin2++))
 551             return false;
 552     return true;
 553 }
 554
 555 //============================================================================
 556 // static
 557 bool INetMIME::equalIgnoreCase(const sal_Char * pBegin1,
 558                                const sal_Char * pEnd1,
 559                                const sal_Char * pString2)
 560 {
 561     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 562                "INetMIME::equalIgnoreCase(): Bad sequences");
 563
 564     while (*pString2 != 0)
 565         if (pBegin1 == pEnd1
 566             || toUpperCase(*pBegin1++) != toUpperCase(*pString2++))
 567             return false;
 568     return pBegin1 == pEnd1;
 569 }
 570
 571 //============================================================================
 572 // static
 573 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
 574                                const sal_Unicode * pEnd1,
 575                                const sal_Char * pString2)
 576 {
 577     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 578                "INetMIME::equalIgnoreCase(): Bad sequences");
 579
 580     while (*pString2 != 0)
 581         if (pBegin1 == pEnd1
 582             || toUpperCase(*pBegin1++) != toUpperCase(*pString2++))
 583             return false;
 584     return pBegin1 == pEnd1;
 585 }
 586
 587 //============================================================================
 588 // static
 589 const sal_Char * INetMIME::skipLinearWhiteSpace(const sal_Char * pBegin,
 590                                                 const sal_Char * pEnd)
 591 {
 592     DBG_ASSERT(pBegin && pBegin <= pEnd,
 593                "INetMIME::skipLinearWhiteSpace(): Bad sequence");
 594
 595     while (pBegin != pEnd)
 596         switch (*pBegin)
 597         {
 598             case '\t':
 599             case ' ':
 600                 ++pBegin;
 601                 break;
 602
 603             case 0x0D: // CR
 604                 if (startsWithLineFolding(pBegin, pEnd))
 605                     pBegin += 3;
 606                 else
 607                     return pBegin;
 608                 break;
 609
 610             default:
 611                 return pBegin;
 612         }
 613     return pBegin;
 614 }
 615
 616 //============================================================================
 617 // static
 618 const sal_Unicode * INetMIME::skipLinearWhiteSpace(const sal_Unicode * pBegin,
 619                                                    const sal_Unicode * pEnd)
 620 {
 621     DBG_ASSERT(pBegin && pBegin <= pEnd,
 622                "INetMIME::skipLinearWhiteSpace(): Bad sequence");
 623
 624     while (pBegin != pEnd)
 625         switch (*pBegin)
 626         {
 627             case '\t':
 628             case ' ':
 629                 ++pBegin;
 630                 break;
 631
 632             case 0x0D: // CR
 633                 if (startsWithLineFolding(pBegin, pEnd))
 634                     pBegin += 3;
 635                 else
 636                     return pBegin;
 637                 break;
 638
 639             default:
 640                 return pBegin;
 641         }
 642     return pBegin;
 643 }
 644
 645 //============================================================================
 646 // static
 647 const sal_Char * INetMIME::skipComment(const sal_Char * pBegin,
 648                                        const sal_Char * pEnd)
 649 {
 650     DBG_ASSERT(pBegin && pBegin <= pEnd,
 651                "INetMIME::skipComment(): Bad sequence");
 652
 653     if (pBegin != pEnd && *pBegin == '(')
 654     {
 655         sal_uInt32 nLevel = 0;
 656         for (const sal_Char * p = pBegin; p != pEnd;)
 657             switch (*p++)
 658             {
 659                 case '(':
 660                     ++nLevel;
 661                     break;
 662
 663                 case ')':
 664                     if (--nLevel == 0)
 665                         return p;
 666                     break;
 667
 668                 case '\\':
 669                     if (p != pEnd)
 670                         ++p;
 671                     break;
 672             }
 673     }
 674     return pBegin;
 675 }
 676
 677 //============================================================================
 678 // static
 679 const sal_Unicode * INetMIME::skipComment(const sal_Unicode * pBegin,
 680                                           const sal_Unicode * pEnd)
 681 {
 682     DBG_ASSERT(pBegin && pBegin <= pEnd,
 683                "INetMIME::skipComment(): Bad sequence");
 684
 685     if (pBegin != pEnd && *pBegin == '(')
 686     {
 687         sal_uInt32 nLevel = 0;
 688         for (const sal_Unicode * p = pBegin; p != pEnd;)
 689             switch (*p++)
 690             {
 691                 case '(':
 692                     ++nLevel;
 693                     break;
 694
 695                 case ')':
 696                     if (--nLevel == 0)
 697                         return p;
 698                     break;
 699
 700                 case '\\':
 701                     if (p != pEnd)
 702                         ++p;
 703                     break;
 704             }
 705     }
 706     return pBegin;
 707 }
 708
 709 //============================================================================
 710 // static
 711 const sal_Char * INetMIME::skipLinearWhiteSpaceComment(const sal_Char *
 712                                                            pBegin,
 713                                                        const sal_Char * pEnd)
 714 {
 715     DBG_ASSERT(pBegin && pBegin <= pEnd,
 716                "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence");
 717
 718     while (pBegin != pEnd)
 719         switch (*pBegin)
 720         {
 721             case '\t':
 722             case ' ':
 723                 ++pBegin;
 724                 break;
 725
 726             case 0x0D: // CR
 727                 if (startsWithLineFolding(pBegin, pEnd))
 728                     pBegin += 3;
 729                 else
 730                     return pBegin;
 731                 break;
 732
 733             case '(':
 734             {
 735                 const sal_Char * p = skipComment(pBegin, pEnd);
 736                 if (p == pBegin)
 737                     return pBegin;
 738                 pBegin = p;
 739                 break;
 740             }
 741
 742             default:
 743                 return pBegin;
 744         }
 745     return pBegin;
 746 }
 747
 748 //============================================================================
 749 // static
 750 const sal_Unicode * INetMIME::skipLinearWhiteSpaceComment(const sal_Unicode *
 751                                                               pBegin,
 752                                                           const sal_Unicode *
 753                                                               pEnd)
 754 {
 755     DBG_ASSERT(pBegin && pBegin <= pEnd,
 756                "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence");
 757
 758     while (pBegin != pEnd)
 759         switch (*pBegin)
 760         {
 761             case '\t':
 762             case ' ':
 763                 ++pBegin;
 764                 break;
 765
 766             case 0x0D: // CR
 767                 if (startsWithLineFolding(pBegin, pEnd))
 768                     pBegin += 3;
 769                 else
 770                     return pBegin;
 771                 break;
 772
 773             case '(':
 774             {
 775                 const sal_Unicode * p = skipComment(pBegin, pEnd);
 776                 if (p == pBegin)
 777                     return pBegin;
 778                 pBegin = p;
 779                 break;
 780             }
 781
 782             default:
 783                 return pBegin;
 784         }
 785     return pBegin;
 786 }
 787
 788 //============================================================================
 789 // static
 790 const sal_Char * INetMIME::skipQuotedString(const sal_Char * pBegin,
 791                                             const sal_Char * pEnd)
 792 {
 793     DBG_ASSERT(pBegin && pBegin <= pEnd,
 794                "INetMIME::skipQuotedString(): Bad sequence");
 795
 796     if (pBegin != pEnd && *pBegin == '"')
 797         for (const sal_Char * p = pBegin + 1; p != pEnd;)
 798             switch (*p++)
 799             {
 800                 case 0x0D: // CR
 801                     if (pEnd - p < 2 || *p++ != 0x0A // LF
 802                         || !isWhiteSpace(*p++))
 803                         return pBegin;
 804                     break;
 805
 806                 case '"':
 807                     return p;
 808
 809                 case '\\':
 810                     if (p != pEnd)
 811                         ++p;
 812                     break;
 813             }
 814     return pBegin;
 815 }
 816
 817 //============================================================================
 818 // static
 819 const sal_Unicode * INetMIME::skipQuotedString(const sal_Unicode * pBegin,
 820                                                const sal_Unicode * pEnd)
 821 {
 822     DBG_ASSERT(pBegin && pBegin <= pEnd,
 823                "INetMIME::skipQuotedString(): Bad sequence");
 824
 825     if (pBegin != pEnd && *pBegin == '"')
 826         for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
 827             switch (*p++)
 828             {
 829                 case 0x0D: // CR
 830                     if (pEnd - p < 2 || *p++ != 0x0A // LF
 831                         || !isWhiteSpace(*p++))
 832                         return pBegin;
 833                     break;
 834
 835                 case '"':
 836                     return p;
 837
 838                 case '\\':
 839                     if (p != pEnd)
 840                         ++p;
 841                     break;
 842             }
 843     return pBegin;
 844 }
 845
 846 //============================================================================
 847 // static
 848 const sal_Char * INetMIME::scanAtom(const sal_Char * pBegin,
 849                                     const sal_Char * pEnd)
 850 {
 851     while (pBegin != pEnd && isAtomChar(*pBegin))
 852         ++pBegin;
 853     return pBegin;
 854 }
 855
 856 //============================================================================
 857 // static
 858 const sal_Unicode * INetMIME::scanAtom(const sal_Unicode * pBegin,
 859                                        const sal_Unicode * pEnd)
 860 {
 861     while (pBegin != pEnd && isAtomChar(*pBegin))
 862         ++pBegin;
 863     return pBegin;
 864 }
 865
 866 //============================================================================
 867 // static
 868 bool INetMIME::scanUnsigned(const sal_Char *& rBegin, const sal_Char * pEnd,
 869                             bool bLeadingZeroes, sal_uInt32 & rValue)
 870 {
 871     sal_uInt64 nTheValue = 0;
 872     const sal_Char * p = rBegin;
 873     for ( ; p != pEnd; ++p)
 874     {
 875         int nWeight = getWeight(*p);
 876         if (nWeight < 0)
 877             break;
 878         nTheValue = 10 * nTheValue + nWeight;
 879         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
 880             return false;
 881     }
 882     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
 883         return false;
 884     rBegin = p;
 885     rValue = sal_uInt32(nTheValue);
 886     return true;
 887 }
 888
 889 //============================================================================
 890 // static
 891 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
 892                             const sal_Unicode * pEnd, bool bLeadingZeroes,
 893                             sal_uInt32 & rValue)
 894 {
 895     sal_uInt64 nTheValue = 0;
 896     const sal_Unicode * p = rBegin;
 897     for ( ; p != pEnd; ++p)
 898     {
 899         int nWeight = getWeight(*p);
 900         if (nWeight < 0)
 901             break;
 902         nTheValue = 10 * nTheValue + nWeight;
 903         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
 904             return false;
 905     }
 906     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
 907         return false;
 908     rBegin = p;
 909     rValue = sal_uInt32(nTheValue);
 910     return true;
 911 }
 912
 913 //============================================================================
 914 // static
 915 bool INetMIME::scanUnsignedHex(const sal_Char *& rBegin,
 916                                const sal_Char * pEnd, bool bLeadingZeroes,
 917                                sal_uInt32 & rValue)
 918 {
 919     sal_uInt64 nTheValue = 0;
 920     const sal_Char * p = rBegin;
 921     for ( p = rBegin; p != pEnd; ++p)
 922     {
 923         int nWeight = getHexWeight(*p);
 924         if (nWeight < 0)
 925             break;
 926         nTheValue = nTheValue << 4 | nWeight;
 927         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
 928             return false;
 929     }
 930     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
 931         return false;
 932     rBegin = p;
 933     rValue = sal_uInt32(nTheValue);
 934     return true;
 935 }
 936
 937 //============================================================================
 938 // static
 939 bool INetMIME::scanUnsignedHex(const sal_Unicode *& rBegin,
 940                                const sal_Unicode * pEnd, bool bLeadingZeroes,
 941                                sal_uInt32 & rValue)
 942 {
 943     sal_uInt64 nTheValue = 0;
 944     const sal_Unicode * p = rBegin;
 945     for ( ; p != pEnd; ++p)
 946     {
 947         int nWeight = getHexWeight(*p);
 948         if (nWeight < 0)
 949             break;
 950         nTheValue = nTheValue << 4 | nWeight;
 951         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
 952             return false;
 953     }
 954     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
 955         return false;
 956     rBegin = p;
 957     rValue = sal_uInt32(nTheValue);
 958     return true;
 959 }
 960
 961 //============================================================================
 962 // static
 963 const sal_Char * INetMIME::scanQuotedBlock(const sal_Char * pBegin,
 964                                            const sal_Char * pEnd,
 965                                            sal_uInt32 nOpening,
 966                                            sal_uInt32 nClosing,
 967                                            sal_Size & rLength,
 968                                            bool & rModify)
 969 {
 970     DBG_ASSERT(pBegin && pBegin <= pEnd,
 971                "INetMIME::scanQuotedBlock(): Bad sequence");
 972
 973     if (pBegin != pEnd && static_cast< unsigned char >(*pBegin) == nOpening)
 974     {
 975         ++rLength;
 976         ++pBegin;
 977         while (pBegin != pEnd)
 978             if (static_cast< unsigned char >(*pBegin) == nClosing)
 979             {
 980                 ++rLength;
 981                 return ++pBegin;
 982             }
 983             else
 984             {
 985                 sal_uInt32 c = *pBegin++;
 986                 switch (c)
 987                 {
 988                     case 0x0D: // CR
 989                         if (pBegin != pEnd && *pBegin == 0x0A) // LF
 990                             if (pEnd - pBegin >= 2 && isWhiteSpace(pBegin[1]))
 991                             {
 992                                 ++rLength;
 993                                 rModify = true;
 994                                 pBegin += 2;
 995                             }
 996                             else
 997                             {
 998                                 rLength += 3;
 999                                 rModify = true;
1000                                 ++pBegin;
1001                             }
1002                         else
1003                             ++rLength;
1004                         break;
1005
1006                     case '\\':
1007                         ++rLength;
1008                         if (pBegin != pEnd)
1009                         {
1010                             if (startsWithLineBreak(pBegin, pEnd)
1011                                 && (pEnd - pBegin < 3
1012                                     || !isWhiteSpace(pBegin[2])))
1013                             {
1014                                 rLength += 3;
1015                                 rModify = true;
1016                                 pBegin += 2;
1017                             }
1018                             else
1019                                 ++pBegin;
1020                         }
1021                         break;
1022
1023                     default:
1024                         ++rLength;
1025                         if (!isUSASCII(c))
1026                             rModify = true;
1027                         break;
1028                 }
1029             }
1030     }
1031     return pBegin;
1032 }
1033
1034 //============================================================================
1035 // static
1036 const sal_Unicode * INetMIME::scanQuotedBlock(const sal_Unicode * pBegin,
1037                                               const sal_Unicode * pEnd,
1038                                               sal_uInt32 nOpening,
1039                                               sal_uInt32 nClosing,
1040                                               sal_Size & rLength,
1041                                               bool & rModify)
1042 {
1043     DBG_ASSERT(pBegin && pBegin <= pEnd,
1044                "INetMIME::scanQuotedBlock(): Bad sequence");
1045
1046     if (pBegin != pEnd && *pBegin == nOpening)
1047     {
1048         ++rLength;
1049         ++pBegin;
1050         while (pBegin != pEnd)
1051             if (*pBegin == nClosing)
1052             {
1053                 ++rLength;
1054                 return ++pBegin;
1055             }
1056             else
1057             {
1058                 sal_uInt32 c = *pBegin++;
1059                 switch (c)
1060                 {
1061                     case 0x0D: // CR
1062                         if (pBegin != pEnd && *pBegin == 0x0A) // LF
1063                             if (pEnd - pBegin >= 2 && isWhiteSpace(pBegin[1]))
1064                             {
1065                                 ++rLength;
1066                                 rModify = true;
1067                                 pBegin += 2;
1068                             }
1069                             else
1070                             {
1071                                 rLength += 3;
1072                                 rModify = true;
1073                                 ++pBegin;
1074                             }
1075                         else
1076                             ++rLength;
1077                         break;
1078
1079                     case '\\':
1080                         ++rLength;
1081                         if (pBegin != pEnd)
1082                         {
1083                             if (startsWithLineBreak(pBegin, pEnd)
1084                                 && (pEnd - pBegin < 3
1085                                     || !isWhiteSpace(pBegin[2])))
1086                             {
1087                                 rLength += 3;
1088                                 rModify = true;
1089                                 pBegin += 2;
1090                             }
1091                             else
1092                                 ++pBegin;
1093                         }
1094                         break;
1095
1096                     default:
1097                         ++rLength;
1098                         if (!isUSASCII(c))
1099                             rModify = true;
1100                         break;
1101                 }
1102             }
1103     }
1104     return pBegin;
1105 }
1106
1107 //============================================================================
1108 // static
1109 sal_Char const * INetMIME::scanParameters(sal_Char const * pBegin,
1110                                           sal_Char const * pEnd,
1111                                           INetContentTypeParameterList *
1112                                               pParameters)
1113 {
1114     ParameterList aList;
1115     sal_Char const * pParameterBegin = pBegin;
1116     for (sal_Char const * p = pParameterBegin;; pParameterBegin = p)
1117     {
1118         pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
1119         if (pParameterBegin == pEnd || *pParameterBegin != ';')
1120             break;
1121         p = pParameterBegin + 1;
1122
1123         sal_Char const * pAttributeBegin = skipLinearWhiteSpaceComment(p,
1124                                                                        pEnd);
1125         p = pAttributeBegin;
1126         bool bDowncaseAttribute = false;
1127         while (p != pEnd && isTokenChar(*p) && *p != '*')
1128         {
1129             bDowncaseAttribute = bDowncaseAttribute || isUpperCase(*p);
1130             ++p;
1131         }
1132         if (p == pAttributeBegin)
1133             break;
1134         ByteString aAttribute(
1135             pAttributeBegin, static_cast< xub_StrLen >(p - pAttributeBegin));
1136         if (bDowncaseAttribute)
1137             aAttribute.ToLowerAscii();
1138
1139         sal_uInt32 nSection = 0;
1140         if (p != pEnd && *p == '*')
1141         {
1142             ++p;
1143             if (p != pEnd && isDigit(*p)
1144                 && !scanUnsigned(p, pEnd, false, nSection))
1145                 break;
1146         }
1147
1148         bool bPresent;
1149         Parameter ** pPos = aList.find(aAttribute, nSection, bPresent);
1150         if (bPresent)
1151             break;
1152
1153         bool bExtended = false;
1154         if (p != pEnd && *p == '*')
1155         {
1156             ++p;
1157             bExtended = true;
1158         }
1159
1160         p = skipLinearWhiteSpaceComment(p, pEnd);
1161
1162         if (p == pEnd || *p != '=')
1163             break;
1164
1165         p = skipLinearWhiteSpaceComment(p + 1, pEnd);
1166
1167         ByteString aCharset;
1168         ByteString aLanguage;
1169          ByteString aValue;
1170         if (bExtended)
1171         {
1172             if (nSection == 0)
1173             {
1174                 sal_Char const * pCharsetBegin = p;
1175                 bool bDowncaseCharset = false;
1176                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
1177                 {
1178                     bDowncaseCharset = bDowncaseCharset || isUpperCase(*p);
1179                     ++p;
1180                 }
1181                 if (p == pCharsetBegin)
1182                     break;
1183                 if (pParameters)
1184                 {
1185                     aCharset = ByteString(
1186                         pCharsetBegin,
1187                         static_cast< xub_StrLen >(p - pCharsetBegin));
1188                     if (bDowncaseCharset)
1189                         aCharset.ToLowerAscii();
1190                 }
1191
1192                 if (p == pEnd || *p != '\'')
1193                     break;
1194                 ++p;
1195
1196                 sal_Char const * pLanguageBegin = p;
1197                 bool bDowncaseLanguage = false;
1198                 int nLetters = 0;
1199                 for (; p != pEnd; ++p)
1200                     if (isAlpha(*p))
1201                     {
1202                         if (++nLetters > 8)
1203                             break;
1204                         bDowncaseLanguage = bDowncaseLanguage
1205                                             || isUpperCase(*p);
1206                     }
1207                     else if (*p == '-')
1208                     {
1209                         if (nLetters == 0)
1210                             break;
1211                         nLetters = 0;
1212                     }
1213                     else
1214                         break;
1215                 if (nLetters == 0 || nLetters > 8)
1216                     break;
1217                 if (pParameters)
1218                 {
1219                     aLanguage = ByteString(
1220                         pLanguageBegin,
1221                         static_cast< xub_StrLen >(p - pLanguageBegin));
1222                     if (bDowncaseLanguage)
1223                         aLanguage.ToLowerAscii();
1224                 }
1225
1226                 if (p == pEnd || *p != '\'')
1227                     break;
1228                 ++p;
1229             }
1230             if (pParameters)
1231                 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1232                 {
1233                     if (*p == '%')
1234                     {
1235                         if (p + 2 < pEnd)
1236                         {
1237                             int nWeight1 = getHexWeight(p[1]);
1238                             int nWeight2 = getHexWeight(p[2]);
1239                             if (nWeight1 >= 0 && nWeight2 >= 0)
1240                             {
1241                                 aValue += sal_Char(nWeight1 << 4 | nWeight2);
1242                                 p += 3;
1243                                 continue;
1244                             }
1245                         }
1246                     }
1247                     aValue += *p++;
1248                 }
1249             else
1250                 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1251                     ++p;
1252         }
1253         else if (p != pEnd && *p == '"')
1254             if (pParameters)
1255             {
1256                 bool bInvalid = false;
1257                 for (++p;;)
1258                 {
1259                     if (p == pEnd)
1260                     {
1261                         bInvalid = true;
1262                         break;
1263                     }
1264                     else if (*p == '"')
1265                     {
1266                         ++p;
1267                         break;
1268                     }
1269                     else if (*p == 0x0D) // CR
1270                     {
1271                         if (pEnd - p < 3 || p[1] != 0x0A // LF
1272                             || !isWhiteSpace(p[2]))
1273                         {
1274                             bInvalid = true;
1275                             break;
1276                         }
1277                         p += 2;
1278                     }
1279                     else if (*p == '\\' && ++p == pEnd)
1280                     {
1281                         bInvalid = true;
1282                         break;
1283                     }
1284                     aValue += *p++;
1285                 }
1286                 if (bInvalid)
1287                     break;
1288             }
1289             else
1290             {
1291                 sal_Char const * pStringEnd = skipQuotedString(p, pEnd);
1292                 if (p == pStringEnd)
1293                     break;
1294                 p = pStringEnd;
1295             }
1296         else
1297         {
1298             sal_Char const * pTokenBegin = p;
1299             while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1300                 ++p;
1301             if (p == pTokenBegin)
1302                 break;
1303             if (pParameters)
1304                 aValue = ByteString(
1305                     pTokenBegin, static_cast< xub_StrLen >(p - pTokenBegin));
1306         }
1307
1308         *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue,
1309                               nSection, bExtended);
1310     }
1311     return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
1312 }
1313
1314 //============================================================================
1315 // static
1316 sal_Unicode const * INetMIME::scanParameters(sal_Unicode const * pBegin,
1317                                              sal_Unicode const * pEnd,
1318                                              INetContentTypeParameterList *
1319                                                  pParameters)
1320 {
1321     ParameterList aList;
1322     sal_Unicode const * pParameterBegin = pBegin;
1323     for (sal_Unicode const * p = pParameterBegin;; pParameterBegin = p)
1324     {
1325         pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
1326         if (pParameterBegin == pEnd || *pParameterBegin != ';')
1327             break;
1328         p = pParameterBegin + 1;
1329
1330         sal_Unicode const * pAttributeBegin
1331             = skipLinearWhiteSpaceComment(p, pEnd);
1332         p = pAttributeBegin;
1333         bool bDowncaseAttribute = false;
1334         while (p != pEnd && isTokenChar(*p) && *p != '*')
1335         {
1336             bDowncaseAttribute = bDowncaseAttribute || isUpperCase(*p);
1337             ++p;
1338         }
1339         if (p == pAttributeBegin)
1340             break;
1341         ByteString aAttribute = ByteString(
1342             pAttributeBegin, static_cast< xub_StrLen >(p - pAttributeBegin),
1343             RTL_TEXTENCODING_ASCII_US);
1344         if (bDowncaseAttribute)
1345             aAttribute.ToLowerAscii();
1346
1347         sal_uInt32 nSection = 0;
1348         if (p != pEnd && *p == '*')
1349         {
1350             ++p;
1351             if (p != pEnd && isDigit(*p)
1352                 && !scanUnsigned(p, pEnd, false, nSection))
1353                 break;
1354         }
1355
1356         bool bPresent;
1357         Parameter ** pPos = aList.find(aAttribute, nSection, bPresent);
1358         if (bPresent)
1359             break;
1360
1361         bool bExtended = false;
1362         if (p != pEnd && *p == '*')
1363         {
1364             ++p;
1365             bExtended = true;
1366         }
1367
1368         p = skipLinearWhiteSpaceComment(p, pEnd);
1369
1370         if (p == pEnd || *p != '=')
1371             break;
1372
1373         p = skipLinearWhiteSpaceComment(p + 1, pEnd);
1374
1375         ByteString aCharset;
1376         ByteString aLanguage;
1377          ByteString aValue;
1378         if (bExtended)
1379         {
1380             if (nSection == 0)
1381             {
1382                 sal_Unicode const * pCharsetBegin = p;
1383                 bool bDowncaseCharset = false;
1384                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
1385                 {
1386                     bDowncaseCharset = bDowncaseCharset || isUpperCase(*p);
1387                     ++p;
1388                 }
1389                 if (p == pCharsetBegin)
1390                     break;
1391                 if (pParameters)
1392                 {
1393                     aCharset = ByteString(
1394                         pCharsetBegin,
1395                         static_cast< xub_StrLen >(p - pCharsetBegin),
1396                         RTL_TEXTENCODING_ASCII_US);
1397                     if (bDowncaseCharset)
1398                         aCharset.ToLowerAscii();
1399                 }
1400
1401                 if (p == pEnd || *p != '\'')
1402                     break;
1403                 ++p;
1404
1405                 sal_Unicode const * pLanguageBegin = p;
1406                 bool bDowncaseLanguage = false;
1407                 int nLetters = 0;
1408                 for (; p != pEnd; ++p)
1409                     if (isAlpha(*p))
1410                     {
1411                         if (++nLetters > 8)
1412                             break;
1413                         bDowncaseLanguage = bDowncaseLanguage
1414                                             || isUpperCase(*p);
1415                     }
1416                     else if (*p == '-')
1417                     {
1418                         if (nLetters == 0)
1419                             break;
1420                         nLetters = 0;
1421                     }
1422                     else
1423                         break;
1424                 if (nLetters == 0 || nLetters > 8)
1425                     break;
1426                 if (pParameters)
1427                 {
1428                     aLanguage = ByteString(
1429                         pLanguageBegin,
1430                         static_cast< xub_StrLen >(p - pLanguageBegin),
1431                         RTL_TEXTENCODING_ASCII_US);
1432                     if (bDowncaseLanguage)
1433                         aLanguage.ToLowerAscii();
1434                 }
1435
1436                 if (p == pEnd || *p != '\'')
1437                     break;
1438                 ++p;
1439             }
1440             if (pParameters)
1441             {
1442                 INetMIMEStringOutputSink
1443                     aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT);
1444                 while (p != pEnd)
1445                 {
1446                     sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
1447                     if (isUSASCII(nChar) && !isTokenChar(nChar))
1448                         break;
1449                     if (nChar == '%' && p + 1 < pEnd)
1450                     {
1451                         int nWeight1 = getHexWeight(p[0]);
1452                         int nWeight2 = getHexWeight(p[1]);
1453                         if (nWeight1 >= 0 && nWeight2 >= 0)
1454                         {
1455                             aSink << sal_Char(nWeight1 << 4 | nWeight2);
1456                             p += 2;
1457                             continue;
1458                         }
1459                     }
1460                     INetMIME::writeUTF8(aSink, nChar);
1461                 }
1462                 aValue = aSink.takeBuffer();
1463             }
1464             else
1465                 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1466                     ++p;
1467         }
1468         else if (p != pEnd && *p == '"')
1469             if (pParameters)
1470             {
1471                 INetMIMEStringOutputSink
1472                     aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT);
1473                 bool bInvalid = false;
1474                 for (++p;;)
1475                 {
1476                     if (p == pEnd)
1477                     {
1478                         bInvalid = true;
1479                         break;
1480                     }
1481                     sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
1482                     if (nChar == '"')
1483                         break;
1484                     else if (nChar == 0x0D) // CR
1485                     {
1486                         if (pEnd - p < 2 || *p++ != 0x0A // LF
1487                             || !isWhiteSpace(*p))
1488                         {
1489                             bInvalid = true;
1490                             break;
1491                         }
1492                         nChar = sal_uChar(*p++);
1493                     }
1494                     else if (nChar == '\\')
1495                     {
1496                         if (p == pEnd)
1497                         {
1498                             bInvalid = true;
1499                             break;
1500                         }
1501                         nChar = INetMIME::getUTF32Character(p, pEnd);
1502                     }
1503                     INetMIME::writeUTF8(aSink, nChar);
1504                 }
1505                 if (bInvalid)
1506                     break;
1507                 aValue = aSink.takeBuffer();
1508             }
1509             else
1510             {
1511                 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
1512                 if (p == pStringEnd)
1513                     break;
1514                 p = pStringEnd;
1515             }
1516         else
1517         {
1518             sal_Unicode const * pTokenBegin = p;
1519             while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1520                 ++p;
1521             if (p == pTokenBegin)
1522                 break;
1523             if (pParameters)
1524                 aValue = ByteString(
1525                     pTokenBegin, static_cast< xub_StrLen >(p - pTokenBegin),
1526                     RTL_TEXTENCODING_UTF8);
1527         }
1528
1529         *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue,
1530                               nSection, bExtended);
1531     }
1532     return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
1533 }
1534
1535 //============================================================================
1536 // static
1537 const sal_Char * INetMIME::getCharsetName(rtl_TextEncoding eEncoding)
1538 {
1539     if (rtl_isOctetTextEncoding(eEncoding))
1540     {
1541         char const * p = rtl_getMimeCharsetFromTextEncoding(eEncoding);
1542         DBG_ASSERT(p, "INetMIME::getCharsetName(): Unsupported encoding");
1543         return p;
1544     }
1545     else
1546         switch (eEncoding)
1547         {
1548             case RTL_TEXTENCODING_UCS4:
1549                 return "ISO-10646-UCS-4";
1550
1551             case RTL_TEXTENCODING_UCS2:
1552                 return "ISO-10646-UCS-2";
1553
1554             default:
1555                 DBG_ERROR("INetMIME::getCharsetName(): Unsupported encoding");
1556                 return 0;
1557         }
1558 }
1559
1560 //============================================================================
1561 namespace unnamed_tools_inetmime {
1562
1563 struct EncodingEntry
1564 {
1565     sal_Char const * m_aName;
1566     rtl_TextEncoding m_eEncoding;
1567 };
1568
1569 //============================================================================
1570 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
1571 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
1572 // noted:
1573 EncodingEntry const aEncodingMap[]
1574     = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
1575         { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
1576         { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
1577         { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
1578         { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
1579         { "ASCII", RTL_TEXTENCODING_ASCII_US },
1580         { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
1581         { "US", RTL_TEXTENCODING_ASCII_US },
1582         { "IBM367", RTL_TEXTENCODING_ASCII_US },
1583         { "CP367", RTL_TEXTENCODING_ASCII_US },
1584         { "CSASCII", RTL_TEXTENCODING_ASCII_US },
1585         { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
1586         { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
1587         { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
1588         { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
1589         { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
1590         { "L1", RTL_TEXTENCODING_ISO_8859_1 },
1591         { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
1592         { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
1593         { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
1594         { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
1595         { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
1596         { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
1597         { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
1598         { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
1599         { "L2", RTL_TEXTENCODING_ISO_8859_2 },
1600         { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
1601         { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
1602         { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
1603         { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
1604         { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
1605         { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
1606         { "L3", RTL_TEXTENCODING_ISO_8859_3 },
1607         { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
1608         { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
1609         { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
1610         { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
1611         { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
1612         { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
1613         { "L4", RTL_TEXTENCODING_ISO_8859_4 },
1614         { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
1615         { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
1616         { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
1617         { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
1618         { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
1619         { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
1620         { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
1621         { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
1622         { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
1623         { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
1624         { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
1625         { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
1626         { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
1627         { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
1628         { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
1629         { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
1630         { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
1631         { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
1632         { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
1633         { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
1634         { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
1635         { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
1636         { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
1637         { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
1638         { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
1639         { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
1640         { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
1641         { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
1642         { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
1643         { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
1644         { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
1645         { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
1646         { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
1647         { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
1648         { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
1649         { "L5", RTL_TEXTENCODING_ISO_8859_9 },
1650         { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
1651         { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
1652         { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
1653         { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
1654         { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
1655         { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
1656         { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
1657         { "IBM437", RTL_TEXTENCODING_IBM_437 },
1658         { "CP437", RTL_TEXTENCODING_IBM_437 },
1659         { "437", RTL_TEXTENCODING_IBM_437 },
1660         { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
1661         { "IBM850", RTL_TEXTENCODING_IBM_850 },
1662         { "CP850", RTL_TEXTENCODING_IBM_850 },
1663         { "850", RTL_TEXTENCODING_IBM_850 },
1664         { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
1665         { "IBM860", RTL_TEXTENCODING_IBM_860 },
1666         { "CP860", RTL_TEXTENCODING_IBM_860 },
1667         { "860", RTL_TEXTENCODING_IBM_860 },
1668         { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
1669         { "IBM861", RTL_TEXTENCODING_IBM_861 },
1670         { "CP861", RTL_TEXTENCODING_IBM_861 },
1671         { "861", RTL_TEXTENCODING_IBM_861 },
1672         { "CP-IS", RTL_TEXTENCODING_IBM_861 },
1673         { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
1674         { "IBM863", RTL_TEXTENCODING_IBM_863 },
1675         { "CP863", RTL_TEXTENCODING_IBM_863 },
1676         { "863", RTL_TEXTENCODING_IBM_863 },
1677         { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
1678         { "IBM865", RTL_TEXTENCODING_IBM_865 },
1679         { "CP865", RTL_TEXTENCODING_IBM_865 },
1680         { "865", RTL_TEXTENCODING_IBM_865 },
1681         { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
1682         { "IBM775", RTL_TEXTENCODING_IBM_775 },
1683         { "CP775", RTL_TEXTENCODING_IBM_775 },
1684         { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
1685         { "IBM852", RTL_TEXTENCODING_IBM_852 },
1686         { "CP852", RTL_TEXTENCODING_IBM_852 },
1687         { "852", RTL_TEXTENCODING_IBM_852 },
1688         { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
1689         { "IBM855", RTL_TEXTENCODING_IBM_855 },
1690         { "CP855", RTL_TEXTENCODING_IBM_855 },
1691         { "855", RTL_TEXTENCODING_IBM_855 },
1692         { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
1693         { "IBM857", RTL_TEXTENCODING_IBM_857 },
1694         { "CP857", RTL_TEXTENCODING_IBM_857 },
1695         { "857", RTL_TEXTENCODING_IBM_857 },
1696         { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
1697         { "IBM862", RTL_TEXTENCODING_IBM_862 },
1698         { "CP862", RTL_TEXTENCODING_IBM_862 },
1699         { "862", RTL_TEXTENCODING_IBM_862 },
1700         { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
1701         { "IBM864", RTL_TEXTENCODING_IBM_864 },
1702         { "CP864", RTL_TEXTENCODING_IBM_864 },
1703         { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
1704         { "IBM866", RTL_TEXTENCODING_IBM_866 },
1705         { "CP866", RTL_TEXTENCODING_IBM_866 },
1706         { "866", RTL_TEXTENCODING_IBM_866 },
1707         { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
1708         { "IBM869", RTL_TEXTENCODING_IBM_869 },
1709         { "CP869", RTL_TEXTENCODING_IBM_869 },
1710         { "869", RTL_TEXTENCODING_IBM_869 },
1711         { "CP-GR", RTL_TEXTENCODING_IBM_869 },
1712         { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
1713         { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
1714         { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
1715         { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
1716         { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
1717         { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
1718         { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
1719         { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
1720         { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
1721         { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
1722         { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
1723         { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
1724         { "GB2312", RTL_TEXTENCODING_GB_2312 },
1725         { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
1726         { "BIG5", RTL_TEXTENCODING_BIG5 },
1727         { "CSBIG5", RTL_TEXTENCODING_BIG5 },
1728         { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
1729         { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
1730           RTL_TEXTENCODING_EUC_JP },
1731         { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
1732         { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
1733         { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
1734         { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
1735         { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
1736         { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
1737         { "UTF-7", RTL_TEXTENCODING_UTF7 },
1738         { "UTF-8", RTL_TEXTENCODING_UTF8 },
1739         { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
1740         { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
1741         { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
1742         { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
1743         { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
1744         { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
1745         { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
1746         { "CSUCS4", RTL_TEXTENCODING_UCS4 },
1747         { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
1748         { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
1749
1750 //============================================================================
1751 template< typename T >
1752 inline rtl_TextEncoding getCharsetEncoding_Impl(T const * pBegin,
1753                                                 T const * pEnd)
1754 {
1755     for (sal_Size i = 0; i < sizeof aEncodingMap / sizeof (EncodingEntry);
1756          ++i)
1757         if (INetMIME::equalIgnoreCase(pBegin, pEnd, aEncodingMap[i].m_aName))
1758             return aEncodingMap[i].m_eEncoding;
1759     return RTL_TEXTENCODING_DONTKNOW;
1760 }
1761
1762 }
1763
1764 //============================================================================
1765 // static
1766 rtl_TextEncoding INetMIME::getCharsetEncoding(sal_Char const * pBegin,
1767                                               sal_Char const * pEnd)
1768 {
1769     return getCharsetEncoding_Impl(pBegin, pEnd);
1770 }
1771
1772 //============================================================================
1773 // static
1774 rtl_TextEncoding INetMIME::getCharsetEncoding(sal_Unicode const * pBegin,
1775                                               sal_Unicode const * pEnd)
1776 {
1777     return getCharsetEncoding_Impl(pBegin, pEnd);
1778 }
1779
1780 //============================================================================
1781 // static
1782 INetMIMECharsetList_Impl *
1783 INetMIME::createPreferredCharsetList(rtl_TextEncoding eEncoding)
1784 {
1785     static const sal_uInt32 aUSASCIIRanges[] = { 0, 0x7F, sal_uInt32(-1) };
1786
1787     static const sal_uInt32 aISO88591Ranges[] = { 0, 0xFF, sal_uInt32(-1) };
1788         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT> version
1789         // 1.0 of 1999 July 27
1790
1791     static const sal_uInt32 aISO88592Ranges[]
1792         = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0,
1793             0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC2, 0xC4, 0xC4, 0xC7, 0xC7,
1794             0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xCE, 0xD3, 0xD4, 0xD6, 0xD7,
1795             0xDA, 0xDA, 0xDC, 0xDD, 0xDF, 0xDF, 0xE1, 0xE2, 0xE4, 0xE4,
1796             0xE7, 0xE7, 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF3, 0xF4,
1797             0xF6, 0xF7, 0xFA, 0xFA, 0xFC, 0xFD, 0x102, 0x107, 0x10C, 0x111,
1798             0x118, 0x11B, 0x139, 0x13A, 0x13D, 0x13E, 0x141, 0x144,
1799             0x147, 0x148, 0x150, 0x151, 0x154, 0x155, 0x158, 0x15B,
1800             0x15E, 0x165, 0x16E, 0x171, 0x179, 0x17E, 0x2C7, 0x2C7,
1801             0x2D8, 0x2D9, 0x2DB, 0x2DB, 0x2DD, 0x2DD, sal_uInt32(-1) };
1802         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT> version
1803         // 1.0 of 1999 July 27
1804
1805     static const sal_uInt32 aISO88593Ranges[]
1806         = { 0, 0xA0, 0xA3, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0,
1807             0xB2, 0xB5, 0xB7, 0xB8, 0xBD, 0xBD, 0xC0, 0xC2, 0xC4, 0xC4,
1808             0xC7, 0xCF, 0xD1, 0xD4, 0xD6, 0xD7, 0xD9, 0xDC, 0xDF, 0xE2,
1809             0xE4, 0xE4, 0xE7, 0xEF, 0xF1, 0xF4, 0xF6, 0xF7, 0xF9, 0xFC,
1810             0x108, 0x10B, 0x11C, 0x121, 0x124, 0x127, 0x130, 0x131,
1811             0x134, 0x135, 0x15C, 0x15F, 0x16C, 0x16D, 0x17B, 0x17C,
1812             0x2D8, 0x2D9, sal_uInt32(-1) };
1813         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT> version
1814         // 1.0 of 1999 July 27
1815
1816     static const sal_uInt32 aISO88594Ranges[]
1817         = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xAF, 0xB0,
1818             0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB,
1819             0xCD, 0xCE, 0xD4, 0xD8, 0xDA, 0xDC, 0xDF, 0xDF, 0xE1, 0xE6,
1820             0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF4, 0xF8, 0xFA, 0xFC,
1821             0x100, 0x101, 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113,
1822             0x116, 0x119, 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F,
1823             0x136, 0x138, 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D,
1824             0x156, 0x157, 0x160, 0x161, 0x166, 0x16B, 0x172, 0x173,
1825             0x17D, 0x17E, 0x2C7, 0x2C7, 0x2D9, 0x2D9, 0x2DB, 0x2DB,
1826             sal_uInt32(-1) };
1827         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT> version
1828         // 1.0 of 1999 July 27
1829
1830     static const sal_uInt32 aISO88595Ranges[]
1831         = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0x401, 0x40C, 0x40E, 0x44F,
1832             0x451, 0x45C, 0x45E, 0x45F, 0x2116, 0x2116, sal_uInt32(-1) };
1833         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT> version
1834         // 1.0 of 1999 July 27
1835
1836     static const sal_uInt32 aISO88596Ranges[]
1837         = { 0, 0xA0, 0xA4, 0xA4, 0xAD, 0xAD, 0x60C, 0x60C, 0x61B, 0x61B,
1838             0x61F, 0x61F, 0x621, 0x63A, 0x640, 0x652, sal_uInt32(-1) };
1839         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT> version
1840         // 1.0 of 1999 July 27
1841
1842     static const sal_uInt32 aISO88597Ranges[]
1843         = { 0, 0xA0, 0xA3, 0xA3, 0xA6, 0xA9, 0xAB, 0xAD, 0xB0, 0xB3,
1844             0xB7, 0xB7, 0xBB, 0xBB, 0xBD, 0xBD, 0x384, 0x386, 0x388, 0x38A,
1845             0x38C, 0x38C, 0x38E, 0x3A1, 0x3A3, 0x3CE, 0x2015, 0x2015,
1846             0x2018, 0x2019, sal_uInt32(-1) };
1847         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT> version
1848         // 1.0 of 1999 July 27
1849
1850     static const sal_uInt32 aISO88598Ranges[]
1851         = { 0, 0xA0, 0xA2, 0xA9, 0xAB, 0xB9, 0xBB, 0xBE, 0xD7, 0xD7,
1852             0xF7, 0xF7, 0x5D0, 0x5EA, 0x200E, 0x200F, 0x2017, 0x2017,
1853             sal_uInt32(-1) };
1854         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT> version
1855         // 1.1 of 2000-Jan-03
1856
1857     static const sal_uInt32 aISO88599Ranges[]
1858         = { 0, 0xCF, 0xD1, 0xDC, 0xDF, 0xEF, 0xF1, 0xFC, 0xFF, 0xFF,
1859             0x11E, 0x11F, 0x130, 0x131, 0x15E, 0x15F, sal_uInt32(-1) };
1860         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT> version
1861         // 1.0 of 1999 July 27
1862
1863     static const sal_uInt32 aISO885910Ranges[]
1864         = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0xB0, 0xB0, 0xB7, 0xB7,
1865             0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xD0, 0xD3, 0xD6,
1866             0xD8, 0xD8, 0xDA, 0xDF, 0xE1, 0xE6, 0xE9, 0xE9, 0xEB, 0xEB,
1867             0xED, 0xF0, 0xF3, 0xF6, 0xF8, 0xF8, 0xFA, 0xFE, 0x100, 0x101,
1868             0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113, 0x116, 0x119,
1869             0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F, 0x136, 0x138,
1870             0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D, 0x160, 0x161,
1871             0x166, 0x16B, 0x172, 0x173, 0x17D, 0x17E, 0x2015, 0x2015,
1872             sal_uInt32(-1) };
1873         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT> version
1874         // 1.1 of 1999 October 11
1875
1876     static const sal_uInt32 aISO885913Ranges[]
1877         = { 0, 0xA0, 0xA2, 0xA4, 0xA6, 0xA7, 0xA9, 0xA9, 0xAB, 0xAE,
1878             0xB0, 0xB3, 0xB5, 0xB7, 0xB9, 0xB9, 0xBB, 0xBE, 0xC4, 0xC6,
1879             0xC9, 0xC9, 0xD3, 0xD3, 0xD5, 0xD8, 0xDC, 0xDC, 0xDF, 0xDF,
1880             0xE4, 0xE6, 0xE9, 0xE9, 0xF3, 0xF3, 0xF5, 0xF8, 0xFC, 0xFC,
1881             0x100, 0x101, 0x104, 0x107, 0x10C, 0x10D, 0x112, 0x113,
1882             0x116, 0x119, 0x122, 0x123, 0x12A, 0x12B, 0x12E, 0x12F,
1883             0x136, 0x137, 0x13B, 0x13C, 0x141, 0x146, 0x14C, 0x14D,
1884             0x156, 0x157, 0x15A, 0x15B, 0x160, 0x161, 0x16A, 0x16B,
1885             0x172, 0x173, 0x179, 0x17E, 0x2019, 0x2019, 0x201C, 0x201E,
1886             sal_uInt32(-1) };
1887         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT> version
1888         // 1.0 of 1999 July 27
1889
1890     static const sal_uInt32 aISO885914Ranges[]
1891         = { 0, 0xA0, 0xA3, 0xA3, 0xA7, 0xA7, 0xA9, 0xA9, 0xAD, 0xAE,
1892             0xB6, 0xB6, 0xC0, 0xCF, 0xD1, 0xD6, 0xD8, 0xDD, 0xDF, 0xEF,
1893             0xF1, 0xF6, 0xF8, 0xFD, 0xFF, 0xFF, 0x10A, 0x10B, 0x120, 0x121,
1894             0x174, 0x178, 0x1E02, 0x1E03, 0x1E0A, 0x1E0B, 0x1E1E, 0x1E1F,
1895             0x1E40, 0x1E41, 0x1E56, 0x1E57, 0x1E60, 0x1E61, 0x1E6A, 0x1E6B,
1896             0x1E80, 0x1E85, 0x1EF2, 0x1EF3, sal_uInt32(-1) };
1897         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT> version
1898         // 1.0 of 1999 July 27
1899
1900     static const sal_uInt32 aISO885915Ranges[]
1901         = { 0, 0xA3, 0xA5, 0xA5, 0xA7, 0xA7, 0xA9, 0xB3, 0xB5, 0xB7,
1902             0xB9, 0xBB, 0xBF, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178,
1903             0x17D, 0x17E, 0x20AC, 0x20AC, sal_uInt32(-1) };
1904         // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT> version
1905         // 1.0 of 1999 July 27
1906
1907     static const sal_uInt32 aKOI8RRanges[]
1908         = { 0, 0x7F, 0xA0, 0xA0, 0xA9, 0xA9, 0xB0, 0xB0, 0xB2, 0xB2,
1909             0xB7, 0xB7, 0xF7, 0xF7, 0x401, 0x401, 0x410, 0x44F, 0x451, 0x451,
1910             0x2219, 0x221A, 0x2248, 0x2248, 0x2264, 0x2265, 0x2320, 0x2321,
1911             0x2500, 0x2500, 0x2502, 0x2502, 0x250C, 0x250C, 0x2510, 0x2510,
1912             0x2514, 0x2514, 0x2518, 0x2518, 0x251C, 0x251C, 0x2524, 0x2524,
1913             0x252C, 0x252C, 0x2534, 0x2534, 0x253C, 0x253C, 0x2550, 0x256C,
1914             0x2580, 0x2580, 0x2584, 0x2584, 0x2588, 0x2588, 0x258C, 0x258C,
1915             0x2590, 0x2593, 0x25A0, 0x25A0, sal_uInt32(-1) };
1916         // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT>
1917         // version 1.0 of 18 August 1999
1918
1919 #if defined WNT
1920     static const sal_uInt32 aWindows1252Ranges[]
1921         = { 0, 0x7F, 0xA0, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178,
1922             0x17D, 0x17E, 0x192, 0x192, 0x2C6, 0x2C6, 0x2DC, 0x2DC,
1923             0x2013, 0x2014, 0x2018, 0x201A, 0x201C, 0x201E, 0x2020, 0x2022,
1924             0x2026, 0x2026, 0x2030, 0x2030, 0x2039, 0x203A, 0x20AC, 0x20AC,
1925             0x2122, 0x2122, sal_uInt32(-1) };
1926         // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/
1927         // CP1252.TXT> version 2.01 of 04/15/98
1928 #endif // WNT
1929
1930     INetMIMECharsetList_Impl * pList = new INetMIMECharsetList_Impl;
1931     switch (eEncoding)
1932     {
1933         case RTL_TEXTENCODING_MS_1252:
1934 #if defined WNT
1935             pList->prepend(Charset(RTL_TEXTENCODING_MS_1252,
1936                                    aWindows1252Ranges));
1937 #endif // WNT
1938         case RTL_TEXTENCODING_ISO_8859_1:
1939         case RTL_TEXTENCODING_UTF7:
1940         case RTL_TEXTENCODING_UTF8:
1941             break;
1942
1943         case RTL_TEXTENCODING_ISO_8859_2:
1944             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2,
1945                                    aISO88592Ranges));
1946             break;
1947
1948         case RTL_TEXTENCODING_ISO_8859_3:
1949             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_3,
1950                                    aISO88593Ranges));
1951             break;
1952
1953         case RTL_TEXTENCODING_ISO_8859_4:
1954             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4,
1955                                    aISO88594Ranges));
1956             break;
1957
1958         case RTL_TEXTENCODING_ISO_8859_5:
1959             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
1960                                    aISO88595Ranges));
1961             break;
1962
1963         case RTL_TEXTENCODING_ISO_8859_6:
1964             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6,
1965                                    aISO88596Ranges));
1966             break;
1967
1968         case RTL_TEXTENCODING_ISO_8859_7:
1969             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7,
1970                                    aISO88597Ranges));
1971             break;
1972
1973         case RTL_TEXTENCODING_ISO_8859_8:
1974             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8,
1975                                    aISO88598Ranges));
1976             break;
1977
1978         case RTL_TEXTENCODING_ISO_8859_9:
1979             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9,
1980                                    aISO88599Ranges));
1981             break;
1982
1983         case RTL_TEXTENCODING_ISO_8859_10:
1984             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_10,
1985                                    aISO885910Ranges));
1986             break;
1987
1988         case RTL_TEXTENCODING_ISO_8859_13:
1989             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_13,
1990                                    aISO885913Ranges));
1991             break;
1992
1993         case RTL_TEXTENCODING_ISO_8859_14:
1994             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_14,
1995                                    aISO885914Ranges));
1996             break;
1997
1998         case RTL_TEXTENCODING_ISO_8859_15:
1999             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_15,
2000                                    aISO885915Ranges));
2001             break;
2002
2003         case RTL_TEXTENCODING_MS_1250:
2004             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2,
2005                                    aISO88592Ranges));
2006             break;
2007
2008         case RTL_TEXTENCODING_MS_1251:
2009             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
2010                                    aISO88595Ranges));
2011             break;
2012
2013         case RTL_TEXTENCODING_MS_1253:
2014             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7,
2015                                    aISO88597Ranges));
2016             break;
2017
2018         case RTL_TEXTENCODING_MS_1254:
2019             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9,
2020                                    aISO88599Ranges));
2021             break;
2022
2023         case RTL_TEXTENCODING_MS_1255:
2024             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8,
2025                                    aISO88598Ranges));
2026             break;
2027
2028         case RTL_TEXTENCODING_MS_1256:
2029             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6,
2030                                    aISO88596Ranges));
2031             break;
2032
2033         case RTL_TEXTENCODING_MS_1257:
2034             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4,
2035                                    aISO88594Ranges));
2036             break;
2037
2038         case RTL_TEXTENCODING_KOI8_R:
2039             pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
2040                                    aISO88595Ranges));
2041             pList->prepend(Charset(RTL_TEXTENCODING_KOI8_R, aKOI8RRanges));
2042             break;
2043
2044         default: //@@@ more cases are missing!
2045             DBG_ERROR("INetMIME::createPreferredCharsetList():"
2046                           " Unsupported encoding");
2047             break;
2048     }
2049     pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_1, aISO88591Ranges));
2050     pList->prepend(Charset(RTL_TEXTENCODING_ASCII_US, aUSASCIIRanges));
2051     return pList;
2052 }
2053
2054 //============================================================================
2055 // static
2056 sal_Unicode * INetMIME::convertToUnicode(const sal_Char * pBegin,
2057                                          const sal_Char * pEnd,
2058                                          rtl_TextEncoding eEncoding,
2059                                          sal_Size & rSize)
2060 {
2061     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
2062         return 0;
2063     rtl_TextToUnicodeConverter hConverter
2064         = rtl_createTextToUnicodeConverter(eEncoding);
2065     rtl_TextToUnicodeContext hContext
2066         = rtl_createTextToUnicodeContext(hConverter);
2067     sal_Unicode * pBuffer;
2068     sal_uInt32 nInfo;
2069     for (sal_Size nBufferSize = pEnd - pBegin;;
2070          nBufferSize += nBufferSize / 3 + 1)
2071     {
2072         pBuffer = new sal_Unicode[nBufferSize];
2073         sal_Size nSrcCvtBytes;
2074         rSize = rtl_convertTextToUnicode(
2075                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
2076                     nBufferSize,
2077                     RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
2078                         | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
2079                         | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
2080                     &nInfo, &nSrcCvtBytes);
2081         if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
2082             break;
2083         delete[] pBuffer;
2084         rtl_resetTextToUnicodeContext(hConverter, hContext);
2085     }
2086     rtl_destroyTextToUnicodeContext(hConverter, hContext);
2087     rtl_destroyTextToUnicodeConverter(hConverter);
2088     if (nInfo != 0)
2089     {
2090         delete[] pBuffer;
2091         pBuffer = 0;
2092     }
2093     return pBuffer;
2094 }
2095
2096 //============================================================================
2097 // static
2098 sal_Char * INetMIME::convertFromUnicode(const sal_Unicode * pBegin,
2099                                         const sal_Unicode * pEnd,
2100                                         rtl_TextEncoding eEncoding,
2101                                         sal_Size & rSize)
2102 {
2103     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
2104         return 0;
2105     rtl_UnicodeToTextConverter hConverter
2106         = rtl_createUnicodeToTextConverter(eEncoding);
2107     rtl_UnicodeToTextContext hContext
2108         = rtl_createUnicodeToTextContext(hConverter);
2109     sal_Char * pBuffer;
2110     sal_uInt32 nInfo;
2111     for (sal_Size nBufferSize = pEnd - pBegin;;
2112          nBufferSize += nBufferSize / 3 + 1)
2113     {
2114         pBuffer = new sal_Char[nBufferSize];
2115         sal_Size nSrcCvtBytes;
2116         rSize = rtl_convertUnicodeToText(
2117                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
2118                     nBufferSize,
2119                     RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
2120                         | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
2121                         | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
2122                         | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
2123                     &nInfo, &nSrcCvtBytes);
2124         if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
2125             break;
2126         delete[] pBuffer;
2127         rtl_resetUnicodeToTextContext(hConverter, hContext);
2128     }
2129     rtl_destroyUnicodeToTextContext(hConverter, hContext);
2130     rtl_destroyUnicodeToTextConverter(hConverter);
2131     if (nInfo != 0)
2132     {
2133         delete[] pBuffer;
2134         pBuffer = 0;
2135     }
2136     return pBuffer;
2137 }
2138
2139 //============================================================================
2140 // static
2141 void INetMIME::writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar)
2142 {
2143     // See RFC 2279 for a discussion of UTF-8.
2144     DBG_ASSERT(nChar < 0x80000000, "INetMIME::writeUTF8(): Bad char");
2145
2146     if (nChar < 0x80)
2147         rSink << sal_Char(nChar);
2148     else if (nChar < 0x800)
2149         rSink << sal_Char(nChar >> 6 | 0xC0)
2150               << sal_Char((nChar & 0x3F) | 0x80);
2151     else if (nChar < 0x10000)
2152         rSink << sal_Char(nChar >> 12 | 0xE0)
2153               << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2154               << sal_Char((nChar & 0x3F) | 0x80);
2155     else if (nChar < 0x200000)
2156         rSink << sal_Char(nChar >> 18 | 0xF0)
2157               << sal_Char((nChar >> 12 & 0x3F) | 0x80)
2158               << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2159               << sal_Char((nChar & 0x3F) | 0x80);
2160     else if (nChar < 0x4000000)
2161         rSink << sal_Char(nChar >> 24 | 0xF8)
2162               << sal_Char((nChar >> 18 & 0x3F) | 0x80)
2163               << sal_Char((nChar >> 12 & 0x3F) | 0x80)
2164               << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2165               << sal_Char((nChar & 0x3F) | 0x80);
2166     else
2167         rSink << sal_Char(nChar >> 30 | 0xFC)
2168               << sal_Char((nChar >> 24 & 0x3F) | 0x80)
2169               << sal_Char((nChar >> 18 & 0x3F) | 0x80)
2170               << sal_Char((nChar >> 12 & 0x3F) | 0x80)
2171               << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2172               << sal_Char((nChar & 0x3F) | 0x80);
2173 }
2174
2175 //============================================================================
2176 // static
2177 void INetMIME::writeUnsigned(INetMIMEOutputSink & rSink, sal_uInt32 nValue,
2178                              int nMinDigits)
2179 {
2180     sal_Char aBuffer[10];
2181         // max unsigned 32 bit value (4294967295) has 10 places
2182     sal_Char * p = aBuffer;
2183     for (; nValue > 0; nValue /= 10)
2184         *p++ = sal_Char(getDigit(nValue % 10));
2185     nMinDigits -= p - aBuffer;
2186     while (nMinDigits-- > 0)
2187         rSink << '0';
2188     while (p != aBuffer)
2189         rSink << *--p;
2190 }
2191
2192 //============================================================================
2193 // static
2194 void INetMIME::writeDateTime(INetMIMEOutputSink & rSink,
2195                              const DateTime & rUTC)
2196 {
2197     static const sal_Char aDay[7][3]
2198         = { { 'M', 'o', 'n' },
2199             { 'T', 'u', 'e' },
2200             { 'W', 'e', 'd' },
2201             { 'T', 'h', 'u' },
2202             { 'F', 'r', 'i' },
2203             { 'S', 'a', 't' },
2204             { 'S', 'u', 'n' } };
2205     const sal_Char * pTheDay = aDay[rUTC.GetDayOfWeek()];
2206     rSink.write(pTheDay, pTheDay + 3);
2207     rSink << ", ";
2208     writeUnsigned(rSink, rUTC.GetDay());
2209     rSink << ' ';
2210     static const sal_Char aMonth[12][3]
2211         = { { 'J', 'a', 'n' },
2212             { 'F', 'e', 'b' },
2213             { 'M', 'a', 'r' },
2214             { 'A', 'p', 'r' },
2215             { 'M', 'a', 'y' },
2216             { 'J', 'u', 'n' },
2217             { 'J', 'u', 'l' },
2218             { 'A', 'u', 'g' },
2219             { 'S', 'e', 'p' },
2220             { 'O', 'c', 't' },
2221             { 'N', 'o', 'v' },
2222             { 'D', 'e', 'c' } };
2223     const sal_Char * pTheMonth = aMonth[rUTC.GetMonth() - 1];
2224     rSink.write(pTheMonth, pTheMonth + 3);
2225     rSink << ' ';
2226     writeUnsigned(rSink, rUTC.GetYear());
2227     rSink << ' ';
2228     writeUnsigned(rSink, rUTC.GetHour(), 2);
2229     rSink << ':';
2230     writeUnsigned(rSink, rUTC.GetMin(), 2);
2231     rSink << ':';
2232     writeUnsigned(rSink, rUTC.GetSec(), 2);
2233     rSink << " +0000";
2234 }
2235
2236 //============================================================================
2237 // static
2238 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink & rSink,
2239                                     HeaderFieldType eType,
2240                                     const ByteString & rBody,
2241                                     rtl_TextEncoding ePreferredEncoding,
2242                                     bool bInitialSpace)
2243 {
2244     writeHeaderFieldBody(rSink, eType,
2245                          UniString(rBody, RTL_TEXTENCODING_UTF8),
2246                          ePreferredEncoding, bInitialSpace);
2247 }
2248
2249 //============================================================================
2250 // static
2251 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink & rSink,
2252                                     HeaderFieldType eType,
2253                                     const UniString & rBody,
2254                                     rtl_TextEncoding ePreferredEncoding,
2255                                     bool bInitialSpace)
2256 {
2257     if (eType == HEADER_FIELD_TEXT)
2258     {
2259         INetMIMEEncodedWordOutputSink
2260             aOutput(rSink, INetMIMEEncodedWordOutputSink::CONTEXT_TEXT,
2261                     bInitialSpace ?
2262                         INetMIMEEncodedWordOutputSink::SPACE_ALWAYS :
2263                         INetMIMEEncodedWordOutputSink::SPACE_NO,
2264                     ePreferredEncoding);
2265         aOutput.write(rBody.GetBuffer(), rBody.GetBuffer() + rBody.Len());
2266         aOutput.flush();
2267     }
2268     else
2269     {
2270         enum Brackets { BRACKETS_OUTSIDE, BRACKETS_OPENING, BRACKETS_INSIDE };
2271         Brackets eBrackets = BRACKETS_OUTSIDE;
2272
2273         const sal_Unicode * pBodyPtr = rBody.GetBuffer();
2274         const sal_Unicode * pBodyEnd = pBodyPtr + rBody.Len();
2275         while (pBodyPtr != pBodyEnd)
2276             switch (*pBodyPtr)
2277             {
2278                 case '\t':
2279                 case ' ':
2280                     // A WSP adds to accumulated space:
2281                     bInitialSpace = true;
2282                     ++pBodyPtr;
2283                     break;
2284
2285                 case '(':
2286                 {
2287                     // Write a pending '<' if necessary:
2288                     if (eBrackets == BRACKETS_OPENING)
2289                     {
2290                         if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2291                                 >= rSink.getLineLengthLimit())
2292                             rSink << INetMIMEOutputSink::endl << ' ';
2293                         else if (bInitialSpace)
2294                             rSink << ' ';
2295                         rSink << '<';
2296                         bInitialSpace = false;
2297                         eBrackets = BRACKETS_INSIDE;
2298                     }
2299
2300                     // Write the comment, introducing encoded-words where
2301                     // necessary:
2302                     int nLevel = 0;
2303                     INetMIMEEncodedWordOutputSink
2304                         aOutput(
2305                             rSink,
2306                             INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT,
2307                             INetMIMEEncodedWordOutputSink::SPACE_NO,
2308                             ePreferredEncoding);
2309                     while (pBodyPtr != pBodyEnd)
2310                         switch (*pBodyPtr)
2311                         {
2312                             case '(':
2313                                 aOutput.flush();
2314                                 if (rSink.getColumn()
2315                                             + (bInitialSpace ? 1 : 0)
2316                                         >= rSink.getLineLengthLimit())
2317                                     rSink << INetMIMEOutputSink::endl << ' ';
2318                                 else if (bInitialSpace)
2319                                     rSink << ' ';
2320                                 rSink << '(';
2321                                 bInitialSpace = false;
2322                                 ++nLevel;
2323                                 ++pBodyPtr;
2324                                 break;
2325
2326                             case ')':
2327                                 aOutput.flush();
2328                                 if (rSink.getColumn()
2329                                         >= rSink.getLineLengthLimit())
2330                                     rSink << INetMIMEOutputSink::endl << ' ';
2331                                 rSink << ')';
2332                                 ++pBodyPtr;
2333                                 if (--nLevel == 0)
2334                                     goto comment_done;
2335                                 break;
2336
2337                             case '\\':
2338                                 if (++pBodyPtr == pBodyEnd)
2339                                     break;
2340                             default:
2341                                 aOutput << *pBodyPtr++;
2342                                 break;
2343                         }
2344                 comment_done:
2345                     break;
2346                 }
2347
2348                 case '<':
2349                     // Write an already pending '<' if necessary:
2350                     if (eBrackets == BRACKETS_OPENING)
2351                     {
2352                         if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2353                                 >= rSink.getLineLengthLimit())
2354                             rSink << INetMIMEOutputSink::endl << ' ';
2355                         else if (bInitialSpace)
2356                             rSink << ' ';
2357                         rSink << '<';
2358                         bInitialSpace = false;
2359                     }
2360
2361                     // Remember this '<' as pending, and open a bracketed
2362                     // block:
2363                     eBrackets = BRACKETS_OPENING;
2364                     ++pBodyPtr;
2365                     break;
2366
2367                 case '>':
2368                     // Write a pending '<' if necessary:
2369                     if (eBrackets == BRACKETS_OPENING)
2370                     {
2371                         if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2372                                 >= rSink.getLineLengthLimit())
2373                             rSink << INetMIMEOutputSink::endl << ' ';
2374                         else if (bInitialSpace)
2375                             rSink << ' ';
2376                         rSink << '<';
2377                         bInitialSpace = false;
2378                     }
2379
2380                     // Write this '>', and close any bracketed block:
2381                     if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2382                             >= rSink.getLineLengthLimit())
2383                         rSink << INetMIMEOutputSink::endl << ' ';
2384                     else if (bInitialSpace)
2385                         rSink << ' ';
2386                     rSink << '>';
2387                     bInitialSpace = false;
2388                     eBrackets = BRACKETS_OUTSIDE;
2389                     ++pBodyPtr;
2390                     break;
2391
2392                 case ',':
2393                 case ':':
2394                 case ';':
2395                 case '\\':
2396                 case ']':
2397                     // Write a pending '<' if necessary:
2398                     if (eBrackets == BRACKETS_OPENING)
2399                     {
2400                         if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2401                                 >= rSink.getLineLengthLimit())
2402                             rSink << INetMIMEOutputSink::endl << ' ';
2403                         else if (bInitialSpace)
2404                             rSink << ' ';
2405                         rSink << '<';
2406                         bInitialSpace = false;
2407                         eBrackets = BRACKETS_INSIDE;
2408                     }
2409
2410                     // Write this specials:
2411                     if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2412                             >= rSink.getLineLengthLimit())
2413                         rSink << INetMIMEOutputSink::endl << ' ';
2414                     else if (bInitialSpace)
2415                         rSink << ' ';
2416                     rSink << sal_Char(*pBodyPtr++);
2417                     bInitialSpace = false;
2418                     break;
2419
2420                 case '\x0D': // CR
2421                     // A <CRLF WSP> adds to accumulated space, a <CR> not
2422                     // followed by <LF WSP> starts 'junk':
2423                     if (startsWithLineFolding(pBodyPtr, pBodyEnd))
2424                     {
2425                         bInitialSpace = true;
2426                         pBodyPtr += 3;
2427                         break;
2428                     }
2429                 default:
2430                 {
2431                     // The next token is either one of <"." / "@" / atom /
2432                     // quoted-string / domain-literal>, or it's 'junk'; if it
2433                     // is not 'junk', it is either a 'phrase' (i.e., it may
2434                     // contain encoded-words) or a 'non-phrase' (i.e., it may
2435                     // not contain encoded-words):
2436                     enum Entity { ENTITY_JUNK, ENTITY_NON_PHRASE,
2437                                   ENTITY_PHRASE };
2438                     Entity eEntity = ENTITY_JUNK;
2439                     switch (*pBodyPtr)
2440                     {
2441                         case '.':
2442                         case '@':
2443                         case '[':
2444                             // A token of <"." / "@" / domain-literal> always
2445                             // starts a 'non-phrase':
2446                             eEntity = ENTITY_NON_PHRASE;
2447                             break;
2448
2449                         default:
2450                             if (isUSASCII(*pBodyPtr)
2451                                 && !isAtomChar(*pBodyPtr))
2452                             {
2453                                 eEntity = ENTITY_JUNK;
2454                                 break;
2455                             }
2456                         case '"':
2457                             // A token of <atom / quoted-string> can either be
2458                             // a 'phrase' or a 'non-phrase':
2459                             switch (eType)
2460                             {
2461                                 case HEADER_FIELD_STRUCTURED:
2462                                     eEntity = ENTITY_NON_PHRASE;
2463                                     break;
2464
2465                                 case HEADER_FIELD_PHRASE:
2466                                     eEntity = ENTITY_PHRASE;
2467                                     break;
2468
2469                                 case HEADER_FIELD_MESSAGE_ID:
2470                                     // A 'phrase' if and only if outside any
2471                                     // bracketed block:
2472                                     eEntity
2473                                         = eBrackets == BRACKETS_OUTSIDE ?
2474                                               ENTITY_PHRASE :
2475                                               ENTITY_NON_PHRASE;
2476                                     break;
2477
2478                                 case HEADER_FIELD_ADDRESS:
2479                                 {
2480                                     // A 'non-phrase' if and only if, after
2481                                     // skipping this token and any following
2482                                     // <linear-white-space> and <comment>s,
2483                                     // there is no token left, or the next
2484                                     // token is any of <"." / "@" / ">" / ","
2485                                     // / ";">, or the next token is <":"> and
2486                                     // is within a bracketed block:
2487                                     const sal_Unicode * pLookAhead = pBodyPtr;
2488                                     if (*pLookAhead == '"')
2489                                     {
2490                                         pLookAhead
2491                                             = skipQuotedString(pLookAhead,
2492                                                                pBodyEnd);
2493                                         if (pLookAhead == pBodyPtr)
2494                                             pLookAhead = pBodyEnd;
2495                                     }
2496                                     else
2497                                         while (pLookAhead != pBodyEnd
2498                                                && (isAtomChar(*pLookAhead)
2499                                                    || !isUSASCII(
2500                                                            *pLookAhead)))
2501                                             ++pLookAhead;
2502                                     while (pLookAhead != pBodyEnd)
2503                                         switch (*pLookAhead)
2504                                         {
2505                                             case '\t':
2506                                             case ' ':
2507                                                 ++pLookAhead;
2508                                                 break;
2509
2510                                             case '(':
2511                                             {
2512                                                 const sal_Unicode * pPast
2513                                                     = skipComment(pLookAhead,
2514                                                                   pBodyEnd);
2515                                                 pLookAhead
2516                                                     = pPast == pLookAhead ?
2517                                                           pBodyEnd : pPast;
2518                                                 break;
2519                                             }
2520
2521                                             case ',':
2522                                             case '.':
2523                                             case ';':
2524                                             case '>':
2525                                             case '@':
2526                                                 eEntity = ENTITY_NON_PHRASE;
2527                                                 goto entity_determined;
2528
2529                                             case ':':
2530                                                 eEntity
2531                                                     = eBrackets
2532                                                          == BRACKETS_OUTSIDE ?
2533                                                           ENTITY_PHRASE :
2534                                                           ENTITY_NON_PHRASE;
2535                                                 goto entity_determined;
2536
2537                                             case '\x0D': // CR
2538                                                 if (startsWithLineFolding(
2539                                                         pLookAhead, pBodyEnd))
2540                                                 {
2541                                                     pLookAhead += 3;
2542                                                     break;
2543                                                 }
2544                                             default:
2545                                                 eEntity = ENTITY_PHRASE;
2546                                                 goto entity_determined;
2547                                         }
2548                                     eEntity = ENTITY_NON_PHRASE;
2549                                 entity_determined:
2550                                     break;
2551                                 }
2552
2553                                 case HEADER_FIELD_TEXT:
2554                                     OSL_ASSERT(false);
2555                                     break;
2556                             }
2557
2558                             // In a 'non-phrase', a non-US-ASCII character
2559                             // cannot be part of an <atom>, but instead the
2560                             // whole entity is 'junk' rather than 'non-
2561                             // phrase':
2562                             if (eEntity == ENTITY_NON_PHRASE
2563                                 && !isUSASCII(*pBodyPtr))
2564                                 eEntity = ENTITY_JUNK;
2565                             break;
2566                     }
2567
2568                     switch (eEntity)
2569                     {
2570                         case ENTITY_JUNK:
2571                         {
2572                             // Write a pending '<' if necessary:
2573                             if (eBrackets == BRACKETS_OPENING)
2574                             {
2575                                 if (rSink.getColumn()
2576                                             + (bInitialSpace ? 1 : 0)
2577                                         >= rSink.getLineLengthLimit())
2578                                     rSink << INetMIMEOutputSink::endl << ' ';
2579                                 else if (bInitialSpace)
2580                                     rSink << ' ';
2581                                 rSink << '<';
2582                                 bInitialSpace = false;
2583                                 eBrackets = BRACKETS_INSIDE;
2584                             }
2585
2586                             // Calculate the length of in- and output:
2587                             const sal_Unicode * pStart = pBodyPtr;
2588                             sal_Size nLength = 0;
2589                             bool bModify = false;
2590                             bool bEnd = false;
2591                             while (pBodyPtr != pBodyEnd && !bEnd)
2592                                 switch (*pBodyPtr)
2593                                 {
2594                                     case '\x0D': // CR
2595                                         if (startsWithLineFolding(pBodyPtr,
2596                                                                   pBodyEnd))
2597                                             bEnd = true;
2598                                         else if (startsWithLineBreak(
2599                                                      pBodyPtr, pBodyEnd))
2600                                         {
2601                                             nLength += 3;
2602                                             bModify = true;
2603                                             pBodyPtr += 2;
2604                                         }
2605                                         else
2606                                         {
2607                                             ++nLength;
2608                                             ++pBodyPtr;
2609                                         }
2610                                         break;
2611
2612                                     case '\t':
2613                                     case ' ':
2614                                         bEnd = true;
2615                                         break;
2616
2617                                     default:
2618                                         if (isVisible(*pBodyPtr))
2619                                             bEnd = true;
2620                                         else if (isUSASCII(*pBodyPtr))
2621                                         {
2622                                             ++nLength;
2623                                             ++pBodyPtr;
2624                                         }
2625                                         else
2626                                         {
2627                                             nLength += getUTF8OctetCount(
2628                                                            *pBodyPtr++);
2629                                             bModify = true;
2630                                         }
2631                                         break;
2632                                 }
2633
2634                             // Write the output:
2635                             if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2636                                         + nLength
2637                                     > rSink.getLineLengthLimit())
2638                                 rSink << INetMIMEOutputSink::endl << ' ';
2639                             else if (bInitialSpace)
2640                                 rSink << ' ';
2641                             bInitialSpace = false;
2642                             if (bModify)
2643                                 while (pStart != pBodyPtr)
2644                                     if (startsWithLineBreak(pStart, pBodyPtr))
2645                                     {
2646                                         rSink << "\x0D\\\x0A"; // CR, '\', LF
2647                                         pStart += 2;
2648                                     }
2649                                     else
2650                                         writeUTF8(rSink, *pStart++);
2651                             else
2652                                 rSink.write(pStart, pBodyPtr);
2653                             break;
2654                         }
2655
2656                         case ENTITY_NON_PHRASE:
2657                         {
2658                             // Calculate the length of in- and output:
2659                             const sal_Unicode * pStart = pBodyPtr;
2660                             sal_Size nLength = 0;
2661                             bool bBracketedBlock = false;
2662                             bool bSymbol = *pStart != '.' && *pStart != '@';
2663                             bool bModify = false;
2664                             bool bEnd = false;
2665                             while (pBodyPtr != pBodyEnd && !bEnd)
2666                                 switch (*pBodyPtr)
2667                                 {
2668                                     case '\t':
2669                                     case ' ':
2670                                     case '\x0D': // CR
2671                                     {
2672                                         const sal_Unicode * pLookAhead
2673                                             = skipLinearWhiteSpace(pBodyPtr,
2674                                                                    pBodyEnd);
2675                                         if (pLookAhead < pBodyEnd
2676                                             && (bSymbol ?
2677                                                     isAtomChar(*pLookAhead)
2678                                                     || *pLookAhead == '"'
2679                                                     || *pLookAhead == '[' :
2680                                                     *pLookAhead == '.'
2681                                                     || *pLookAhead == '@'
2682                                                     || (*pLookAhead == '>'
2683                                                        && eType
2684                                                     >= HEADER_FIELD_MESSAGE_ID
2685                                                        && eBrackets
2686                                                          == BRACKETS_OPENING)))
2687                                         {
2688                                             bModify = true;
2689                                             pBodyPtr = pLookAhead;
2690                                         }
2691                                         else
2692                                             bEnd = true;
2693                                         break;
2694                                     }
2695
2696                                     case '"':
2697                                         if (bSymbol)
2698                                         {
2699                                             pBodyPtr
2700                                                 = scanQuotedBlock(pBodyPtr,
2701                                                                   pBodyEnd,
2702                                                                   '"', '"',
2703                                                                   nLength,
2704                                                                   bModify);
2705                                             bSymbol = false;
2706                                         }
2707                                         else
2708                                             bEnd = true;
2709                                         break;
2710
2711                                     case '[':
2712                                         if (bSymbol)
2713                                         {
2714                                             pBodyPtr
2715                                                 = scanQuotedBlock(pBodyPtr,
2716                                                                   pBodyEnd,
2717                                                                   '[', ']',
2718                                                                   nLength,
2719                                                                   bModify);
2720                                             bSymbol = false;
2721                                         }
2722                                         else
2723                                             bEnd = true;
2724                                         break;
2725
2726                                     case '.':
2727                                     case '@':
2728                                         if (bSymbol)
2729                                             bEnd = true;
2730                                         else
2731                                         {
2732                                             ++nLength;
2733                                             bSymbol = true;
2734                                             ++pBodyPtr;
2735                                         }
2736                                         break;
2737
2738                                     case '>':
2739                                         if (eBrackets == BRACKETS_OPENING
2740                                             && eType
2741                                                    >= HEADER_FIELD_MESSAGE_ID)
2742                                         {
2743                                             ++nLength;
2744                                             bBracketedBlock = true;
2745                                             ++pBodyPtr;
2746                                         }
2747                                         bEnd = true;
2748                                         break;
2749
2750                                     default:
2751                                         if (isAtomChar(*pBodyPtr) && bSymbol)
2752                                         {
2753                                             while (pBodyPtr != pBodyEnd
2754                                                    && isAtomChar(*pBodyPtr))
2755                                             {
2756                                                 ++nLength;
2757                                                 ++pBodyPtr;
2758                                             }
2759                                             bSymbol = false;
2760                                         }
2761                                         else
2762                                         {
2763                                             if (!isUSASCII(*pBodyPtr))
2764                                                 bModify = true;
2765                                             bEnd = true;
2766                                         }
2767                                         break;
2768                                 }
2769
2770                             // Write a pending '<' if necessary:
2771                             if (eBrackets == BRACKETS_OPENING
2772                                 && !bBracketedBlock)
2773                             {
2774                                 if (rSink.getColumn()
2775                                             + (bInitialSpace ? 1 : 0)
2776                                         >= rSink.getLineLengthLimit())
2777                                     rSink << INetMIMEOutputSink::endl << ' ';
2778                                 else if (bInitialSpace)
2779                                     rSink << ' ';
2780                                 rSink << '<';
2781                                 bInitialSpace = false;
2782                                 eBrackets = BRACKETS_INSIDE;
2783                             }
2784
2785                             // Write the output:
2786                             if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2787                                         + nLength
2788                                     > rSink.getLineLengthLimit())
2789                                 rSink << INetMIMEOutputSink::endl << ' ';
2790                             else if (bInitialSpace)
2791                                 rSink << ' ';
2792                             bInitialSpace = false;
2793                             if (bBracketedBlock)
2794                             {
2795                                 rSink << '<';
2796                                 eBrackets = BRACKETS_OUTSIDE;
2797                             }
2798                             if (bModify)
2799                             {
2800                                 enum Mode { MODE_PLAIN, MODE_QUOTED_STRING,
2801                                             MODE_DOMAIN_LITERAL };
2802                                 Mode eMode = MODE_PLAIN;
2803                                 while (pStart != pBodyPtr)
2804                                     switch (*pStart)
2805                                     {
2806                                         case '\x0D': // CR
2807                                             if (startsWithLineFolding(
2808                                                     pStart, pBodyPtr))
2809                                             {
2810                                                 if (eMode != MODE_PLAIN)
2811                                                     rSink << sal_Char(
2812                                                                  pStart[2]);
2813                                                 pStart += 3;
2814                                             }
2815                                             else if (startsWithLineBreak(
2816                                                          pStart, pBodyPtr))
2817                                             {
2818                                                 rSink << "\x0D\\\x0A";
2819                                                     // CR, '\', LF
2820                                                 pStart += 2;
2821                                             }
2822                                             else
2823                                             {
2824                                                 rSink << '\x0D'; // CR
2825                                                 ++pStart;
2826                                             }
2827                                             break;
2828
2829                                         case '\t':
2830                                         case ' ':
2831                                             if (eMode != MODE_PLAIN)
2832                                                 rSink << sal_Char(*pStart);
2833                                             ++pStart;
2834                                             break;
2835
2836                                         case '"':
2837                                             if (eMode == MODE_PLAIN)
2838                                                 eMode = MODE_QUOTED_STRING;
2839                                             else if (eMode
2840                                                         == MODE_QUOTED_STRING)
2841                                                 eMode = MODE_PLAIN;
2842                                             rSink << '"';
2843                                             ++pStart;
2844                                             break;
2845
2846                                         case '[':
2847                                             if (eMode == MODE_PLAIN)
2848                                                 eMode = MODE_DOMAIN_LITERAL;
2849                                             rSink << '[';
2850                                             ++pStart;
2851                                             break;
2852
2853                                         case ']':
2854                                             if (eMode == MODE_DOMAIN_LITERAL)
2855                                                 eMode = MODE_PLAIN;
2856                                             rSink << ']';
2857                                             ++pStart;
2858                                             break;
2859
2860                                         case '\\':
2861                                             rSink << '\\';
2862                                             if (++pStart < pBodyPtr)
2863                                                 writeUTF8(rSink, *pStart++);
2864                                             break;
2865
2866                                         default:
2867                                             writeUTF8(rSink, *pStart++);
2868                                             break;
2869                                     }
2870                             }
2871                             else
2872                                 rSink.write(pStart, pBodyPtr);
2873                             break;
2874                         }
2875
2876                         case ENTITY_PHRASE:
2877                         {
2878                             // Write a pending '<' if necessary:
2879                             if (eBrackets == BRACKETS_OPENING)
2880                             {
2881                                 if (rSink.getColumn()
2882                                             + (bInitialSpace ? 1 : 0)
2883                                         >= rSink.getLineLengthLimit())
2884                                     rSink << INetMIMEOutputSink::endl << ' ';
2885                                 else if (bInitialSpace)
2886                                     rSink << ' ';
2887                                 rSink << '<';
2888                                 bInitialSpace = false;
2889                                 eBrackets = BRACKETS_INSIDE;
2890                             }
2891
2892                             // Calculate the length of in- and output:
2893                             const sal_Unicode * pStart = pBodyPtr;
2894                             bool bQuotedString = false;
2895                             bool bEnd = false;
2896                             while (pBodyPtr != pBodyEnd && !bEnd)
2897                                 switch (*pBodyPtr)
2898                                 {
2899                                     case '\t':
2900                                     case ' ':
2901                                     case '\x0D': // CR
2902                                         if (bQuotedString)
2903                                             ++pBodyPtr;
2904                                         else
2905                                         {
2906                                             const sal_Unicode * pLookAhead
2907                                                 = skipLinearWhiteSpace(
2908                                                       pBodyPtr, pBodyEnd);
2909                                             if (pLookAhead != pBodyEnd
2910                                                 && (isAtomChar(*pLookAhead)
2911                                                     || !isUSASCII(*pLookAhead)
2912                                                     || *pLookAhead == '"'))
2913                                                 pBodyPtr = pLookAhead;
2914                                             else
2915                                                 bEnd = true;
2916                                         }
2917                                         break;
2918
2919                                     case '"':
2920                                         bQuotedString = !bQuotedString;
2921                                         ++pBodyPtr;
2922                                         break;
2923
2924                                     case '\\':
2925                                         if (bQuotedString)
2926                                         {
2927                                             if (++pBodyPtr != pBodyEnd)
2928                                                 ++pBodyPtr;
2929                                         }
2930                                         else
2931                                             bEnd = true;
2932                                         break;
2933
2934                                     default:
2935                                         if (bQuotedString
2936                                             || isAtomChar(*pBodyPtr)
2937                                             || !isUSASCII(*pBodyPtr))
2938                                             ++pBodyPtr;
2939                                         else
2940                                             bEnd = true;
2941                                         break;
2942                                 }
2943
2944                             // Write the phrase, introducing encoded-words
2945                             // where necessary:
2946                             INetMIMEEncodedWordOutputSink
2947                                 aOutput(
2948                                     rSink,
2949                                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,
2950                                     bInitialSpace ?
2951                                  INetMIMEEncodedWordOutputSink::SPACE_ALWAYS :
2952                                  INetMIMEEncodedWordOutputSink::SPACE_ENCODED,
2953                                ePreferredEncoding);
2954                             while (pStart != pBodyPtr)
2955                                 switch (*pStart)
2956                                 {
2957                                     case '"':
2958                                         ++pStart;
2959                                         break;
2960
2961                                     case '\\':
2962                                         if (++pStart != pBodyPtr)
2963                                             aOutput << *pStart++;
2964                                         break;
2965
2966                                     case '\x0D': // CR
2967                                         pStart += 2;
2968                                         aOutput << *pStart++;
2969                                         break;
2970
2971                                     default:
2972                                         aOutput << *pStart++;
2973                                         break;
2974                                 }
2975                             bInitialSpace = aOutput.flush();
2976                             break;
2977                         }
2978                     }
2979                     break;
2980                 }
2981             }
2982     }
2983 }
2984
2985 //============================================================================
2986 // static
2987 bool INetMIME::translateUTF8Char(const sal_Char *& rBegin,
2988                                  const sal_Char * pEnd,
2989                                  rtl_TextEncoding eEncoding,
2990                                  sal_uInt32 & rCharacter)
2991 {
2992     if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
2993         || static_cast< unsigned char >(*rBegin) >= 0xFE)
2994         return false;
2995
2996     int nCount;
2997     sal_uInt32 nMin;
2998     sal_uInt32 nUCS4;
2999     const sal_Char * p = rBegin;
3000     if (static_cast< unsigned char >(*p) < 0xE0)
3001     {
3002         nCount = 1;
3003         nMin = 0x80;
3004         nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
3005     }
3006     else if (static_cast< unsigned char >(*p) < 0xF0)
3007     {
3008         nCount = 2;
3009         nMin = 0x800;
3010         nUCS4 = static_cast< unsigned char >(*p) & 0xF;
3011     }
3012     else if (static_cast< unsigned char >(*p) < 0xF8)
3013     {
3014         nCount = 3;
3015         nMin = 0x10000;
3016         nUCS4 = static_cast< unsigned char >(*p) & 7;
3017     }
3018     else if (static_cast< unsigned char >(*p) < 0xFC)
3019     {
3020         nCount = 4;
3021         nMin = 0x200000;
3022         nUCS4 = static_cast< unsigned char >(*p) & 3;
3023     }
3024     else
3025     {
3026         nCount = 5;
3027         nMin = 0x4000000;
3028         nUCS4 = static_cast< unsigned char >(*p) & 1;
3029     }
3030     ++p;
3031
3032     for (; nCount-- > 0; ++p)
3033         if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
3034             nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
3035         else
3036             return false;
3037
3038     if (nUCS4 < nMin || nUCS4 > 0x10FFFF)
3039         return false;
3040
3041     if (eEncoding >= RTL_TEXTENCODING_UCS4)
3042         rCharacter = nUCS4;
3043     else
3044     {
3045         sal_Unicode aUTF16[2];
3046         const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
3047         sal_Size nSize;
3048         sal_Char * pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
3049                                                 nSize);
3050         if (!pBuffer)
3051             return false;
3052         DBG_ASSERT(nSize == 1,
3053                    "INetMIME::translateUTF8Char(): Bad conversion");
3054         rCharacter = *pBuffer;
3055         delete[] pBuffer;
3056     }
3057     rBegin = p;
3058     return true;
3059 }
3060
3061 //============================================================================
3062 // static
3063 ByteString INetMIME::decodeUTF8(const ByteString & rText,
3064                                 rtl_TextEncoding eEncoding)
3065 {
3066     const sal_Char * p = rText.GetBuffer();
3067     const sal_Char * pEnd = p + rText.Len();
3068     ByteString sDecoded;
3069     while (p != pEnd)
3070     {
3071                 // the value is initialized just to avoid warning: ‘nCharacter’ is used uninitialized in this function
3072                 // there seems to be a bug in gcc-4.1 because this value is used only if it is initialized
3073                 sal_uInt32 nCharacter=0;
3074         if (translateUTF8Char(p, pEnd, eEncoding, nCharacter))
3075             sDecoded += sal_Char(nCharacter);
3076         else
3077             sDecoded += sal_Char(*p++);
3078     }
3079     return sDecoded;
3080 }
3081
3082 //============================================================================
3083 // static
3084 UniString INetMIME::decodeHeaderFieldBody(HeaderFieldType eType,
3085                                           const ByteString & rBody)
3086 {
3087     // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
3088     // versions of StarOffice send mails with header fields where encoded
3089     // words can be preceded by '=', ',', '.', '"', or '(', and followed by
3090     // '=', ',', '.', '"', ')', without any required white space in between.
3091     // And there appear to exist some broken mailers that only encode single
3092     // letters within words, like "Appel
3093     // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
3094     // detect encoded words even when not propperly surrounded by white space.
3095     //
3096     // Non US-ASCII characters in rBody are treated as ISO-8859-1.
3097     //
3098     // encoded-word = "=?"
3099     //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
3100     //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
3101     //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
3102     //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
3103     //     "?="
3104     //
3105     // base64 = ALPHA / DIGIT / "+" / "/"
3106
3107     const sal_Char * pBegin = rBody.GetBuffer();
3108     const sal_Char * pEnd = pBegin + rBody.Len();
3109
3110     UniString sDecoded;
3111     const sal_Char * pCopyBegin = pBegin;
3112
3113     /* bool bStartEncodedWord = true; */
3114     const sal_Char * pWSPBegin = pBegin;
3115     UniString sEncodedText;
3116     bool bQuotedEncodedText = false;
3117     sal_uInt32 nCommentLevel = 0;
3118
3119     for (const sal_Char * p = pBegin; p != pEnd;)
3120     {
3121         if (p != pEnd && *p == '=' /* && bStartEncodedWord */)
3122         {
3123             const sal_Char * q = p + 1;
3124             bool bEncodedWord = q != pEnd && *q++ == '?';
3125
3126             rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
3127             if (bEncodedWord)
3128             {
3129                 const sal_Char * pCharsetBegin = q;
3130                 const sal_Char * pLanguageBegin = 0;
3131                 int nAlphaCount = 0;
3132                 for (bool bDone = false; !bDone;)
3133                     if (q == pEnd)
3134                     {
3135                         bEncodedWord = false;
3136                         bDone = true;
3137                     }
3138                     else
3139                     {
3140                         sal_Char cChar = *q++;
3141                         switch (cChar)
3142                         {
3143                             case '*':
3144                                 pLanguageBegin = q - 1;
3145                                 nAlphaCount = 0;
3146                                 break;
3147
3148                             case '-':
3149                                 if (pLanguageBegin != 0)
3150                                 {
3151                                     if (nAlphaCount == 0)
3152                                         pLanguageBegin = 0;
3153                                     else
3154                                         nAlphaCount = 0;
3155                                 }
3156                                 break;
3157
3158                             case '?':
3159                                 if (pCharsetBegin == q - 1)
3160                                     bEncodedWord = false;
3161                                 else
3162                                 {
3163                                     eCharsetEncoding
3164                                         = getCharsetEncoding(
3165                                               pCharsetBegin,
3166                                               pLanguageBegin == 0
3167                                               || nAlphaCount == 0 ?
3168                                                   q - 1 : pLanguageBegin);
3169                                     bEncodedWord = isMIMECharsetEncoding(
3170                                                        eCharsetEncoding);
3171                                     eCharsetEncoding
3172                                         = translateFromMIME(eCharsetEncoding);
3173                                 }
3174                                 bDone = true;
3175                                 break;
3176
3177                             default:
3178                                 if (pLanguageBegin != 0
3179                                     && (!isAlpha(cChar) || ++nAlphaCount > 8))
3180                                     pLanguageBegin = 0;
3181                                 break;
3182                         }
3183                     }
3184             }
3185
3186             bool bEncodingB = false;
3187             if (bEncodedWord)
3188             {
3189                 if (q == pEnd)
3190                     bEncodedWord = false;
3191                 else
3192                 {
3193                     switch (*q++)
3194                     {
3195                         case 'B':
3196                         case 'b':
3197                             bEncodingB = true;
3198                             break;
3199
3200                         case 'Q':
3201                         case 'q':
3202                             bEncodingB = false;
3203                             break;
3204
3205                         default:
3206                             bEncodedWord = false;
3207                             break;
3208                     }
3209                 }
3210             }
3211
3212             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
3213
3214             ByteString sText;
3215             if (bEncodedWord)
3216             {
3217                 if (bEncodingB)
3218                 {
3219                     for (bool bDone = false; !bDone;)
3220                     {
3221                         if (pEnd - q < 4)
3222                         {
3223                             bEncodedWord = false;
3224                             bDone = true;
3225                         }
3226                         else
3227                         {
3228                             bool bFinal = false;
3229                             int nCount = 3;
3230                             sal_uInt32 nValue = 0;
3231                             for (int nShift = 18; nShift >= 0; nShift -= 6)
3232                             {
3233                                 int nWeight = getBase64Weight(*q++);
3234                                 if (nWeight == -2)
3235                                 {
3236                                     bEncodedWord = false;
3237                                     bDone = true;
3238                                     break;
3239                                 }
3240                                 if (nWeight == -1)
3241                                 {
3242                                     if (!bFinal)
3243                                     {
3244                                         if (nShift >= 12)
3245                                         {
3246                                             bEncodedWord = false;
3247                                             bDone = true;
3248                                             break;
3249                                         }
3250                                         bFinal = true;
3251                                         nCount = nShift == 6 ? 1 : 2;
3252                                     }
3253                                 }
3254                                 else
3255                                     nValue |= nWeight << nShift;
3256                             }
3257                             if (bEncodedWord)
3258                             {
3259                                 for (int nShift = 16; nCount-- > 0;
3260                                      nShift -= 8)
3261                                     sText += sal_Char(nValue >> nShift
3262                                                           & 0xFF);
3263                                 if (*q == '?')
3264                                 {
3265                                     ++q;
3266                                     bDone = true;
3267                                 }
3268                                 if (bFinal && !bDone)
3269                                 {
3270                                     bEncodedWord = false;
3271                                     bDone = true;
3272                                 }
3273                             }
3274                         }
3275                     }
3276                 }
3277                 else
3278                 {
3279                     const sal_Char * pEncodedTextBegin = q;
3280                     const sal_Char * pEncodedTextCopyBegin = q;
3281                     for (bool bDone = false; !bDone;)
3282                         if (q == pEnd)
3283                         {
3284                             bEncodedWord = false;
3285                             bDone = true;
3286                         }
3287                         else
3288                         {
3289                             sal_uInt32 nChar = *q++;
3290                             switch (nChar)
3291                             {
3292                                 case '=':
3293                                 {
3294                                     if (pEnd - q < 2)
3295                                     {
3296                                         bEncodedWord = false;
3297                                         bDone = true;
3298                                         break;
3299                                     }
3300                                     int nDigit1 = getHexWeight(q[0]);
3301                                     int nDigit2 = getHexWeight(q[1]);
3302                                     if (nDigit1 < 0 || nDigit2 < 0)
3303                                     {
3304                                         bEncodedWord = false;
3305                                         bDone = true;
3306                                         break;
3307                                     }
3308                                     sText += rBody.Copy(
3309                                         static_cast< xub_StrLen >(
3310                                             pEncodedTextCopyBegin - pBegin),
3311                                         static_cast< xub_StrLen >(
3312                                             q - 1 - pEncodedTextCopyBegin));
3313                                     sText += sal_Char(nDigit1 << 4 | nDigit2);
3314                                     q += 2;
3315                                     pEncodedTextCopyBegin = q;
3316                                     break;
3317                                 }
3318
3319                                 case '?':
3320                                     if (q - pEncodedTextBegin > 1)
3321                                         sText += rBody.Copy(
3322                                             static_cast< xub_StrLen >(
3323                                                 pEncodedTextCopyBegin - pBegin),
3324                                             static_cast< xub_StrLen >(
3325                                                 q - 1 - pEncodedTextCopyBegin));
3326                                     else
3327                                         bEncodedWord = false;
3328                                     bDone = true;
3329                                     break;
3330
3331                                 case '_':
3332                                     sText += rBody.Copy(
3333                                         static_cast< xub_StrLen >(
3334                                             pEncodedTextCopyBegin - pBegin),
3335                                         static_cast< xub_StrLen >(
3336                                             q - 1 - pEncodedTextCopyBegin));
3337                                     sText += ' ';
3338                                     pEncodedTextCopyBegin = q;
3339                                     break;
3340
3341                                 default:
3342                                     if (!isVisible(nChar))
3343                                     {
3344                                         bEncodedWord = false;
3345                                         bDone = true;
3346                                     }
3347                                     break;
3348                             }
3349                         }
3350                 }
3351             }
3352
3353             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
3354
3355 //                      if (bEncodedWord && q != pEnd)
3356 //                              switch (*q)
3357 //                              {
3358 //                                      case '\t':
3359 //                                      case ' ':
3360 //                                      case '"':
3361 //                                      case ')':
3362 //                                      case ',':
3363 //                                      case '.':
3364 //                                      case '=':
3365 //                                              break;
3366 //
3367 //                                      default:
3368 //                                              bEncodedWord = false;
3369 //                                              break;
3370 //                              }
3371
3372             sal_Unicode * pUnicodeBuffer = 0;
3373             sal_Size nUnicodeSize = 0;
3374             if (bEncodedWord)
3375             {
3376                 pUnicodeBuffer
3377                     = convertToUnicode(sText.GetBuffer(),
3378                                        sText.GetBuffer() + sText.Len(),
3379                                        eCharsetEncoding, nUnicodeSize);
3380                 if (pUnicodeBuffer == 0)
3381                     bEncodedWord = false;
3382             }
3383
3384             if (bEncodedWord)
3385             {
3386                 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
3387                 if (eType == HEADER_FIELD_TEXT)
3388                     sDecoded.Append(
3389                         pUnicodeBuffer,
3390                         static_cast< xub_StrLen >(nUnicodeSize));
3391                 else if (nCommentLevel == 0)
3392                 {
3393                     sEncodedText.Append(
3394                         pUnicodeBuffer,
3395                         static_cast< xub_StrLen >(nUnicodeSize));
3396                     if (!bQuotedEncodedText)
3397                     {
3398                         const sal_Unicode * pTextPtr = pUnicodeBuffer;
3399                         const sal_Unicode * pTextEnd = pTextPtr
3400                                                            + nUnicodeSize;
3401                         for (; pTextPtr != pTextEnd; ++pTextPtr)
3402                             if (!isEncodedWordTokenChar(*pTextPtr))
3403                             {
3404                                 bQuotedEncodedText = true;
3405                                 break;
3406                             }
3407                     }
3408                 }
3409                 else
3410                 {
3411                     const sal_Unicode * pTextPtr = pUnicodeBuffer;
3412                     const sal_Unicode * pTextEnd = pTextPtr + nUnicodeSize;
3413                     for (; pTextPtr != pTextEnd; ++pTextPtr)
3414                     {
3415                         switch (*pTextPtr)
3416                         {
3417                             case '(':
3418                             case ')':
3419                             case '\\':
3420                             case '\x0D':
3421                             case '=':
3422                                 sDecoded += '\\';
3423                                 break;
3424                         }
3425                         sDecoded += *pTextPtr;
3426                     }
3427                 }
3428                 delete[] pUnicodeBuffer;
3429                 p = q;
3430                 pCopyBegin = p;
3431
3432                 pWSPBegin = p;
3433                 while (p != pEnd && isWhiteSpace(*p))
3434                     ++p;
3435                 /* bStartEncodedWord = p != pWSPBegin; */
3436                 continue;
3437             }
3438         }
3439
3440         if (sEncodedText.Len() != 0)
3441         {
3442             if (bQuotedEncodedText)
3443             {
3444                 sDecoded += '"';
3445                 const sal_Unicode * pTextPtr = sEncodedText.GetBuffer();
3446                 const sal_Unicode * pTextEnd = pTextPtr + sEncodedText.Len();
3447                 for (;pTextPtr != pTextEnd; ++pTextPtr)
3448                 {
3449                     switch (*pTextPtr)
3450                     {
3451                         case '"':
3452                         case '\\':
3453                         case '\x0D':
3454                             sDecoded += '\\';
3455                             break;
3456                     }
3457                     sDecoded += *pTextPtr;
3458                 }
3459                 sDecoded += '"';
3460             }
3461             else
3462                 sDecoded += sEncodedText;
3463             sEncodedText.Erase();
3464             bQuotedEncodedText = false;
3465         }
3466
3467         if (p == pEnd)
3468             break;
3469
3470         switch (*p++)
3471         {
3472 //                      case '\t':
3473 //                      case ' ':
3474 //                      case ',':
3475 //                      case '.':
3476 //                      case '=':
3477 //                              bStartEncodedWord = true;
3478 //                              break;
3479
3480             case '"':
3481                 if (eType != HEADER_FIELD_TEXT && nCommentLevel == 0)
3482                 {
3483                     const sal_Char * pQuotedStringEnd
3484                         = skipQuotedString(p - 1, pEnd);
3485                     p = pQuotedStringEnd == p - 1 ? pEnd : pQuotedStringEnd;
3486                 }
3487                 /* bStartEncodedWord = true; */
3488                 break;
3489
3490             case '(':
3491                 if (eType != HEADER_FIELD_TEXT)
3492                     ++nCommentLevel;
3493                 /* bStartEncodedWord = true; */
3494                 break;
3495
3496             case ')':
3497                 if (nCommentLevel > 0)
3498                     --nCommentLevel;
3499                 /* bStartEncodedWord = false; */
3500                 break;
3501
3502             default:
3503             {
3504                 const sal_Char * pUTF8Begin = p - 1;
3505                 const sal_Char * pUTF8End = pUTF8Begin;
3506                                 // the value is initialized just to avoid warning: ‘nCharacter’ is used uninitialized in this function
3507                                 // there seems to be a bug in gcc-4.1 because this value is used only if it is initialized
3508                                 sal_uInt32 nCharacter=0;
3509                 if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
3510                                       nCharacter))
3511                 {
3512                     appendISO88591(sDecoded, pCopyBegin, p - 1);
3513                     sal_Unicode aUTF16Buf[2];
3514                     xub_StrLen nUTF16Len = static_cast< xub_StrLen >(
3515                         putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf);
3516                     sDecoded.Append(aUTF16Buf, nUTF16Len);
3517                     p = pUTF8End;
3518                     pCopyBegin = p;
3519                 }
3520                 /* bStartEncodedWord = false; */
3521                 break;
3522             }
3523         }
3524         pWSPBegin = p;
3525     }
3526
3527     appendISO88591(sDecoded, pCopyBegin, pEnd);
3528     return sDecoded;
3529 }
3530
3531 //============================================================================
3532 //
3533 //  INetMIMEOutputSink
3534 //
3535 //============================================================================
3536
3537 // virtual
3538 sal_Size INetMIMEOutputSink::writeSequence(const sal_Char * pSequence)
3539 {
3540     sal_Size nLength = rtl_str_getLength(pSequence);
3541     writeSequence(pSequence, pSequence + nLength);
3542     return nLength;
3543 }
3544
3545 //============================================================================
3546 // virtual
3547 void INetMIMEOutputSink::writeSequence(const sal_uInt32 * pBegin,
3548                                        const sal_uInt32 * pEnd)
3549 {
3550     DBG_ASSERT(pBegin && pBegin <= pEnd,
3551                "INetMIMEOutputSink::writeSequence(): Bad sequence");
3552
3553     sal_Char * pBufferBegin = new sal_Char[pEnd - pBegin];
3554     sal_Char * pBufferEnd = pBufferBegin;
3555     while (pBegin != pEnd)
3556     {
3557         DBG_ASSERT(*pBegin < 256,
3558                    "INetMIMEOutputSink::writeSequence(): Bad octet");
3559         *pBufferEnd++ = sal_Char(*pBegin++);
3560     }
3561     writeSequence(pBufferBegin, pBufferEnd);
3562     delete[] pBufferBegin;
3563 }
3564
3565 //============================================================================
3566 // virtual
3567 void INetMIMEOutputSink::writeSequence(const sal_Unicode * pBegin,
3568                                        const sal_Unicode * pEnd)
3569 {
3570     DBG_ASSERT(pBegin && pBegin <= pEnd,
3571                "INetMIMEOutputSink::writeSequence(): Bad sequence");
3572
3573     sal_Char * pBufferBegin = new sal_Char[pEnd - pBegin];
3574     sal_Char * pBufferEnd = pBufferBegin;
3575     while (pBegin != pEnd)
3576     {
3577         DBG_ASSERT(*pBegin < 256,
3578                    "INetMIMEOutputSink::writeSequence(): Bad octet");
3579         *pBufferEnd++ = sal_Char(*pBegin++);
3580     }
3581     writeSequence(pBufferBegin, pBufferEnd);
3582     delete[] pBufferBegin;
3583 }
3584
3585 //============================================================================
3586 // virtual
3587 ErrCode INetMIMEOutputSink::getError() const
3588 {
3589     return ERRCODE_NONE;
3590 }
3591
3592 //============================================================================
3593 void INetMIMEOutputSink::writeLineEnd()
3594 {
3595     static const sal_Char aCRLF[2] = { 0x0D, 0x0A };
3596     writeSequence(aCRLF, aCRLF + 2);
3597     m_nColumn = 0;
3598 }
3599
3600 //============================================================================
3601 //
3602 //  INetMIMEStringOutputSink
3603 //
3604 //============================================================================
3605
3606 // virtual
3607 void INetMIMEStringOutputSink::writeSequence(const sal_Char * pBegin,
3608                                              const sal_Char * pEnd)
3609 {
3610     DBG_ASSERT(pBegin && pBegin <= pEnd,
3611                "INetMIMEStringOutputSink::writeSequence(): Bad sequence");
3612
3613     m_bOverflow = m_bOverflow
3614                   || pEnd - pBegin > STRING_MAXLEN - m_aBuffer.Len();
3615     if (!m_bOverflow)
3616         m_aBuffer.Append(pBegin, static_cast< xub_StrLen >(pEnd - pBegin));
3617 }
3618
3619 //============================================================================
3620 // virtual
3621 ErrCode INetMIMEStringOutputSink::getError() const
3622 {
3623     return m_bOverflow ? ERRCODE_IO_OUTOFMEMORY : ERRCODE_NONE;
3624 }
3625
3626 //============================================================================
3627 //
3628 //  INetMIMEUnicodeOutputSink
3629 //
3630 //============================================================================
3631
3632 // virtual
3633 void INetMIMEUnicodeOutputSink::writeSequence(const sal_Char * pBegin,
3634                                               const sal_Char * pEnd)
3635 {
3636     DBG_ASSERT(pBegin && pBegin <= pEnd,
3637                "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3638
3639     sal_Unicode * pBufferBegin = new sal_Unicode[pEnd - pBegin];
3640     sal_Unicode * pBufferEnd = pBufferBegin;
3641     while (pBegin != pEnd)
3642         *pBufferEnd++ = sal_uChar(*pBegin++);
3643     writeSequence(pBufferBegin, pBufferEnd);
3644     delete[] pBufferBegin;
3645 }
3646
3647 //============================================================================
3648 // virtual
3649 void INetMIMEUnicodeOutputSink::writeSequence(const sal_uInt32 * pBegin,
3650                                               const sal_uInt32 * pEnd)
3651 {
3652     DBG_ASSERT(pBegin && pBegin <= pEnd,
3653                "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3654
3655     sal_Unicode * pBufferBegin = new sal_Unicode[pEnd - pBegin];
3656     sal_Unicode * pBufferEnd = pBufferBegin;
3657     while (pBegin != pEnd)
3658     {
3659         DBG_ASSERT(*pBegin < 256,
3660                    "INetMIMEOutputSink::writeSequence(): Bad octet");
3661         *pBufferEnd++ = sal_Unicode(*pBegin++);
3662     }
3663     writeSequence(pBufferBegin, pBufferEnd);
3664     delete[] pBufferBegin;
3665 }
3666
3667 //============================================================================
3668 // virtual
3669 void INetMIMEUnicodeOutputSink::writeSequence(const sal_Unicode * pBegin,
3670                                               const sal_Unicode * pEnd)
3671 {
3672     DBG_ASSERT(pBegin && pBegin <= pEnd,
3673                "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3674
3675     m_bOverflow = m_bOverflow
3676                   || pEnd - pBegin > STRING_MAXLEN - m_aBuffer.Len();
3677     if (!m_bOverflow)
3678         m_aBuffer.Append(pBegin, static_cast< xub_StrLen >(pEnd - pBegin));
3679 }
3680
3681 //============================================================================
3682 // virtual
3683 ErrCode INetMIMEUnicodeOutputSink::getError() const
3684 {
3685     return m_bOverflow ? ERRCODE_IO_OUTOFMEMORY : ERRCODE_NONE;
3686 }
3687
3688 //============================================================================
3689 //
3690 //  INetMIMEEncodedWordOutputSink
3691 //
3692 //============================================================================
3693
3694 static const sal_Char aEscape[128]
3695     = { INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x00
3696         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x01
3697         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x02
3698         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x03
3699         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x04
3700         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x05
3701         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x06
3702         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x07
3703         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x08
3704         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x09
3705         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0A
3706         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0B
3707         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0C
3708         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0D
3709         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0E
3710         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0F
3711         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x10
3712         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x11
3713         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x12
3714         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x13
3715         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x14
3716         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x15
3717         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x16
3718         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x17
3719         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x18
3720         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x19
3721         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1A
3722         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1B
3723         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1C
3724         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1D
3725         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1E
3726         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1F
3727         0,   // ' '
3728         0,   // '!'
3729         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '"'
3730         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '#'
3731         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '$'
3732         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '%'
3733         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '&'
3734         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '''
3735         INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '('
3736         INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // ')'
3737         0,   // '*'
3738         0,   // '+'
3739         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // ','
3740         0,   // '-'
3741         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '.'
3742         0,   // '/'
3743         0,   // '0'
3744         0,   // '1'
3745         0,   // '2'
3746         0,   // '3'
3747         0,   // '4'
3748         0,   // '5'
3749         0,   // '6'
3750         0,   // '7'
3751         0,   // '8'
3752         0,   // '9'
3753         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // ':'
3754         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // ';'
3755         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '<'
3756         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '='
3757         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '>'
3758         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '?'
3759         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '@'
3760         0,   // 'A'
3761         0,   // 'B'
3762         0,   // 'C'
3763         0,   // 'D'
3764         0,   // 'E'
3765         0,   // 'F'
3766         0,   // 'G'
3767         0,   // 'H'
3768         0,   // 'I'
3769         0,   // 'J'
3770         0,   // 'K'
3771         0,   // 'L'
3772         0,   // 'M'
3773         0,   // 'N'
3774         0,   // 'O'
3775         0,   // 'P'
3776         0,   // 'Q'
3777         0,   // 'R'
3778         0,   // 'S'
3779         0,   // 'T'
3780         0,   // 'U'
3781         0,   // 'V'
3782         0,   // 'W'
3783         0,   // 'X'
3784         0,   // 'Y'
3785         0,   // 'Z'
3786         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '['
3787         INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '\'
3788         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // ']'
3789         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '^'
3790         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '_'
3791         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '`'
3792         0,   // 'a'
3793         0,   // 'b'
3794         0,   // 'c'
3795         0,   // 'd'
3796         0,   // 'e'
3797         0,   // 'f'
3798         0,   // 'g'
3799         0,   // 'h'
3800         0,   // 'i'
3801         0,   // 'j'
3802         0,   // 'k'
3803         0,   // 'l'
3804         0,   // 'm'
3805         0,   // 'n'
3806         0,   // 'o'
3807         0,   // 'p'
3808         0,   // 'q'
3809         0,   // 'r'
3810         0,   // 's'
3811         0,   // 't'
3812         0,   // 'u'
3813         0,   // 'v'
3814         0,   // 'w'
3815         0,   // 'x'
3816         0,   // 'y'
3817         0,   // 'z'
3818         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '{'
3819         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '|'
3820         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '}'
3821         INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '~'
3822         INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE }; // DEL
3823
3824 inline bool
3825 INetMIMEEncodedWordOutputSink::needsEncodedWordEscape(sal_uInt32 nChar) const
3826 {
3827     return !INetMIME::isUSASCII(nChar) || aEscape[nChar] & m_eContext;
3828 }
3829
3830 //============================================================================
3831 void INetMIMEEncodedWordOutputSink::finish(bool bWriteTrailer)
3832 {
3833     if (m_eInitialSpace == SPACE_ALWAYS && m_nExtraSpaces == 0)
3834         m_nExtraSpaces = 1;
3835
3836     if (m_eEncodedWordState == STATE_SECOND_EQUALS)
3837     {
3838         // If the text is already an encoded word, copy it verbatim:
3839         sal_uInt32 nSize = m_pBufferEnd - m_pBuffer;
3840         switch (m_ePrevCoding)
3841         {
3842             case CODING_QUOTED:
3843                 m_rSink << '"';
3844             case CODING_NONE:
3845                 if (m_eInitialSpace == SPACE_ENCODED && m_nExtraSpaces == 0)
3846                     m_nExtraSpaces = 1;
3847                 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3848                 {
3849                     if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3850                         m_rSink << INetMIMEOutputSink::endl;
3851                     m_rSink << ' ';
3852                 }
3853                 if (m_nExtraSpaces == 1)
3854                 {
3855                     if (m_rSink.getColumn() + nSize
3856                             >= m_rSink.getLineLengthLimit())
3857                         m_rSink << INetMIMEOutputSink::endl;
3858                     m_rSink << ' ';
3859                 }
3860                 break;
3861
3862             case CODING_ENCODED:
3863             {
3864                 const sal_Char * pCharsetName
3865                  = INetMIME::getCharsetName(m_ePrevMIMEEncoding);
3866                 while (m_nExtraSpaces-- > 0)
3867                 {
3868                     if (m_rSink.getColumn()
3869                             > m_rSink.getLineLengthLimit() - 3)
3870                         m_rSink << "?=" << INetMIMEOutputSink::endl << " =?"
3871                                 << pCharsetName << "?Q?";
3872                     m_rSink << '_';
3873                 }
3874                 m_rSink << "?=";
3875             }
3876             case CODING_ENCODED_TERMINATED:
3877                 if (m_rSink.getColumn() + nSize
3878                         > m_rSink.getLineLengthLimit() - 1)
3879                     m_rSink << INetMIMEOutputSink::endl;
3880                 m_rSink << ' ';
3881                 break;
3882         }
3883         m_rSink.write(m_pBuffer, m_pBufferEnd);
3884         m_eCoding = CODING_ENCODED_TERMINATED;
3885     }
3886     else
3887     {
3888         // If the text itself is too long to fit into a single line, make it
3889         // into multiple encoded words:
3890         switch (m_eCoding)
3891         {
3892             case CODING_NONE:
3893                 if (m_nExtraSpaces == 0)
3894                 {
3895                     DBG_ASSERT(m_ePrevCoding == CODING_NONE
3896                                || m_pBuffer == m_pBufferEnd,
3897                                "INetMIMEEncodedWordOutputSink::finish():"
3898                                    " Bad state");
3899                     if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3900                             > m_rSink.getLineLengthLimit())
3901                         m_eCoding = CODING_ENCODED;
3902                 }
3903                 else
3904                 {
3905                     OSL_ASSERT(m_pBufferEnd >= m_pBuffer);
3906                     if (static_cast< std::size_t >(m_pBufferEnd - m_pBuffer)
3907                         > m_rSink.getLineLengthLimit() - 1)
3908                     {
3909                         m_eCoding = CODING_ENCODED;
3910                     }
3911                 }
3912                 break;
3913
3914             case CODING_QUOTED:
3915                 if (m_nExtraSpaces == 0)
3916                 {
3917                     DBG_ASSERT(m_ePrevCoding == CODING_NONE,
3918                                "INetMIMEEncodedWordOutputSink::finish():"
3919                                    " Bad state");
3920                     if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3921                                 + m_nQuotedEscaped
3922                             > m_rSink.getLineLengthLimit() - 2)
3923                         m_eCoding = CODING_ENCODED;
3924                 }
3925                 else if ((m_pBufferEnd - m_pBuffer) + m_nQuotedEscaped
3926                              > m_rSink.getLineLengthLimit() - 3)
3927                     m_eCoding = CODING_ENCODED;
3928                 break;
3929
3930             default:
3931                 break;
3932         }
3933
3934         switch (m_eCoding)
3935         {
3936             case CODING_NONE:
3937                 switch (m_ePrevCoding)
3938                 {
3939                     case CODING_QUOTED:
3940                         if (m_rSink.getColumn() + m_nExtraSpaces
3941                                     + (m_pBufferEnd - m_pBuffer)
3942                                 < m_rSink.getLineLengthLimit())
3943                             m_eCoding = CODING_QUOTED;
3944                         else
3945                             m_rSink << '"';
3946                         break;
3947
3948                     case CODING_ENCODED:
3949                         m_rSink << "?=";
3950                         break;
3951
3952                     default:
3953                         break;
3954                 }
3955                 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3956                 {
3957                     if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3958                         m_rSink << INetMIMEOutputSink::endl;
3959                     m_rSink << ' ';
3960                 }
3961                 if (m_nExtraSpaces == 1)
3962                 {
3963                     if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3964                             >= m_rSink.getLineLengthLimit())
3965                         m_rSink << INetMIMEOutputSink::endl;
3966                     m_rSink << ' ';
3967                 }
3968                 m_rSink.write(m_pBuffer, m_pBufferEnd);
3969                 if (m_eCoding == CODING_QUOTED && bWriteTrailer)
3970                 {
3971                     m_rSink << '"';
3972                     m_eCoding = CODING_NONE;
3973                 }
3974                 break;
3975
3976             case CODING_QUOTED:
3977             {
3978                 bool bInsertLeadingQuote = true;
3979                 sal_uInt32 nSize = (m_pBufferEnd - m_pBuffer)
3980                                        + m_nQuotedEscaped + 2;
3981                 switch (m_ePrevCoding)
3982                 {
3983                     case CODING_QUOTED:
3984                         if (m_rSink.getColumn() + m_nExtraSpaces + nSize - 1
3985                                 < m_rSink.getLineLengthLimit())
3986                         {
3987                             bInsertLeadingQuote = false;
3988                             --nSize;
3989                         }
3990                         else
3991                             m_rSink << '"';
3992                         break;
3993
3994                     case CODING_ENCODED:
3995                         m_rSink << "?=";
3996                         break;
3997
3998                     default:
3999                         break;
4000                 }
4001                 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
4002                 {
4003                     if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
4004                         m_rSink << INetMIMEOutputSink::endl;
4005                     m_rSink << ' ';
4006                 }
4007                 if (m_nExtraSpaces == 1)
4008                 {
4009                     if (m_rSink.getColumn() + nSize
4010                             >= m_rSink.getLineLengthLimit())
4011                         m_rSink << INetMIMEOutputSink::endl;
4012                     m_rSink << ' ';
4013                 }
4014                 if (bInsertLeadingQuote)
4015                     m_rSink << '"';
4016                 for (const sal_Unicode * p = m_pBuffer; p != m_pBufferEnd;
4017                      ++p)
4018                 {
4019                     if (INetMIME::needsQuotedStringEscape(*p))
4020                         m_rSink << '\\';
4021                     m_rSink << sal_Char(*p);
4022                 }
4023                 if (bWriteTrailer)
4024                 {
4025                     m_rSink << '"';
4026                     m_eCoding = CODING_NONE;
4027                 }
4028                 break;
4029             }
4030
4031             case CODING_ENCODED:
4032             {
4033                 rtl_TextEncoding eCharsetEncoding
4034                     = m_pEncodingList->
4035                           getPreferredEncoding(RTL_TEXTENCODING_UTF8);
4036                 rtl_TextEncoding eMIMEEncoding
4037                     = INetMIME::translateToMIME(eCharsetEncoding);
4038
4039                 // The non UTF-8 code will only work for stateless single byte
4040                 // character encodings (see also below):
4041                 sal_Char * pTargetBuffer = NULL;
4042                 sal_Size nTargetSize = 0;
4043                 sal_uInt32 nSize;
4044                 if (eMIMEEncoding == RTL_TEXTENCODING_UTF8)
4045                 {
4046                     nSize = 0;
4047                     for (sal_Unicode const * p = m_pBuffer;
4048                          p != m_pBufferEnd;)
4049                     {
4050                         sal_uInt32 nUTF32
4051                             = INetMIME::getUTF32Character(p, m_pBufferEnd);
4052                         nSize += needsEncodedWordEscape(nUTF32) ?
4053                                      3 * INetMIME::getUTF8OctetCount(nUTF32) :
4054                                      1;
4055                             // only US-ASCII characters (that are converted to
4056                             // a single byte by UTF-8) need no encoded word
4057                             // escapes...
4058                     }
4059                 }
4060                 else
4061                 {
4062                     rtl_UnicodeToTextConverter hConverter
4063                         = rtl_createUnicodeToTextConverter(eCharsetEncoding);
4064                     rtl_UnicodeToTextContext hContext
4065                         = rtl_createUnicodeToTextContext(hConverter);
4066                     for (sal_Size nBufferSize = m_pBufferEnd - m_pBuffer;;
4067                          nBufferSize += nBufferSize / 3 + 1)
4068                     {
4069                         pTargetBuffer = new sal_Char[nBufferSize];
4070                         sal_uInt32 nInfo;
4071                         sal_Size nSrcCvtBytes;
4072                         nTargetSize
4073                             = rtl_convertUnicodeToText(
4074                                   hConverter, hContext, m_pBuffer,
4075                                   m_pBufferEnd - m_pBuffer, pTargetBuffer,
4076                                   nBufferSize,
4077                                   RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE
4078                                      | RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE,
4079                                   &nInfo, &nSrcCvtBytes);
4080                         if (!(nInfo
4081                                   & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
4082                             break;
4083                         delete[] pTargetBuffer;
4084                         pTargetBuffer = NULL;
4085                         rtl_resetUnicodeToTextContext(hConverter, hContext);
4086                     }
4087                     rtl_destroyUnicodeToTextContext(hConverter, hContext);
4088                     rtl_destroyUnicodeToTextConverter(hConverter);
4089
4090                     nSize = nTargetSize;
4091                     for (sal_Size k = 0; k < nTargetSize; ++k)
4092                         if (needsEncodedWordEscape(sal_uChar(
4093                                                        pTargetBuffer[k])))
4094                             nSize += 2;
4095                 }
4096
4097                 const sal_Char * pCharsetName
4098                     = INetMIME::getCharsetName(eMIMEEncoding);
4099                 sal_uInt32 nWrapperSize = rtl_str_getLength(pCharsetName) + 7;
4100                     // '=?', '?Q?', '?='
4101
4102                 switch (m_ePrevCoding)
4103                 {
4104                     case CODING_QUOTED:
4105                         m_rSink << '"';
4106                     case CODING_NONE:
4107                         if (m_eInitialSpace == SPACE_ENCODED
4108                             && m_nExtraSpaces == 0)
4109                             m_nExtraSpaces = 1;
4110                         nSize += nWrapperSize;
4111                         for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
4112                         {
4113                             if (m_rSink.getColumn()
4114                                     >= m_rSink.getLineLengthLimit())
4115                                 m_rSink << INetMIMEOutputSink::endl;
4116                             m_rSink << ' ';
4117                         }
4118                         if (m_nExtraSpaces == 1)
4119                         {
4120                             if (m_rSink.getColumn() + nSize
4121                                     >= m_rSink.getLineLengthLimit())
4122                             m_rSink << INetMIMEOutputSink::endl;
4123                             m_rSink << ' ';
4124                         }
4125                         m_rSink << "=?" << pCharsetName << "?Q?";
4126                         break;
4127
4128                     case CODING_ENCODED:
4129                         if (m_ePrevMIMEEncoding != eMIMEEncoding
4130                             || m_rSink.getColumn() + m_nExtraSpaces + nSize
4131                                    > m_rSink.getLineLengthLimit() - 2)
4132                         {
4133                             m_rSink << "?=";
4134                             if (m_rSink.getColumn() + nWrapperSize
4135                                         + m_nExtraSpaces + nSize
4136                                     > m_rSink.getLineLengthLimit() - 1)
4137                                 m_rSink << INetMIMEOutputSink::endl;
4138                             m_rSink << " =?" << pCharsetName << "?Q?";
4139                         }
4140                         while (m_nExtraSpaces-- > 0)
4141                         {
4142                             if (m_rSink.getColumn()
4143                                     > m_rSink.getLineLengthLimit() - 3)
4144                                 m_rSink << "?=" << INetMIMEOutputSink::endl
4145                                         << " =?" << pCharsetName << "?Q?";
4146                             m_rSink << '_';
4147                         }
4148                         break;
4149
4150                     case CODING_ENCODED_TERMINATED:
4151                         if (m_rSink.getColumn() + nWrapperSize
4152                                     + m_nExtraSpaces + nSize
4153                                 > m_rSink.getLineLengthLimit() - 1)
4154                             m_rSink << INetMIMEOutputSink::endl;
4155                         m_rSink << " =?" << pCharsetName << "?Q?";
4156                         while (m_nExtraSpaces-- > 0)
4157                         {
4158                             if (m_rSink.getColumn()
4159                                     > m_rSink.getLineLengthLimit() - 3)
4160                                 m_rSink << "?=" << INetMIMEOutputSink::endl
4161                                         << " =?" << pCharsetName << "?Q?";
4162                             m_rSink << '_';
4163                         }
4164                         break;
4165                 }
4166
4167                 // The non UTF-8 code will only work for stateless single byte
4168                 // character encodings (see also above):
4169                 if (eMIMEEncoding == RTL_TEXTENCODING_UTF8)
4170                 {
4171                     bool bInitial = true;
4172                     for (sal_Unicode const * p = m_pBuffer;
4173                          p != m_pBufferEnd;)
4174                     {
4175                         sal_uInt32 nUTF32
4176                             = INetMIME::getUTF32Character(p, m_pBufferEnd);
4177                         bool bEscape = needsEncodedWordEscape(nUTF32);
4178                         sal_uInt32 nWidth
4179                             = bEscape ?
4180                                   3 * INetMIME::getUTF8OctetCount(nUTF32) : 1;
4181                             // only US-ASCII characters (that are converted to
4182                             // a single byte by UTF-8) need no encoded word
4183                             // escapes...
4184                         if (!bInitial
4185                             && m_rSink.getColumn() + nWidth + 2
4186                                    > m_rSink.getLineLengthLimit())
4187                             m_rSink << "?=" << INetMIMEOutputSink::endl
4188                                     << " =?" << pCharsetName << "?Q?";
4189                         if (bEscape)
4190                         {
4191                             DBG_ASSERT(
4192                                 nUTF32 < 0x10FFFF,
4193                                 "INetMIMEEncodedWordOutputSink::finish():"
4194                                     " Bad char");
4195                             if (nUTF32 < 0x80)
4196                                 INetMIME::writeEscapeSequence(m_rSink,
4197                                                               nUTF32);
4198                             else if (nUTF32 < 0x800)
4199                             {
4200                                 INetMIME::writeEscapeSequence(m_rSink,
4201                                                               (nUTF32 >> 6)
4202                                                                   | 0xC0);
4203                                 INetMIME::writeEscapeSequence(m_rSink,
4204                                                               (nUTF32 & 0x3F)
4205                                                                   | 0x80);
4206                             }
4207                             else if (nUTF32 < 0x10000)
4208                             {
4209                                 INetMIME::writeEscapeSequence(m_rSink,
4210                                                               (nUTF32 >> 12)
4211                                                                   | 0xE0);
4212                                 INetMIME::writeEscapeSequence(m_rSink,
4213                                                               ((nUTF32 >> 6)
4214                                                                       & 0x3F)
4215                                                                   | 0x80);
4216                                 INetMIME::writeEscapeSequence(m_rSink,
4217                                                               (nUTF32 & 0x3F)
4218                                                                   | 0x80);
4219                             }
4220                             else
4221                             {
4222                                 INetMIME::writeEscapeSequence(m_rSink,
4223                                                               (nUTF32 >> 18)
4224                                                                   | 0xF0);
4225                                 INetMIME::writeEscapeSequence(m_rSink,
4226                                                               ((nUTF32 >> 12)
4227                                                                       & 0x3F)
4228                                                                   | 0x80);
4229                                 INetMIME::writeEscapeSequence(m_rSink,
4230                                                               ((nUTF32 >> 6)
4231                                                                       & 0x3F)
4232                                                                   | 0x80);
4233                                 INetMIME::writeEscapeSequence(m_rSink,
4234                                                               (nUTF32 & 0x3F)
4235                                                                   | 0x80);
4236                             }
4237                         }
4238                         else
4239                             m_rSink << sal_Char(nUTF32);
4240                         bInitial = false;
4241                     }
4242                 }
4243                 else
4244                 {
4245                     for (sal_Size k = 0; k < nTargetSize; ++k)
4246                     {
4247                         sal_uInt32 nUCS4 = sal_uChar(pTargetBuffer[k]);
4248                         bool bEscape = needsEncodedWordEscape(nUCS4);
4249                         if (k > 0
4250                             && m_rSink.getColumn() + (bEscape ? 5 : 3)
4251                                    > m_rSink.getLineLengthLimit())
4252                             m_rSink << "?=" << INetMIMEOutputSink::endl
4253                                     << " =?" << pCharsetName << "?Q?";
4254                         if (bEscape)
4255                             INetMIME::writeEscapeSequence(m_rSink, nUCS4);
4256                         else
4257                             m_rSink << sal_Char(nUCS4);
4258                     }
4259                     delete[] pTargetBuffer;
4260                 }
4261
4262                 if (bWriteTrailer)
4263                 {
4264                     m_rSink << "?=";
4265                     m_eCoding = CODING_ENCODED_TERMINATED;
4266                 }
4267
4268                 m_ePrevMIMEEncoding = eMIMEEncoding;
4269                 break;
4270             }
4271
4272             default:
4273                 OSL_ASSERT(false);
4274                 break;
4275         }
4276     }
4277
4278     m_eInitialSpace = SPACE_NO;
4279     m_nExtraSpaces = 0;
4280     m_pEncodingList->reset();
4281     m_pBufferEnd = m_pBuffer;
4282     m_ePrevCoding = m_eCoding;
4283     m_eCoding = CODING_NONE;
4284     m_nQuotedEscaped = 0;
4285     m_eEncodedWordState = STATE_INITIAL;
4286 }
4287
4288 //============================================================================
4289 INetMIMEEncodedWordOutputSink::~INetMIMEEncodedWordOutputSink()
4290 {
4291     rtl_freeMemory(m_pBuffer);
4292     delete m_pEncodingList;
4293 }
4294
4295 //============================================================================
4296 INetMIMEEncodedWordOutputSink &
4297 INetMIMEEncodedWordOutputSink::operator <<(sal_uInt32 nChar)
4298 {
4299     if (nChar == ' ')
4300     {
4301         if (m_pBufferEnd != m_pBuffer)
4302             finish(false);
4303         ++m_nExtraSpaces;
4304     }
4305     else
4306     {
4307         // Check for an already encoded word:
4308         switch (m_eEncodedWordState)
4309         {
4310             case STATE_INITIAL:
4311                 if (nChar == '=')
4312                     m_eEncodedWordState = STATE_FIRST_EQUALS;
4313                 else
4314                     m_eEncodedWordState = STATE_BAD;
4315                 break;
4316
4317             case STATE_FIRST_EQUALS:
4318                 if (nChar == '?')
4319                     m_eEncodedWordState = STATE_FIRST_EQUALS;
4320                 else
4321                     m_eEncodedWordState = STATE_BAD;
4322                 break;
4323
4324             case STATE_FIRST_QUESTION:
4325                 if (INetMIME::isEncodedWordTokenChar(nChar))
4326                     m_eEncodedWordState = STATE_CHARSET;
4327                 else
4328                     m_eEncodedWordState = STATE_BAD;
4329                 break;
4330
4331             case STATE_CHARSET:
4332                 if (nChar == '?')
4333                     m_eEncodedWordState = STATE_SECOND_QUESTION;
4334                 else if (!INetMIME::isEncodedWordTokenChar(nChar))
4335                     m_eEncodedWordState = STATE_BAD;
4336                 break;
4337
4338             case STATE_SECOND_QUESTION:
4339                 if (nChar == 'B' || nChar == 'Q'
4340                     || nChar == 'b' || nChar == 'q')
4341                     m_eEncodedWordState = STATE_ENCODING;
4342                 else
4343                     m_eEncodedWordState = STATE_BAD;
4344                 break;
4345
4346             case STATE_ENCODING:
4347                 if (nChar == '?')
4348                     m_eEncodedWordState = STATE_THIRD_QUESTION;
4349                 else
4350                     m_eEncodedWordState = STATE_BAD;
4351                 break;
4352
4353             case STATE_THIRD_QUESTION:
4354                 if (INetMIME::isVisible(nChar) && nChar != '?')
4355                     m_eEncodedWordState = STATE_ENCODED_TEXT;
4356                 else
4357                     m_eEncodedWordState = STATE_BAD;
4358                 break;
4359
4360             case STATE_ENCODED_TEXT:
4361                 if (nChar == '?')
4362                     m_eEncodedWordState = STATE_FOURTH_QUESTION;
4363                 else if (!INetMIME::isVisible(nChar))
4364                     m_eEncodedWordState = STATE_BAD;
4365                 break;
4366
4367             case STATE_FOURTH_QUESTION:
4368                 if (nChar == '=')
4369                     m_eEncodedWordState = STATE_SECOND_EQUALS;
4370                 else
4371                     m_eEncodedWordState = STATE_BAD;
4372                 break;
4373
4374             case STATE_SECOND_EQUALS:
4375                 m_eEncodedWordState = STATE_BAD;
4376                 break;
4377
4378             case STATE_BAD:
4379                 break;
4380         }
4381
4382         // Update encoding:
4383         m_pEncodingList->includes(nChar);
4384
4385         // Update coding:
4386         enum { TENQ = 1,   // CONTEXT_TEXT, CODING_ENCODED
4387                CENQ = 2,   // CONTEXT_COMMENT, CODING_ENCODED
4388                PQTD = 4,   // CONTEXT_PHRASE, CODING_QUOTED
4389                PENQ = 8 }; // CONTEXT_PHRASE, CODING_ENCODED
4390         static const sal_Char aMinimal[128]
4391             = { TENQ | CENQ        | PENQ,   // 0x00
4392                 TENQ | CENQ        | PENQ,   // 0x01
4393                 TENQ | CENQ        | PENQ,   // 0x02
4394                 TENQ | CENQ        | PENQ,   // 0x03
4395                 TENQ | CENQ        | PENQ,   // 0x04
4396                 TENQ | CENQ        | PENQ,   // 0x05
4397                 TENQ | CENQ        | PENQ,   // 0x06
4398                 TENQ | CENQ        | PENQ,   // 0x07
4399                 TENQ | CENQ        | PENQ,   // 0x08
4400                 TENQ | CENQ        | PENQ,   // 0x09
4401                 TENQ | CENQ        | PENQ,   // 0x0A
4402                 TENQ | CENQ        | PENQ,   // 0x0B
4403                 TENQ | CENQ        | PENQ,   // 0x0C
4404                 TENQ | CENQ        | PENQ,   // 0x0D
4405                 TENQ | CENQ        | PENQ,   // 0x0E
4406                 TENQ | CENQ        | PENQ,   // 0x0F
4407                 TENQ | CENQ        | PENQ,   // 0x10
4408                 TENQ | CENQ        | PENQ,   // 0x11
4409                 TENQ | CENQ        | PENQ,   // 0x12
4410                 TENQ | CENQ        | PENQ,   // 0x13
4411                 TENQ | CENQ        | PENQ,   // 0x14
4412                 TENQ | CENQ        | PENQ,   // 0x15
4413                 TENQ | CENQ        | PENQ,   // 0x16
4414                 TENQ | CENQ        | PENQ,   // 0x17
4415                 TENQ | CENQ        | PENQ,   // 0x18
4416                 TENQ | CENQ        | PENQ,   // 0x19
4417                 TENQ | CENQ        | PENQ,   // 0x1A
4418                 TENQ | CENQ        | PENQ,   // 0x1B
4419                 TENQ | CENQ        | PENQ,   // 0x1C
4420                 TENQ | CENQ        | PENQ,   // 0x1D
4421                 TENQ | CENQ        | PENQ,   // 0x1E
4422                 TENQ | CENQ        | PENQ,   // 0x1F
4423                                         0,   // ' '
4424                                         0,   // '!'
4425                               PQTD       ,   // '"'
4426                                         0,   // '#'
4427                                         0,   // '$'
4428                                         0,   // '%'
4429                                         0,   // '&'
4430                                         0,   // '''
4431                        CENQ | PQTD       ,   // '('
4432                        CENQ | PQTD       ,   // ')'
4433                                         0,   // '*'
4434                                         0,   // '+'
4435                               PQTD       ,   // ','
4436                                         0,   // '-'
4437                               PQTD       ,   // '.'
4438                                         0,   // '/'
4439                                         0,   // '0'
4440                                         0,   // '1'
4441                                         0,   // '2'
4442                                         0,   // '3'
4443                                         0,   // '4'
4444                                         0,   // '5'
4445                                         0,   // '6'
4446                                         0,   // '7'
4447                                         0,   // '8'
4448                                         0,   // '9'
4449                               PQTD       ,   // ':'
4450                               PQTD       ,   // ';'
4451                               PQTD       ,   // '<'
4452                                         0,   // '='
4453                               PQTD       ,   // '>'
4454                                         0,   // '?'
4455                               PQTD       ,   // '@'
4456                                         0,   // 'A'
4457                                         0,   // 'B'
4458                                         0,   // 'C'
4459                                         0,   // 'D'
4460                                         0,   // 'E'
4461                                         0,   // 'F'
4462                                         0,   // 'G'
4463                                         0,   // 'H'
4464                                         0,   // 'I'
4465                                         0,   // 'J'
4466                                         0,   // 'K'
4467                                         0,   // 'L'
4468                                         0,   // 'M'
4469                                         0,   // 'N'
4470                                         0,   // 'O'
4471                                         0,   // 'P'
4472                                         0,   // 'Q'
4473                                         0,   // 'R'
4474                                         0,   // 'S'
4475                                         0,   // 'T'
4476                                         0,   // 'U'
4477                                         0,   // 'V'
4478                                         0,   // 'W'
4479                                         0,   // 'X'
4480                                         0,   // 'Y'
4481                                         0,   // 'Z'
4482                               PQTD       ,   // '['
4483                        CENQ | PQTD       ,   // '\'
4484                               PQTD       ,   // ']'
4485                                         0,   // '^'
4486                                         0,   // '_'
4487                                         0,   // '`'
4488                                         0,   // 'a'
4489                                         0,   // 'b'
4490                                         0,   // 'c'
4491                                         0,   // 'd'
4492                                         0,   // 'e'
4493                                         0,   // 'f'
4494                                         0,   // 'g'
4495                                         0,   // 'h'
4496                                         0,   // 'i'
4497                                         0,   // 'j'
4498                                         0,   // 'k'
4499                                         0,   // 'l'
4500                                         0,   // 'm'
4501                                         0,   // 'n'
4502                                         0,   // 'o'
4503                                         0,   // 'p'
4504                                         0,   // 'q'
4505                                         0,   // 'r'
4506                                         0,   // 's'
4507                                         0,   // 't'
4508                                         0,   // 'u'
4509                                         0,   // 'v'
4510                                         0,   // 'w'
4511                                         0,   // 'x'
4512                                         0,   // 'y'
4513                                         0,   // 'z'
4514                                         0,   // '{'
4515                                         0,   // '|'
4516                                         0,   // '}'
4517                                         0,   // '~'
4518                 TENQ | CENQ        | PENQ }; // DEL
4519         Coding eNewCoding = !INetMIME::isUSASCII(nChar) ? CODING_ENCODED :
4520                             m_eContext == CONTEXT_PHRASE ?
4521                                 Coding(aMinimal[nChar] >> 2) :
4522                             aMinimal[nChar] & m_eContext ? CODING_ENCODED :
4523                                                            CODING_NONE;
4524         if (eNewCoding > m_eCoding)
4525             m_eCoding = eNewCoding;
4526         if (m_eCoding == CODING_QUOTED
4527             && INetMIME::needsQuotedStringEscape(nChar))
4528             ++m_nQuotedEscaped;
4529
4530         // Append to buffer:
4531         if (sal_uInt32(m_pBufferEnd - m_pBuffer) == m_nBufferSize)
4532         {
4533             m_pBuffer
4534                 = static_cast< sal_Unicode * >(
4535                       rtl_reallocateMemory(m_pBuffer,
4536                                            (m_nBufferSize + BUFFER_SIZE)
4537                                                * sizeof (sal_Unicode)));
4538             m_pBufferEnd = m_pBuffer + m_nBufferSize;
4539             m_nBufferSize += BUFFER_SIZE;
4540         }
4541         *m_pBufferEnd++ = sal_Unicode(nChar);
4542     }
4543     return *this;
4544 }
4545
4546 //============================================================================
4547 //
4548 //  INetContentTypeParameterList
4549 //
4550 //============================================================================
4551
4552 void INetContentTypeParameterList::Clear()
4553 {
4554     while (Count() > 0)
4555         delete static_cast< INetContentTypeParameter * >(Remove(Count() - 1));
4556 }
4557
4558 //============================================================================
4559 const INetContentTypeParameter *
4560 INetContentTypeParameterList::find(const ByteString & rAttribute) const
4561 {
4562     for (ULONG i = 0; i < Count(); ++i)
4563     {
4564         const INetContentTypeParameter * pParameter = GetObject(i);
4565         if (pParameter->m_sAttribute.EqualsIgnoreCaseAscii(rAttribute))
4566             return pParameter;
4567     }
4568     return 0;
4569 }
4570