tools/source/inet/inetmime.cxx

   1 /*************************************************************************
   2  *
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * Copyright 2008 by Sun Microsystems, Inc.
   6  *
   7  * OpenOffice.org - a multi-platform office productivity suite
   8  *
   9  * $RCSfile: inetmime.cxx,v $
  10  * $Revision: 1.14 $
  11  *
  12  * This file is part of OpenOffice.org.
  13  *
  14  * OpenOffice.org is free software: you can redistribute it and/or modify
  15  * it under the terms of the GNU Lesser General Public License version 3
  16  * only, as published by the Free Software Foundation.
  17  *
  18  * OpenOffice.org is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21  * GNU Lesser General Public License version 3 for more details
  22  * (a copy is included in the LICENSE file that accompanied this code).
  23  *
  24  * You should have received a copy of the GNU Lesser General Public License
  25  * version 3 along with OpenOffice.org.  If not, see
  26  * <http://www.openoffice.org/license.html>
  27  * for a copy of the LGPLv3 License.
  28  *
  29  ************************************************************************/
  30
  31 // MARKER(update_precomp.py): autogen include statement, do not remove
  32 #include "precompiled_tools.hxx"
  33
  34 #include <cstddef>
  35 #include <limits>
  36
  37 #include "rtl/tencinfo.h"
  38 #include <tools/datetime.hxx>
  39 #include <tools/inetmime.hxx>
  40
  41 namespace unnamed_tools_inetmime {} using namespace unnamed_tools_inetmime;
  42         // unnamed namespaces don't work well yet
  43
  44 //============================================================================
  45 namespace unnamed_tools_inetmime {
  46
  47 class Charset
  48 {
  49         rtl_TextEncoding m_eEncoding;
  50         const sal_uInt32 * m_pRanges;
  51
  52 public:
  53         inline Charset(rtl_TextEncoding eTheEncoding,
  54                                    const sal_uInt32 * pTheRanges);
  55
  56         rtl_TextEncoding getEncoding() const { return m_eEncoding; }
  57
  58         bool contains(sal_uInt32 nChar) const;
  59 };
  60
  61 inline Charset::Charset(rtl_TextEncoding eTheEncoding,
  62                                                 const sal_uInt32 * pTheRanges):
  63         m_eEncoding(eTheEncoding),
  64         m_pRanges(pTheRanges)
  65 {
  66         DBG_ASSERT(m_pRanges, "Charset::Charset(): Bad ranges");
  67 }
  68
  69 //============================================================================
  70 void appendISO88591(UniString & rText, sal_Char const * pBegin,
  71                                         sal_Char const * pEnd);
  72
  73 }
  74
  75 //============================================================================
  76 class INetMIMECharsetList_Impl
  77 {
  78         struct Node
  79         {
  80                 Charset m_aCharset;
  81                 bool m_bDisabled;
  82                 Node * m_pNext;
  83
  84                 inline Node(const Charset & rTheCharset, bool bTheDisabled,
  85                                         Node * pTheNext);
  86         };
  87
  88         Node * m_pFirst;
  89
  90 public:
  91         INetMIMECharsetList_Impl(): m_pFirst(0) {}
  92
  93         ~INetMIMECharsetList_Impl();
  94
  95         void prepend(const Charset & rCharset)
  96         { m_pFirst = new Node(rCharset, false, m_pFirst); }
  97
  98         void includes(sal_uInt32 nChar);
  99
 100         rtl_TextEncoding getPreferredEncoding(rtl_TextEncoding eDefault
 101                                                                                       = RTL_TEXTENCODING_DONTKNOW)
 102                 const;
 103
 104         void reset();
 105 };
 106
 107 inline INetMIMECharsetList_Impl::Node::Node(const Charset & rTheCharset,
 108                                                                                         bool bTheDisabled,
 109                                                                                         Node * pTheNext):
 110         m_aCharset(rTheCharset),
 111         m_bDisabled(bTheDisabled),
 112         m_pNext(pTheNext)
 113 {}
 114
 115 //============================================================================
 116 namespace unnamed_tools_inetmime {
 117
 118 struct Parameter
 119 {
 120         Parameter * m_pNext;
 121         ByteString m_aAttribute;
 122         ByteString m_aCharset;
 123         ByteString m_aLanguage;
 124         ByteString m_aValue;
 125         sal_uInt32 m_nSection;
 126         bool m_bExtended;
 127
 128         inline Parameter(Parameter * pTheNext, ByteString const & rTheAttribute,
 129                                          ByteString const & rTheCharset,
 130                                          ByteString const & rTheLanguage,
 131                                          ByteString const & rTheValue, sal_uInt32 nTheSection,
 132                                          bool bTheExtended);
 133 };
 134
 135 inline Parameter::Parameter(Parameter * pTheNext,
 136                                                         ByteString const & rTheAttribute,
 137                                                         ByteString const & rTheCharset,
 138                                                         ByteString const & rTheLanguage,
 139                                                         ByteString const & rTheValue,
 140                                                         sal_uInt32 nTheSection, bool bTheExtended):
 141         m_pNext(pTheNext),
 142         m_aAttribute(rTheAttribute),
 143         m_aCharset(rTheCharset),
 144         m_aLanguage(rTheLanguage),
 145         m_aValue(rTheValue),
 146         m_nSection(nTheSection),
 147         m_bExtended(bTheExtended)
 148 {}
 149
 150 //============================================================================
 151 struct ParameterList
 152 {
 153         Parameter * m_pList;
 154
 155         ParameterList(): m_pList(0) {}
 156
 157         inline ~ParameterList();
 158
 159         Parameter ** find(ByteString const & rAttribute, sal_uInt32 nSection,
 160                                           bool & rPresent);
 161 };
 162
 163 inline ParameterList::~ParameterList()
 164 {
 165         while (m_pList)
 166         {
 167                 Parameter * pNext = m_pList->m_pNext;
 168                 delete m_pList;
 169                 m_pList = pNext;
 170         }
 171 }
 172
 173 //============================================================================
 174 bool parseParameters(ParameterList const & rInput,
 175                                          INetContentTypeParameterList * pOutput);
 176
 177 }
 178
 179 //============================================================================
 180 //
 181 //  Charset
 182 //
 183 //============================================================================
 184
 185 bool Charset::contains(sal_uInt32 nChar) const
 186 {
 187         for (const sal_uInt32 * p = m_pRanges;;)
 188         {
 189                 if (nChar < *p++)
 190                         return false;
 191                 if (nChar <= *p++)
 192                         return true;
 193         }
 194 }
 195
 196 //============================================================================
 197 //
 198 //  appendISO88591
 199 //
 200 //============================================================================
 201
 202 namespace unnamed_tools_inetmime {
 203
 204 void appendISO88591(UniString & rText, sal_Char const * pBegin,
 205                                         sal_Char const * pEnd)
 206 {
 207         xub_StrLen nLength = static_cast< xub_StrLen >(pEnd - pBegin);
 208         sal_Unicode * pBuffer = new sal_Unicode[nLength];
 209         for (sal_Unicode * p = pBuffer; pBegin != pEnd;)
 210                 *p++ = sal_uChar(*pBegin++);
 211         rText.Append(pBuffer, nLength);
 212         delete[] pBuffer;
 213 }
 214
 215 }
 216
 217 //============================================================================
 218 //
 219 //  INetMIMECharsetList_Impl
 220 //
 221 //============================================================================
 222
 223 INetMIMECharsetList_Impl::~INetMIMECharsetList_Impl()
 224 {
 225         while (m_pFirst)
 226         {
 227                 Node * pRemove = m_pFirst;
 228                 m_pFirst = m_pFirst->m_pNext;
 229                 delete pRemove;
 230         }
 231 }
 232
 233 //============================================================================
 234 void INetMIMECharsetList_Impl::includes(sal_uInt32 nChar)
 235 {
 236         for (Node * p = m_pFirst; p; p = p->m_pNext)
 237                 if (!(p->m_bDisabled || p->m_aCharset.contains(nChar)))
 238                         p->m_bDisabled = true;
 239 }
 240
 241 //============================================================================
 242 rtl_TextEncoding
 243 INetMIMECharsetList_Impl::getPreferredEncoding(rtl_TextEncoding eDefault)
 244         const
 245 {
 246         for (Node * p = m_pFirst; p; p = p->m_pNext)
 247                 if (!p->m_bDisabled)
 248                         return p->m_aCharset.getEncoding();
 249         return eDefault;
 250 }
 251
 252 //============================================================================
 253 void INetMIMECharsetList_Impl::reset()
 254 {
 255         for (Node * p = m_pFirst; p; p = p->m_pNext)
 256                 p->m_bDisabled = false;
 257 }
 258
 259 //============================================================================
 260 //
 261 //  ParameterList
 262 //
 263 //============================================================================
 264
 265 Parameter ** ParameterList::find(ByteString const & rAttribute,
 266                                                                  sal_uInt32 nSection, bool & rPresent)
 267 {
 268         Parameter ** p = &m_pList;
 269         for (; *p; p = &(*p)->m_pNext)
 270         {
 271                 StringCompare eCompare = rAttribute.CompareTo((*p)->m_aAttribute);
 272                 if (eCompare == COMPARE_GREATER)
 273                         break;
 274                 else if (eCompare == COMPARE_EQUAL)
 275         {
 276                         if (nSection > (*p)->m_nSection)
 277                                 break;
 278                         else if (nSection == (*p)->m_nSection)
 279                         {
 280                                 rPresent = true;
 281                                 return p;
 282                         }
 283         }
 284         }
 285         rPresent = false;
 286         return p;
 287 }
 288
 289 //============================================================================
 290 //
 291 //  parseParameters
 292 //
 293 //============================================================================
 294
 295 namespace unnamed_tools_inetmime {
 296
 297 bool parseParameters(ParameterList const & rInput,
 298                                          INetContentTypeParameterList * pOutput)
 299 {
 300         if (pOutput)
 301                 pOutput->Clear();
 302
 303         Parameter * pPrev = 0;
 304         for (Parameter * p = rInput.m_pList; p; p = p->m_pNext)
 305         {
 306                 if (p->m_nSection > 0
 307                         && (!pPrev
 308                                 || pPrev->m_nSection != p->m_nSection - 1
 309                                 || pPrev->m_aAttribute != p->m_aAttribute))
 310                         return false;
 311                 pPrev = p;
 312         }
 313
 314         if (pOutput)
 315                 for (Parameter * p = rInput.m_pList; p;)
 316                 {
 317                         bool bCharset = p->m_aCharset.Len() != 0;
 318                         rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
 319                         if (bCharset)
 320                                 eEncoding
 321                                         = INetMIME::getCharsetEncoding(p->m_aCharset.GetBuffer(),
 322                                                                                                    p->m_aCharset.GetBuffer()
 323                                                                                                    + rInput.m_pList->
 324                                                                                                              m_aCharset.
 325                                                                                                                  Len());
 326                         UniString aValue;
 327                         bool bBadEncoding = false;
 328                         Parameter * pNext = p;
 329                         do
 330                         {
 331                                 sal_Size nSize;
 332                                 sal_Unicode * pUnicode
 333                                         = INetMIME::convertToUnicode(pNext->m_aValue.GetBuffer(),
 334                                                                                                  pNext->m_aValue.GetBuffer()
 335                                                                                                      + pNext->m_aValue.Len(),
 336                                                                                                  bCharset && p->m_bExtended ?
 337                                                                                                      eEncoding :
 338                                                                                                      RTL_TEXTENCODING_UTF8,
 339                                                                                                  nSize);
 340                                 if (!pUnicode && !(bCharset && p->m_bExtended))
 341                                         pUnicode = INetMIME::convertToUnicode(
 342                                                            pNext->m_aValue.GetBuffer(),
 343                                                                    pNext->m_aValue.GetBuffer()
 344                                                                        + pNext->m_aValue.Len(),
 345                                                                    RTL_TEXTENCODING_ISO_8859_1, nSize);
 346                                 if (!pUnicode)
 347                                 {
 348                                         bBadEncoding = true;
 349                                         break;
 350                                 }
 351                                 aValue += UniString(pUnicode, static_cast< xub_StrLen >(nSize));
 352                                 delete[] pUnicode;
 353                                 pNext = pNext->m_pNext;
 354                         }
 355                         while (pNext && pNext->m_nSection > 0);
 356                         if (bBadEncoding)
 357                         {
 358                                 aValue.Erase();
 359                                 for (pNext = p;;)
 360                                 {
 361                                         if (pNext->m_bExtended)
 362                                                 for (xub_StrLen i = 0; i < pNext->m_aValue.Len(); ++i)
 363                                                         aValue += sal_Unicode(
 364                                 sal_Unicode(
 365                                     sal_uChar(pNext->m_aValue.GetChar(i)))
 366                                 | 0xF800);
 367                                         else
 368                                                 for (xub_StrLen i = 0; i < pNext->m_aValue.Len(); ++i)
 369                                                         aValue
 370                                                                 += sal_Unicode(sal_uChar
 371                                                                                                (pNext->
 372                                                                                                             m_aValue.GetChar(i)));
 373                                         pNext = pNext->m_pNext;
 374                                         if (!pNext || pNext->m_nSection == 0)
 375                                                 break;
 376                                 };
 377                         }
 378                         pOutput->Insert(new INetContentTypeParameter(p->m_aAttribute,
 379                                                                                                                          p->m_aCharset,
 380                                                                                                                          p->m_aLanguage,
 381                                                                                                                          aValue,
 382                                                                                                                          !bBadEncoding),
 383                                                                 LIST_APPEND);
 384                         p = pNext;
 385                 }
 386         return true;
 387 }
 388
 389 }
 390
 391 //============================================================================
 392 //
 393 //  INetMIME
 394 //
 395 //============================================================================
 396
 397 // static
 398 bool INetMIME::isAtomChar(sal_uInt32 nChar)
 399 {
 400         static const bool aMap[128]
 401                 = { false, false, false, false, false, false, false, false,
 402                         false, false, false, false, false, false, false, false,
 403                         false, false, false, false, false, false, false, false,
 404                         false, false, false, false, false, false, false, false,
 405                         false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 406                         false, false,  true,  true, false,  true, false,  true, //()*+,-./
 407                          true,  true,  true,  true,  true,  true,  true,  true, //01234567
 408                          true,  true, false, false, false,  true, false,  true, //89:;<=>?
 409                         false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 410                          true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 411                          true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 412                          true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 413                          true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 414                          true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 415                          true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 416                          true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 417                   };
 418         return isUSASCII(nChar) && aMap[nChar];
 419 }
 420
 421 //============================================================================
 422 // static
 423 bool INetMIME::isTokenChar(sal_uInt32 nChar)
 424 {
 425         static const sal_Char aMap[128]
 426                 = { false, false, false, false, false, false, false, false,
 427                         false, false, false, false, false, false, false, false,
 428                         false, false, false, false, false, false, false, false,
 429                         false, false, false, false, false, false, false, false,
 430                         false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 431                         false, false,  true,  true, false,  true,  true, false, //()*+,-./
 432                          true,  true,  true,  true,  true,  true,  true,  true, //01234567
 433                          true,  true, false, false, false, false, false, false, //89:;<=>?
 434                         false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 435                          true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 436                          true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 437                          true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 438                          true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 439                          true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 440                          true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 441                          true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 442                   };
 443         return isUSASCII(nChar) && aMap[nChar];
 444 }
 445
 446 //============================================================================
 447 // static
 448 bool INetMIME::isEncodedWordTokenChar(sal_uInt32 nChar)
 449 {
 450         static const sal_Char aMap[128]
 451                 = { false, false, false, false, false, false, false, false,
 452                         false, false, false, false, false, false, false, false,
 453                         false, false, false, false, false, false, false, false,
 454                         false, false, false, false, false, false, false, false,
 455                         false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
 456                         false, false,  true,  true, false,  true, false, false, //()*+,-./
 457                          true,  true,  true,  true,  true,  true,  true,  true, //01234567
 458                          true,  true, false, false, false, false, false, false, //89:;<=>?
 459                         false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 460                          true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 461                          true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 462                          true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
 463                          true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 464                          true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 465                          true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 466                          true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
 467                   };
 468         return isUSASCII(nChar) && aMap[nChar];
 469 }
 470
 471 //============================================================================
 472 // static
 473 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
 474 {
 475         static const sal_Char aMap[128]
 476                 = { false, false, false, false, false, false, false, false,
 477                         false, false, false, false, false, false, false, false,
 478                         false, false, false, false, false, false, false, false,
 479                         false, false, false, false, false, false, false, false,
 480                         false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
 481                         false, false, false,  true,  true,  true,  true,  true, //()*+,-./
 482                          true,  true,  true,  true,  true,  true,  true,  true, //01234567
 483                          true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
 484                          true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
 485                          true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
 486                          true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
 487                          true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
 488                          true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
 489                          true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
 490                          true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
 491                          true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
 492                   };
 493         return isUSASCII(nChar) && aMap[nChar];
 494 }
 495
 496 //============================================================================
 497 // static
 498 sal_uInt32 INetMIME::getDigit(int nWeight)
 499 {
 500         DBG_ASSERT(nWeight >= 0 && nWeight < 10,
 501                            "INetMIME::getDigit(): Bad weight");
 502
 503         static const sal_Char aDigits[16]
 504                 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
 505         return aDigits[nWeight];
 506 }
 507
 508 //============================================================================
 509 // static
 510 sal_uInt32 INetMIME::getHexDigit(int nWeight)
 511 {
 512         DBG_ASSERT(nWeight >= 0 && nWeight < 16,
 513                            "INetMIME::getHexDigit(): Bad weight");
 514
 515         static const sal_Char aDigits[16]
 516                 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
 517                         'D', 'E', 'F' };
 518         return aDigits[nWeight];
 519 }
 520
 521 //============================================================================
 522 // static
 523 sal_uInt32 INetMIME::getBase64Digit(int nWeight)
 524 {
 525         DBG_ASSERT(nWeight >= 0 && nWeight < 64,
 526                            "INetMIME::getBase64Digit(): Bad weight");
 527
 528         static const sal_Char aDigits[64]
 529                 = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
 530                         'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 531                         'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
 532                         'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
 533                         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
 534         return aDigits[nWeight];
 535 }
 536
 537 //============================================================================
 538 // static
 539 bool INetMIME::equalIgnoreCase(const sal_Char * pBegin1,
 540                                                            const sal_Char * pEnd1,
 541                                                            const sal_Char * pBegin2,
 542                                                            const sal_Char * pEnd2)
 543 {
 544         DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pBegin2 && pBegin2 <= pEnd2,
 545                            "INetMIME::equalIgnoreCase(): Bad sequences");
 546
 547         if (pEnd1 - pBegin1 != pEnd2 - pBegin2)
 548                 return false;
 549         while (pBegin1 != pEnd1)
 550                 if (toUpperCase(*pBegin1++) != toUpperCase(*pBegin2++))
 551                         return false;
 552         return true;
 553 }
 554
 555 //============================================================================
 556 // static
 557 bool INetMIME::equalIgnoreCase(const sal_Char * pBegin1,
 558                                                            const sal_Char * pEnd1,
 559                                                            const sal_Char * pString2)
 560 {
 561         DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 562                            "INetMIME::equalIgnoreCase(): Bad sequences");
 563
 564         while (*pString2 != 0)
 565                 if (pBegin1 == pEnd1
 566                         || toUpperCase(*pBegin1++) != toUpperCase(*pString2++))
 567                         return false;
 568         return pBegin1 == pEnd1;
 569 }
 570
 571 //============================================================================
 572 // static
 573 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
 574                                                            const sal_Unicode * pEnd1,
 575                                                            const sal_Char * pString2)
 576 {
 577         DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
 578                            "INetMIME::equalIgnoreCase(): Bad sequences");
 579
 580         while (*pString2 != 0)
 581                 if (pBegin1 == pEnd1
 582                         || toUpperCase(*pBegin1++) != toUpperCase(*pString2++))
 583                         return false;
 584         return pBegin1 == pEnd1;
 585 }
 586
 587 //============================================================================
 588 // static
 589 const sal_Char * INetMIME::skipLinearWhiteSpace(const sal_Char * pBegin,
 590                                                                                                 const sal_Char * pEnd)
 591 {
 592         DBG_ASSERT(pBegin && pBegin <= pEnd,
 593                            "INetMIME::skipLinearWhiteSpace(): Bad sequence");
 594
 595         while (pBegin != pEnd)
 596                 switch (*pBegin)
 597                 {
 598                         case '\t':
 599                         case ' ':
 600                                 ++pBegin;
 601                                 break;
 602
 603                         case 0x0D: // CR
 604                                 if (startsWithLineFolding(pBegin, pEnd))
 605                                         pBegin += 3;
 606                                 else
 607                                         return pBegin;
 608                                 break;
 609
 610                         default:
 611                                 return pBegin;
 612                 }
 613         return pBegin;
 614 }
 615
 616 //============================================================================
 617 // static
 618 const sal_Unicode * INetMIME::skipLinearWhiteSpace(const sal_Unicode * pBegin,
 619                                                                                                    const sal_Unicode * pEnd)
 620 {
 621         DBG_ASSERT(pBegin && pBegin <= pEnd,
 622                            "INetMIME::skipLinearWhiteSpace(): Bad sequence");
 623
 624         while (pBegin != pEnd)
 625                 switch (*pBegin)
 626                 {
 627                         case '\t':
 628                         case ' ':
 629                                 ++pBegin;
 630                                 break;
 631
 632                         case 0x0D: // CR
 633                                 if (startsWithLineFolding(pBegin, pEnd))
 634                                         pBegin += 3;
 635                                 else
 636                                         return pBegin;
 637                                 break;
 638
 639                         default:
 640                                 return pBegin;
 641                 }
 642         return pBegin;
 643 }
 644
 645 //============================================================================
 646 // static
 647 const sal_Char * INetMIME::skipComment(const sal_Char * pBegin,
 648                                                                            const sal_Char * pEnd)
 649 {
 650         DBG_ASSERT(pBegin && pBegin <= pEnd,
 651                            "INetMIME::skipComment(): Bad sequence");
 652
 653         if (pBegin != pEnd && *pBegin == '(')
 654         {
 655                 sal_uInt32 nLevel = 0;
 656                 for (const sal_Char * p = pBegin; p != pEnd;)
 657                         switch (*p++)
 658                         {
 659                                 case '(':
 660                                         ++nLevel;
 661                                         break;
 662
 663                                 case ')':
 664                                         if (--nLevel == 0)
 665                                                 return p;
 666                                         break;
 667
 668                                 case '\\':
 669                                         if (p != pEnd)
 670                                                 ++p;
 671                                         break;
 672                         }
 673         }
 674         return pBegin;
 675 }
 676
 677 //============================================================================
 678 // static
 679 const sal_Unicode * INetMIME::skipComment(const sal_Unicode * pBegin,
 680                                                                                   const sal_Unicode * pEnd)
 681 {
 682         DBG_ASSERT(pBegin && pBegin <= pEnd,
 683                            "INetMIME::skipComment(): Bad sequence");
 684
 685         if (pBegin != pEnd && *pBegin == '(')
 686         {
 687                 sal_uInt32 nLevel = 0;
 688                 for (const sal_Unicode * p = pBegin; p != pEnd;)
 689                         switch (*p++)
 690                         {
 691                                 case '(':
 692                                         ++nLevel;
 693                                         break;
 694
 695                                 case ')':
 696                                         if (--nLevel == 0)
 697                                                 return p;
 698                                         break;
 699
 700                                 case '\\':
 701                                         if (p != pEnd)
 702                                                 ++p;
 703                                         break;
 704                         }
 705         }
 706         return pBegin;
 707 }
 708
 709 //============================================================================
 710 // static
 711 const sal_Char * INetMIME::skipLinearWhiteSpaceComment(const sal_Char *
 712                                                                                                                pBegin,
 713                                                                                                            const sal_Char * pEnd)
 714 {
 715         DBG_ASSERT(pBegin && pBegin <= pEnd,
 716                            "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence");
 717
 718         while (pBegin != pEnd)
 719                 switch (*pBegin)
 720                 {
 721                         case '\t':
 722                         case ' ':
 723                                 ++pBegin;
 724                                 break;
 725
 726                         case 0x0D: // CR
 727                                 if (startsWithLineFolding(pBegin, pEnd))
 728                                         pBegin += 3;
 729                                 else
 730                                         return pBegin;
 731                                 break;
 732
 733                         case '(':
 734                         {
 735                                 const sal_Char * p = skipComment(pBegin, pEnd);
 736                                 if (p == pBegin)
 737                                         return pBegin;
 738                                 pBegin = p;
 739                                 break;
 740                         }
 741
 742                         default:
 743                                 return pBegin;
 744                 }
 745         return pBegin;
 746 }
 747
 748 //============================================================================
 749 // static
 750 const sal_Unicode * INetMIME::skipLinearWhiteSpaceComment(const sal_Unicode *
 751                                                                                                                       pBegin,
 752                                                                                                                   const sal_Unicode *
 753                                                                                                                       pEnd)
 754 {
 755         DBG_ASSERT(pBegin && pBegin <= pEnd,
 756                            "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence");
 757
 758         while (pBegin != pEnd)
 759                 switch (*pBegin)
 760                 {
 761                         case '\t':
 762                         case ' ':
 763                                 ++pBegin;
 764                                 break;
 765
 766                         case 0x0D: // CR
 767                                 if (startsWithLineFolding(pBegin, pEnd))
 768                                         pBegin += 3;
 769                                 else
 770                                         return pBegin;
 771                                 break;
 772
 773                         case '(':
 774                         {
 775                                 const sal_Unicode * p = skipComment(pBegin, pEnd);
 776                                 if (p == pBegin)
 777                                         return pBegin;
 778                                 pBegin = p;
 779                                 break;
 780                         }
 781
 782                         default:
 783                                 return pBegin;
 784                 }
 785         return pBegin;
 786 }
 787
 788 //============================================================================
 789 // static
 790 const sal_Char * INetMIME::skipQuotedString(const sal_Char * pBegin,
 791                                                                                         const sal_Char * pEnd)
 792 {
 793         DBG_ASSERT(pBegin && pBegin <= pEnd,
 794                            "INetMIME::skipQuotedString(): Bad sequence");
 795
 796         if (pBegin != pEnd && *pBegin == '"')
 797                 for (const sal_Char * p = pBegin + 1; p != pEnd;)
 798                         switch (*p++)
 799                         {
 800                                 case 0x0D: // CR
 801                                         if (pEnd - p < 2 || *p++ != 0x0A // LF
 802                                                 || !isWhiteSpace(*p++))
 803                                                 return pBegin;
 804                                         break;
 805
 806                                 case '"':
 807                                         return p;
 808
 809                                 case '\\':
 810                                         if (p != pEnd)
 811                                                 ++p;
 812                                         break;
 813                         }
 814         return pBegin;
 815 }
 816
 817 //============================================================================
 818 // static
 819 const sal_Unicode * INetMIME::skipQuotedString(const sal_Unicode * pBegin,
 820                                                                                            const sal_Unicode * pEnd)
 821 {
 822         DBG_ASSERT(pBegin && pBegin <= pEnd,
 823                            "INetMIME::skipQuotedString(): Bad sequence");
 824
 825         if (pBegin != pEnd && *pBegin == '"')
 826                 for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
 827                         switch (*p++)
 828                         {
 829                                 case 0x0D: // CR
 830                                         if (pEnd - p < 2 || *p++ != 0x0A // LF
 831                                                 || !isWhiteSpace(*p++))
 832                                                 return pBegin;
 833                                         break;
 834
 835                                 case '"':
 836                                         return p;
 837
 838                                 case '\\':
 839                                         if (p != pEnd)
 840                                                 ++p;
 841                                         break;
 842                         }
 843         return pBegin;
 844 }
 845
 846 //============================================================================
 847 // static
 848 const sal_Char * INetMIME::scanAtom(const sal_Char * pBegin,
 849                                                                         const sal_Char * pEnd)
 850 {
 851         while (pBegin != pEnd && isAtomChar(*pBegin))
 852                 ++pBegin;
 853         return pBegin;
 854 }
 855
 856 //============================================================================
 857 // static
 858 const sal_Unicode * INetMIME::scanAtom(const sal_Unicode * pBegin,
 859                                                                            const sal_Unicode * pEnd)
 860 {
 861         while (pBegin != pEnd && isAtomChar(*pBegin))
 862                 ++pBegin;
 863         return pBegin;
 864 }
 865
 866 //============================================================================
 867 // static
 868 bool INetMIME::scanUnsigned(const sal_Char *& rBegin, const sal_Char * pEnd,
 869                                                         bool bLeadingZeroes, sal_uInt32 & rValue)
 870 {
 871         sal_uInt64 nTheValue = 0;
 872         const sal_Char * p = rBegin;
 873         for ( ; p != pEnd; ++p)
 874         {
 875                 int nWeight = getWeight(*p);
 876                 if (nWeight < 0)
 877                         break;
 878                 nTheValue = 10 * nTheValue + nWeight;
 879                 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
 880                         return false;
 881         }
 882         if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
 883                 return false;
 884         rBegin = p;
 885         rValue = sal_uInt32(nTheValue);
 886         return true;
 887 }
 888
 889 //============================================================================
 890 // static
 891 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
 892                                                         const sal_Unicode * pEnd, bool bLeadingZeroes,
 893                                                         sal_uInt32 & rValue)
 894 {
 895         sal_uInt64 nTheValue = 0;
 896         const sal_Unicode * p = rBegin;
 897         for ( ; p != pEnd; ++p)
 898         {
 899                 int nWeight = getWeight(*p);
 900                 if (nWeight < 0)
 901                         break;
 902                 nTheValue = 10 * nTheValue + nWeight;
 903                 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
 904                         return false;
 905         }
 906         if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
 907                 return false;
 908         rBegin = p;
 909         rValue = sal_uInt32(nTheValue);
 910         return true;
 911 }
 912
 913 //============================================================================
 914 // static
 915 bool INetMIME::scanUnsignedHex(const sal_Char *& rBegin,
 916                                                            const sal_Char * pEnd, bool bLeadingZeroes,
 917                                                            sal_uInt32 & rValue)
 918 {
 919         sal_uInt64 nTheValue = 0;
 920         const sal_Char * p = rBegin;
 921         for ( p = rBegin; p != pEnd; ++p)
 922         {
 923                 int nWeight = getHexWeight(*p);
 924                 if (nWeight < 0)
 925                         break;
 926                 nTheValue = nTheValue << 4 | nWeight;
 927                 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
 928                         return false;
 929         }
 930         if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
 931                 return false;
 932         rBegin = p;
 933         rValue = sal_uInt32(nTheValue);
 934         return true;
 935 }
 936
 937 //============================================================================
 938 // static
 939 bool INetMIME::scanUnsignedHex(const sal_Unicode *& rBegin,
 940                                                            const sal_Unicode * pEnd, bool bLeadingZeroes,
 941                                                            sal_uInt32 & rValue)
 942 {
 943         sal_uInt64 nTheValue = 0;
 944         const sal_Unicode * p = rBegin;
 945         for ( ; p != pEnd; ++p)
 946         {
 947                 int nWeight = getHexWeight(*p);
 948                 if (nWeight < 0)
 949                         break;
 950                 nTheValue = nTheValue << 4 | nWeight;
 951                 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
 952                         return false;
 953         }
 954         if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
 955                 return false;
 956         rBegin = p;
 957         rValue = sal_uInt32(nTheValue);
 958         return true;
 959 }
 960
 961 //============================================================================
 962 // static
 963 const sal_Char * INetMIME::scanQuotedBlock(const sal_Char * pBegin,
 964                                                                                    const sal_Char * pEnd,
 965                                                                                    sal_uInt32 nOpening,
 966                                                                                    sal_uInt32 nClosing,
 967                                                                                    sal_Size & rLength,
 968                                                                                    bool & rModify)
 969 {
 970         DBG_ASSERT(pBegin && pBegin <= pEnd,
 971                            "INetMIME::scanQuotedBlock(): Bad sequence");
 972
 973         if (pBegin != pEnd && static_cast< unsigned char >(*pBegin) == nOpening)
 974         {
 975                 ++rLength;
 976                 ++pBegin;
 977                 while (pBegin != pEnd)
 978                         if (static_cast< unsigned char >(*pBegin) == nClosing)
 979                         {
 980                                 ++rLength;
 981                                 return ++pBegin;
 982                         }
 983                         else
 984                         {
 985                                 sal_uInt32 c = *pBegin++;
 986                                 switch (c)
 987                                 {
 988                                         case 0x0D: // CR
 989                                                 if (pBegin != pEnd && *pBegin == 0x0A) // LF
 990                                                         if (pEnd - pBegin >= 2 && isWhiteSpace(pBegin[1]))
 991                                                         {
 992                                                                 ++rLength;
 993                                                                 rModify = true;
 994                                                                 pBegin += 2;
 995                                                         }
 996                                                         else
 997                                                         {
 998                                                                 rLength += 3;
 999                                                                 rModify = true;
1000                                                                 ++pBegin;
1001                                                         }
1002                                                 else
1003                                                         ++rLength;
1004                                                 break;
1005
1006                                         case '\\':
1007                                                 ++rLength;
1008                                                 if (pBegin != pEnd)
1009                         {
1010                                                         if (startsWithLineBreak(pBegin, pEnd)
1011                                                                 && (pEnd - pBegin < 3
1012                                                                         || !isWhiteSpace(pBegin[2])))
1013                                                         {
1014                                                                 rLength += 3;
1015                                                                 rModify = true;
1016                                                                 pBegin += 2;
1017                                                         }
1018                                                         else
1019                                                                 ++pBegin;
1020                         }
1021                                                 break;
1022
1023                                         default:
1024                                                 ++rLength;
1025                                                 if (!isUSASCII(c))
1026                                                         rModify = true;
1027                                                 break;
1028                                 }
1029                         }
1030         }
1031         return pBegin;
1032 }
1033
1034 //============================================================================
1035 // static
1036 const sal_Unicode * INetMIME::scanQuotedBlock(const sal_Unicode * pBegin,
1037                                                                                           const sal_Unicode * pEnd,
1038                                                                                           sal_uInt32 nOpening,
1039                                                                                           sal_uInt32 nClosing,
1040                                                                                           sal_Size & rLength,
1041                                                                                           bool & rModify)
1042 {
1043         DBG_ASSERT(pBegin && pBegin <= pEnd,
1044                            "INetMIME::scanQuotedBlock(): Bad sequence");
1045
1046         if (pBegin != pEnd && *pBegin == nOpening)
1047         {
1048                 ++rLength;
1049                 ++pBegin;
1050                 while (pBegin != pEnd)
1051                         if (*pBegin == nClosing)
1052                         {
1053                                 ++rLength;
1054                                 return ++pBegin;
1055                         }
1056                         else
1057                         {
1058                                 sal_uInt32 c = *pBegin++;
1059                                 switch (c)
1060                                 {
1061                                         case 0x0D: // CR
1062                                                 if (pBegin != pEnd && *pBegin == 0x0A) // LF
1063                                                         if (pEnd - pBegin >= 2 && isWhiteSpace(pBegin[1]))
1064                                                         {
1065                                                                 ++rLength;
1066                                                                 rModify = true;
1067                                                                 pBegin += 2;
1068                                                         }
1069                                                         else
1070                                                         {
1071                                                                 rLength += 3;
1072                                                                 rModify = true;
1073                                                                 ++pBegin;
1074                                                         }
1075                                                 else
1076                                                         ++rLength;
1077                                                 break;
1078
1079                                         case '\\':
1080                                                 ++rLength;
1081                                                 if (pBegin != pEnd)
1082                         {
1083                                                         if (startsWithLineBreak(pBegin, pEnd)
1084                                                                 && (pEnd - pBegin < 3
1085                                                                         || !isWhiteSpace(pBegin[2])))
1086                                                         {
1087                                                                 rLength += 3;
1088                                                                 rModify = true;
1089                                                                 pBegin += 2;
1090                                                         }
1091                                                         else
1092                                                                 ++pBegin;
1093                         }
1094                                                 break;
1095
1096                                         default:
1097                                                 ++rLength;
1098                                                 if (!isUSASCII(c))
1099                                                         rModify = true;
1100                                                 break;
1101                                 }
1102                         }
1103         }
1104         return pBegin;
1105 }
1106
1107 //============================================================================
1108 // static
1109 sal_Char const * INetMIME::scanParameters(sal_Char const * pBegin,
1110                                                                                   sal_Char const * pEnd,
1111                                                                                   INetContentTypeParameterList *
1112                                                                                       pParameters)
1113 {
1114         ParameterList aList;
1115         sal_Char const * pParameterBegin = pBegin;
1116         for (sal_Char const * p = pParameterBegin;; pParameterBegin = p)
1117         {
1118                 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
1119                 if (pParameterBegin == pEnd || *pParameterBegin != ';')
1120                         break;
1121                 p = pParameterBegin + 1;
1122
1123                 sal_Char const * pAttributeBegin = skipLinearWhiteSpaceComment(p,
1124                                                                                                                                            pEnd);
1125                 p = pAttributeBegin;
1126                 bool bDowncaseAttribute = false;
1127                 while (p != pEnd && isTokenChar(*p) && *p != '*')
1128                 {
1129                         bDowncaseAttribute = bDowncaseAttribute || isUpperCase(*p);
1130                         ++p;
1131                 }
1132                 if (p == pAttributeBegin)
1133                         break;
1134                 ByteString aAttribute(
1135             pAttributeBegin, static_cast< xub_StrLen >(p - pAttributeBegin));
1136                 if (bDowncaseAttribute)
1137                         aAttribute.ToLowerAscii();
1138
1139                 sal_uInt32 nSection = 0;
1140                 if (p != pEnd && *p == '*')
1141                 {
1142                         ++p;
1143                         if (p != pEnd && isDigit(*p)
1144                                 && !scanUnsigned(p, pEnd, false, nSection))
1145                                 break;
1146                 }
1147
1148                 bool bPresent;
1149                 Parameter ** pPos = aList.find(aAttribute, nSection, bPresent);
1150                 if (bPresent)
1151                         break;
1152
1153                 bool bExtended = false;
1154                 if (p != pEnd && *p == '*')
1155                 {
1156                         ++p;
1157                         bExtended = true;
1158                 }
1159
1160                 p = skipLinearWhiteSpaceComment(p, pEnd);
1161
1162                 if (p == pEnd || *p != '=')
1163                         break;
1164
1165                 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
1166
1167                 ByteString aCharset;
1168                 ByteString aLanguage;
1169                 ByteString aValue;
1170                 if (bExtended)
1171                 {
1172                         if (nSection == 0)
1173                         {
1174                                 sal_Char const * pCharsetBegin = p;
1175                                 bool bDowncaseCharset = false;
1176                                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
1177                                 {
1178                                         bDowncaseCharset = bDowncaseCharset || isUpperCase(*p);
1179                                         ++p;
1180                                 }
1181                                 if (p == pCharsetBegin)
1182                                         break;
1183                                 if (pParameters)
1184                                 {
1185                                         aCharset = ByteString(
1186                         pCharsetBegin,
1187                         static_cast< xub_StrLen >(p - pCharsetBegin));
1188                                         if (bDowncaseCharset)
1189                                                 aCharset.ToLowerAscii();
1190                                 }
1191
1192                                 if (p == pEnd || *p != '\'')
1193                                         break;
1194                                 ++p;
1195
1196                                 sal_Char const * pLanguageBegin = p;
1197                                 bool bDowncaseLanguage = false;
1198                                 int nLetters = 0;
1199                                 for (; p != pEnd; ++p)
1200                                         if (isAlpha(*p))
1201                                         {
1202                                                 if (++nLetters > 8)
1203                                                         break;
1204                                                 bDowncaseLanguage = bDowncaseLanguage
1205                                                                         || isUpperCase(*p);
1206                                         }
1207                                         else if (*p == '-')
1208                                         {
1209                                                 if (nLetters == 0)
1210                                                         break;
1211                                                 nLetters = 0;
1212                                         }
1213                                         else
1214                                                 break;
1215                                 if (nLetters == 0 || nLetters > 8)
1216                                         break;
1217                                 if (pParameters)
1218                                 {
1219                                         aLanguage = ByteString(
1220                         pLanguageBegin,
1221                         static_cast< xub_StrLen >(p - pLanguageBegin));
1222                                         if (bDowncaseLanguage)
1223                                                 aLanguage.ToLowerAscii();
1224                                 }
1225
1226                                 if (p == pEnd || *p != '\'')
1227                                         break;
1228                                 ++p;
1229                         }
1230                         if (pParameters)
1231                                 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1232                                 {
1233                                         if (*p == '%')
1234                                         {
1235                                                 if (p + 2 < pEnd)
1236                                                 {
1237                                                         int nWeight1 = getHexWeight(p[1]);
1238                                                         int nWeight2 = getHexWeight(p[2]);
1239                                                         if (nWeight1 >= 0 && nWeight2 >= 0)
1240                                                         {
1241                                                                 aValue += sal_Char(nWeight1 << 4 | nWeight2);
1242                                                                 p += 3;
1243                                                                 continue;
1244                                                         }
1245                                                 }
1246                                         }
1247                                         aValue += *p++;
1248                                 }
1249                         else
1250                                 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1251                                         ++p;
1252                 }
1253                 else if (p != pEnd && *p == '"')
1254                         if (pParameters)
1255                         {
1256                                 bool bInvalid = false;
1257                                 for (++p;;)
1258                                 {
1259                                         if (p == pEnd)
1260                                         {
1261                                                 bInvalid = true;
1262                                                 break;
1263                                         }
1264                                         else if (*p == '"')
1265                                         {
1266                                                 ++p;
1267                                                 break;
1268                                         }
1269                                         else if (*p == 0x0D) // CR
1270                                         {
1271                                                 if (pEnd - p < 3 || p[1] != 0x0A // LF
1272                                                         || !isWhiteSpace(p[2]))
1273                                                 {
1274                                                         bInvalid = true;
1275                                                         break;
1276                                                 }
1277                                                 p += 2;
1278                                         }
1279                                         else if (*p == '\\' && ++p == pEnd)
1280                                         {
1281                                                 bInvalid = true;
1282                                                 break;
1283                                         }
1284                                         aValue += *p++;
1285                                 }
1286                                 if (bInvalid)
1287                                         break;
1288                         }
1289                         else
1290                         {
1291                                 sal_Char const * pStringEnd = skipQuotedString(p, pEnd);
1292                                 if (p == pStringEnd)
1293                                         break;
1294                                 p = pStringEnd;
1295                         }
1296                 else
1297                 {
1298                         sal_Char const * pTokenBegin = p;
1299                         while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1300                                 ++p;
1301                         if (p == pTokenBegin)
1302                                 break;
1303                         if (pParameters)
1304                                 aValue = ByteString(
1305                     pTokenBegin, static_cast< xub_StrLen >(p - pTokenBegin));
1306                 }
1307
1308                 *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue,
1309                                                           nSection, bExtended);
1310         }
1311         return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
1312 }
1313
1314 //============================================================================
1315 // static
1316 sal_Unicode const * INetMIME::scanParameters(sal_Unicode const * pBegin,
1317                                                                                          sal_Unicode const * pEnd,
1318                                                                                          INetContentTypeParameterList *
1319                                                                                              pParameters)
1320 {
1321         ParameterList aList;
1322         sal_Unicode const * pParameterBegin = pBegin;
1323         for (sal_Unicode const * p = pParameterBegin;; pParameterBegin = p)
1324         {
1325                 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
1326                 if (pParameterBegin == pEnd || *pParameterBegin != ';')
1327                         break;
1328                 p = pParameterBegin + 1;
1329
1330                 sal_Unicode const * pAttributeBegin
1331                         = skipLinearWhiteSpaceComment(p, pEnd);
1332                 p = pAttributeBegin;
1333                 bool bDowncaseAttribute = false;
1334                 while (p != pEnd && isTokenChar(*p) && *p != '*')
1335                 {
1336                         bDowncaseAttribute = bDowncaseAttribute || isUpperCase(*p);
1337                         ++p;
1338                 }
1339                 if (p == pAttributeBegin)
1340                         break;
1341                 ByteString aAttribute = ByteString(
1342             pAttributeBegin, static_cast< xub_StrLen >(p - pAttributeBegin),
1343             RTL_TEXTENCODING_ASCII_US);
1344                 if (bDowncaseAttribute)
1345                         aAttribute.ToLowerAscii();
1346
1347                 sal_uInt32 nSection = 0;
1348                 if (p != pEnd && *p == '*')
1349                 {
1350                         ++p;
1351                         if (p != pEnd && isDigit(*p)
1352                                 && !scanUnsigned(p, pEnd, false, nSection))
1353                                 break;
1354                 }
1355
1356                 bool bPresent;
1357                 Parameter ** pPos = aList.find(aAttribute, nSection, bPresent);
1358                 if (bPresent)
1359                         break;
1360
1361                 bool bExtended = false;
1362                 if (p != pEnd && *p == '*')
1363                 {
1364                         ++p;
1365                         bExtended = true;
1366                 }
1367
1368                 p = skipLinearWhiteSpaceComment(p, pEnd);
1369
1370                 if (p == pEnd || *p != '=')
1371                         break;
1372
1373                 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
1374
1375                 ByteString aCharset;
1376                 ByteString aLanguage;
1377                 ByteString aValue;
1378                 if (bExtended)
1379                 {
1380                         if (nSection == 0)
1381                         {
1382                                 sal_Unicode const * pCharsetBegin = p;
1383                                 bool bDowncaseCharset = false;
1384                                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
1385                                 {
1386                                         bDowncaseCharset = bDowncaseCharset || isUpperCase(*p);
1387                                         ++p;
1388                                 }
1389                                 if (p == pCharsetBegin)
1390                                         break;
1391                                 if (pParameters)
1392                                 {
1393                                         aCharset = ByteString(
1394                         pCharsetBegin,
1395                         static_cast< xub_StrLen >(p - pCharsetBegin),
1396                         RTL_TEXTENCODING_ASCII_US);
1397                                         if (bDowncaseCharset)
1398                                                 aCharset.ToLowerAscii();
1399                                 }
1400
1401                                 if (p == pEnd || *p != '\'')
1402                                         break;
1403                                 ++p;
1404
1405                                 sal_Unicode const * pLanguageBegin = p;
1406                                 bool bDowncaseLanguage = false;
1407                                 int nLetters = 0;
1408                                 for (; p != pEnd; ++p)
1409                                         if (isAlpha(*p))
1410                                         {
1411                                                 if (++nLetters > 8)
1412                                                         break;
1413                                                 bDowncaseLanguage = bDowncaseLanguage
1414                                                                         || isUpperCase(*p);
1415                                         }
1416                                         else if (*p == '-')
1417                                         {
1418                                                 if (nLetters == 0)
1419                                                         break;
1420                                                 nLetters = 0;
1421                                         }
1422                                         else
1423                                                 break;
1424                                 if (nLetters == 0 || nLetters > 8)
1425                                         break;
1426                                 if (pParameters)
1427                                 {
1428                                         aLanguage = ByteString(
1429                         pLanguageBegin,
1430                         static_cast< xub_StrLen >(p - pLanguageBegin),
1431                         RTL_TEXTENCODING_ASCII_US);
1432                                         if (bDowncaseLanguage)
1433                                                 aLanguage.ToLowerAscii();
1434                                 }
1435
1436                                 if (p == pEnd || *p != '\'')
1437                                         break;
1438                                 ++p;
1439                         }
1440                         if (pParameters)
1441                         {
1442                                 INetMIMEStringOutputSink
1443                                         aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT);
1444                                 while (p != pEnd)
1445                                 {
1446                                         sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
1447                                         if (isUSASCII(nChar) && !isTokenChar(nChar))
1448                                                 break;
1449                                         if (nChar == '%' && p + 1 < pEnd)
1450                                         {
1451                                                 int nWeight1 = getHexWeight(p[0]);
1452                                                 int nWeight2 = getHexWeight(p[1]);
1453                                                 if (nWeight1 >= 0 && nWeight2 >= 0)
1454                                                 {
1455                                                         aSink << sal_Char(nWeight1 << 4 | nWeight2);
1456                                                         p += 2;
1457                                                         continue;
1458                                                 }
1459                                         }
1460                                         INetMIME::writeUTF8(aSink, nChar);
1461                                 }
1462                                 aValue = aSink.takeBuffer();
1463                         }
1464                         else
1465                                 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1466                                         ++p;
1467                 }
1468                 else if (p != pEnd && *p == '"')
1469                         if (pParameters)
1470                         {
1471                                 INetMIMEStringOutputSink
1472                                         aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT);
1473                                 bool bInvalid = false;
1474                                 for (++p;;)
1475                                 {
1476                                         if (p == pEnd)
1477                                         {
1478                                                 bInvalid = true;
1479                                                 break;
1480                                         }
1481                                         sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
1482                                         if (nChar == '"')
1483                                                 break;
1484                                         else if (nChar == 0x0D) // CR
1485                                         {
1486                                                 if (pEnd - p < 2 || *p++ != 0x0A // LF
1487                                                         || !isWhiteSpace(*p))
1488                                                 {
1489                                                         bInvalid = true;
1490                                                         break;
1491                                                 }
1492                                                 nChar = sal_uChar(*p++);
1493                                         }
1494                                         else if (nChar == '\\')
1495                                         {
1496                                                 if (p == pEnd)
1497                                                 {
1498                                                         bInvalid = true;
1499                                                         break;
1500                                                 }
1501                                                 nChar = INetMIME::getUTF32Character(p, pEnd);
1502                                         }
1503                                         INetMIME::writeUTF8(aSink, nChar);
1504                                 }
1505                                 if (bInvalid)
1506                                         break;
1507                                 aValue = aSink.takeBuffer();
1508                         }
1509                         else
1510                         {
1511                                 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
1512                                 if (p == pStringEnd)
1513                                         break;
1514                                 p = pStringEnd;
1515                         }
1516                 else
1517                 {
1518                         sal_Unicode const * pTokenBegin = p;
1519                         while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1520                                 ++p;
1521                         if (p == pTokenBegin)
1522                                 break;
1523                         if (pParameters)
1524                                 aValue = ByteString(
1525                     pTokenBegin, static_cast< xub_StrLen >(p - pTokenBegin),
1526                     RTL_TEXTENCODING_UTF8);
1527                 }
1528
1529                 *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue,
1530                                                           nSection, bExtended);
1531         }
1532         return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
1533 }
1534
1535 //============================================================================
1536 // static
1537 const sal_Char * INetMIME::getCharsetName(rtl_TextEncoding eEncoding)
1538 {
1539         if (rtl_isOctetTextEncoding(eEncoding))
1540         {
1541         char const * p = rtl_getMimeCharsetFromTextEncoding(eEncoding);
1542                 DBG_ASSERT(p, "INetMIME::getCharsetName(): Unsupported encoding");
1543                 return p;
1544         }
1545         else
1546                 switch (eEncoding)
1547                 {
1548                         case RTL_TEXTENCODING_UCS4:
1549                                 return "ISO-10646-UCS-4";
1550
1551                         case RTL_TEXTENCODING_UCS2:
1552                                 return "ISO-10646-UCS-2";
1553
1554                         default:
1555                                 DBG_ERROR("INetMIME::getCharsetName(): Unsupported encoding");
1556                                 return 0;
1557                 }
1558 }
1559
1560 //============================================================================
1561 namespace unnamed_tools_inetmime {
1562
1563 struct EncodingEntry
1564 {
1565         sal_Char const * m_aName;
1566         rtl_TextEncoding m_eEncoding;
1567 };
1568
1569 //============================================================================
1570 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
1571 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
1572 // noted:
1573 EncodingEntry const aEncodingMap[]
1574         = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
1575                 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
1576                 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
1577                 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
1578                 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
1579                 { "ASCII", RTL_TEXTENCODING_ASCII_US },
1580                 { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
1581                 { "US", RTL_TEXTENCODING_ASCII_US },
1582                 { "IBM367", RTL_TEXTENCODING_ASCII_US },
1583                 { "CP367", RTL_TEXTENCODING_ASCII_US },
1584                 { "CSASCII", RTL_TEXTENCODING_ASCII_US },
1585                 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
1586                 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
1587                 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
1588                 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
1589                 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
1590                 { "L1", RTL_TEXTENCODING_ISO_8859_1 },
1591                 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
1592                 { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
1593                 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
1594                 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
1595                 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
1596                 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
1597                 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
1598                 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
1599                 { "L2", RTL_TEXTENCODING_ISO_8859_2 },
1600                 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
1601                 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
1602                 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
1603                 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
1604                 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
1605                 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
1606                 { "L3", RTL_TEXTENCODING_ISO_8859_3 },
1607                 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
1608                 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
1609                 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
1610                 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
1611                 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
1612                 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
1613                 { "L4", RTL_TEXTENCODING_ISO_8859_4 },
1614                 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
1615                 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
1616                 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
1617                 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
1618                 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
1619                 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
1620                 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
1621                 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
1622                 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
1623                 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
1624                 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
1625                 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
1626                 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
1627                 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
1628                 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
1629                 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
1630                 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
1631                 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
1632                 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
1633                 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
1634                 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
1635                 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
1636                 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
1637                 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
1638                 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
1639                 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
1640                 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
1641                 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
1642                 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
1643                 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
1644                 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
1645                 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
1646                 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
1647                 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
1648                 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
1649                 { "L5", RTL_TEXTENCODING_ISO_8859_9 },
1650                 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
1651                 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
1652                 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
1653                 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
1654                 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
1655                 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
1656                 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
1657                 { "IBM437", RTL_TEXTENCODING_IBM_437 },
1658                 { "CP437", RTL_TEXTENCODING_IBM_437 },
1659                 { "437", RTL_TEXTENCODING_IBM_437 },
1660                 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
1661                 { "IBM850", RTL_TEXTENCODING_IBM_850 },
1662                 { "CP850", RTL_TEXTENCODING_IBM_850 },
1663                 { "850", RTL_TEXTENCODING_IBM_850 },
1664                 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
1665                 { "IBM860", RTL_TEXTENCODING_IBM_860 },
1666                 { "CP860", RTL_TEXTENCODING_IBM_860 },
1667                 { "860", RTL_TEXTENCODING_IBM_860 },
1668                 { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
1669                 { "IBM861", RTL_TEXTENCODING_IBM_861 },
1670                 { "CP861", RTL_TEXTENCODING_IBM_861 },
1671                 { "861", RTL_TEXTENCODING_IBM_861 },
1672                 { "CP-IS", RTL_TEXTENCODING_IBM_861 },
1673                 { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
1674                 { "IBM863", RTL_TEXTENCODING_IBM_863 },
1675                 { "CP863", RTL_TEXTENCODING_IBM_863 },
1676                 { "863", RTL_TEXTENCODING_IBM_863 },
1677                 { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
1678                 { "IBM865", RTL_TEXTENCODING_IBM_865 },
1679                 { "CP865", RTL_TEXTENCODING_IBM_865 },
1680                 { "865", RTL_TEXTENCODING_IBM_865 },
1681                 { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
1682                 { "IBM775", RTL_TEXTENCODING_IBM_775 },
1683                 { "CP775", RTL_TEXTENCODING_IBM_775 },
1684                 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
1685                 { "IBM852", RTL_TEXTENCODING_IBM_852 },
1686                 { "CP852", RTL_TEXTENCODING_IBM_852 },
1687                 { "852", RTL_TEXTENCODING_IBM_852 },
1688                 { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
1689                 { "IBM855", RTL_TEXTENCODING_IBM_855 },
1690                 { "CP855", RTL_TEXTENCODING_IBM_855 },
1691                 { "855", RTL_TEXTENCODING_IBM_855 },
1692                 { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
1693                 { "IBM857", RTL_TEXTENCODING_IBM_857 },
1694                 { "CP857", RTL_TEXTENCODING_IBM_857 },
1695                 { "857", RTL_TEXTENCODING_IBM_857 },
1696                 { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
1697                 { "IBM862", RTL_TEXTENCODING_IBM_862 },
1698                 { "CP862", RTL_TEXTENCODING_IBM_862 },
1699                 { "862", RTL_TEXTENCODING_IBM_862 },
1700                 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
1701                 { "IBM864", RTL_TEXTENCODING_IBM_864 },
1702                 { "CP864", RTL_TEXTENCODING_IBM_864 },
1703                 { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
1704                 { "IBM866", RTL_TEXTENCODING_IBM_866 },
1705                 { "CP866", RTL_TEXTENCODING_IBM_866 },
1706                 { "866", RTL_TEXTENCODING_IBM_866 },
1707                 { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
1708                 { "IBM869", RTL_TEXTENCODING_IBM_869 },
1709                 { "CP869", RTL_TEXTENCODING_IBM_869 },
1710                 { "869", RTL_TEXTENCODING_IBM_869 },
1711                 { "CP-GR", RTL_TEXTENCODING_IBM_869 },
1712                 { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
1713                 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
1714                 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
1715                 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
1716                 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
1717                 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
1718                 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
1719                 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
1720                 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
1721                 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
1722                 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
1723                 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
1724                 { "GB2312", RTL_TEXTENCODING_GB_2312 },
1725                 { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
1726                 { "BIG5", RTL_TEXTENCODING_BIG5 },
1727                 { "CSBIG5", RTL_TEXTENCODING_BIG5 },
1728                 { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
1729                 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
1730                   RTL_TEXTENCODING_EUC_JP },
1731                 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
1732                 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
1733                 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
1734                 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
1735                 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
1736                 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
1737                 { "UTF-7", RTL_TEXTENCODING_UTF7 },
1738                 { "UTF-8", RTL_TEXTENCODING_UTF8 },
1739                 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
1740                 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
1741                 { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
1742                 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
1743                 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
1744                 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
1745                 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
1746                 { "CSUCS4", RTL_TEXTENCODING_UCS4 },
1747                 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
1748                 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
1749
1750 //============================================================================
1751 template< typename T >
1752 inline rtl_TextEncoding getCharsetEncoding_Impl(T const * pBegin,
1753                                                                                                 T const * pEnd)
1754 {
1755         for (sal_Size i = 0; i < sizeof aEncodingMap / sizeof (EncodingEntry);
1756                  ++i)
1757                 if (INetMIME::equalIgnoreCase(pBegin, pEnd, aEncodingMap[i].m_aName))
1758                         return aEncodingMap[i].m_eEncoding;
1759         return RTL_TEXTENCODING_DONTKNOW;
1760 }
1761
1762 }
1763
1764 //============================================================================
1765 // static
1766 rtl_TextEncoding INetMIME::getCharsetEncoding(sal_Char const * pBegin,
1767                                                                                           sal_Char const * pEnd)
1768 {
1769         return getCharsetEncoding_Impl(pBegin, pEnd);
1770 }
1771
1772 //============================================================================
1773 // static
1774 rtl_TextEncoding INetMIME::getCharsetEncoding(sal_Unicode const * pBegin,
1775                                                                                           sal_Unicode const * pEnd)
1776 {
1777         return getCharsetEncoding_Impl(pBegin, pEnd);
1778 }
1779
1780 //============================================================================
1781 // static
1782 INetMIMECharsetList_Impl *
1783 INetMIME::createPreferredCharsetList(rtl_TextEncoding eEncoding)
1784 {
1785         static const sal_uInt32 aUSASCIIRanges[] = { 0, 0x7F, sal_uInt32(-1) };
1786
1787         static const sal_uInt32 aISO88591Ranges[] = { 0, 0xFF, sal_uInt32(-1) };
1788                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT> version
1789                 // 1.0 of 1999 July 27
1790
1791         static const sal_uInt32 aISO88592Ranges[]
1792                 = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0,
1793                         0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC2, 0xC4, 0xC4, 0xC7, 0xC7,
1794                         0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xCE, 0xD3, 0xD4, 0xD6, 0xD7,
1795                         0xDA, 0xDA, 0xDC, 0xDD, 0xDF, 0xDF, 0xE1, 0xE2, 0xE4, 0xE4,
1796                         0xE7, 0xE7, 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF3, 0xF4,
1797                         0xF6, 0xF7, 0xFA, 0xFA, 0xFC, 0xFD, 0x102, 0x107, 0x10C, 0x111,
1798                         0x118, 0x11B, 0x139, 0x13A, 0x13D, 0x13E, 0x141, 0x144,
1799                         0x147, 0x148, 0x150, 0x151, 0x154, 0x155, 0x158, 0x15B,
1800                         0x15E, 0x165, 0x16E, 0x171, 0x179, 0x17E, 0x2C7, 0x2C7,
1801                         0x2D8, 0x2D9, 0x2DB, 0x2DB, 0x2DD, 0x2DD, sal_uInt32(-1) };
1802                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT> version
1803                 // 1.0 of 1999 July 27
1804
1805         static const sal_uInt32 aISO88593Ranges[]
1806                 = { 0, 0xA0, 0xA3, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0,
1807                         0xB2, 0xB5, 0xB7, 0xB8, 0xBD, 0xBD, 0xC0, 0xC2, 0xC4, 0xC4,
1808                         0xC7, 0xCF, 0xD1, 0xD4, 0xD6, 0xD7, 0xD9, 0xDC, 0xDF, 0xE2,
1809                         0xE4, 0xE4, 0xE7, 0xEF, 0xF1, 0xF4, 0xF6, 0xF7, 0xF9, 0xFC,
1810                         0x108, 0x10B, 0x11C, 0x121, 0x124, 0x127, 0x130, 0x131,
1811                         0x134, 0x135, 0x15C, 0x15F, 0x16C, 0x16D, 0x17B, 0x17C,
1812                         0x2D8, 0x2D9, sal_uInt32(-1) };
1813                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT> version
1814                 // 1.0 of 1999 July 27
1815
1816         static const sal_uInt32 aISO88594Ranges[]
1817                 = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xAF, 0xB0,
1818                         0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB,
1819                         0xCD, 0xCE, 0xD4, 0xD8, 0xDA, 0xDC, 0xDF, 0xDF, 0xE1, 0xE6,
1820                         0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF4, 0xF8, 0xFA, 0xFC,
1821                         0x100, 0x101, 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113,
1822                         0x116, 0x119, 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F,
1823                         0x136, 0x138, 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D,
1824                         0x156, 0x157, 0x160, 0x161, 0x166, 0x16B, 0x172, 0x173,
1825                         0x17D, 0x17E, 0x2C7, 0x2C7, 0x2D9, 0x2D9, 0x2DB, 0x2DB,
1826                         sal_uInt32(-1) };
1827                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT> version
1828                 // 1.0 of 1999 July 27
1829
1830         static const sal_uInt32 aISO88595Ranges[]
1831                 = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0x401, 0x40C, 0x40E, 0x44F,
1832                         0x451, 0x45C, 0x45E, 0x45F, 0x2116, 0x2116, sal_uInt32(-1) };
1833                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT> version
1834                 // 1.0 of 1999 July 27
1835
1836         static const sal_uInt32 aISO88596Ranges[]
1837                 = { 0, 0xA0, 0xA4, 0xA4, 0xAD, 0xAD, 0x60C, 0x60C, 0x61B, 0x61B,
1838                         0x61F, 0x61F, 0x621, 0x63A, 0x640, 0x652, sal_uInt32(-1) };
1839                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT> version
1840                 // 1.0 of 1999 July 27
1841
1842         static const sal_uInt32 aISO88597Ranges[]
1843                 = { 0, 0xA0, 0xA3, 0xA3, 0xA6, 0xA9, 0xAB, 0xAD, 0xB0, 0xB3,
1844                         0xB7, 0xB7, 0xBB, 0xBB, 0xBD, 0xBD, 0x384, 0x386, 0x388, 0x38A,
1845                         0x38C, 0x38C, 0x38E, 0x3A1, 0x3A3, 0x3CE, 0x2015, 0x2015,
1846                         0x2018, 0x2019, sal_uInt32(-1) };
1847                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT> version
1848                 // 1.0 of 1999 July 27
1849
1850         static const sal_uInt32 aISO88598Ranges[]
1851                 = { 0, 0xA0, 0xA2, 0xA9, 0xAB, 0xB9, 0xBB, 0xBE, 0xD7, 0xD7,
1852                         0xF7, 0xF7, 0x5D0, 0x5EA, 0x200E, 0x200F, 0x2017, 0x2017,
1853                         sal_uInt32(-1) };
1854                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT> version
1855                 // 1.1 of 2000-Jan-03
1856
1857         static const sal_uInt32 aISO88599Ranges[]
1858                 = { 0, 0xCF, 0xD1, 0xDC, 0xDF, 0xEF, 0xF1, 0xFC, 0xFF, 0xFF,
1859                         0x11E, 0x11F, 0x130, 0x131, 0x15E, 0x15F, sal_uInt32(-1) };
1860                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT> version
1861                 // 1.0 of 1999 July 27
1862
1863         static const sal_uInt32 aISO885910Ranges[]
1864                 = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0xB0, 0xB0, 0xB7, 0xB7,
1865                         0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xD0, 0xD3, 0xD6,
1866                         0xD8, 0xD8, 0xDA, 0xDF, 0xE1, 0xE6, 0xE9, 0xE9, 0xEB, 0xEB,
1867                         0xED, 0xF0, 0xF3, 0xF6, 0xF8, 0xF8, 0xFA, 0xFE, 0x100, 0x101,
1868                         0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113, 0x116, 0x119,
1869                         0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F, 0x136, 0x138,
1870                         0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D, 0x160, 0x161,
1871                         0x166, 0x16B, 0x172, 0x173, 0x17D, 0x17E, 0x2015, 0x2015,
1872                         sal_uInt32(-1) };
1873                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT> version
1874                 // 1.1 of 1999 October 11
1875
1876         static const sal_uInt32 aISO885913Ranges[]
1877                 = { 0, 0xA0, 0xA2, 0xA4, 0xA6, 0xA7, 0xA9, 0xA9, 0xAB, 0xAE,
1878                         0xB0, 0xB3, 0xB5, 0xB7, 0xB9, 0xB9, 0xBB, 0xBE, 0xC4, 0xC6,
1879                         0xC9, 0xC9, 0xD3, 0xD3, 0xD5, 0xD8, 0xDC, 0xDC, 0xDF, 0xDF,
1880                         0xE4, 0xE6, 0xE9, 0xE9, 0xF3, 0xF3, 0xF5, 0xF8, 0xFC, 0xFC,
1881                         0x100, 0x101, 0x104, 0x107, 0x10C, 0x10D, 0x112, 0x113,
1882                         0x116, 0x119, 0x122, 0x123, 0x12A, 0x12B, 0x12E, 0x12F,
1883                         0x136, 0x137, 0x13B, 0x13C, 0x141, 0x146, 0x14C, 0x14D,
1884                         0x156, 0x157, 0x15A, 0x15B, 0x160, 0x161, 0x16A, 0x16B,
1885                         0x172, 0x173, 0x179, 0x17E, 0x2019, 0x2019, 0x201C, 0x201E,
1886                         sal_uInt32(-1) };
1887                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT> version
1888                 // 1.0 of 1999 July 27
1889
1890         static const sal_uInt32 aISO885914Ranges[]
1891                 = { 0, 0xA0, 0xA3, 0xA3, 0xA7, 0xA7, 0xA9, 0xA9, 0xAD, 0xAE,
1892                         0xB6, 0xB6, 0xC0, 0xCF, 0xD1, 0xD6, 0xD8, 0xDD, 0xDF, 0xEF,
1893                         0xF1, 0xF6, 0xF8, 0xFD, 0xFF, 0xFF, 0x10A, 0x10B, 0x120, 0x121,
1894                         0x174, 0x178, 0x1E02, 0x1E03, 0x1E0A, 0x1E0B, 0x1E1E, 0x1E1F,
1895                         0x1E40, 0x1E41, 0x1E56, 0x1E57, 0x1E60, 0x1E61, 0x1E6A, 0x1E6B,
1896                         0x1E80, 0x1E85, 0x1EF2, 0x1EF3, sal_uInt32(-1) };
1897                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT> version
1898                 // 1.0 of 1999 July 27
1899
1900         static const sal_uInt32 aISO885915Ranges[]
1901                 = { 0, 0xA3, 0xA5, 0xA5, 0xA7, 0xA7, 0xA9, 0xB3, 0xB5, 0xB7,
1902                         0xB9, 0xBB, 0xBF, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178,
1903                         0x17D, 0x17E, 0x20AC, 0x20AC, sal_uInt32(-1) };
1904                 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT> version
1905                 // 1.0 of 1999 July 27
1906
1907         static const sal_uInt32 aKOI8RRanges[]
1908                 = { 0, 0x7F, 0xA0, 0xA0, 0xA9, 0xA9, 0xB0, 0xB0, 0xB2, 0xB2,
1909                         0xB7, 0xB7, 0xF7, 0xF7, 0x401, 0x401, 0x410, 0x44F, 0x451, 0x451,
1910                         0x2219, 0x221A, 0x2248, 0x2248, 0x2264, 0x2265, 0x2320, 0x2321,
1911                         0x2500, 0x2500, 0x2502, 0x2502, 0x250C, 0x250C, 0x2510, 0x2510,
1912                         0x2514, 0x2514, 0x2518, 0x2518, 0x251C, 0x251C, 0x2524, 0x2524,
1913                         0x252C, 0x252C, 0x2534, 0x2534, 0x253C, 0x253C, 0x2550, 0x256C,
1914                         0x2580, 0x2580, 0x2584, 0x2584, 0x2588, 0x2588, 0x258C, 0x258C,
1915                         0x2590, 0x2593, 0x25A0, 0x25A0, sal_uInt32(-1) };
1916                 // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT>
1917                 // version 1.0 of 18 August 1999
1918
1919 #if defined WNT
1920         static const sal_uInt32 aWindows1252Ranges[]
1921                 = { 0, 0x7F, 0xA0, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178,
1922                         0x17D, 0x17E, 0x192, 0x192, 0x2C6, 0x2C6, 0x2DC, 0x2DC,
1923                         0x2013, 0x2014, 0x2018, 0x201A, 0x201C, 0x201E, 0x2020, 0x2022,
1924                         0x2026, 0x2026, 0x2030, 0x2030, 0x2039, 0x203A, 0x20AC, 0x20AC,
1925                         0x2122, 0x2122, sal_uInt32(-1) };
1926                 // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/
1927                 // CP1252.TXT> version 2.01 of 04/15/98
1928 #endif // WNT
1929
1930         INetMIMECharsetList_Impl * pList = new INetMIMECharsetList_Impl;
1931         switch (eEncoding)
1932         {
1933                 case RTL_TEXTENCODING_MS_1252:
1934 #if defined WNT
1935                         pList->prepend(Charset(RTL_TEXTENCODING_MS_1252,
1936                                                                    aWindows1252Ranges));
1937 #endif // WNT
1938                 case RTL_TEXTENCODING_ISO_8859_1:
1939                 case RTL_TEXTENCODING_UTF7:
1940                 case RTL_TEXTENCODING_UTF8:
1941                         break;
1942
1943                 case RTL_TEXTENCODING_ISO_8859_2:
1944                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2,
1945                                                                    aISO88592Ranges));
1946                         break;
1947
1948                 case RTL_TEXTENCODING_ISO_8859_3:
1949                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_3,
1950                                                                    aISO88593Ranges));
1951                         break;
1952
1953                 case RTL_TEXTENCODING_ISO_8859_4:
1954                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4,
1955                                                                    aISO88594Ranges));
1956                         break;
1957
1958                 case RTL_TEXTENCODING_ISO_8859_5:
1959                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
1960                                                                    aISO88595Ranges));
1961                         break;
1962
1963                 case RTL_TEXTENCODING_ISO_8859_6:
1964                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6,
1965                                                                    aISO88596Ranges));
1966                         break;
1967
1968                 case RTL_TEXTENCODING_ISO_8859_7:
1969                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7,
1970                                                                    aISO88597Ranges));
1971                         break;
1972
1973                 case RTL_TEXTENCODING_ISO_8859_8:
1974                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8,
1975                                                                    aISO88598Ranges));
1976                         break;
1977
1978                 case RTL_TEXTENCODING_ISO_8859_9:
1979                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9,
1980                                                                    aISO88599Ranges));
1981                         break;
1982
1983                 case RTL_TEXTENCODING_ISO_8859_10:
1984                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_10,
1985                                                                    aISO885910Ranges));
1986                         break;
1987
1988                 case RTL_TEXTENCODING_ISO_8859_13:
1989                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_13,
1990                                                                    aISO885913Ranges));
1991                         break;
1992
1993                 case RTL_TEXTENCODING_ISO_8859_14:
1994                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_14,
1995                                                                    aISO885914Ranges));
1996                         break;
1997
1998                 case RTL_TEXTENCODING_ISO_8859_15:
1999                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_15,
2000                                                                    aISO885915Ranges));
2001                         break;
2002
2003                 case RTL_TEXTENCODING_MS_1250:
2004                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2,
2005                                                                    aISO88592Ranges));
2006                         break;
2007
2008                 case RTL_TEXTENCODING_MS_1251:
2009                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
2010                                                                    aISO88595Ranges));
2011                         break;
2012
2013                 case RTL_TEXTENCODING_MS_1253:
2014                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7,
2015                                                                    aISO88597Ranges));
2016                         break;
2017
2018                 case RTL_TEXTENCODING_MS_1254:
2019                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9,
2020                                                                    aISO88599Ranges));
2021                         break;
2022
2023                 case RTL_TEXTENCODING_MS_1255:
2024                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8,
2025                                                                    aISO88598Ranges));
2026                         break;
2027
2028                 case RTL_TEXTENCODING_MS_1256:
2029                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6,
2030                                                                    aISO88596Ranges));
2031                         break;
2032
2033                 case RTL_TEXTENCODING_MS_1257:
2034                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4,
2035                                                                    aISO88594Ranges));
2036                         break;
2037
2038                 case RTL_TEXTENCODING_KOI8_R:
2039                         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
2040                                                                    aISO88595Ranges));
2041                         pList->prepend(Charset(RTL_TEXTENCODING_KOI8_R, aKOI8RRanges));
2042                         break;
2043
2044                 default: //@@@ more cases are missing!
2045                         DBG_ERROR("INetMIME::createPreferredCharsetList():"
2046                                               " Unsupported encoding");
2047                         break;
2048         }
2049         pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_1, aISO88591Ranges));
2050         pList->prepend(Charset(RTL_TEXTENCODING_ASCII_US, aUSASCIIRanges));
2051         return pList;
2052 }
2053
2054 //============================================================================
2055 // static
2056 sal_Unicode * INetMIME::convertToUnicode(const sal_Char * pBegin,
2057                                                                                  const sal_Char * pEnd,
2058                                                                                  rtl_TextEncoding eEncoding,
2059                                                                                  sal_Size & rSize)
2060 {
2061         if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
2062                 return 0;
2063         rtl_TextToUnicodeConverter hConverter
2064                 = rtl_createTextToUnicodeConverter(eEncoding);
2065         rtl_TextToUnicodeContext hContext
2066                 = rtl_createTextToUnicodeContext(hConverter);
2067         sal_Unicode * pBuffer;
2068         sal_uInt32 nInfo;
2069         for (sal_Size nBufferSize = pEnd - pBegin;;
2070                  nBufferSize += nBufferSize / 3 + 1)
2071         {
2072                 pBuffer = new sal_Unicode[nBufferSize];
2073                 sal_Size nSrcCvtBytes;
2074                 rSize = rtl_convertTextToUnicode(
2075                                 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
2076                                         nBufferSize,
2077                                         RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
2078                                             | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
2079                                             | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
2080                                         &nInfo, &nSrcCvtBytes);
2081                 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
2082                         break;
2083                 delete[] pBuffer;
2084                 rtl_resetTextToUnicodeContext(hConverter, hContext);
2085         }
2086         rtl_destroyTextToUnicodeContext(hConverter, hContext);
2087         rtl_destroyTextToUnicodeConverter(hConverter);
2088         if (nInfo != 0)
2089         {
2090                 delete[] pBuffer;
2091                 pBuffer = 0;
2092         }
2093         return pBuffer;
2094 }
2095
2096 //============================================================================
2097 // static
2098 sal_Char * INetMIME::convertFromUnicode(const sal_Unicode * pBegin,
2099                                                                                 const sal_Unicode * pEnd,
2100                                                                                 rtl_TextEncoding eEncoding,
2101                                                                                 sal_Size & rSize)
2102 {
2103         if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
2104                 return 0;
2105         rtl_UnicodeToTextConverter hConverter
2106                 = rtl_createUnicodeToTextConverter(eEncoding);
2107         rtl_UnicodeToTextContext hContext
2108                 = rtl_createUnicodeToTextContext(hConverter);
2109         sal_Char * pBuffer;
2110         sal_uInt32 nInfo;
2111         for (sal_Size nBufferSize = pEnd - pBegin;;
2112                  nBufferSize += nBufferSize / 3 + 1)
2113         {
2114                 pBuffer = new sal_Char[nBufferSize];
2115                 sal_Size nSrcCvtBytes;
2116                 rSize = rtl_convertUnicodeToText(
2117                                 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
2118                                         nBufferSize,
2119                                         RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
2120                                             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
2121                                             | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
2122                                             | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
2123                                         &nInfo, &nSrcCvtBytes);
2124                 if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
2125                         break;
2126                 delete[] pBuffer;
2127                 rtl_resetUnicodeToTextContext(hConverter, hContext);
2128         }
2129         rtl_destroyUnicodeToTextContext(hConverter, hContext);
2130         rtl_destroyUnicodeToTextConverter(hConverter);
2131         if (nInfo != 0)
2132         {
2133                 delete[] pBuffer;
2134                 pBuffer = 0;
2135         }
2136         return pBuffer;
2137 }
2138
2139 //============================================================================
2140 // static
2141 void INetMIME::writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar)
2142 {
2143         // See RFC 2279 for a discussion of UTF-8.
2144         DBG_ASSERT(nChar < 0x80000000, "INetMIME::writeUTF8(): Bad char");
2145
2146         if (nChar < 0x80)
2147                 rSink << sal_Char(nChar);
2148         else if (nChar < 0x800)
2149                 rSink << sal_Char(nChar >> 6 | 0xC0)
2150                           << sal_Char((nChar & 0x3F) | 0x80);
2151         else if (nChar < 0x10000)
2152                 rSink << sal_Char(nChar >> 12 | 0xE0)
2153                           << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2154                           << sal_Char((nChar & 0x3F) | 0x80);
2155         else if (nChar < 0x200000)
2156                 rSink << sal_Char(nChar >> 18 | 0xF0)
2157                           << sal_Char((nChar >> 12 & 0x3F) | 0x80)
2158                           << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2159                           << sal_Char((nChar & 0x3F) | 0x80);
2160         else if (nChar < 0x4000000)
2161                 rSink << sal_Char(nChar >> 24 | 0xF8)
2162                           << sal_Char((nChar >> 18 & 0x3F) | 0x80)
2163                           << sal_Char((nChar >> 12 & 0x3F) | 0x80)
2164                           << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2165                           << sal_Char((nChar & 0x3F) | 0x80);
2166         else
2167                 rSink << sal_Char(nChar >> 30 | 0xFC)
2168                           << sal_Char((nChar >> 24 & 0x3F) | 0x80)
2169                           << sal_Char((nChar >> 18 & 0x3F) | 0x80)
2170                           << sal_Char((nChar >> 12 & 0x3F) | 0x80)
2171                           << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2172                           << sal_Char((nChar & 0x3F) | 0x80);
2173 }
2174
2175 //============================================================================
2176 // static
2177 void INetMIME::writeUnsigned(INetMIMEOutputSink & rSink, sal_uInt32 nValue,
2178                                                          int nMinDigits)
2179 {
2180         sal_Char aBuffer[10];
2181             // max unsigned 32 bit value (4294967295) has 10 places
2182         sal_Char * p = aBuffer;
2183         for (; nValue > 0; nValue /= 10)
2184                 *p++ = sal_Char(getDigit(nValue % 10));
2185         nMinDigits -= p - aBuffer;
2186         while (nMinDigits-- > 0)
2187                 rSink << '0';
2188         while (p != aBuffer)
2189                 rSink << *--p;
2190 }
2191
2192 //============================================================================
2193 // static
2194 void INetMIME::writeDateTime(INetMIMEOutputSink & rSink,
2195                                                          const DateTime & rUTC)
2196 {
2197         static const sal_Char aDay[7][3]
2198                 = { { 'M', 'o', 'n' },
2199                         { 'T', 'u', 'e' },
2200                         { 'W', 'e', 'd' },
2201                         { 'T', 'h', 'u' },
2202                         { 'F', 'r', 'i' },
2203                         { 'S', 'a', 't' },
2204                         { 'S', 'u', 'n' } };
2205         const sal_Char * pTheDay = aDay[rUTC.GetDayOfWeek()];
2206         rSink.write(pTheDay, pTheDay + 3);
2207         rSink << ", ";
2208         writeUnsigned(rSink, rUTC.GetDay());
2209         rSink << ' ';
2210         static const sal_Char aMonth[12][3]
2211                 = { { 'J', 'a', 'n' },
2212                         { 'F', 'e', 'b' },
2213                         { 'M', 'a', 'r' },
2214                         { 'A', 'p', 'r' },
2215                         { 'M', 'a', 'y' },
2216                         { 'J', 'u', 'n' },
2217                         { 'J', 'u', 'l' },
2218                         { 'A', 'u', 'g' },
2219                         { 'S', 'e', 'p' },
2220                         { 'O', 'c', 't' },
2221                         { 'N', 'o', 'v' },
2222                         { 'D', 'e', 'c' } };
2223         const sal_Char * pTheMonth = aMonth[rUTC.GetMonth() - 1];
2224         rSink.write(pTheMonth, pTheMonth + 3);
2225         rSink << ' ';
2226         writeUnsigned(rSink, rUTC.GetYear());
2227         rSink << ' ';
2228         writeUnsigned(rSink, rUTC.GetHour(), 2);
2229         rSink << ':';
2230         writeUnsigned(rSink, rUTC.GetMin(), 2);
2231         rSink << ':';
2232         writeUnsigned(rSink, rUTC.GetSec(), 2);
2233         rSink << " +0000";
2234 }
2235
2236 //============================================================================
2237 // static
2238 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink & rSink,
2239                                                                         HeaderFieldType eType,
2240                                                                         const ByteString & rBody,
2241                                                                         rtl_TextEncoding ePreferredEncoding,
2242                                                                         bool bInitialSpace)
2243 {
2244         writeHeaderFieldBody(rSink, eType,
2245                                                  UniString(rBody, RTL_TEXTENCODING_UTF8),
2246                                                  ePreferredEncoding, bInitialSpace);
2247 }
2248
2249 //============================================================================
2250 // static
2251 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink & rSink,
2252                                                                         HeaderFieldType eType,
2253                                                                         const UniString & rBody,
2254                                                                         rtl_TextEncoding ePreferredEncoding,
2255                                                                         bool bInitialSpace)
2256 {
2257         if (eType == HEADER_FIELD_TEXT)
2258         {
2259                 INetMIMEEncodedWordOutputSink
2260                         aOutput(rSink, INetMIMEEncodedWordOutputSink::CONTEXT_TEXT,
2261                                         bInitialSpace ?
2262                                             INetMIMEEncodedWordOutputSink::SPACE_ALWAYS :
2263                                             INetMIMEEncodedWordOutputSink::SPACE_NO,
2264                                         ePreferredEncoding);
2265                 aOutput.write(rBody.GetBuffer(), rBody.GetBuffer() + rBody.Len());
2266                 aOutput.flush();
2267         }
2268         else
2269         {
2270                 enum Brackets { BRACKETS_OUTSIDE, BRACKETS_OPENING, BRACKETS_INSIDE };
2271                 Brackets eBrackets = BRACKETS_OUTSIDE;
2272
2273                 const sal_Unicode * pBodyPtr = rBody.GetBuffer();
2274                 const sal_Unicode * pBodyEnd = pBodyPtr + rBody.Len();
2275                 while (pBodyPtr != pBodyEnd)
2276                         switch (*pBodyPtr)
2277                         {
2278                                 case '\t':
2279                                 case ' ':
2280                                         // A WSP adds to accumulated space:
2281                                         bInitialSpace = true;
2282                                         ++pBodyPtr;
2283                                         break;
2284
2285                                 case '(':
2286                                 {
2287                                         // Write a pending '<' if necessary:
2288                                         if (eBrackets == BRACKETS_OPENING)
2289                                         {
2290                                                 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2291                                                             >= rSink.getLineLengthLimit())
2292                                                         rSink << INetMIMEOutputSink::endl << ' ';
2293                                                 else if (bInitialSpace)
2294                                                         rSink << ' ';
2295                                                 rSink << '<';
2296                                                 bInitialSpace = false;
2297                                                 eBrackets = BRACKETS_INSIDE;
2298                                         }
2299
2300                                         // Write the comment, introducing encoded-words where
2301                                         // necessary:
2302                                         int nLevel = 0;
2303                                         INetMIMEEncodedWordOutputSink
2304                                                 aOutput(
2305                                                         rSink,
2306                                                         INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT,
2307                                                         INetMIMEEncodedWordOutputSink::SPACE_NO,
2308                                                         ePreferredEncoding);
2309                                         while (pBodyPtr != pBodyEnd)
2310                                                 switch (*pBodyPtr)
2311                                                 {
2312                                                         case '(':
2313                                                                 aOutput.flush();
2314                                                                 if (rSink.getColumn()
2315                                                                                 + (bInitialSpace ? 1 : 0)
2316                                                                             >= rSink.getLineLengthLimit())
2317                                                                         rSink << INetMIMEOutputSink::endl << ' ';
2318                                                                 else if (bInitialSpace)
2319                                                                         rSink << ' ';
2320                                                                 rSink << '(';
2321                                                                 bInitialSpace = false;
2322                                                                 ++nLevel;
2323                                                                 ++pBodyPtr;
2324                                                                 break;
2325
2326                                                         case ')':
2327                                                                 aOutput.flush();
2328                                                                 if (rSink.getColumn()
2329                                                                             >= rSink.getLineLengthLimit())
2330                                                                         rSink << INetMIMEOutputSink::endl << ' ';
2331                                                                 rSink << ')';
2332                                                                 ++pBodyPtr;
2333                                                                 if (--nLevel == 0)
2334                                                                         goto comment_done;
2335                                                                 break;
2336
2337                                                         case '\\':
2338                                                                 if (++pBodyPtr == pBodyEnd)
2339                                                                         break;
2340                                                         default:
2341                                                                 aOutput << *pBodyPtr++;
2342                                                                 break;
2343                                                 }
2344                                 comment_done:
2345                                         break;
2346                                 }
2347
2348                                 case '<':
2349                                         // Write an already pending '<' if necessary:
2350                                         if (eBrackets == BRACKETS_OPENING)
2351                                         {
2352                                                 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2353                                                             >= rSink.getLineLengthLimit())
2354                                                         rSink << INetMIMEOutputSink::endl << ' ';
2355                                                 else if (bInitialSpace)
2356                                                         rSink << ' ';
2357                                                 rSink << '<';
2358                                                 bInitialSpace = false;
2359                                         }
2360
2361                                         // Remember this '<' as pending, and open a bracketed
2362                                         // block:
2363                                         eBrackets = BRACKETS_OPENING;
2364                                         ++pBodyPtr;
2365                                         break;
2366
2367                                 case '>':
2368                                         // Write a pending '<' if necessary:
2369                                         if (eBrackets == BRACKETS_OPENING)
2370                                         {
2371                                                 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2372                                                             >= rSink.getLineLengthLimit())
2373                                                         rSink << INetMIMEOutputSink::endl << ' ';
2374                                                 else if (bInitialSpace)
2375                                                         rSink << ' ';
2376                                                 rSink << '<';
2377                                                 bInitialSpace = false;
2378                                         }
2379
2380                                         // Write this '>', and close any bracketed block:
2381                                         if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2382                                                     >= rSink.getLineLengthLimit())
2383                                                 rSink << INetMIMEOutputSink::endl << ' ';
2384                                         else if (bInitialSpace)
2385                                                 rSink << ' ';
2386                                         rSink << '>';
2387                                         bInitialSpace = false;
2388                                         eBrackets = BRACKETS_OUTSIDE;
2389                                         ++pBodyPtr;
2390                                         break;
2391
2392                                 case ',':
2393                                 case ':':
2394                                 case ';':
2395                                 case '\\':
2396                                 case ']':
2397                                         // Write a pending '<' if necessary:
2398                                         if (eBrackets == BRACKETS_OPENING)
2399                                         {
2400                                                 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2401                                                             >= rSink.getLineLengthLimit())
2402                                                         rSink << INetMIMEOutputSink::endl << ' ';
2403                                                 else if (bInitialSpace)
2404                                                         rSink << ' ';
2405                                                 rSink << '<';
2406                                                 bInitialSpace = false;
2407                                                 eBrackets = BRACKETS_INSIDE;
2408                                         }
2409
2410                                         // Write this specials:
2411                                         if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2412                                                     >= rSink.getLineLengthLimit())
2413                                                 rSink << INetMIMEOutputSink::endl << ' ';
2414                                         else if (bInitialSpace)
2415                                                 rSink << ' ';
2416                                         rSink << sal_Char(*pBodyPtr++);
2417                                         bInitialSpace = false;
2418                                         break;
2419
2420                                 case '\x0D': // CR
2421                                         // A <CRLF WSP> adds to accumulated space, a <CR> not
2422                                         // followed by <LF WSP> starts 'junk':
2423                                         if (startsWithLineFolding(pBodyPtr, pBodyEnd))
2424                                         {
2425                                                 bInitialSpace = true;
2426                                                 pBodyPtr += 3;
2427                                                 break;
2428                                         }
2429                                 default:
2430                                 {
2431                                         // The next token is either one of <"." / "@" / atom /
2432                                         // quoted-string / domain-literal>, or it's 'junk'; if it
2433                                         // is not 'junk', it is either a 'phrase' (i.e., it may
2434                                         // contain encoded-words) or a 'non-phrase' (i.e., it may
2435                                         // not contain encoded-words):
2436                                         enum Entity { ENTITY_JUNK, ENTITY_NON_PHRASE,
2437                                                                   ENTITY_PHRASE };
2438                                         Entity eEntity = ENTITY_JUNK;
2439                                         switch (*pBodyPtr)
2440                                         {
2441                                                 case '.':
2442                                                 case '@':
2443                                                 case '[':
2444                                                         // A token of <"." / "@" / domain-literal> always
2445                                                         // starts a 'non-phrase':
2446                                                         eEntity = ENTITY_NON_PHRASE;
2447                                                         break;
2448
2449                                                 default:
2450                                                         if (isUSASCII(*pBodyPtr)
2451                                                                 && !isAtomChar(*pBodyPtr))
2452                                                         {
2453                                                                 eEntity = ENTITY_JUNK;
2454                                                                 break;
2455                                                         }
2456                                                 case '"':
2457                                                         // A token of <atom / quoted-string> can either be
2458                                                         // a 'phrase' or a 'non-phrase':
2459                                                         switch (eType)
2460                                                         {
2461                                                                 case HEADER_FIELD_STRUCTURED:
2462                                                                         eEntity = ENTITY_NON_PHRASE;
2463                                                                         break;
2464
2465                                                                 case HEADER_FIELD_PHRASE:
2466                                                                         eEntity = ENTITY_PHRASE;
2467                                                                         break;
2468
2469                                                                 case HEADER_FIELD_MESSAGE_ID:
2470                                                                         // A 'phrase' if and only if outside any
2471                                                                         // bracketed block:
2472                                                                         eEntity
2473                                                                                 = eBrackets == BRACKETS_OUTSIDE ?
2474                                                                                   ENTITY_PHRASE :
2475                                                                                           ENTITY_NON_PHRASE;
2476                                                                         break;
2477
2478                                                                 case HEADER_FIELD_ADDRESS:
2479                                                                 {
2480                                                                         // A 'non-phrase' if and only if, after
2481                                                                         // skipping this token and any following
2482                                                                         // <linear-white-space> and <comment>s,
2483                                                                         // there is no token left, or the next
2484                                                                         // token is any of <"." / "@" / ">" / ","
2485                                                                         // / ";">, or the next token is <":"> and
2486                                                                         // is within a bracketed block:
2487                                                                         const sal_Unicode * pLookAhead = pBodyPtr;
2488                                                                         if (*pLookAhead == '"')
2489                                                                         {
2490                                                                                 pLookAhead
2491                                                                                         = skipQuotedString(pLookAhead,
2492                                                                                                                            pBodyEnd);
2493                                                                                 if (pLookAhead == pBodyPtr)
2494                                                                                         pLookAhead = pBodyEnd;
2495                                                                         }
2496                                                                         else
2497                                                                                 while (pLookAhead != pBodyEnd
2498                                                                                            && (isAtomChar(*pLookAhead)
2499                                                                                                    || !isUSASCII(
2500                                                                                                                *pLookAhead)))
2501                                                                                         ++pLookAhead;
2502                                                                         while (pLookAhead != pBodyEnd)
2503                                                                                 switch (*pLookAhead)
2504                                                                                 {
2505                                                                                         case '\t':
2506                                                                                         case ' ':
2507                                                                                                 ++pLookAhead;
2508                                                                                                 break;
2509
2510                                                                                         case '(':
2511                                                                                         {
2512                                                                                                 const sal_Unicode * pPast
2513                                                                                                         = skipComment(pLookAhead,
2514                                                                                                                                   pBodyEnd);
2515                                                                                                 pLookAhead
2516                                                                                                         = pPast == pLookAhead ?
2517                                                                                                               pBodyEnd : pPast;
2518                                                                                                 break;
2519                                                                                         }
2520
2521                                                                                         case ',':
2522                                                                                         case '.':
2523                                                                                         case ';':
2524                                                                                         case '>':
2525                                                                                         case '@':
2526                                                                                                 eEntity = ENTITY_NON_PHRASE;
2527                                                                                                 goto entity_determined;
2528
2529                                                                                         case ':':
2530                                                                                                 eEntity
2531                                                                                                         = eBrackets
2532                                                                                                              == BRACKETS_OUTSIDE ?
2533                                                                                                           ENTITY_PHRASE :
2534                                                                                                                   ENTITY_NON_PHRASE;
2535                                                                                                 goto entity_determined;
2536
2537                                                                                         case '\x0D': // CR
2538                                                                                                 if (startsWithLineFolding(
2539                                                                                                             pLookAhead, pBodyEnd))
2540                                                                                                 {
2541                                                                                                         pLookAhead += 3;
2542                                                                                                         break;
2543                                                                                                 }
2544                                                                                         default:
2545                                                                                                 eEntity = ENTITY_PHRASE;
2546                                                                                                 goto entity_determined;
2547                                                                                 }
2548                                                                         eEntity = ENTITY_NON_PHRASE;
2549                                                                 entity_determined:
2550                                                                         break;
2551                                                                 }
2552
2553                                 case HEADER_FIELD_TEXT:
2554                                     OSL_ASSERT(false);
2555                                     break;
2556                                                         }
2557
2558                                                         // In a 'non-phrase', a non-US-ASCII character
2559                                                         // cannot be part of an <atom>, but instead the
2560                                                         // whole entity is 'junk' rather than 'non-
2561                                                         // phrase':
2562                                                         if (eEntity == ENTITY_NON_PHRASE
2563                                                                 && !isUSASCII(*pBodyPtr))
2564                                                                 eEntity = ENTITY_JUNK;
2565                                                         break;
2566                                         }
2567
2568                                         switch (eEntity)
2569                                         {
2570                                                 case ENTITY_JUNK:
2571                                                 {
2572                                                         // Write a pending '<' if necessary:
2573                                                         if (eBrackets == BRACKETS_OPENING)
2574                                                         {
2575                                                                 if (rSink.getColumn()
2576                                                                                 + (bInitialSpace ? 1 : 0)
2577                                                                             >= rSink.getLineLengthLimit())
2578                                                                         rSink << INetMIMEOutputSink::endl << ' ';
2579                                                                 else if (bInitialSpace)
2580                                                                         rSink << ' ';
2581                                                                 rSink << '<';
2582                                                                 bInitialSpace = false;
2583                                                                 eBrackets = BRACKETS_INSIDE;
2584                                                         }
2585
2586                                                         // Calculate the length of in- and output:
2587                                                         const sal_Unicode * pStart = pBodyPtr;
2588                                                         sal_Size nLength = 0;
2589                                                         bool bModify = false;
2590                                                         bool bEnd = false;
2591                                                         while (pBodyPtr != pBodyEnd && !bEnd)
2592                                                                 switch (*pBodyPtr)
2593                                                                 {
2594                                                                         case '\x0D': // CR
2595                                                                                 if (startsWithLineFolding(pBodyPtr,
2596                                                                                                                                   pBodyEnd))
2597                                                                                         bEnd = true;
2598                                                                                 else if (startsWithLineBreak(
2599                                                                                                  pBodyPtr, pBodyEnd))
2600                                                                                 {
2601                                                                                         nLength += 3;
2602                                                                                         bModify = true;
2603                                                                                         pBodyPtr += 2;
2604                                                                                 }
2605                                                                                 else
2606                                                                                 {
2607                                                                                         ++nLength;
2608                                                                                         ++pBodyPtr;
2609                                                                                 }
2610                                                                                 break;
2611
2612                                                                         case '\t':
2613                                                                         case ' ':
2614                                                                                 bEnd = true;
2615                                                                                 break;
2616
2617                                                                         default:
2618                                                                                 if (isVisible(*pBodyPtr))
2619                                                                                         bEnd = true;
2620                                                                                 else if (isUSASCII(*pBodyPtr))
2621                                                                                 {
2622                                                                                         ++nLength;
2623                                                                                         ++pBodyPtr;
2624                                                                                 }
2625                                                                                 else
2626                                                                                 {
2627                                                                                         nLength += getUTF8OctetCount(
2628                                                                                                            *pBodyPtr++);
2629                                                                                         bModify = true;
2630                                                                                 }
2631                                                                                 break;
2632                                                                 }
2633
2634                                                         // Write the output:
2635                                                         if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2636                                                                         + nLength
2637                                                                     > rSink.getLineLengthLimit())
2638                                                                 rSink << INetMIMEOutputSink::endl << ' ';
2639                                                         else if (bInitialSpace)
2640                                                                 rSink << ' ';
2641                                                         bInitialSpace = false;
2642                                                         if (bModify)
2643                                                                 while (pStart != pBodyPtr)
2644                                                                         if (startsWithLineBreak(pStart, pBodyPtr))
2645                                                                         {
2646                                                                                 rSink << "\x0D\\\x0A"; // CR, '\', LF
2647                                                                                 pStart += 2;
2648                                                                         }
2649                                                                         else
2650                                                                                 writeUTF8(rSink, *pStart++);
2651                                                         else
2652                                                                 rSink.write(pStart, pBodyPtr);
2653                                                         break;
2654                                                 }
2655
2656                                                 case ENTITY_NON_PHRASE:
2657                                                 {
2658                                                         // Calculate the length of in- and output:
2659                                                         const sal_Unicode * pStart = pBodyPtr;
2660                                                         sal_Size nLength = 0;
2661                                                         bool bBracketedBlock = false;
2662                                                         bool bSymbol = *pStart != '.' && *pStart != '@';
2663                                                         bool bModify = false;
2664                                                         bool bEnd = false;
2665                                                         while (pBodyPtr != pBodyEnd && !bEnd)
2666                                                                 switch (*pBodyPtr)
2667                                                                 {
2668                                                                         case '\t':
2669                                                                         case ' ':
2670                                                                         case '\x0D': // CR
2671                                                                         {
2672                                                                                 const sal_Unicode * pLookAhead
2673                                                                                         = skipLinearWhiteSpace(pBodyPtr,
2674                                                                                                                                    pBodyEnd);
2675                                                                                 if (pLookAhead < pBodyEnd
2676                                                                                         && (bSymbol ?
2677                                                                                                     isAtomChar(*pLookAhead)
2678                                                                                                     || *pLookAhead == '"'
2679                                                                                                     || *pLookAhead == '[' :
2680                                                                                                     *pLookAhead == '.'
2681                                                                                                     || *pLookAhead == '@'
2682                                                                                                     || (*pLookAhead == '>'
2683                                                                                                        && eType
2684                                                                                                     >= HEADER_FIELD_MESSAGE_ID
2685                                                                                                        && eBrackets
2686                                                                                                          == BRACKETS_OPENING)))
2687                                                                                 {
2688                                                                                         bModify = true;
2689                                                                                         pBodyPtr = pLookAhead;
2690                                                                                 }
2691                                                                                 else
2692                                                                                         bEnd = true;
2693                                                                                 break;
2694                                                                         }
2695
2696                                                                         case '"':
2697                                                                                 if (bSymbol)
2698                                                                                 {
2699                                                                                         pBodyPtr
2700                                                                                                 = scanQuotedBlock(pBodyPtr,
2701                                                                                                                                   pBodyEnd,
2702                                                                                                                                   '"', '"',
2703                                                                                                                                   nLength,
2704                                                                                                                                   bModify);
2705                                                                                         bSymbol = false;
2706                                                                                 }
2707                                                                                 else
2708                                                                                         bEnd = true;
2709                                                                                 break;
2710
2711                                                                         case '[':
2712                                                                                 if (bSymbol)
2713                                                                                 {
2714                                                                                         pBodyPtr
2715                                                                                                 = scanQuotedBlock(pBodyPtr,
2716                                                                                                                                   pBodyEnd,
2717                                                                                                                                   '[', ']',
2718                                                                                                                                   nLength,
2719                                                                                                                                   bModify);
2720                                                                                         bSymbol = false;
2721                                                                                 }
2722                                                                                 else
2723                                                                                         bEnd = true;
2724                                                                                 break;
2725
2726                                                                         case '.':
2727                                                                         case '@':
2728                                                                                 if (bSymbol)
2729                                                                                         bEnd = true;
2730                                                                                 else
2731                                                                                 {
2732                                                                                         ++nLength;
2733                                                                                         bSymbol = true;
2734                                                                                         ++pBodyPtr;
2735                                                                                 }
2736                                                                                 break;
2737
2738                                                                         case '>':
2739                                                                                 if (eBrackets == BRACKETS_OPENING
2740                                                                                         && eType
2741                                                                                                >= HEADER_FIELD_MESSAGE_ID)
2742                                                                                 {
2743                                                                                         ++nLength;
2744                                                                                         bBracketedBlock = true;
2745                                                                                         ++pBodyPtr;
2746                                                                                 }
2747                                                                                 bEnd = true;
2748                                                                                 break;
2749
2750                                                                         default:
2751                                                                                 if (isAtomChar(*pBodyPtr) && bSymbol)
2752                                                                                 {
2753                                                                                         while (pBodyPtr != pBodyEnd
2754                                                                                                    && isAtomChar(*pBodyPtr))
2755                                                                                         {
2756                                                                                                 ++nLength;
2757                                                                                                 ++pBodyPtr;
2758                                                                                         }
2759                                                                                         bSymbol = false;
2760                                                                                 }
2761                                                                                 else
2762                                                                                 {
2763                                                                                         if (!isUSASCII(*pBodyPtr))
2764                                                                                                 bModify = true;
2765                                                                                         bEnd = true;
2766                                                                                 }
2767                                                                                 break;
2768                                                                 }
2769
2770                                                         // Write a pending '<' if necessary:
2771                                                         if (eBrackets == BRACKETS_OPENING
2772                                                                 && !bBracketedBlock)
2773                                                         {
2774                                                                 if (rSink.getColumn()
2775                                                                                 + (bInitialSpace ? 1 : 0)
2776                                                                             >= rSink.getLineLengthLimit())
2777                                                                         rSink << INetMIMEOutputSink::endl << ' ';
2778                                                                 else if (bInitialSpace)
2779                                                                         rSink << ' ';
2780                                                                 rSink << '<';
2781                                                                 bInitialSpace = false;
2782                                                                 eBrackets = BRACKETS_INSIDE;
2783                                                         }
2784
2785                                                         // Write the output:
2786                                                         if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2787                                                                         + nLength
2788                                                                     > rSink.getLineLengthLimit())
2789                                                                 rSink << INetMIMEOutputSink::endl << ' ';
2790                                                         else if (bInitialSpace)
2791                                                                 rSink << ' ';
2792                                                         bInitialSpace = false;
2793                                                         if (bBracketedBlock)
2794                                                         {
2795                                                                 rSink << '<';
2796                                                                 eBrackets = BRACKETS_OUTSIDE;
2797                                                         }
2798                                                         if (bModify)
2799                                                         {
2800                                                                 enum Mode { MODE_PLAIN, MODE_QUOTED_STRING,
2801                                                                                         MODE_DOMAIN_LITERAL };
2802                                                                 Mode eMode = MODE_PLAIN;
2803                                                                 while (pStart != pBodyPtr)
2804                                                                         switch (*pStart)
2805                                                                         {
2806                                                                                 case '\x0D': // CR
2807                                                                                         if (startsWithLineFolding(
2808                                                                                                     pStart, pBodyPtr))
2809                                                                                         {
2810                                                                                                 if (eMode != MODE_PLAIN)
2811                                                                                                         rSink << sal_Char(
2812                                                                                                                          pStart[2]);
2813                                                                                                 pStart += 3;
2814                                                                                         }
2815                                                                                         else if (startsWithLineBreak(
2816                                                                                                          pStart, pBodyPtr))
2817                                                                                         {
2818                                                                                                 rSink << "\x0D\\\x0A";
2819                                                                                                     // CR, '\', LF
2820                                                                                                 pStart += 2;
2821                                                                                         }
2822                                                                                         else
2823                                                                                         {
2824                                                                                                 rSink << '\x0D'; // CR
2825                                                                                                 ++pStart;
2826                                                                                         }
2827                                                                                         break;
2828
2829                                                                                 case '\t':
2830                                                                                 case ' ':
2831                                                                                         if (eMode != MODE_PLAIN)
2832                                                                                                 rSink << sal_Char(*pStart);
2833                                                                                         ++pStart;
2834                                                                                         break;
2835
2836                                                                                 case '"':
2837                                                                                         if (eMode == MODE_PLAIN)
2838                                                                                                 eMode = MODE_QUOTED_STRING;
2839                                                                                         else if (eMode
2840                                                                                                             == MODE_QUOTED_STRING)
2841                                                                                                 eMode = MODE_PLAIN;
2842                                                                                         rSink << '"';
2843                                                                                         ++pStart;
2844                                                                                         break;
2845
2846                                                                                 case '[':
2847                                                                                         if (eMode == MODE_PLAIN)
2848                                                                                                 eMode = MODE_DOMAIN_LITERAL;
2849                                                                                         rSink << '[';
2850                                                                                         ++pStart;
2851                                                                                         break;
2852
2853                                                                                 case ']':
2854                                                                                         if (eMode == MODE_DOMAIN_LITERAL)
2855                                                                                                 eMode = MODE_PLAIN;
2856                                                                                         rSink << ']';
2857                                                                                         ++pStart;
2858                                                                                         break;
2859
2860                                                                                 case '\\':
2861                                                                                         rSink << '\\';
2862                                                                                         if (++pStart < pBodyPtr)
2863                                                                                                 writeUTF8(rSink, *pStart++);
2864                                                                                         break;
2865
2866                                                                                 default:
2867                                                                                         writeUTF8(rSink, *pStart++);
2868                                                                                         break;
2869                                                                         }
2870                                                         }
2871                                                         else
2872                                                                 rSink.write(pStart, pBodyPtr);
2873                                                         break;
2874                                                 }
2875
2876                                                 case ENTITY_PHRASE:
2877                                                 {
2878                                                         // Write a pending '<' if necessary:
2879                                                         if (eBrackets == BRACKETS_OPENING)
2880                                                         {
2881                                                                 if (rSink.getColumn()
2882                                                                                 + (bInitialSpace ? 1 : 0)
2883                                                                             >= rSink.getLineLengthLimit())
2884                                                                         rSink << INetMIMEOutputSink::endl << ' ';
2885                                                                 else if (bInitialSpace)
2886                                                                         rSink << ' ';
2887                                                                 rSink << '<';
2888                                                                 bInitialSpace = false;
2889                                                                 eBrackets = BRACKETS_INSIDE;
2890                                                         }
2891
2892                                                         // Calculate the length of in- and output:
2893                                                         const sal_Unicode * pStart = pBodyPtr;
2894                                                         bool bQuotedString = false;
2895                                                         bool bEnd = false;
2896                                                         while (pBodyPtr != pBodyEnd && !bEnd)
2897                                                                 switch (*pBodyPtr)
2898                                                                 {
2899                                                                         case '\t':
2900                                                                         case ' ':
2901                                                                         case '\x0D': // CR
2902                                                                                 if (bQuotedString)
2903                                                                                         ++pBodyPtr;
2904                                                                                 else
2905                                                                                 {
2906                                                                                         const sal_Unicode * pLookAhead
2907                                                                                                 = skipLinearWhiteSpace(
2908                                                                                                           pBodyPtr, pBodyEnd);
2909                                                                                         if (pLookAhead != pBodyEnd
2910                                                                                                 && (isAtomChar(*pLookAhead)
2911                                                                                                         || !isUSASCII(*pLookAhead)
2912                                                                                                         || *pLookAhead == '"'))
2913                                                                                                 pBodyPtr = pLookAhead;
2914                                                                                         else
2915                                                                                                 bEnd = true;
2916                                                                                 }
2917                                                                                 break;
2918
2919                                                                         case '"':
2920                                                                                 bQuotedString = !bQuotedString;
2921                                                                                 ++pBodyPtr;
2922                                                                                 break;
2923
2924                                                                         case '\\':
2925                                                                                 if (bQuotedString)
2926                                                                                 {
2927                                                                                         if (++pBodyPtr != pBodyEnd)
2928                                                                                                 ++pBodyPtr;
2929                                                                                 }
2930                                                                                 else
2931                                                                                         bEnd = true;
2932                                                                                 break;
2933
2934                                                                         default:
2935                                                                                 if (bQuotedString
2936                                                                                         || isAtomChar(*pBodyPtr)
2937                                                                                         || !isUSASCII(*pBodyPtr))
2938                                                                                         ++pBodyPtr;
2939                                                                                 else
2940                                                                                         bEnd = true;
2941                                                                                 break;
2942                                                                 }
2943
2944                                                         // Write the phrase, introducing encoded-words
2945                                                         // where necessary:
2946                                                         INetMIMEEncodedWordOutputSink
2947                                                                 aOutput(
2948                                                                         rSink,
2949                                                                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,
2950                                                                         bInitialSpace ?
2951                                                                  INetMIMEEncodedWordOutputSink::SPACE_ALWAYS :
2952                                                                  INetMIMEEncodedWordOutputSink::SPACE_ENCODED,
2953                                                            ePreferredEncoding);
2954                                                         while (pStart != pBodyPtr)
2955                                                                 switch (*pStart)
2956                                                                 {
2957                                                                         case '"':
2958                                                                                 ++pStart;
2959                                                                                 break;
2960
2961                                                                         case '\\':
2962                                                                                 if (++pStart != pBodyPtr)
2963                                                                                         aOutput << *pStart++;
2964                                                                                 break;
2965
2966                                                                         case '\x0D': // CR
2967                                                                                 pStart += 2;
2968                                                                                 aOutput << *pStart++;
2969                                                                                 break;
2970
2971                                                                         default:
2972                                                                                 aOutput << *pStart++;
2973                                                                                 break;
2974                                                                 }
2975                                                         bInitialSpace = aOutput.flush();
2976                                                         break;
2977                                                 }
2978                                         }
2979                                         break;
2980                                 }
2981                         }
2982         }
2983 }
2984
2985 //============================================================================
2986 // static
2987 bool INetMIME::translateUTF8Char(const sal_Char *& rBegin,
2988                                                                  const sal_Char * pEnd,
2989                                                                  rtl_TextEncoding eEncoding,
2990                                                                  sal_uInt32 & rCharacter)
2991 {
2992         if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
2993         || static_cast< unsigned char >(*rBegin) >= 0xFE)
2994                 return false;
2995
2996         int nCount;
2997         sal_uInt32 nMin;
2998         sal_uInt32 nUCS4;
2999         const sal_Char * p = rBegin;
3000         if (static_cast< unsigned char >(*p) < 0xE0)
3001         {
3002                 nCount = 1;
3003                 nMin = 0x80;
3004                 nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
3005         }
3006         else if (static_cast< unsigned char >(*p) < 0xF0)
3007         {
3008                 nCount = 2;
3009                 nMin = 0x800;
3010                 nUCS4 = static_cast< unsigned char >(*p) & 0xF;
3011         }
3012         else if (static_cast< unsigned char >(*p) < 0xF8)
3013         {
3014                 nCount = 3;
3015                 nMin = 0x10000;
3016                 nUCS4 = static_cast< unsigned char >(*p) & 7;
3017         }
3018         else if (static_cast< unsigned char >(*p) < 0xFC)
3019         {
3020                 nCount = 4;
3021                 nMin = 0x200000;
3022                 nUCS4 = static_cast< unsigned char >(*p) & 3;
3023         }
3024         else
3025         {
3026                 nCount = 5;
3027                 nMin = 0x4000000;
3028                 nUCS4 = static_cast< unsigned char >(*p) & 1;
3029         }
3030         ++p;
3031
3032         for (; nCount-- > 0; ++p)
3033                 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
3034                         nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
3035                 else
3036                         return false;
3037
3038         if (nUCS4 < nMin || nUCS4 > 0x10FFFF)
3039                 return false;
3040
3041         if (eEncoding >= RTL_TEXTENCODING_UCS4)
3042                 rCharacter = nUCS4;
3043         else
3044         {
3045                 sal_Unicode aUTF16[2];
3046                 const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
3047                 sal_Size nSize;
3048                 sal_Char * pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
3049                                                                                                 nSize);
3050                 if (!pBuffer)
3051                         return false;
3052                 DBG_ASSERT(nSize == 1,
3053                                    "INetMIME::translateUTF8Char(): Bad conversion");
3054                 rCharacter = *pBuffer;
3055                 delete[] pBuffer;
3056         }
3057         rBegin = p;
3058         return true;
3059 }
3060
3061 //============================================================================
3062 // static
3063 ByteString INetMIME::decodeUTF8(const ByteString & rText,
3064                                                                 rtl_TextEncoding eEncoding)
3065 {
3066         const sal_Char * p = rText.GetBuffer();
3067         const sal_Char * pEnd = p + rText.Len();
3068         ByteString sDecoded;
3069         while (p != pEnd)
3070         {
3071                 // the value is initialized just to avoid warning: ‘nCharacter’ is used uninitialized in this function
3072                 // there seems to be a bug in gcc-4.1 because this value is used only if it is initialized
3073                 sal_uInt32 nCharacter=0;
3074                 if (translateUTF8Char(p, pEnd, eEncoding, nCharacter))
3075                         sDecoded += sal_Char(nCharacter);
3076                 else
3077                         sDecoded += sal_Char(*p++);
3078         }
3079         return sDecoded;
3080 }
3081
3082 //============================================================================
3083 // static
3084 UniString INetMIME::decodeHeaderFieldBody(HeaderFieldType eType,
3085                                                                                   const ByteString & rBody)
3086 {
3087         // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
3088         // versions of StarOffice send mails with header fields where encoded
3089         // words can be preceded by '=', ',', '.', '"', or '(', and followed by
3090         // '=', ',', '.', '"', ')', without any required white space in between.
3091         // And there appear to exist some broken mailers that only encode single
3092         // letters within words, like "Appel
3093         // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
3094         // detect encoded words even when not propperly surrounded by white space.
3095         //
3096         // Non US-ASCII characters in rBody are treated as ISO-8859-1.
3097         //
3098         // encoded-word = "=?"
3099         //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
3100         //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
3101         //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
3102         //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
3103         //     "?="
3104         //
3105         // base64 = ALPHA / DIGIT / "+" / "/"
3106
3107         const sal_Char * pBegin = rBody.GetBuffer();
3108         const sal_Char * pEnd = pBegin + rBody.Len();
3109
3110         UniString sDecoded;
3111         const sal_Char * pCopyBegin = pBegin;
3112
3113         /* bool bStartEncodedWord = true; */
3114         const sal_Char * pWSPBegin = pBegin;
3115         UniString sEncodedText;
3116         bool bQuotedEncodedText = false;
3117         sal_uInt32 nCommentLevel = 0;
3118
3119         for (const sal_Char * p = pBegin; p != pEnd;)
3120         {
3121                 if (p != pEnd && *p == '=' /* && bStartEncodedWord */)
3122                 {
3123                         const sal_Char * q = p + 1;
3124                         bool bEncodedWord = q != pEnd && *q++ == '?';
3125
3126                         rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
3127                         if (bEncodedWord)
3128                         {
3129                                 const sal_Char * pCharsetBegin = q;
3130                                 const sal_Char * pLanguageBegin = 0;
3131                                 int nAlphaCount = 0;
3132                                 for (bool bDone = false; !bDone;)
3133                                         if (q == pEnd)
3134                                         {
3135                                                 bEncodedWord = false;
3136                                                 bDone = true;
3137                                         }
3138                                         else
3139                                         {
3140                                                 sal_Char cChar = *q++;
3141                                                 switch (cChar)
3142                                                 {
3143                                                         case '*':
3144                                                                 pLanguageBegin = q - 1;
3145                                                                 nAlphaCount = 0;
3146                                                                 break;
3147
3148                                                         case '-':
3149                                                                 if (pLanguageBegin != 0)
3150                                                                 {
3151                                                                         if (nAlphaCount == 0)
3152                                                                                 pLanguageBegin = 0;
3153                                                                         else
3154                                                                                 nAlphaCount = 0;
3155                                                                 }
3156                                                                 break;
3157
3158                                                         case '?':
3159                                                                 if (pCharsetBegin == q - 1)
3160                                                                         bEncodedWord = false;
3161                                                                 else
3162                                                                 {
3163                                                                         eCharsetEncoding
3164                                                                                 = getCharsetEncoding(
3165                                                                                           pCharsetBegin,
3166                                                                                           pLanguageBegin == 0
3167                                                                                           || nAlphaCount == 0 ?
3168                                                                                               q - 1 : pLanguageBegin);
3169                                                                         bEncodedWord = isMIMECharsetEncoding(
3170                                                                                                eCharsetEncoding);
3171                                                                         eCharsetEncoding
3172                                                                                 = translateFromMIME(eCharsetEncoding);
3173                                                                 }
3174                                                                 bDone = true;
3175                                                                 break;
3176
3177                                                         default:
3178                                                                 if (pLanguageBegin != 0
3179                                                                         && (!isAlpha(cChar) || ++nAlphaCount > 8))
3180                                                                         pLanguageBegin = 0;
3181                                                                 break;
3182                                                 }
3183                                         }
3184                         }
3185
3186                         bool bEncodingB = false;
3187                         if (bEncodedWord)
3188             {
3189                                 if (q == pEnd)
3190                                         bEncodedWord = false;
3191                                 else
3192                 {
3193                                         switch (*q++)
3194                                         {
3195                                                 case 'B':
3196                                                 case 'b':
3197                                                         bEncodingB = true;
3198                                                         break;
3199
3200                                                 case 'Q':
3201                                                 case 'q':
3202                                                         bEncodingB = false;
3203                                                         break;
3204
3205                                                 default:
3206                                                         bEncodedWord = false;
3207                                                         break;
3208                                         }
3209                 }
3210             }
3211
3212                         bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
3213
3214                         ByteString sText;
3215                         if (bEncodedWord)
3216             {
3217                                 if (bEncodingB)
3218                 {
3219                                         for (bool bDone = false; !bDone;)
3220                                         {
3221                                                 if (pEnd - q < 4)
3222                                                 {
3223                                                         bEncodedWord = false;
3224                                                         bDone = true;
3225                                                 }
3226                                                 else
3227                                                 {
3228                                                         bool bFinal = false;
3229                                                         int nCount = 3;
3230                                                         sal_uInt32 nValue = 0;
3231                                                         for (int nShift = 18; nShift >= 0; nShift -= 6)
3232                                                         {
3233                                                                 int nWeight = getBase64Weight(*q++);
3234                                                                 if (nWeight == -2)
3235                                                                 {
3236                                                                         bEncodedWord = false;
3237                                                                         bDone = true;
3238                                                                         break;
3239                                                                 }
3240                                                                 if (nWeight == -1)
3241                                                                 {
3242                                                                         if (!bFinal)
3243                                                                         {
3244                                                                                 if (nShift >= 12)
3245                                                                                 {
3246                                                                                         bEncodedWord = false;
3247                                                                                         bDone = true;
3248                                                                                         break;
3249                                                                                 }
3250                                                                                 bFinal = true;
3251                                                                                 nCount = nShift == 6 ? 1 : 2;
3252                                                                         }
3253                                                                 }
3254                                                                 else
3255                                                                         nValue |= nWeight << nShift;
3256                                                         }
3257                                                         if (bEncodedWord)
3258                                                         {
3259                                                                 for (int nShift = 16; nCount-- > 0;
3260                                                                          nShift -= 8)
3261                                                                         sText += sal_Char(nValue >> nShift
3262                                                                                                               & 0xFF);
3263                                                                 if (*q == '?')
3264                                                                 {
3265                                                                         ++q;
3266                                                                         bDone = true;
3267                                                                 }
3268                                                                 if (bFinal && !bDone)
3269                                                                 {
3270                                                                         bEncodedWord = false;
3271                                                                         bDone = true;
3272                                                                 }
3273                                                         }
3274                                                 }
3275                                         }
3276                 }
3277                                 else
3278                                 {
3279                                         const sal_Char * pEncodedTextBegin = q;
3280                                         const sal_Char * pEncodedTextCopyBegin = q;
3281                                         for (bool bDone = false; !bDone;)
3282                                                 if (q == pEnd)
3283                                                 {
3284                                                         bEncodedWord = false;
3285                                                         bDone = true;
3286                                                 }
3287                                                 else
3288                                                 {
3289                                                         sal_uInt32 nChar = *q++;
3290                                                         switch (nChar)
3291                                                         {
3292                                                                 case '=':
3293                                                                 {
3294                                                                         if (pEnd - q < 2)
3295                                                                         {
3296                                                                                 bEncodedWord = false;
3297                                                                                 bDone = true;
3298                                                                                 break;
3299                                                                         }
3300                                                                         int nDigit1 = getHexWeight(q[0]);
3301                                                                         int nDigit2 = getHexWeight(q[1]);
3302                                                                         if (nDigit1 < 0 || nDigit2 < 0)
3303                                                                         {
3304                                                                                 bEncodedWord = false;
3305                                                                                 bDone = true;
3306                                                                                 break;
3307                                                                         }
3308                                                                         sText += rBody.Copy(
3309                                         static_cast< xub_StrLen >(
3310                                             pEncodedTextCopyBegin - pBegin),
3311                                         static_cast< xub_StrLen >(
3312                                             q - 1 - pEncodedTextCopyBegin));
3313                                                                         sText += sal_Char(nDigit1 << 4 | nDigit2);
3314                                                                         q += 2;
3315                                                                         pEncodedTextCopyBegin = q;
3316                                                                         break;
3317                                                                 }
3318
3319                                                                 case '?':
3320                                                                         if (q - pEncodedTextBegin > 1)
3321                                                                                 sText += rBody.Copy(
3322                                             static_cast< xub_StrLen >(
3323                                                 pEncodedTextCopyBegin - pBegin),
3324                                             static_cast< xub_StrLen >(
3325                                                 q - 1 - pEncodedTextCopyBegin));
3326                                                                         else
3327                                                                                 bEncodedWord = false;
3328                                                                         bDone = true;
3329                                                                         break;
3330
3331                                                                 case '_':
3332                                                                         sText += rBody.Copy(
3333                                         static_cast< xub_StrLen >(
3334                                             pEncodedTextCopyBegin - pBegin),
3335                                         static_cast< xub_StrLen >(
3336                                             q - 1 - pEncodedTextCopyBegin));
3337                                                                         sText += ' ';
3338                                                                         pEncodedTextCopyBegin = q;
3339                                                                         break;
3340
3341                                                                 default:
3342                                                                         if (!isVisible(nChar))
3343                                                                         {
3344                                                                                 bEncodedWord = false;
3345                                                                                 bDone = true;
3346                                                                         }
3347                                                                         break;
3348                                                         }
3349                                                 }
3350                                 }
3351             }
3352
3353                         bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
3354
3355 //                      if (bEncodedWord && q != pEnd)
3356 //                              switch (*q)
3357 //                              {
3358 //                                      case '\t':
3359 //                                      case ' ':
3360 //                                      case '"':
3361 //                                      case ')':
3362 //                                      case ',':
3363 //                                      case '.':
3364 //                                      case '=':
3365 //                                              break;
3366 //
3367 //                                      default:
3368 //                                              bEncodedWord = false;
3369 //                                              break;
3370 //                              }
3371
3372                         sal_Unicode * pUnicodeBuffer = 0;
3373                         sal_Size nUnicodeSize = 0;
3374                         if (bEncodedWord)
3375                         {
3376                                 pUnicodeBuffer
3377                                         = convertToUnicode(sText.GetBuffer(),
3378                                                                            sText.GetBuffer() + sText.Len(),
3379                                                                            eCharsetEncoding, nUnicodeSize);
3380                                 if (pUnicodeBuffer == 0)
3381                                         bEncodedWord = false;
3382                         }
3383
3384                         if (bEncodedWord)
3385                         {
3386                                 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
3387                                 if (eType == HEADER_FIELD_TEXT)
3388                                         sDecoded.Append(
3389                         pUnicodeBuffer,
3390                         static_cast< xub_StrLen >(nUnicodeSize));
3391                                 else if (nCommentLevel == 0)
3392                                 {
3393                                         sEncodedText.Append(
3394                         pUnicodeBuffer,
3395                         static_cast< xub_StrLen >(nUnicodeSize));
3396                                         if (!bQuotedEncodedText)
3397                                         {
3398                                                 const sal_Unicode * pTextPtr = pUnicodeBuffer;
3399                                                 const sal_Unicode * pTextEnd = pTextPtr
3400                                                                                        + nUnicodeSize;
3401                                                 for (; pTextPtr != pTextEnd; ++pTextPtr)
3402                                                         if (!isEncodedWordTokenChar(*pTextPtr))
3403                                                         {
3404                                                                 bQuotedEncodedText = true;
3405                                                                 break;
3406                                                         }
3407                                         }
3408                                 }
3409                                 else
3410                                 {
3411                                         const sal_Unicode * pTextPtr = pUnicodeBuffer;
3412                                         const sal_Unicode * pTextEnd = pTextPtr + nUnicodeSize;
3413                                         for (; pTextPtr != pTextEnd; ++pTextPtr)
3414                                         {
3415                                                 switch (*pTextPtr)
3416                                                 {
3417                                                         case '(':
3418                                                         case ')':
3419                                                         case '\\':
3420                                                         case '\x0D':
3421                                                         case '=':
3422                                                                 sDecoded += '\\';
3423                                                                 break;
3424                                                 }
3425                                                 sDecoded += *pTextPtr;
3426                                         }
3427                                 }
3428                                 delete[] pUnicodeBuffer;
3429                                 p = q;
3430                                 pCopyBegin = p;
3431
3432                                 pWSPBegin = p;
3433                                 while (p != pEnd && isWhiteSpace(*p))
3434                                         ++p;
3435                                 /* bStartEncodedWord = p != pWSPBegin; */
3436                                 continue;
3437                         }
3438                 }
3439
3440                 if (sEncodedText.Len() != 0)
3441                 {
3442                         if (bQuotedEncodedText)
3443                         {
3444                                 sDecoded += '"';
3445                                 const sal_Unicode * pTextPtr = sEncodedText.GetBuffer();
3446                                 const sal_Unicode * pTextEnd = pTextPtr + sEncodedText.Len();
3447                                 for (;pTextPtr != pTextEnd; ++pTextPtr)
3448                                 {
3449                                         switch (*pTextPtr)
3450                                         {
3451                                                 case '"':
3452                                                 case '\\':
3453                                                 case '\x0D':
3454                                                         sDecoded += '\\';
3455                                                         break;
3456                                         }
3457                                         sDecoded += *pTextPtr;
3458                                 }
3459                                 sDecoded += '"';
3460                         }
3461                         else
3462                                 sDecoded += sEncodedText;
3463                         sEncodedText.Erase();
3464                         bQuotedEncodedText = false;
3465                 }
3466
3467                 if (p == pEnd)
3468                         break;
3469
3470                 switch (*p++)
3471                 {
3472 //                      case '\t':
3473 //                      case ' ':
3474 //                      case ',':
3475 //                      case '.':
3476 //                      case '=':
3477 //                              bStartEncodedWord = true;
3478 //                              break;
3479
3480                         case '"':
3481                                 if (eType != HEADER_FIELD_TEXT && nCommentLevel == 0)
3482                                 {
3483                                         const sal_Char * pQuotedStringEnd
3484                                                 = skipQuotedString(p - 1, pEnd);
3485                                         p = pQuotedStringEnd == p - 1 ? pEnd : pQuotedStringEnd;
3486                                 }
3487                                 /* bStartEncodedWord = true; */
3488                                 break;
3489
3490                         case '(':
3491                                 if (eType != HEADER_FIELD_TEXT)
3492                                         ++nCommentLevel;
3493                                 /* bStartEncodedWord = true; */
3494                                 break;
3495
3496                         case ')':
3497                                 if (nCommentLevel > 0)
3498                                         --nCommentLevel;
3499                                 /* bStartEncodedWord = false; */
3500                                 break;
3501
3502                         default:
3503                         {
3504                                 const sal_Char * pUTF8Begin = p - 1;
3505                                 const sal_Char * pUTF8End = pUTF8Begin;
3506                                 // the value is initialized just to avoid warning: ‘nCharacter’ is used uninitialized in this function
3507                                 // there seems to be a bug in gcc-4.1 because this value is used only if it is initialized
3508                                 sal_uInt32 nCharacter=0;
3509                                 if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
3510                                                                           nCharacter))
3511                                 {
3512                                         appendISO88591(sDecoded, pCopyBegin, p - 1);
3513                                         sal_Unicode aUTF16Buf[2];
3514                                         xub_StrLen nUTF16Len = static_cast< xub_StrLen >(
3515                         putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf);
3516                                         sDecoded.Append(aUTF16Buf, nUTF16Len);
3517                                         p = pUTF8End;
3518                                         pCopyBegin = p;
3519                                 }
3520                                 /* bStartEncodedWord = false; */
3521                                 break;
3522                         }
3523                 }
3524                 pWSPBegin = p;
3525         }
3526
3527         appendISO88591(sDecoded, pCopyBegin, pEnd);
3528         return sDecoded;
3529 }
3530
3531 //============================================================================
3532 //
3533 //  INetMIMEOutputSink
3534 //
3535 //============================================================================
3536
3537 // virtual
3538 sal_Size INetMIMEOutputSink::writeSequence(const sal_Char * pSequence)
3539 {
3540         sal_Size nLength = rtl_str_getLength(pSequence);
3541         writeSequence(pSequence, pSequence + nLength);
3542         return nLength;
3543 }
3544
3545 //============================================================================
3546 // virtual
3547 void INetMIMEOutputSink::writeSequence(const sal_uInt32 * pBegin,
3548                                                                            const sal_uInt32 * pEnd)
3549 {
3550         DBG_ASSERT(pBegin && pBegin <= pEnd,
3551                            "INetMIMEOutputSink::writeSequence(): Bad sequence");
3552
3553         sal_Char * pBufferBegin = new sal_Char[pEnd - pBegin];
3554         sal_Char * pBufferEnd = pBufferBegin;
3555         while (pBegin != pEnd)
3556         {
3557                 DBG_ASSERT(*pBegin < 256,
3558                                    "INetMIMEOutputSink::writeSequence(): Bad octet");
3559                 *pBufferEnd++ = sal_Char(*pBegin++);
3560         }
3561         writeSequence(pBufferBegin, pBufferEnd);
3562         delete[] pBufferBegin;
3563 }
3564
3565 //============================================================================
3566 // virtual
3567 void INetMIMEOutputSink::writeSequence(const sal_Unicode * pBegin,
3568                                                                            const sal_Unicode * pEnd)
3569 {
3570         DBG_ASSERT(pBegin && pBegin <= pEnd,
3571                            "INetMIMEOutputSink::writeSequence(): Bad sequence");
3572
3573         sal_Char * pBufferBegin = new sal_Char[pEnd - pBegin];
3574         sal_Char * pBufferEnd = pBufferBegin;
3575         while (pBegin != pEnd)
3576         {
3577                 DBG_ASSERT(*pBegin < 256,
3578                                    "INetMIMEOutputSink::writeSequence(): Bad octet");
3579                 *pBufferEnd++ = sal_Char(*pBegin++);
3580         }
3581         writeSequence(pBufferBegin, pBufferEnd);
3582         delete[] pBufferBegin;
3583 }
3584
3585 //============================================================================
3586 // virtual
3587 ErrCode INetMIMEOutputSink::getError() const
3588 {
3589         return ERRCODE_NONE;
3590 }
3591
3592 //============================================================================
3593 void INetMIMEOutputSink::writeLineEnd()
3594 {
3595         static const sal_Char aCRLF[2] = { 0x0D, 0x0A };
3596         writeSequence(aCRLF, aCRLF + 2);
3597         m_nColumn = 0;
3598 }
3599
3600 //============================================================================
3601 //
3602 //  INetMIMEStringOutputSink
3603 //
3604 //============================================================================
3605
3606 // virtual
3607 void INetMIMEStringOutputSink::writeSequence(const sal_Char * pBegin,
3608                                                                                          const sal_Char * pEnd)
3609 {
3610         DBG_ASSERT(pBegin && pBegin <= pEnd,
3611                            "INetMIMEStringOutputSink::writeSequence(): Bad sequence");
3612
3613         m_bOverflow = m_bOverflow
3614                           || pEnd - pBegin > STRING_MAXLEN - m_aBuffer.Len();
3615         if (!m_bOverflow)
3616                 m_aBuffer.Append(pBegin, static_cast< xub_StrLen >(pEnd - pBegin));
3617 }
3618
3619 //============================================================================
3620 // virtual
3621 ErrCode INetMIMEStringOutputSink::getError() const
3622 {
3623         return m_bOverflow ? ERRCODE_IO_OUTOFMEMORY : ERRCODE_NONE;
3624 }
3625
3626 //============================================================================
3627 //
3628 //  INetMIMEUnicodeOutputSink
3629 //
3630 //============================================================================
3631
3632 // virtual
3633 void INetMIMEUnicodeOutputSink::writeSequence(const sal_Char * pBegin,
3634                                                                                           const sal_Char * pEnd)
3635 {
3636         DBG_ASSERT(pBegin && pBegin <= pEnd,
3637                            "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3638
3639         sal_Unicode * pBufferBegin = new sal_Unicode[pEnd - pBegin];
3640         sal_Unicode * pBufferEnd = pBufferBegin;
3641         while (pBegin != pEnd)
3642                 *pBufferEnd++ = sal_uChar(*pBegin++);
3643         writeSequence(pBufferBegin, pBufferEnd);
3644         delete[] pBufferBegin;
3645 }
3646
3647 //============================================================================
3648 // virtual
3649 void INetMIMEUnicodeOutputSink::writeSequence(const sal_uInt32 * pBegin,
3650                                                                                           const sal_uInt32 * pEnd)
3651 {
3652         DBG_ASSERT(pBegin && pBegin <= pEnd,
3653                            "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3654
3655         sal_Unicode * pBufferBegin = new sal_Unicode[pEnd - pBegin];
3656         sal_Unicode * pBufferEnd = pBufferBegin;
3657         while (pBegin != pEnd)
3658         {
3659                 DBG_ASSERT(*pBegin < 256,
3660                                    "INetMIMEOutputSink::writeSequence(): Bad octet");
3661                 *pBufferEnd++ = sal_Unicode(*pBegin++);
3662         }
3663         writeSequence(pBufferBegin, pBufferEnd);
3664         delete[] pBufferBegin;
3665 }
3666
3667 //============================================================================
3668 // virtual
3669 void INetMIMEUnicodeOutputSink::writeSequence(const sal_Unicode * pBegin,
3670                                                                                           const sal_Unicode * pEnd)
3671 {
3672         DBG_ASSERT(pBegin && pBegin <= pEnd,
3673                            "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3674
3675         m_bOverflow = m_bOverflow
3676                           || pEnd - pBegin > STRING_MAXLEN - m_aBuffer.Len();
3677         if (!m_bOverflow)
3678                 m_aBuffer.Append(pBegin, static_cast< xub_StrLen >(pEnd - pBegin));
3679 }
3680
3681 //============================================================================
3682 // virtual
3683 ErrCode INetMIMEUnicodeOutputSink::getError() const
3684 {
3685         return m_bOverflow ? ERRCODE_IO_OUTOFMEMORY : ERRCODE_NONE;
3686 }
3687
3688 //============================================================================
3689 //
3690 //  INetMIMEEncodedWordOutputSink
3691 //
3692 //============================================================================
3693
3694 static const sal_Char aEscape[128]
3695         = { INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x00
3696                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x01
3697                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x02
3698                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x03
3699                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x04
3700                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x05
3701                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x06
3702                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x07
3703                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x08
3704                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x09
3705                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0A
3706                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0B
3707                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0C
3708                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0D
3709                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0E
3710                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x0F
3711                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x10
3712                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x11
3713                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x12
3714                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x13
3715                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x14
3716                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x15
3717                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x16
3718                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x17
3719                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x18
3720                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x19
3721                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1A
3722                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1B
3723                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1C
3724                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1D
3725                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1E
3726                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // 0x1F
3727                 0,   // ' '
3728                 0,   // '!'
3729                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '"'
3730                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '#'
3731                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '$'
3732                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '%'
3733                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '&'
3734                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '''
3735                 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '('
3736                 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // ')'
3737                 0,   // '*'
3738                 0,   // '+'
3739                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // ','
3740                 0,   // '-'
3741                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '.'
3742                 0,   // '/'
3743                 0,   // '0'
3744                 0,   // '1'
3745                 0,   // '2'
3746                 0,   // '3'
3747                 0,   // '4'
3748                 0,   // '5'
3749                 0,   // '6'
3750                 0,   // '7'
3751                 0,   // '8'
3752                 0,   // '9'
3753                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // ':'
3754                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // ';'
3755                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '<'
3756                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '='
3757                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '>'
3758                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '?'
3759                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '@'
3760                 0,   // 'A'
3761                 0,   // 'B'
3762                 0,   // 'C'
3763                 0,   // 'D'
3764                 0,   // 'E'
3765                 0,   // 'F'
3766                 0,   // 'G'
3767                 0,   // 'H'
3768                 0,   // 'I'
3769                 0,   // 'J'
3770                 0,   // 'K'
3771                 0,   // 'L'
3772                 0,   // 'M'
3773                 0,   // 'N'
3774                 0,   // 'O'
3775                 0,   // 'P'
3776                 0,   // 'Q'
3777                 0,   // 'R'
3778                 0,   // 'S'
3779                 0,   // 'T'
3780                 0,   // 'U'
3781                 0,   // 'V'
3782                 0,   // 'W'
3783                 0,   // 'X'
3784                 0,   // 'Y'
3785                 0,   // 'Z'
3786                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '['
3787                 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '\'
3788                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // ']'
3789                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '^'
3790                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '_'
3791                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '`'
3792                 0,   // 'a'
3793                 0,   // 'b'
3794                 0,   // 'c'
3795                 0,   // 'd'
3796                 0,   // 'e'
3797                 0,   // 'f'
3798                 0,   // 'g'
3799                 0,   // 'h'
3800                 0,   // 'i'
3801                 0,   // 'j'
3802                 0,   // 'k'
3803                 0,   // 'l'
3804                 0,   // 'm'
3805                 0,   // 'n'
3806                 0,   // 'o'
3807                 0,   // 'p'
3808                 0,   // 'q'
3809                 0,   // 'r'
3810                 0,   // 's'
3811                 0,   // 't'
3812                 0,   // 'u'
3813                 0,   // 'v'
3814                 0,   // 'w'
3815                 0,   // 'x'
3816                 0,   // 'y'
3817                 0,   // 'z'
3818                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '{'
3819                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '|'
3820                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '}'
3821                 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,   // '~'
3822                 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE }; // DEL
3823
3824 inline bool
3825 INetMIMEEncodedWordOutputSink::needsEncodedWordEscape(sal_uInt32 nChar) const
3826 {
3827         return !INetMIME::isUSASCII(nChar) || aEscape[nChar] & m_eContext;
3828 }
3829
3830 //============================================================================
3831 void INetMIMEEncodedWordOutputSink::finish(bool bWriteTrailer)
3832 {
3833         if (m_eInitialSpace == SPACE_ALWAYS && m_nExtraSpaces == 0)
3834                 m_nExtraSpaces = 1;
3835
3836         if (m_eEncodedWordState == STATE_SECOND_EQUALS)
3837         {
3838                 // If the text is already an encoded word, copy it verbatim:
3839                 sal_uInt32 nSize = m_pBufferEnd - m_pBuffer;
3840                 switch (m_ePrevCoding)
3841                 {
3842                         case CODING_QUOTED:
3843                                 m_rSink << '"';
3844                         case CODING_NONE:
3845                                 if (m_eInitialSpace == SPACE_ENCODED && m_nExtraSpaces == 0)
3846                                         m_nExtraSpaces = 1;
3847                                 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3848                                 {
3849                                         if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3850                                                 m_rSink << INetMIMEOutputSink::endl;
3851                                         m_rSink << ' ';
3852                                 }
3853                                 if (m_nExtraSpaces == 1)
3854                                 {
3855                                         if (m_rSink.getColumn() + nSize
3856                                                     >= m_rSink.getLineLengthLimit())
3857                                                 m_rSink << INetMIMEOutputSink::endl;
3858                                         m_rSink << ' ';
3859                                 }
3860                                 break;
3861
3862                         case CODING_ENCODED:
3863                         {
3864                                 const sal_Char * pCharsetName
3865                                  = INetMIME::getCharsetName(m_ePrevMIMEEncoding);
3866                                 while (m_nExtraSpaces-- > 0)
3867                                 {
3868                                         if (m_rSink.getColumn()
3869                                                     > m_rSink.getLineLengthLimit() - 3)
3870                                                 m_rSink << "?=" << INetMIMEOutputSink::endl << " =?"
3871                                                                 << pCharsetName << "?Q?";
3872                                         m_rSink << '_';
3873                                 }
3874                                 m_rSink << "?=";
3875                         }
3876                         case CODING_ENCODED_TERMINATED:
3877                                 if (m_rSink.getColumn() + nSize
3878                                             > m_rSink.getLineLengthLimit() - 1)
3879                                         m_rSink << INetMIMEOutputSink::endl;
3880                                 m_rSink << ' ';
3881                                 break;
3882                 }
3883                 m_rSink.write(m_pBuffer, m_pBufferEnd);
3884                 m_eCoding = CODING_ENCODED_TERMINATED;
3885         }
3886         else
3887         {
3888                 // If the text itself is too long to fit into a single line, make it
3889                 // into multiple encoded words:
3890                 switch (m_eCoding)
3891                 {
3892                         case CODING_NONE:
3893                                 if (m_nExtraSpaces == 0)
3894                                 {
3895                                         DBG_ASSERT(m_ePrevCoding == CODING_NONE
3896                                                            || m_pBuffer == m_pBufferEnd,
3897                                                            "INetMIMEEncodedWordOutputSink::finish():"
3898                                                                " Bad state");
3899                                         if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3900                                                     > m_rSink.getLineLengthLimit())
3901                                                 m_eCoding = CODING_ENCODED;
3902                                 }
3903                                 else
3904                 {
3905                     OSL_ASSERT(m_pBufferEnd >= m_pBuffer);
3906                     if (static_cast< std::size_t >(m_pBufferEnd - m_pBuffer)
3907                         > m_rSink.getLineLengthLimit() - 1)
3908                     {
3909                         m_eCoding = CODING_ENCODED;
3910                     }
3911                 }
3912                                 break;
3913
3914                         case CODING_QUOTED:
3915                                 if (m_nExtraSpaces == 0)
3916                                 {
3917                                         DBG_ASSERT(m_ePrevCoding == CODING_NONE,
3918                                                            "INetMIMEEncodedWordOutputSink::finish():"
3919                                                                " Bad state");
3920                                         if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3921                                                         + m_nQuotedEscaped
3922                                                     > m_rSink.getLineLengthLimit() - 2)
3923                                                 m_eCoding = CODING_ENCODED;
3924                                 }
3925                                 else if ((m_pBufferEnd - m_pBuffer) + m_nQuotedEscaped
3926                                                      > m_rSink.getLineLengthLimit() - 3)
3927                                         m_eCoding = CODING_ENCODED;
3928                                 break;
3929
3930             default:
3931                 break;
3932                 }
3933
3934                 switch (m_eCoding)
3935                 {
3936                         case CODING_NONE:
3937                                 switch (m_ePrevCoding)
3938                                 {
3939                                         case CODING_QUOTED:
3940                                                 if (m_rSink.getColumn() + m_nExtraSpaces
3941                                                                 + (m_pBufferEnd - m_pBuffer)
3942                                                             < m_rSink.getLineLengthLimit())
3943                                                         m_eCoding = CODING_QUOTED;
3944                                                 else
3945                                                         m_rSink << '"';
3946                                                 break;
3947
3948                                         case CODING_ENCODED:
3949                                                 m_rSink << "?=";
3950                                                 break;
3951
3952                     default:
3953                         break;
3954                                 }
3955                                 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3956                                 {
3957                                         if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3958                                                 m_rSink << INetMIMEOutputSink::endl;
3959                                         m_rSink << ' ';
3960                                 }
3961                                 if (m_nExtraSpaces == 1)
3962                                 {
3963                                         if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3964                                                     >= m_rSink.getLineLengthLimit())
3965                                                 m_rSink << INetMIMEOutputSink::endl;
3966                                         m_rSink << ' ';
3967                                 }
3968                                 m_rSink.write(m_pBuffer, m_pBufferEnd);
3969                                 if (m_eCoding == CODING_QUOTED && bWriteTrailer)
3970                                 {
3971                                         m_rSink << '"';
3972                                         m_eCoding = CODING_NONE;
3973                                 }
3974                                 break;
3975
3976                         case CODING_QUOTED:
3977                         {
3978                                 bool bInsertLeadingQuote = true;
3979                                 sal_uInt32 nSize = (m_pBufferEnd - m_pBuffer)
3980                                                            + m_nQuotedEscaped + 2;
3981                                 switch (m_ePrevCoding)
3982                                 {
3983                                         case CODING_QUOTED:
3984                                                 if (m_rSink.getColumn() + m_nExtraSpaces + nSize - 1
3985                                                             < m_rSink.getLineLengthLimit())
3986                                                 {
3987                                                         bInsertLeadingQuote = false;
3988                                                         --nSize;
3989                                                 }
3990                                                 else
3991                                                         m_rSink << '"';
3992                                                 break;
3993
3994                                         case CODING_ENCODED:
3995                                                 m_rSink << "?=";
3996                                                 break;
3997
3998                     default:
3999                         break;
4000                                 }
4001                                 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
4002                                 {
4003                                         if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
4004                                                 m_rSink << INetMIMEOutputSink::endl;
4005                                         m_rSink << ' ';
4006                                 }
4007                                 if (m_nExtraSpaces == 1)
4008                                 {
4009                                         if (m_rSink.getColumn() + nSize
4010                                                     >= m_rSink.getLineLengthLimit())
4011                                                 m_rSink << INetMIMEOutputSink::endl;
4012                                         m_rSink << ' ';
4013                                 }
4014                                 if (bInsertLeadingQuote)
4015                                         m_rSink << '"';
4016                                 for (const sal_Unicode * p = m_pBuffer; p != m_pBufferEnd;
4017                                          ++p)
4018                                 {
4019                                         if (INetMIME::needsQuotedStringEscape(*p))
4020                                                 m_rSink << '\\';
4021                                         m_rSink << sal_Char(*p);
4022                                 }
4023                                 if (bWriteTrailer)
4024                                 {
4025                                         m_rSink << '"';
4026                                         m_eCoding = CODING_NONE;
4027                                 }
4028                                 break;
4029                         }
4030
4031                         case CODING_ENCODED:
4032                         {
4033                                 rtl_TextEncoding eCharsetEncoding
4034                                         = m_pEncodingList->
4035                                               getPreferredEncoding(RTL_TEXTENCODING_UTF8);
4036                                 rtl_TextEncoding eMIMEEncoding
4037                                         = INetMIME::translateToMIME(eCharsetEncoding);
4038
4039                                 // The non UTF-8 code will only work for stateless single byte
4040                                 // character encodings (see also below):
4041                                 sal_Char * pTargetBuffer = NULL;
4042                                 sal_Size nTargetSize = 0;
4043                                 sal_uInt32 nSize;
4044                                 if (eMIMEEncoding == RTL_TEXTENCODING_UTF8)
4045                                 {
4046                                         nSize = 0;
4047                                         for (sal_Unicode const * p = m_pBuffer;
4048                                                  p != m_pBufferEnd;)
4049                                         {
4050                                                 sal_uInt32 nUTF32
4051                                                         = INetMIME::getUTF32Character(p, m_pBufferEnd);
4052                                                 nSize += needsEncodedWordEscape(nUTF32) ?
4053                                                                  3 * INetMIME::getUTF8OctetCount(nUTF32) :
4054                                                                  1;
4055                                                         // only US-ASCII characters (that are converted to
4056                                                         // a single byte by UTF-8) need no encoded word
4057                                                         // escapes...
4058                                         }
4059                                 }
4060                                 else
4061                                 {
4062                                         rtl_UnicodeToTextConverter hConverter
4063                                                 = rtl_createUnicodeToTextConverter(eCharsetEncoding);
4064                                         rtl_UnicodeToTextContext hContext
4065                                                 = rtl_createUnicodeToTextContext(hConverter);
4066                                         for (sal_Size nBufferSize = m_pBufferEnd - m_pBuffer;;
4067                                                  nBufferSize += nBufferSize / 3 + 1)
4068                                         {
4069                                                 pTargetBuffer = new sal_Char[nBufferSize];
4070                                                 sal_uInt32 nInfo;
4071                                                 sal_Size nSrcCvtBytes;
4072                                                 nTargetSize
4073                                                         = rtl_convertUnicodeToText(
4074                                                                   hConverter, hContext, m_pBuffer,
4075                                                                   m_pBufferEnd - m_pBuffer, pTargetBuffer,
4076                                                                   nBufferSize,
4077                                                                   RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE
4078                                                                  | RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE,
4079                                                                   &nInfo, &nSrcCvtBytes);
4080                                                 if (!(nInfo
4081                                                               & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
4082                                                         break;
4083                                                 delete[] pTargetBuffer;
4084                                                 pTargetBuffer = NULL;
4085                                                 rtl_resetUnicodeToTextContext(hConverter, hContext);
4086                                         }
4087                                         rtl_destroyUnicodeToTextContext(hConverter, hContext);
4088                                         rtl_destroyUnicodeToTextConverter(hConverter);
4089
4090                                         nSize = nTargetSize;
4091                                         for (sal_Size k = 0; k < nTargetSize; ++k)
4092                                                 if (needsEncodedWordEscape(sal_uChar(
4093                                                                                    pTargetBuffer[k])))
4094                                                         nSize += 2;
4095                                 }
4096
4097                                 const sal_Char * pCharsetName
4098                                         = INetMIME::getCharsetName(eMIMEEncoding);
4099                                 sal_uInt32 nWrapperSize = rtl_str_getLength(pCharsetName) + 7;
4100                                         // '=?', '?Q?', '?='
4101
4102                                 switch (m_ePrevCoding)
4103                                 {
4104                                         case CODING_QUOTED:
4105                                                 m_rSink << '"';
4106                                         case CODING_NONE:
4107                                                 if (m_eInitialSpace == SPACE_ENCODED
4108                                                         && m_nExtraSpaces == 0)
4109                                                         m_nExtraSpaces = 1;
4110                                                 nSize += nWrapperSize;
4111                                                 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
4112                                                 {
4113                                                         if (m_rSink.getColumn()
4114                                                                     >= m_rSink.getLineLengthLimit())
4115                                                                 m_rSink << INetMIMEOutputSink::endl;
4116                                                         m_rSink << ' ';
4117                                                 }
4118                                                 if (m_nExtraSpaces == 1)
4119                                                 {
4120                                                         if (m_rSink.getColumn() + nSize
4121                                                                     >= m_rSink.getLineLengthLimit())
4122                                                         m_rSink << INetMIMEOutputSink::endl;
4123                                                         m_rSink << ' ';
4124                                                 }
4125                                                 m_rSink << "=?" << pCharsetName << "?Q?";
4126                                                 break;
4127
4128                                         case CODING_ENCODED:
4129                                                 if (m_ePrevMIMEEncoding != eMIMEEncoding
4130                                                         || m_rSink.getColumn() + m_nExtraSpaces + nSize
4131                                                                > m_rSink.getLineLengthLimit() - 2)
4132                                                 {
4133                                                         m_rSink << "?=";
4134                                                         if (m_rSink.getColumn() + nWrapperSize
4135                                                                         + m_nExtraSpaces + nSize
4136                                                                     > m_rSink.getLineLengthLimit() - 1)
4137                                                                 m_rSink << INetMIMEOutputSink::endl;
4138                                                         m_rSink << " =?" << pCharsetName << "?Q?";
4139                                                 }
4140                                                 while (m_nExtraSpaces-- > 0)
4141                                                 {
4142                                                         if (m_rSink.getColumn()
4143                                                                     > m_rSink.getLineLengthLimit() - 3)
4144                                                                 m_rSink << "?=" << INetMIMEOutputSink::endl
4145                                                                                 << " =?" << pCharsetName << "?Q?";
4146                                                         m_rSink << '_';
4147                                                 }
4148                                                 break;
4149
4150                                         case CODING_ENCODED_TERMINATED:
4151                                                 if (m_rSink.getColumn() + nWrapperSize
4152                                                                 + m_nExtraSpaces + nSize
4153                                                             > m_rSink.getLineLengthLimit() - 1)
4154                                                         m_rSink << INetMIMEOutputSink::endl;
4155                                                 m_rSink << " =?" << pCharsetName << "?Q?";
4156                                                 while (m_nExtraSpaces-- > 0)
4157                                                 {
4158                                                         if (m_rSink.getColumn()
4159                                                                     > m_rSink.getLineLengthLimit() - 3)
4160                                                                 m_rSink << "?=" << INetMIMEOutputSink::endl
4161                                                                                 << " =?" << pCharsetName << "?Q?";
4162                                                         m_rSink << '_';
4163                                                 }
4164                                                 break;
4165                                 }
4166
4167                                 // The non UTF-8 code will only work for stateless single byte
4168                                 // character encodings (see also above):
4169                                 if (eMIMEEncoding == RTL_TEXTENCODING_UTF8)
4170                                 {
4171                                         bool bInitial = true;
4172                                         for (sal_Unicode const * p = m_pBuffer;
4173                                                  p != m_pBufferEnd;)
4174                                         {
4175                                                 sal_uInt32 nUTF32
4176                                                         = INetMIME::getUTF32Character(p, m_pBufferEnd);
4177                                                 bool bEscape = needsEncodedWordEscape(nUTF32);
4178                                                 sal_uInt32 nWidth
4179                                                         = bEscape ?
4180                                                               3 * INetMIME::getUTF8OctetCount(nUTF32) : 1;
4181                                                         // only US-ASCII characters (that are converted to
4182                                                         // a single byte by UTF-8) need no encoded word
4183                                                         // escapes...
4184                                                 if (!bInitial
4185                                                         && m_rSink.getColumn() + nWidth + 2
4186                                                                > m_rSink.getLineLengthLimit())
4187                                                         m_rSink << "?=" << INetMIMEOutputSink::endl
4188                                                                         << " =?" << pCharsetName << "?Q?";
4189                                                 if (bEscape)
4190                                                 {
4191                                                         DBG_ASSERT(
4192                                                                 nUTF32 < 0x10FFFF,
4193                                                                 "INetMIMEEncodedWordOutputSink::finish():"
4194                                                                     " Bad char");
4195                                                         if (nUTF32 < 0x80)
4196                                                                 INetMIME::writeEscapeSequence(m_rSink,
4197                                                                                                                           nUTF32);
4198                                                         else if (nUTF32 < 0x800)
4199                                                         {
4200                                                                 INetMIME::writeEscapeSequence(m_rSink,
4201                                                                                                                           (nUTF32 >> 6)
4202                                                                                                                               | 0xC0);
4203                                                                 INetMIME::writeEscapeSequence(m_rSink,
4204                                                                                                                           (nUTF32 & 0x3F)
4205                                                                                                                               | 0x80);
4206                                                         }
4207                                                         else if (nUTF32 < 0x10000)
4208                                                         {
4209                                                                 INetMIME::writeEscapeSequence(m_rSink,
4210                                                                                                                           (nUTF32 >> 12)
4211                                                                                                                               | 0xE0);
4212                                                                 INetMIME::writeEscapeSequence(m_rSink,
4213                                                                                                                           ((nUTF32 >> 6)
4214                                                                                                                                   & 0x3F)
4215                                                                                                                               | 0x80);
4216                                                                 INetMIME::writeEscapeSequence(m_rSink,
4217                                                                                                                           (nUTF32 & 0x3F)
4218                                                                                                                               | 0x80);
4219                                                         }
4220                                                         else
4221                                                         {
4222                                                                 INetMIME::writeEscapeSequence(m_rSink,
4223                                                                                                                           (nUTF32 >> 18)
4224                                                                                                                               | 0xF0);
4225                                                                 INetMIME::writeEscapeSequence(m_rSink,
4226                                                                                                                           ((nUTF32 >> 12)
4227                                                                                                                                   & 0x3F)
4228                                                                                                                               | 0x80);
4229                                                                 INetMIME::writeEscapeSequence(m_rSink,
4230                                                                                                                           ((nUTF32 >> 6)
4231                                                                                                                                   & 0x3F)
4232                                                                                                                               | 0x80);
4233                                                                 INetMIME::writeEscapeSequence(m_rSink,
4234                                                                                                                           (nUTF32 & 0x3F)
4235                                                                                                                               | 0x80);
4236                                                         }
4237                                                 }
4238                                                 else
4239                                                         m_rSink << sal_Char(nUTF32);
4240                                                 bInitial = false;
4241                                         }
4242                                 }
4243                                 else
4244                                 {
4245                                         for (sal_Size k = 0; k < nTargetSize; ++k)
4246                                         {
4247                                                 sal_uInt32 nUCS4 = sal_uChar(pTargetBuffer[k]);
4248                                                 bool bEscape = needsEncodedWordEscape(nUCS4);
4249                                                 if (k > 0
4250                                                         && m_rSink.getColumn() + (bEscape ? 5 : 3)
4251                                                                > m_rSink.getLineLengthLimit())
4252                                                         m_rSink << "?=" << INetMIMEOutputSink::endl
4253                                                                         << " =?" << pCharsetName << "?Q?";
4254                                                 if (bEscape)
4255                                                         INetMIME::writeEscapeSequence(m_rSink, nUCS4);
4256                                                 else
4257                                                         m_rSink << sal_Char(nUCS4);
4258                                         }
4259                                         delete[] pTargetBuffer;
4260                                 }
4261
4262                                 if (bWriteTrailer)
4263                                 {
4264                                         m_rSink << "?=";
4265                                         m_eCoding = CODING_ENCODED_TERMINATED;
4266                                 }
4267
4268                                 m_ePrevMIMEEncoding = eMIMEEncoding;
4269                                 break;
4270                         }
4271
4272             default:
4273                 OSL_ASSERT(false);
4274                 break;
4275                 }
4276         }
4277
4278         m_eInitialSpace = SPACE_NO;
4279         m_nExtraSpaces = 0;
4280         m_pEncodingList->reset();
4281         m_pBufferEnd = m_pBuffer;
4282         m_ePrevCoding = m_eCoding;
4283         m_eCoding = CODING_NONE;
4284         m_nQuotedEscaped = 0;
4285         m_eEncodedWordState = STATE_INITIAL;
4286 }
4287
4288 //============================================================================
4289 INetMIMEEncodedWordOutputSink::~INetMIMEEncodedWordOutputSink()
4290 {
4291         rtl_freeMemory(m_pBuffer);
4292         delete m_pEncodingList;
4293 }
4294
4295 //============================================================================
4296 INetMIMEEncodedWordOutputSink &
4297 INetMIMEEncodedWordOutputSink::operator <<(sal_uInt32 nChar)
4298 {
4299         if (nChar == ' ')
4300         {
4301                 if (m_pBufferEnd != m_pBuffer)
4302                         finish(false);
4303                 ++m_nExtraSpaces;
4304         }
4305         else
4306         {
4307                 // Check for an already encoded word:
4308                 switch (m_eEncodedWordState)
4309                 {
4310                         case STATE_INITIAL:
4311                                 if (nChar == '=')
4312                                         m_eEncodedWordState = STATE_FIRST_EQUALS;
4313                                 else
4314                                         m_eEncodedWordState = STATE_BAD;
4315                                 break;
4316
4317                         case STATE_FIRST_EQUALS:
4318                                 if (nChar == '?')
4319                                         m_eEncodedWordState = STATE_FIRST_EQUALS;
4320                                 else
4321                                         m_eEncodedWordState = STATE_BAD;
4322                                 break;
4323
4324                         case STATE_FIRST_QUESTION:
4325                                 if (INetMIME::isEncodedWordTokenChar(nChar))
4326                                         m_eEncodedWordState = STATE_CHARSET;
4327                                 else
4328                                         m_eEncodedWordState = STATE_BAD;
4329                                 break;
4330
4331                         case STATE_CHARSET:
4332                                 if (nChar == '?')
4333                                         m_eEncodedWordState = STATE_SECOND_QUESTION;
4334                                 else if (!INetMIME::isEncodedWordTokenChar(nChar))
4335                                         m_eEncodedWordState = STATE_BAD;
4336                                 break;
4337
4338                         case STATE_SECOND_QUESTION:
4339                                 if (nChar == 'B' || nChar == 'Q'
4340                                         || nChar == 'b' || nChar == 'q')
4341                                         m_eEncodedWordState = STATE_ENCODING;
4342                                 else
4343                                         m_eEncodedWordState = STATE_BAD;
4344                                 break;
4345
4346                         case STATE_ENCODING:
4347                                 if (nChar == '?')
4348                                         m_eEncodedWordState = STATE_THIRD_QUESTION;
4349                                 else
4350                                         m_eEncodedWordState = STATE_BAD;
4351                                 break;
4352
4353                         case STATE_THIRD_QUESTION:
4354                                 if (INetMIME::isVisible(nChar) && nChar != '?')
4355                                         m_eEncodedWordState = STATE_ENCODED_TEXT;
4356                                 else
4357                                         m_eEncodedWordState = STATE_BAD;
4358                                 break;
4359
4360                         case STATE_ENCODED_TEXT:
4361                                 if (nChar == '?')
4362                                         m_eEncodedWordState = STATE_FOURTH_QUESTION;
4363                                 else if (!INetMIME::isVisible(nChar))
4364                                         m_eEncodedWordState = STATE_BAD;
4365                                 break;
4366
4367                         case STATE_FOURTH_QUESTION:
4368                                 if (nChar == '=')
4369                                         m_eEncodedWordState = STATE_SECOND_EQUALS;
4370                                 else
4371                                         m_eEncodedWordState = STATE_BAD;
4372                                 break;
4373
4374                         case STATE_SECOND_EQUALS:
4375                                 m_eEncodedWordState = STATE_BAD;
4376                                 break;
4377
4378             case STATE_BAD:
4379                 break;
4380                 }
4381
4382                 // Update encoding:
4383                 m_pEncodingList->includes(nChar);
4384
4385                 // Update coding:
4386                 enum { TENQ = 1,   // CONTEXT_TEXT, CODING_ENCODED
4387                            CENQ = 2,   // CONTEXT_COMMENT, CODING_ENCODED
4388                            PQTD = 4,   // CONTEXT_PHRASE, CODING_QUOTED
4389                            PENQ = 8 }; // CONTEXT_PHRASE, CODING_ENCODED
4390                 static const sal_Char aMinimal[128]
4391                         = { TENQ | CENQ        | PENQ,   // 0x00
4392                                 TENQ | CENQ        | PENQ,   // 0x01
4393                                 TENQ | CENQ        | PENQ,   // 0x02
4394                                 TENQ | CENQ        | PENQ,   // 0x03
4395                                 TENQ | CENQ        | PENQ,   // 0x04
4396                                 TENQ | CENQ        | PENQ,   // 0x05
4397                                 TENQ | CENQ        | PENQ,   // 0x06
4398                                 TENQ | CENQ        | PENQ,   // 0x07
4399                                 TENQ | CENQ        | PENQ,   // 0x08
4400                                 TENQ | CENQ        | PENQ,   // 0x09
4401                                 TENQ | CENQ        | PENQ,   // 0x0A
4402                                 TENQ | CENQ        | PENQ,   // 0x0B
4403                                 TENQ | CENQ        | PENQ,   // 0x0C
4404                                 TENQ | CENQ        | PENQ,   // 0x0D
4405                                 TENQ | CENQ        | PENQ,   // 0x0E
4406                                 TENQ | CENQ        | PENQ,   // 0x0F
4407                                 TENQ | CENQ        | PENQ,   // 0x10
4408                                 TENQ | CENQ        | PENQ,   // 0x11
4409                                 TENQ | CENQ        | PENQ,   // 0x12
4410                                 TENQ | CENQ        | PENQ,   // 0x13
4411                                 TENQ | CENQ        | PENQ,   // 0x14
4412                                 TENQ | CENQ        | PENQ,   // 0x15
4413                                 TENQ | CENQ        | PENQ,   // 0x16
4414                                 TENQ | CENQ        | PENQ,   // 0x17
4415                                 TENQ | CENQ        | PENQ,   // 0x18
4416                                 TENQ | CENQ        | PENQ,   // 0x19
4417                                 TENQ | CENQ        | PENQ,   // 0x1A
4418                                 TENQ | CENQ        | PENQ,   // 0x1B
4419                                 TENQ | CENQ        | PENQ,   // 0x1C
4420                                 TENQ | CENQ        | PENQ,   // 0x1D
4421                                 TENQ | CENQ        | PENQ,   // 0x1E
4422                                 TENQ | CENQ        | PENQ,   // 0x1F
4423                                                                                 0,   // ' '
4424                                                                                 0,   // '!'
4425                                                           PQTD       ,   // '"'
4426                                                                                 0,   // '#'
4427                                                                                 0,   // '$'
4428                                                                                 0,   // '%'
4429                                                                                 0,   // '&'
4430                                                                                 0,   // '''
4431                                            CENQ | PQTD       ,   // '('
4432                                            CENQ | PQTD       ,   // ')'
4433                                                                                 0,   // '*'
4434                                                                                 0,   // '+'
4435                                                           PQTD       ,   // ','
4436                                                                                 0,   // '-'
4437                                                           PQTD       ,   // '.'
4438                                                                                 0,   // '/'
4439                                                                                 0,   // '0'
4440                                                                                 0,   // '1'
4441                                                                                 0,   // '2'
4442                                                                                 0,   // '3'
4443                                                                                 0,   // '4'
4444                                                                                 0,   // '5'
4445                                                                                 0,   // '6'
4446                                                                                 0,   // '7'
4447                                                                                 0,   // '8'
4448                                                                                 0,   // '9'
4449                                                           PQTD       ,   // ':'
4450                                                           PQTD       ,   // ';'
4451                                                           PQTD       ,   // '<'
4452                                                                                 0,   // '='
4453                                                           PQTD       ,   // '>'
4454                                                                                 0,   // '?'
4455                                                           PQTD       ,   // '@'
4456                                                                                 0,   // 'A'
4457                                                                                 0,   // 'B'
4458                                                                                 0,   // 'C'
4459                                                                                 0,   // 'D'
4460                                                                                 0,   // 'E'
4461                                                                                 0,   // 'F'
4462                                                                                 0,   // 'G'
4463                                                                                 0,   // 'H'
4464                                                                                 0,   // 'I'
4465                                                                                 0,   // 'J'
4466                                                                                 0,   // 'K'
4467                                                                                 0,   // 'L'
4468                                                                                 0,   // 'M'
4469                                                                                 0,   // 'N'
4470                                                                                 0,   // 'O'
4471                                                                                 0,   // 'P'
4472                                                                                 0,   // 'Q'
4473                                                                                 0,   // 'R'
4474                                                                                 0,   // 'S'
4475                                                                                 0,   // 'T'
4476                                                                                 0,   // 'U'
4477                                                                                 0,   // 'V'
4478                                                                                 0,   // 'W'
4479                                                                                 0,   // 'X'
4480                                                                                 0,   // 'Y'
4481                                                                                 0,   // 'Z'
4482                                                           PQTD       ,   // '['
4483                                            CENQ | PQTD       ,   // '\'
4484                                                           PQTD       ,   // ']'
4485                                                                                 0,   // '^'
4486                                                                                 0,   // '_'
4487                                                                                 0,   // '`'
4488                                                                                 0,   // 'a'
4489                                                                                 0,   // 'b'
4490                                                                                 0,   // 'c'
4491                                                                                 0,   // 'd'
4492                                                                                 0,   // 'e'
4493                                                                                 0,   // 'f'
4494                                                                                 0,   // 'g'
4495                                                                                 0,   // 'h'
4496                                                                                 0,   // 'i'
4497                                                                                 0,   // 'j'
4498                                                                                 0,   // 'k'
4499                                                                                 0,   // 'l'
4500                                                                                 0,   // 'm'
4501                                                                                 0,   // 'n'
4502                                                                                 0,   // 'o'
4503                                                                                 0,   // 'p'
4504                                                                                 0,   // 'q'
4505                                                                                 0,   // 'r'
4506                                                                                 0,   // 's'
4507                                                                                 0,   // 't'
4508                                                                                 0,   // 'u'
4509                                                                                 0,   // 'v'
4510                                                                                 0,   // 'w'
4511                                                                                 0,   // 'x'
4512                                                                                 0,   // 'y'
4513                                                                                 0,   // 'z'
4514                                                                                 0,   // '{'
4515                                                                                 0,   // '|'
4516                                                                                 0,   // '}'
4517                                                                                 0,   // '~'
4518                                 TENQ | CENQ        | PENQ }; // DEL
4519                 Coding eNewCoding = !INetMIME::isUSASCII(nChar) ? CODING_ENCODED :
4520                                     m_eContext == CONTEXT_PHRASE ?
4521                                             Coding(aMinimal[nChar] >> 2) :
4522                                     aMinimal[nChar] & m_eContext ? CODING_ENCODED :
4523                                                                    CODING_NONE;
4524                 if (eNewCoding > m_eCoding)
4525                         m_eCoding = eNewCoding;
4526                 if (m_eCoding == CODING_QUOTED
4527                         && INetMIME::needsQuotedStringEscape(nChar))
4528                         ++m_nQuotedEscaped;
4529
4530                 // Append to buffer:
4531                 if (sal_uInt32(m_pBufferEnd - m_pBuffer) == m_nBufferSize)
4532                 {
4533                         m_pBuffer
4534                                 = static_cast< sal_Unicode * >(
4535                                           rtl_reallocateMemory(m_pBuffer,
4536                                                                                    (m_nBufferSize + BUFFER_SIZE)
4537                                                                                        * sizeof (sal_Unicode)));
4538                         m_pBufferEnd = m_pBuffer + m_nBufferSize;
4539                         m_nBufferSize += BUFFER_SIZE;
4540                 }
4541                 *m_pBufferEnd++ = sal_Unicode(nChar);
4542         }
4543         return *this;
4544 }
4545
4546 //============================================================================
4547 //
4548 //  INetContentTypeParameterList
4549 //
4550 //============================================================================
4551
4552 void INetContentTypeParameterList::Clear()
4553 {
4554         while (Count() > 0)
4555                 delete static_cast< INetContentTypeParameter * >(Remove(Count() - 1));
4556 }
4557
4558 //============================================================================
4559 const INetContentTypeParameter *
4560 INetContentTypeParameterList::find(const ByteString & rAttribute) const
4561 {
4562         for (ULONG i = 0; i < Count(); ++i)
4563         {
4564                 const INetContentTypeParameter * pParameter = GetObject(i);
4565                 if (pParameter->m_sAttribute.EqualsIgnoreCaseAscii(rAttribute))
4566                         return pParameter;
4567         }
4568         return 0;
4569 }
4570