include/tools/inetmime.hxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19 #ifndef TOOLS_INETMIME_HXX
  20 #define TOOLS_INETMIME_HXX
  21
  22 #include <boost/ptr_container/ptr_vector.hpp>
  23
  24 #include "tools/toolsdllapi.h"
  25 #include <rtl/alloc.h>
  26 #include <rtl/character.hxx>
  27 #include <rtl/string.hxx>
  28 #include <rtl/strbuf.hxx>
  29 #include <rtl/tencinfo.h>
  30 #include <tools/debug.hxx>
  31 #include <tools/errcode.hxx>
  32 #include <tools/string.hxx>
  33
  34 class DateTime;
  35 class INetContentTypeParameterList;
  36 class INetMIMECharsetList_Impl;
  37 class INetMIMEOutputSink;
  38
  39 class TOOLS_DLLPUBLIC INetMIME
  40 {
  41 public:
  42     enum { SOFT_LINE_LENGTH_LIMIT = 76,
  43            HARD_LINE_LENGTH_LIMIT = 998 };
  44
  45     /** The various types of message header field bodies, with respect to
  46         encoding and decoding them.
  47
  48         @descr  At the moment, five different types of header fields suffice
  49         to describe how to encoded and decode any known message header field
  50         body, but need for more types may arise in the future as new header
  51         fields are introduced.
  52
  53         @descr  The following is an exhaustive list of all the header fields
  54         currently known to our implementation.  For every header field, it
  55         includes a 'canonic' (with regard to capitalization) name, a grammar
  56         rule for the body (using RFC 822 and RFC 2234 conventions), a list of
  57         relevant sources of information, and the HeaderFieldType value to use
  58         with that header field.  The list is based on RFC 2076 and draft-
  59         palme-mailext-headers-02.txt (see also <http://www.dsv.su.se/~jpalme/
  60         ietf/jp-ietf-home.html#anchor1003783>).
  61
  62         Approved: address  ;RFC 1036; HEADER_FIELD_ADDRESS
  63         bcc: #address  ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
  64         cc: 1#address  ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
  65         Comments: *text  ;RFCs 822, RFC 2047; HEADER_FIELD_TEXT
  66         Content-Base: absoluteURI  ;RFC 2110; HEADER_FIELD_TEXT
  67         Content-Description: *text  ;RFC 2045, RFC 2047; HEADER_FIELD_TEXT
  68         Content-Disposition: disposition-type *(";" disposition-parm)
  69             ;RFC 1806; HEADER_FIELD_STRUCTURED
  70         Content-ID: msg-id  ;RFC 2045, RFC 2047; HEADER_FIELD_MESSAGE_ID
  71         Content-Location: absoluteURI / relativeURI  ;RFC 2110;
  72             HEADER_FIELD_TEXT
  73         Content-Transfer-Encoding: mechanism  ;RFC 2045, RFC 2047;
  74             HEADER_FIELD_STRUCTURED
  75         Content-Type: type "/" subtype *(";" parameter)  ;RFC 2045, RFC 2047;
  76             HEADER_FIELD_STRUCTURED
  77         Control:  *text ;RFC 1036; HEADER_FIELD_TEXT
  78         Date: date-time  ;RFC 822, RFC 1123, RFC 2047; HEADER_FIELD_STRUCTURED
  79         Distribution: 1#atom  ;RFC 1036; HEADER_FIELD_STRUCTURED
  80         Encrypted: 1#2word  ;RFC 822, RFC 2047; HEADER_FIELD_STRUCTURED
  81         Expires: date-time  ;RFC 1036; HEADER_FIELD_STRUCTURED
  82         Followup-To: 1#(atom *("." atom))  ;RFC 1036; HEADER_FIELD_STRUCTURED
  83         From: mailbox / 1#mailbox  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
  84         In-Reply-To: *(phrase / msg-id)  ;RFC 822, RFC 2047;
  85             HEADER_FIELD_ADDRESS
  86         Keywords: #phrase  ;RFC 822, RFC 2047; HEADER_FIELD_PHRASE
  87         MIME-Version: 1*DIGIT "." 1*DIGIT  ;RFC 2045, RFC 2047;
  88             HEADER_FIELD_STRUCTURED
  89         Message-ID: msg-id  ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
  90         Newsgroups: 1#(atom *("." atom))  ;RFC 1036, RFC 2047;
  91             HEADER_FIELD_STRUCTURED
  92         Organization: *text  ;RFC 1036; HEADER_FIELD_TEXT
  93         Received: ["from" domain] ["by" domain] ["via" atom] *("with" atom)
  94             ["id" msg-id] ["for" addr-spec] ";" date-time  ;RFC 822, RFC 1123,
  95             RFC 2047; HEADER_FIELD_STRUCTURED
  96         References: *(phrase / msg-id)  ;RFC 822, RFC 2047;
  97             HEADER_FIELD_ADDRESS
  98         Reply-To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
  99         Resent-Date: date-time  ;RFC 822, RFC 1123, RFC 2047;
 100             HEADER_FIELD_STRUCTURED
 101         Resent-From: mailbox / 1#mailbox  ;RFC 822, RFC 2047;
 102             HEADER_FIELD_ADDRESS
 103         Resent-Message-ID: msg-id  ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
 104         Resent-Reply-To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
 105         Resent-Sender: mailbox  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
 106         Resent-To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
 107         Resent-bcc: #address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
 108         Resent-cc: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
 109         Return-path: route-addr / ("<" ">")  ;RFC 822, RFC 1123, RFC 2047;
 110             HEADER_FIELD_STRUCTURED
 111         Return-Receipt-To: address  ;Not Internet standard;
 112             HEADER_FIELD_ADDRES
 113         Sender: mailbox  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
 114         Subject: *text  ;RFC 822, RFC 2047; HEADER_FIELD_TEXT
 115         Summary: *text  ;RFC 1036; HEADER_FIELD_TEXT
 116         To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
 117         X-CHAOS-Marked: "YES" / "NO"  ;local; HEADER_FIELD_STRUCTURED
 118         X-CHAOS-Read: "YES" / "NO"  ;local; HEADER_FIELD_STRUCTURED
 119         X-CHAOS-Recipients: #*("<" atom word ">")  ;local;
 120             HEADER_FIELD_STRUCTURED
 121         X-CHAOS-Size: 1*DIGIT  ;local; HEADER_FIELD_STRUCTURED
 122         X-Mailer: *text  ;Not Internet standard; HEADER_FIELD_TEXT
 123         X-Mozilla-Status: 4HEXDIG  ;Mozilla; HEADER_FIELD_STRUCTURED
 124         X-Newsreader: *text  ;Not Internet standard; HEADER_FIELD_TEXT
 125         X-Priority: "1" / "2" / "3" / "4" / "5"  ;Not Internet standard;
 126             HEADER_FIELD_STRUCTURED
 127         Xref: sub-domain
 128             1*((atom / string) *("." (atom / string)) ":" msg-number)
 129             ;RFCs 1036, 2047, local; HEADER_FIELD_STRUCTURED
 130      */
 131     enum HeaderFieldType
 132     {
 133         HEADER_FIELD_TEXT,
 134         HEADER_FIELD_STRUCTURED,
 135         HEADER_FIELD_PHRASE,
 136         HEADER_FIELD_MESSAGE_ID,
 137         HEADER_FIELD_ADDRESS
 138     };
 139
 140     /** Check for US-ASCII character.
 141
 142         @param nChar  Some UCS-4 character.
 143
 144         @return  True if nChar is a US-ASCII character (0x00--0x7F).
 145      */
 146     static inline bool isUSASCII(sal_uInt32 nChar);
 147
 148     /** Check for ISO 8859-1 character.
 149
 150         @param nChar  Some UCS-4 character.
 151
 152         @return  True if nChar is a ISO 8859-1 character (0x00--0xFF).
 153      */
 154     static inline bool isISO88591(sal_uInt32 nChar);
 155
 156     /** Check for US-ASCII control character.
 157
 158         @param nChar  Some UCS-4 character.
 159
 160         @return  True if nChar is a US-ASCII control character (US-ASCII
 161         0x00--0x1F or 0x7F).
 162      */
 163     static inline bool isControl(sal_uInt32 nChar);
 164
 165     /** Check for US-ASCII white space character.
 166
 167         @param nChar  Some UCS-4 character.
 168
 169         @return  True if nChar is a US-ASCII white space character (US-ASCII
 170         0x09 or 0x20).
 171      */
 172     static inline bool isWhiteSpace(sal_uInt32 nChar);
 173
 174     /** Check for US-ASCII visible character.
 175
 176         @param nChar  Some UCS-4 character.
 177
 178         @return  True if nChar is a US-ASCII visible character (US-ASCII
 179         0x21--0x7E).
 180      */
 181     static inline bool isVisible(sal_uInt32 nChar);
 182
 183     /** Check for US-ASCII digit character.
 184
 185         @param nChar  Some UCS-4 character.
 186
 187         @return  True if nChar is a US-ASCII (decimal) digit character (US-
 188         ASCII '0'--'9').
 189      */
 190     static inline bool isDigit(sal_uInt32 nChar);
 191
 192     /** Check for US-ASCII canonic hexadecimal digit character.
 193
 194         @param nChar  Some UCS-4 character.
 195
 196         @return  True if nChar is a US-ASCII canonic (i.e., upper case)
 197         hexadecimal digit character (US-ASCII '0'--'9' or 'A'--'F').
 198      */
 199     static inline bool isCanonicHexDigit(sal_uInt32 nChar);
 200
 201     /** Check for US-ASCII hexadecimal digit character.
 202
 203         @param nChar  Some UCS-4 character.
 204
 205         @return  True if nChar is a US-ASCII hexadecimal digit character (US-
 206         ASCII '0'--'9', 'A'--'F', 'a'--'f').
 207      */
 208     static inline bool isHexDigit(sal_uInt32 nChar);
 209
 210     /** Check for US-ASCII upper case character.
 211
 212         @param nChar  Some UCS-4 character.
 213
 214         @return  True if nChar is a US-ASCII upper case alphabetic character
 215         (US-ASCII 'A'--'Z').
 216      */
 217     static inline bool isUpperCase(sal_uInt32 nChar);
 218
 219     /** Check for US-ASCII lower case character.
 220
 221         @param nChar  Some UCS-4 character.
 222
 223         @return  True if nChar is a US-ASCII lower case alphabetic character
 224         (US-ASCII 'a'--'z').
 225      */
 226     static inline bool isLowerCase(sal_uInt32 nChar);
 227
 228     /** Check for US-ASCII alphabetic character.
 229
 230         @param nChar  Some UCS-4 character.
 231
 232         @return  True if nChar is a US-ASCII alphabetic character (US-ASCII
 233         'A'--'Z' or 'a'--'z').
 234      */
 235     static inline bool isAlpha(sal_uInt32 nChar);
 236
 237     /** Check for US-ASCII alphanumeric character.
 238
 239         @param nChar  Some UCS-4 character.
 240
 241         @return  True if nChar is a US-ASCII alphanumeric character (US-ASCII
 242         '0'--'9', 'A'--'Z' or 'a'--'z').
 243      */
 244     static inline bool isAlphanumeric(sal_uInt32 nChar);
 245
 246     /** Check for US-ASCII Base 64 digit character.
 247
 248         @param nChar  Some UCS-4 character.
 249
 250         @return  True if nChar is a US-ASCII Base 64 digit character (US-ASCII
 251         'A'--'Z', 'a'--'z', '0'--'9', '+', or '/').
 252      */
 253     static inline bool isBase64Digit(sal_uInt32 nChar);
 254
 255     /** Check whether some character is valid within an RFC 822 <atom>.
 256
 257         @param nChar  Some UCS-4 character.
 258
 259         @return  True if nChar is valid within an RFC 822 <atom> (US-ASCII
 260         'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
 261         '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', or '~').
 262      */
 263     static bool isAtomChar(sal_uInt32 nChar);
 264
 265     /** Check whether some character is valid within an RFC 2045 <token>.
 266
 267         @param nChar  Some UCS-4 character.
 268
 269         @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
 270         'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
 271         '-', '.', '^', '_', '`', '{', '|', '}', or '~').
 272      */
 273     static bool isTokenChar(sal_uInt32 nChar);
 274
 275     /** Check whether some character is valid within an RFC 2047 <token>.
 276
 277         @param nChar  Some UCS-4 character.
 278
 279         @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
 280         'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
 281         '-', '^', '_', '`', '{', '|', '}', or '~').
 282      */
 283     static bool isEncodedWordTokenChar(sal_uInt32 nChar);
 284
 285     /** Check whether some character is valid within an RFC 2060 <atom>.
 286
 287         @param nChar  Some UCS-4 character.
 288
 289         @return  True if nChar is valid within an RFC 2060 <atom> (US-ASCII
 290         'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '&', ''', '+', ',', '-',
 291         '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`',
 292         '|', '}', or '~').
 293      */
 294     static bool isIMAPAtomChar(sal_uInt32 nChar);
 295
 296     /** Translate an US-ASCII character to upper case.
 297
 298         @param nChar  Some UCS-4 character.
 299
 300         @return  If nChar is a US-ASCII upper case character (US-ASCII
 301         'A'--'Z'), return the corresponding US-ASCII lower case character (US-
 302         ASCII 'a'--'z'); otherwise, return nChar unchanged.
 303      */
 304     static inline sal_uInt32 toUpperCase(sal_uInt32 nChar);
 305
 306     /** Translate an US-ASCII character to lower case.
 307
 308         @param nChar  Some UCS-4 character.
 309
 310         @return  If nChar is a US-ASCII lower case character (US-ASCII
 311         'a'--'z'), return the corresponding US-ASCII upper case character (US-
 312         ASCII 'A'--'Z'); otherwise, return nChar unchanged.
 313      */
 314     static inline sal_uInt32 toLowerCase(sal_uInt32 nChar);
 315
 316     /** Get the digit weight of a US-ASCII character.
 317
 318         @param nChar  Some UCS-4 character.
 319
 320         @return  If nChar is a US-ASCII (decimal) digit character (US-ASCII
 321         '0'--'9'), return the corresponding weight (0--9); otherwise,
 322         return -1.
 323      */
 324     static inline int getWeight(sal_uInt32 nChar);
 325
 326     /** Get the hexadecimal digit weight of a US-ASCII character.
 327
 328         @param nChar  Some UCS-4 character.
 329
 330         @return  If nChar is a US-ASCII hexadecimal digit character (US-ASCII
 331         '0'--'9', 'A'--'F', or 'a'--'f'), return the corresponding weight
 332         (0--15); otherwise, return -1.
 333      */
 334     static inline int getHexWeight(sal_uInt32 nChar);
 335
 336     /** Get the Base 64 digit weight of a US-ASCII character.
 337
 338         @param nChar  Some UCS-4 character.
 339
 340         @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
 341         'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
 342         corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
 343         character (US-ASCII '='), return -1; otherwise, return -2.
 344      */
 345     static inline int getBase64Weight(sal_uInt32 nChar);
 346
 347     /** Get a hexadecimal digit encoded as US-ASCII.
 348
 349         @param nWeight  Must be in the range 0--15, inclusive.
 350
 351         @return  The canonic (i.e., upper case) hexadecimal digit
 352         corresponding to nWeight (US-ASCII '0'--'9' or 'A'--'F').
 353      */
 354     static sal_uInt32 getHexDigit(int nWeight);
 355
 356     static inline bool isHighSurrogate(sal_uInt32 nUTF16);
 357
 358     static inline bool isLowSurrogate(sal_uInt32 nUTF16);
 359
 360     static inline sal_uInt32 toUTF32(sal_Unicode cHighSurrogate,
 361                                      sal_Unicode cLowSurrogate);
 362
 363     /** Check two US-ASCII strings for equality, ignoring case.
 364
 365         @param pBegin1  Points to the start of the first string, must not be
 366         null.
 367
 368         @param pEnd1  Points past the end of the first string, must be >=
 369         pBegin1.
 370
 371         @param pString2  Points to the start of the null terminated second
 372         string, must not be null.
 373
 374         @return  True if the two strings are equal, ignoring the case of US-
 375         ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
 376      */
 377     static bool equalIgnoreCase(const sal_Char * pBegin1,
 378                                 const sal_Char * pEnd1,
 379                                 const sal_Char * pString2);
 380
 381     /** Check two US-ASCII strings for equality, ignoring case.
 382
 383         @param pBegin1  Points to the start of the first string, must not be
 384         null.
 385
 386         @param pEnd1  Points past the end of the first string, must be >=
 387         pBegin1.
 388
 389         @param pString2  Points to the start of the null terminated second
 390         string, must not be null.
 391
 392         @return  True if the two strings are equal, ignoring the case of US-
 393         ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
 394      */
 395     static bool equalIgnoreCase(const sal_Unicode * pBegin1,
 396                                 const sal_Unicode * pEnd1,
 397                                 const sal_Char * pString2);
 398
 399     static inline bool startsWithLineBreak(const sal_Char * pBegin,
 400                                            const sal_Char * pEnd);
 401
 402     static inline bool startsWithLineBreak(const sal_Unicode * pBegin,
 403                                            const sal_Unicode * pEnd);
 404
 405     static inline bool startsWithLineFolding(const sal_Char * pBegin,
 406                                              const sal_Char * pEnd);
 407
 408     static inline bool startsWithLineFolding(const sal_Unicode * pBegin,
 409                                              const sal_Unicode * pEnd);
 410
 411     static bool startsWithLinearWhiteSpace(const sal_Char * pBegin,
 412                                            const sal_Char * pEnd);
 413
 414     static const sal_Unicode * skipLinearWhiteSpace(const sal_Unicode *
 415                                                         pBegin,
 416                                                     const sal_Unicode * pEnd);
 417
 418     static const sal_Unicode * skipComment(const sal_Unicode * pBegin,
 419                                            const sal_Unicode * pEnd);
 420
 421     static const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
 422                                                                pBegin,
 423                                                            const sal_Unicode *
 424                                                                pEnd);
 425
 426     static inline bool needsQuotedStringEscape(sal_uInt32 nChar);
 427
 428     static const sal_Char * skipQuotedString(const sal_Char * pBegin,
 429                                              const sal_Char * pEnd);
 430
 431     static const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
 432                                                 const sal_Unicode * pEnd);
 433
 434     static bool scanUnsigned(const sal_Unicode *& rBegin,
 435                              const sal_Unicode * pEnd, bool bLeadingZeroes,
 436                              sal_uInt32 & rValue);
 437
 438     static const sal_Unicode * scanQuotedBlock(const sal_Unicode * pBegin,
 439                                                const sal_Unicode * pEnd,
 440                                                sal_uInt32 nOpening,
 441                                                sal_uInt32 nClosing,
 442                                                sal_Size & rLength,
 443                                                bool & rModify);
 444
 445     static sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
 446                                               sal_Unicode const * pEnd,
 447                                               INetContentTypeParameterList *
 448                                                   pParameters);
 449
 450     static inline rtl_TextEncoding translateToMIME(rtl_TextEncoding
 451                                                        eEncoding);
 452
 453     static inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
 454                                                          eEncoding);
 455
 456     static const sal_Char * getCharsetName(rtl_TextEncoding eEncoding);
 457
 458     static rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
 459                                                const sal_Char * pEnd);
 460
 461     static inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);
 462
 463     static INetMIMECharsetList_Impl *
 464     createPreferredCharsetList(rtl_TextEncoding eEncoding);
 465
 466     static sal_Unicode * convertToUnicode(const sal_Char * pBegin,
 467                                           const sal_Char * pEnd,
 468                                           rtl_TextEncoding eEncoding,
 469                                           sal_Size & rSize);
 470
 471     static sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
 472                                          const sal_Unicode * pEnd,
 473                                          rtl_TextEncoding eEncoding,
 474                                          sal_Size & rSize);
 475
 476     /** Get the number of octets required to encode an UCS-4 character using
 477         UTF-8 encoding.
 478
 479         @param nChar  Some UCS-4 character.
 480
 481         @return  The number of octets required (in the range 1--6, inclusive).
 482      */
 483     static inline int getUTF8OctetCount(sal_uInt32 nChar);
 484
 485     static inline void writeEscapeSequence(INetMIMEOutputSink & rSink,
 486                                            sal_uInt32 nChar);
 487
 488     static void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);
 489
 490     static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
 491                                      HeaderFieldType eType,
 492                                      const OUString& rBody,
 493                                      rtl_TextEncoding ePreferredEncoding,
 494                                      bool bInitialSpace = true);
 495
 496     static bool translateUTF8Char(const sal_Char *& rBegin,
 497                                   const sal_Char * pEnd,
 498                                   rtl_TextEncoding eEncoding,
 499                                   sal_uInt32 & rCharacter);
 500
 501     static OUString decodeHeaderFieldBody(HeaderFieldType eType,
 502                                            const OString& rBody);
 503
 504 // #i70651#: Prevent warnings on Mac OS X.
 505 #ifdef MACOSX
 506 #pragma GCC system_header
 507 #endif
 508
 509     /** Get the UTF-32 character at the head of a UTF-16 encoded string.
 510
 511         @param rBegin  Points to the start of the UTF-16 encoded string, must
 512         not be null.  On exit, it points past the first UTF-32 character's
 513         encoding.
 514
 515         @param pEnd  Points past the end of the UTF-16 encoded string, must be
 516         strictly greater than rBegin.
 517
 518         @return  The UCS-4 character at the head of the UTF-16 encoded string.
 519         If the string does not start with the UTF-16 encoding of a UCS-32
 520         character, the first UTF-16 value is returned.
 521      */
 522     static inline sal_uInt32 getUTF32Character(const sal_Unicode *& rBegin,
 523                                                const sal_Unicode * pEnd);
 524
 525     /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
 526
 527         @param pBuffer  Points to a buffer, must not be null.
 528
 529         @param nUTF32  An UTF-32 character, must be in the range 0..0x10FFFF.
 530
 531         @return  A pointer past the UTF-16 characters put into the buffer
 532         (i.e., pBuffer + 1 or pBuffer + 2).
 533      */
 534     static inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
 535                                                   sal_uInt32 nUTF32);
 536 };
 537
 538 // static
 539 inline bool INetMIME::isUSASCII(sal_uInt32 nChar)
 540 {
 541     return rtl::isAscii(nChar);
 542 }
 543
 544 // static
 545 inline bool INetMIME::isISO88591(sal_uInt32 nChar)
 546 {
 547     return nChar <= 0xFF;
 548 }
 549
 550 // static
 551 inline bool INetMIME::isControl(sal_uInt32 nChar)
 552 {
 553     return nChar <= 0x1F || nChar == 0x7F;
 554 }
 555
 556 // static
 557 inline bool INetMIME::isWhiteSpace(sal_uInt32 nChar)
 558 {
 559     return nChar == '\t' || nChar == ' ';
 560 }
 561
 562 // static
 563 inline bool INetMIME::isVisible(sal_uInt32 nChar)
 564 {
 565     return nChar >= '!' && nChar <= '~';
 566 }
 567
 568 // static
 569 inline bool INetMIME::isDigit(sal_uInt32 nChar)
 570 {
 571     return rtl::isAsciiDigit(nChar);
 572 }
 573
 574 // static
 575 inline bool INetMIME::isCanonicHexDigit(sal_uInt32 nChar)
 576 {
 577     return rtl::isAsciiCanonicHexDigit(nChar);
 578 }
 579
 580 // static
 581 inline bool INetMIME::isHexDigit(sal_uInt32 nChar)
 582 {
 583     return rtl::isAsciiHexDigit(nChar);
 584 }
 585
 586 // static
 587 inline bool INetMIME::isUpperCase(sal_uInt32 nChar)
 588 {
 589     return rtl::isAsciiUpperCase(nChar);
 590 }
 591
 592 // static
 593 inline bool INetMIME::isLowerCase(sal_uInt32 nChar)
 594 {
 595     return rtl::isAsciiLowerCase(nChar);
 596 }
 597
 598 // static
 599 inline bool INetMIME::isAlpha(sal_uInt32 nChar)
 600 {
 601     return rtl::isAsciiAlpha(nChar);
 602 }
 603
 604 // static
 605 inline bool INetMIME::isAlphanumeric(sal_uInt32 nChar)
 606 {
 607     return rtl::isAsciiAlphanumeric(nChar);
 608 }
 609
 610 // static
 611 inline bool INetMIME::isBase64Digit(sal_uInt32 nChar)
 612 {
 613     return rtl::isAsciiUpperCase(nChar) || rtl::isAsciiLowerCase(nChar) || rtl::isAsciiDigit(nChar)
 614            || nChar == '+' || nChar == '/';
 615 }
 616
 617 // static
 618 inline sal_uInt32 INetMIME::toUpperCase(sal_uInt32 nChar)
 619 {
 620     return rtl::isAsciiLowerCase(nChar) ? nChar - ('a' - 'A') : nChar;
 621 }
 622
 623 // static
 624 inline sal_uInt32 INetMIME::toLowerCase(sal_uInt32 nChar)
 625 {
 626     return rtl::isAsciiUpperCase(nChar) ? nChar + ('a' - 'A') : nChar;
 627 }
 628
 629 // static
 630 inline int INetMIME::getWeight(sal_uInt32 nChar)
 631 {
 632     return rtl::isAsciiDigit(nChar) ? int(nChar - '0') : -1;
 633 }
 634
 635 // static
 636 inline int INetMIME::getHexWeight(sal_uInt32 nChar)
 637 {
 638     return rtl::isAsciiDigit(nChar) ? int(nChar - '0') :
 639            nChar >= 'A' && nChar <= 'F' ? int(nChar - 'A' + 10) :
 640            nChar >= 'a' && nChar <= 'f' ? int(nChar - 'a' + 10) : -1;
 641 }
 642
 643 // static
 644 inline int INetMIME::getBase64Weight(sal_uInt32 nChar)
 645 {
 646     return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
 647            rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
 648            rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
 649            nChar == '+' ? 62 :
 650            nChar == '/' ? 63 :
 651            nChar == '=' ? -1 : -2;
 652 }
 653
 654 // static
 655 inline bool INetMIME::isHighSurrogate(sal_uInt32 nUTF16)
 656 {
 657     return nUTF16 >= 0xD800 && nUTF16 <= 0xDBFF;
 658 }
 659
 660 // static
 661 inline bool INetMIME::isLowSurrogate(sal_uInt32 nUTF16)
 662 {
 663     return nUTF16 >= 0xDC00 && nUTF16 <= 0xDFFF;
 664 }
 665
 666 // static
 667 inline sal_uInt32 INetMIME::toUTF32(sal_Unicode cHighSurrogate,
 668                                     sal_Unicode cLowSurrogate)
 669 {
 670     DBG_ASSERT(isHighSurrogate(cHighSurrogate)
 671                && isLowSurrogate(cLowSurrogate),
 672                "INetMIME::toUTF32(): Bad chars");
 673     return ((sal_uInt32(cHighSurrogate) & 0x3FF) << 10)
 674                | (sal_uInt32(cLowSurrogate) & 0x3FF);
 675 }
 676
 677 // static
 678 inline bool INetMIME::startsWithLineBreak(const sal_Char * pBegin,
 679                                           const sal_Char * pEnd)
 680 {
 681     DBG_ASSERT(pBegin && pBegin <= pEnd,
 682                "INetMIME::startsWithLineBreak(): Bad sequence");
 683
 684     return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
 685         // CR, LF
 686 }
 687
 688 // static
 689 inline bool INetMIME::startsWithLineBreak(const sal_Unicode * pBegin,
 690                                               const sal_Unicode * pEnd)
 691 {
 692     DBG_ASSERT(pBegin && pBegin <= pEnd,
 693                "INetMIME::startsWithLineBreak(): Bad sequence");
 694
 695     return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
 696         // CR, LF
 697 }
 698
 699 // static
 700 inline bool INetMIME::startsWithLineFolding(const sal_Char * pBegin,
 701                                             const sal_Char * pEnd)
 702 {
 703     DBG_ASSERT(pBegin && pBegin <= pEnd,
 704                "INetMIME::startsWithLineFolding(): Bad sequence");
 705
 706     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
 707            && isWhiteSpace(pBegin[2]); // CR, LF
 708 }
 709
 710 // static
 711 inline bool INetMIME::startsWithLineFolding(const sal_Unicode * pBegin,
 712                                             const sal_Unicode * pEnd)
 713 {
 714     DBG_ASSERT(pBegin && pBegin <= pEnd,
 715                "INetMIME::startsWithLineFolding(): Bad sequence");
 716
 717     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
 718            && isWhiteSpace(pBegin[2]); // CR, LF
 719 }
 720
 721 // static
 722 inline bool INetMIME::startsWithLinearWhiteSpace(const sal_Char * pBegin,
 723                                                  const sal_Char * pEnd)
 724 {
 725     DBG_ASSERT(pBegin && pBegin <= pEnd,
 726                "INetMIME::startsWithLinearWhiteSpace(): Bad sequence");
 727
 728     return pBegin != pEnd
 729            && (isWhiteSpace(*pBegin) || startsWithLineFolding(pBegin, pEnd));
 730 }
 731
 732 // static
 733 inline bool INetMIME::needsQuotedStringEscape(sal_uInt32 nChar)
 734 {
 735     return nChar == '"' || nChar == '\\';
 736 }
 737
 738 // static
 739 inline rtl_TextEncoding INetMIME::translateToMIME(rtl_TextEncoding eEncoding)
 740 {
 741 #if defined WNT
 742     return eEncoding == RTL_TEXTENCODING_MS_1252 ?
 743                RTL_TEXTENCODING_ISO_8859_1 : eEncoding;
 744 #else // WNT
 745     return eEncoding;
 746 #endif // WNT
 747 }
 748
 749 // static
 750 inline rtl_TextEncoding INetMIME::translateFromMIME(rtl_TextEncoding
 751                                                         eEncoding)
 752 {
 753 #if defined WNT
 754     return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
 755                RTL_TEXTENCODING_MS_1252 : eEncoding;
 756 #else
 757     return eEncoding;
 758 #endif
 759 }
 760
 761 // static
 762 inline bool INetMIME::isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
 763 {
 764     return ( rtl_isOctetTextEncoding(eEncoding) == sal_True );
 765 }
 766
 767 // static
 768 inline int INetMIME::getUTF8OctetCount(sal_uInt32 nChar)
 769 {
 770     DBG_ASSERT(nChar < 0x80000000, "INetMIME::getUTF8OctetCount(): Bad char");
 771
 772     return nChar < 0x80 ? 1 :
 773            nChar < 0x800 ? 2 :
 774            nChar <= 0x10000 ? 3 :
 775            nChar <= 0x200000 ? 4 :
 776            nChar <= 0x4000000 ? 5 : 6;
 777 }
 778
 779 // static
 780 inline sal_uInt32 INetMIME::getUTF32Character(const sal_Unicode *& rBegin,
 781                                               const sal_Unicode * pEnd)
 782 {
 783     DBG_ASSERT(rBegin && rBegin < pEnd,
 784                "INetMIME::getUTF32Character(): Bad sequence");
 785     if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF
 786         && rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF)
 787     {
 788         sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10;
 789         return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000;
 790     }
 791     else
 792         return *rBegin++;
 793 }
 794
 795 // static
 796 inline sal_Unicode * INetMIME::putUTF32Character(sal_Unicode * pBuffer,
 797                                                  sal_uInt32 nUTF32)
 798 {
 799     DBG_ASSERT(nUTF32 <= 0x10FFFF, "INetMIME::putUTF32Character(): Bad char");
 800     if (nUTF32 < 0x10000)
 801         *pBuffer++ = sal_Unicode(nUTF32);
 802     else
 803     {
 804         nUTF32 -= 0x10000;
 805         *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
 806         *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
 807     }
 808     return pBuffer;
 809 }
 810
 811 class INetMIMEOutputSink
 812 {
 813 public:
 814     static sal_uInt32 const NO_LINE_LENGTH_LIMIT = SAL_MAX_UINT32;
 815
 816 private:
 817     sal_uInt32 m_nColumn;
 818     sal_uInt32 m_nLineLengthLimit;
 819
 820 protected:
 821     /** Write a sequence of octets.
 822
 823         @param pBegin  Points to the start of the sequence, must not be null.
 824
 825         @param pEnd  Points past the end of the sequence, must be >= pBegin.
 826      */
 827     virtual void writeSequence(const sal_Char * pBegin,
 828                                const sal_Char * pEnd) = 0;
 829
 830     /** Write a null terminated sequence of octets (without the terminating
 831         null).
 832
 833         @param pOctets  A null terminated sequence of octets, must not be
 834         null.
 835
 836         @return  The length of pOctets (without the terminating null).
 837      */
 838     virtual sal_Size writeSequence(const sal_Char * pSequence);
 839
 840     /** Write a sequence of octets.
 841
 842         @descr  The supplied sequence of UCS-4 characters is interpreted as a
 843         sequence of octets.  It is an error if any of the elements of the
 844         sequence has a numerical value greater than 255.
 845
 846         @param pBegin  Points to the start of the sequence, must not be null.
 847
 848         @param pEnd  Points past the end of the sequence, must be >= pBegin.
 849      */
 850     virtual void writeSequence(const sal_uInt32 * pBegin,
 851                                const sal_uInt32 * pEnd);
 852
 853     /** Write a sequence of octets.
 854
 855         @descr  The supplied sequence of Unicode characters is interpreted as
 856         a sequence of octets.  It is an error if any of the elements of the
 857         sequence has a numerical value greater than 255.
 858
 859         @param pBegin  Points to the start of the sequence, must not be null.
 860
 861         @param pEnd  Points past the end of the sequence, must be >= pBegin.
 862      */
 863     virtual void writeSequence(const sal_Unicode * pBegin,
 864                                const sal_Unicode * pEnd);
 865
 866 public:
 867     INetMIMEOutputSink(sal_uInt32 nTheColumn = 0,
 868                        sal_uInt32 nTheLineLengthLimit
 869                            = INetMIME::SOFT_LINE_LENGTH_LIMIT):
 870         m_nColumn(nTheColumn), m_nLineLengthLimit(nTheLineLengthLimit) {}
 871
 872     virtual ~INetMIMEOutputSink() {}
 873
 874     /** Get the current column.
 875
 876         @return  The current column (starting from zero).
 877      */
 878     sal_uInt32 getColumn() const { return m_nColumn; }
 879
 880     sal_uInt32 getLineLengthLimit() const { return m_nLineLengthLimit; }
 881
 882     void setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)
 883     { m_nLineLengthLimit = nTheLineLengthLimit; }
 884
 885     virtual ErrCode getError() const;
 886
 887     /** Write a sequence of octets.
 888
 889         @param pBegin  Points to the start of the sequence, must not be null.
 890
 891         @param pEnd  Points past the end of the sequence, must be >= pBegin.
 892      */
 893     inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
 894
 895     /** Write a sequence of octets.
 896
 897         @param pBegin  Points to the start of the sequence, must not be null.
 898
 899         @param nLength  The length of the sequence.
 900      */
 901     void write(const sal_Char * pBegin, sal_Size nLength)
 902     { write(pBegin, pBegin + nLength); }
 903
 904     /** Write a sequence of octets.
 905
 906         @descr  The supplied sequence of UCS-4 characters is interpreted as a
 907         sequence of octets.  It is an error if any of the elements of the
 908         sequence has a numerical value greater than 255.
 909
 910         @param pBegin  Points to the start of the sequence, must not be null.
 911
 912         @param pEnd  Points past the end of the sequence, must be >= pBegin.
 913      */
 914     inline void write(const sal_uInt32 * pBegin, const sal_uInt32 * pEnd);
 915
 916     /** Write a sequence of octets.
 917
 918         @descr  The supplied sequence of Unicode characters is interpreted as
 919         a sequence of octets.  It is an error if any of the elements of the
 920         sequence has a numerical value greater than 255.
 921
 922         @param pBegin  Points to the start of the sequence, must not be null.
 923
 924         @param pEnd  Points past the end of the sequence, must be >= pBegin.
 925      */
 926     inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
 927
 928     /** Write a sequence of octets.
 929
 930         @param rOctets  A OString, interpreted as a sequence of octets.
 931
 932         @param nBegin  The offset of the first character to write.
 933
 934         @param nEnd  The offset past the last character to write.
 935      */
 936     void write(const OString& rOctets, xub_StrLen nBegin,
 937                       xub_StrLen nEnd)
 938     {
 939         writeSequence(rOctets.getStr() + nBegin, rOctets.getStr() + nEnd);
 940         m_nColumn += nEnd - nBegin;
 941     }
 942
 943     /** Write a single octet.
 944
 945         @param nOctet  Some octet.
 946
 947         @return  This instance.
 948      */
 949     inline INetMIMEOutputSink & operator <<(sal_Char nOctet);
 950
 951     /** Write a null terminated sequence of octets (without the terminating
 952         null).
 953
 954         @param pOctets  A null terminated sequence of octets, must not be
 955         null.
 956
 957         @return  This instance.
 958      */
 959     inline INetMIMEOutputSink & operator <<(const sal_Char * pOctets);
 960
 961     /** Write a sequence of octets.
 962
 963         @param rOctets  A OString, interpreted as a sequence of octets.
 964
 965         @return  This instance.
 966      */
 967     INetMIMEOutputSink & operator <<(const OString& rOctets)
 968     {
 969         writeSequence(rOctets.getStr(), rOctets.getStr() + rOctets.getLength());
 970         m_nColumn += rOctets.getLength();
 971         return *this;
 972     }
 973
 974     /** Call a manipulator function.
 975
 976         @param  pManipulator  A manipulator function.
 977
 978         @return  Whatever the manipulator function returns.
 979      */
 980     INetMIMEOutputSink &
 981     operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))
 982     { return pManipulator(*this); }
 983
 984     /** Write a line end (CR LF).
 985      */
 986     void writeLineEnd();
 987
 988     /** A manipulator function that writes a line end (CR LF).
 989
 990         @param rSink  Some sink.
 991
 992         @return  The sink rSink.
 993      */
 994     static inline INetMIMEOutputSink & endl(INetMIMEOutputSink & rSink);
 995 };
 996
 997 inline void INetMIMEOutputSink::write(const sal_Char * pBegin,
 998                                       const sal_Char * pEnd)
 999 {
1000     writeSequence(pBegin, pEnd);
1001     m_nColumn += pEnd - pBegin;
1002 }
1003
1004 inline void INetMIMEOutputSink::write(const sal_uInt32 * pBegin,
1005                                       const sal_uInt32 * pEnd)
1006 {
1007     writeSequence(pBegin, pEnd);
1008     m_nColumn += pEnd - pBegin;
1009 }
1010
1011 inline void INetMIMEOutputSink::write(const sal_Unicode * pBegin,
1012                                       const sal_Unicode * pEnd)
1013 {
1014     writeSequence(pBegin, pEnd);
1015     m_nColumn += pEnd - pBegin;
1016 }
1017
1018 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(sal_Char nOctet)
1019 {
1020     writeSequence(&nOctet, &nOctet + 1);
1021     ++m_nColumn;
1022     return *this;
1023 }
1024
1025 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const sal_Char *
1026                                                                 pOctets)
1027 {
1028     m_nColumn += writeSequence(pOctets);
1029     return *this;
1030 }
1031
1032 // static
1033 inline INetMIMEOutputSink & INetMIMEOutputSink::endl(INetMIMEOutputSink &
1034                                                          rSink)
1035 {
1036     rSink.writeLineEnd();
1037     return rSink;
1038 }
1039
1040 // static
1041 inline void INetMIME::writeEscapeSequence(INetMIMEOutputSink & rSink,
1042                                           sal_uInt32 nChar)
1043 {
1044     DBG_ASSERT(nChar <= 0xFF, "INetMIME::writeEscapeSequence(): Bad char");
1045     rSink << '=' << sal_uInt8(getHexDigit(nChar >> 4))
1046           << sal_uInt8(getHexDigit(nChar & 15));
1047 }
1048
1049 class INetMIMEStringOutputSink: public INetMIMEOutputSink
1050 {
1051     OStringBuffer m_aBuffer;
1052
1053     using INetMIMEOutputSink::writeSequence;
1054
1055     virtual void writeSequence(const sal_Char * pBegin,
1056                                const sal_Char * pEnd);
1057
1058 public:
1059     inline INetMIMEStringOutputSink(sal_uInt32 nColumn = 0,
1060                                     sal_uInt32 nLineLengthLimit
1061                                         = INetMIME::SOFT_LINE_LENGTH_LIMIT):
1062         INetMIMEOutputSink(nColumn, nLineLengthLimit) {}
1063
1064     virtual ErrCode getError() const;
1065
1066     OString takeBuffer()
1067     {
1068         return m_aBuffer.makeStringAndClear();
1069     }
1070 };
1071
1072 class INetMIMEEncodedWordOutputSink
1073 {
1074 public:
1075     enum Context { CONTEXT_TEXT = 1,
1076                    CONTEXT_COMMENT = 2,
1077                    CONTEXT_PHRASE = 4 };
1078
1079     enum Space { SPACE_NO, SPACE_ENCODED, SPACE_ALWAYS };
1080
1081 private:
1082     enum { BUFFER_SIZE = 256 };
1083
1084     enum Coding { CODING_NONE, CODING_QUOTED, CODING_ENCODED,
1085                   CODING_ENCODED_TERMINATED };
1086
1087     enum EncodedWordState { STATE_INITIAL, STATE_FIRST_EQUALS,
1088                             STATE_FIRST_QUESTION, STATE_CHARSET,
1089                             STATE_SECOND_QUESTION, STATE_ENCODING,
1090                             STATE_THIRD_QUESTION, STATE_ENCODED_TEXT,
1091                             STATE_FOURTH_QUESTION, STATE_SECOND_EQUALS,
1092                             STATE_BAD };
1093
1094     INetMIMEOutputSink & m_rSink;
1095     Context m_eContext;
1096     Space m_eInitialSpace;
1097     sal_uInt32 m_nExtraSpaces;
1098     INetMIMECharsetList_Impl * m_pEncodingList;
1099     sal_Unicode * m_pBuffer;
1100     sal_uInt32 m_nBufferSize;
1101     sal_Unicode * m_pBufferEnd;
1102     Coding m_ePrevCoding;
1103     rtl_TextEncoding m_ePrevMIMEEncoding;
1104     Coding m_eCoding;
1105     sal_uInt32 m_nQuotedEscaped;
1106     EncodedWordState m_eEncodedWordState;
1107
1108     inline bool needsEncodedWordEscape(sal_uInt32 nChar) const;
1109
1110     void finish(bool bWriteTrailer);
1111
1112 public:
1113     inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,
1114                                          Context eTheContext,
1115                                          Space eTheInitialSpace,
1116                                          rtl_TextEncoding ePreferredEncoding);
1117
1118     ~INetMIMEEncodedWordOutputSink();
1119
1120     INetMIMEEncodedWordOutputSink & operator <<(sal_uInt32 nChar);
1121
1122     inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
1123
1124     inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
1125
1126     inline bool flush();
1127 };
1128
1129 inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink(
1130            INetMIMEOutputSink & rTheSink, Context eTheContext,
1131            Space eTheInitialSpace, rtl_TextEncoding ePreferredEncoding):
1132     m_rSink(rTheSink),
1133     m_eContext(eTheContext),
1134     m_eInitialSpace(eTheInitialSpace),
1135     m_nExtraSpaces(0),
1136     m_pEncodingList(INetMIME::createPreferredCharsetList(ePreferredEncoding)),
1137     m_ePrevCoding(CODING_NONE),
1138     m_eCoding(CODING_NONE),
1139     m_nQuotedEscaped(0),
1140     m_eEncodedWordState(STATE_INITIAL)
1141 {
1142     m_nBufferSize = BUFFER_SIZE;
1143     m_pBuffer = static_cast< sal_Unicode * >(rtl_allocateMemory(
1144                                                  m_nBufferSize
1145                                                      * sizeof (sal_Unicode)));
1146     m_pBufferEnd = m_pBuffer;
1147 }
1148
1149 inline void INetMIMEEncodedWordOutputSink::write(const sal_Char * pBegin,
1150                                                  const sal_Char * pEnd)
1151 {
1152     DBG_ASSERT(pBegin && pBegin <= pEnd,
1153                "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1154
1155     while (pBegin != pEnd)
1156         operator <<(*pBegin++);
1157 }
1158
1159 inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode * pBegin,
1160                                                  const sal_Unicode * pEnd)
1161 {
1162     DBG_ASSERT(pBegin && pBegin <= pEnd,
1163                "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1164
1165     while (pBegin != pEnd)
1166         operator <<(*pBegin++);
1167 }
1168
1169 inline bool INetMIMEEncodedWordOutputSink::flush()
1170 {
1171     finish(true);
1172     return m_ePrevCoding != CODING_NONE;
1173 }
1174
1175 struct INetContentTypeParameter
1176 {
1177     /** The name of the attribute, in US-ASCII encoding and converted to lower
1178         case.  If a parameter value is split as described in RFC 2231, there
1179         will only be one item for the complete parameter, with the attribute
1180         name lacking any section suffix.
1181      */
1182     const OString m_sAttribute;
1183
1184     /** The optional character set specification (see RFC 2231), in US-ASCII
1185         encoding and converted to lower case.
1186      */
1187     const OString m_sCharset;
1188
1189     /** The optional language specification (see RFC 2231), in US-ASCII
1190         encoding and converted to lower case.
1191      */
1192     const OString m_sLanguage;
1193
1194     /** The attribute value.  If the value is a quoted-string, it is
1195         'unpacked.'  If a character set is specified, and the value can be
1196         converted to Unicode, this is done.  Also, if no character set is
1197         specified, it is first tried to convert the value from UTF-8 encoding
1198         to Unicode, and if that doesn't work (because the value is not in
1199         UTF-8 encoding), it is converted from ISO-8859-1 encoding to Unicode
1200         (which will always work).  But if a character set is specified and the
1201         value cannot be converted from that character set to Unicode, special
1202         action is taken to produce a value that can possibly be transformed
1203         back into its original form:  Any 8-bit character from a non-encoded
1204         part of the original value is directly converted to Unicode
1205         (effectively handling it as if it was ISO-8859-1 encoded), and any
1206         8-bit character from an encoded part of the original value is mapped
1207         to the range U+F800..U+F8FF at the top of the Corporate Use Subarea
1208         within Unicode's Private Use Area (effectively adding 0xF800 to the
1209         character's numeric value).
1210      */
1211     const OUString m_sValue;
1212
1213     /** This is true if the value is successfully converted to Unicode, and
1214         false if the value is a special mixture of ISO-LATIN-1 characters and
1215         characters from Unicode's Private Use Area.
1216      */
1217     const bool m_bConverted;
1218
1219     INetContentTypeParameter(const OString& rTheAttribute,
1220         const OString& rTheCharset, const OString& rTheLanguage,
1221         const OUString& rTheValue, bool bTheConverted)
1222     : m_sAttribute(rTheAttribute)
1223     , m_sCharset(rTheCharset)
1224     , m_sLanguage(rTheLanguage)
1225     , m_sValue(rTheValue)
1226     , m_bConverted(bTheConverted)
1227     {
1228     }
1229 };
1230
1231 class TOOLS_DLLPUBLIC INetContentTypeParameterList
1232 {
1233 public:
1234
1235     void Clear();
1236
1237     void Insert(INetContentTypeParameter * pParameter, sal_uIntPtr nIndex)
1238     {
1239         maEntries.insert(maEntries.begin()+nIndex,pParameter);
1240     }
1241
1242     void Append(INetContentTypeParameter *pParameter)
1243     {
1244         maEntries.push_back(pParameter);
1245     }
1246
1247     inline const INetContentTypeParameter * GetObject(sal_uIntPtr nIndex) const
1248     {
1249         return &(maEntries[nIndex]);
1250     }
1251
1252     const INetContentTypeParameter * find(const OString& rAttribute) const;
1253
1254 private:
1255
1256     boost::ptr_vector<INetContentTypeParameter> maEntries;
1257 };
1258
1259 #endif
1260
1261 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */