1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef TOOLS_INETMIME_HXX
20 #define TOOLS_INETMIME_HXX
22 #include <boost/ptr_container/ptr_vector.hpp>
24 #include "tools/toolsdllapi.h"
25 #include <rtl/alloc.h>
26 #include <rtl/character.hxx>
27 #include <rtl/string.hxx>
28 #include <rtl/strbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/debug.hxx>
31 #include <tools/errcode.hxx>
32 #include <tools/string.hxx>
35 class INetContentTypeParameterList
;
36 class INetMIMECharsetList_Impl
;
37 class INetMIMEOutputSink
;
39 class TOOLS_DLLPUBLIC INetMIME
42 enum { SOFT_LINE_LENGTH_LIMIT
= 76,
43 HARD_LINE_LENGTH_LIMIT
= 998 };
45 /** The various types of message header field bodies, with respect to
46 encoding and decoding them.
48 @descr At the moment, five different types of header fields suffice
49 to describe how to encoded and decode any known message header field
50 body, but need for more types may arise in the future as new header
51 fields are introduced.
53 @descr The following is an exhaustive list of all the header fields
54 currently known to our implementation. For every header field, it
55 includes a 'canonic' (with regard to capitalization) name, a grammar
56 rule for the body (using RFC 822 and RFC 2234 conventions), a list of
57 relevant sources of information, and the HeaderFieldType value to use
58 with that header field. The list is based on RFC 2076 and draft-
59 palme-mailext-headers-02.txt (see also <http://www.dsv.su.se/~jpalme/
60 ietf/jp-ietf-home.html#anchor1003783>).
62 Approved: address ;RFC 1036; HEADER_FIELD_ADDRESS
63 bcc: #address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
64 cc: 1#address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
65 Comments: *text ;RFCs 822, RFC 2047; HEADER_FIELD_TEXT
66 Content-Base: absoluteURI ;RFC 2110; HEADER_FIELD_TEXT
67 Content-Description: *text ;RFC 2045, RFC 2047; HEADER_FIELD_TEXT
68 Content-Disposition: disposition-type *(";" disposition-parm)
69 ;RFC 1806; HEADER_FIELD_STRUCTURED
70 Content-ID: msg-id ;RFC 2045, RFC 2047; HEADER_FIELD_MESSAGE_ID
71 Content-Location: absoluteURI / relativeURI ;RFC 2110;
73 Content-Transfer-Encoding: mechanism ;RFC 2045, RFC 2047;
74 HEADER_FIELD_STRUCTURED
75 Content-Type: type "/" subtype *(";" parameter) ;RFC 2045, RFC 2047;
76 HEADER_FIELD_STRUCTURED
77 Control: *text ;RFC 1036; HEADER_FIELD_TEXT
78 Date: date-time ;RFC 822, RFC 1123, RFC 2047; HEADER_FIELD_STRUCTURED
79 Distribution: 1#atom ;RFC 1036; HEADER_FIELD_STRUCTURED
80 Encrypted: 1#2word ;RFC 822, RFC 2047; HEADER_FIELD_STRUCTURED
81 Expires: date-time ;RFC 1036; HEADER_FIELD_STRUCTURED
82 Followup-To: 1#(atom *("." atom)) ;RFC 1036; HEADER_FIELD_STRUCTURED
83 From: mailbox / 1#mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
84 In-Reply-To: *(phrase / msg-id) ;RFC 822, RFC 2047;
86 Keywords: #phrase ;RFC 822, RFC 2047; HEADER_FIELD_PHRASE
87 MIME-Version: 1*DIGIT "." 1*DIGIT ;RFC 2045, RFC 2047;
88 HEADER_FIELD_STRUCTURED
89 Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
90 Newsgroups: 1#(atom *("." atom)) ;RFC 1036, RFC 2047;
91 HEADER_FIELD_STRUCTURED
92 Organization: *text ;RFC 1036; HEADER_FIELD_TEXT
93 Received: ["from" domain] ["by" domain] ["via" atom] *("with" atom)
94 ["id" msg-id] ["for" addr-spec] ";" date-time ;RFC 822, RFC 1123,
95 RFC 2047; HEADER_FIELD_STRUCTURED
96 References: *(phrase / msg-id) ;RFC 822, RFC 2047;
98 Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
99 Resent-Date: date-time ;RFC 822, RFC 1123, RFC 2047;
100 HEADER_FIELD_STRUCTURED
101 Resent-From: mailbox / 1#mailbox ;RFC 822, RFC 2047;
103 Resent-Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
104 Resent-Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
105 Resent-Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
106 Resent-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
107 Resent-bcc: #address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
108 Resent-cc: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
109 Return-path: route-addr / ("<" ">") ;RFC 822, RFC 1123, RFC 2047;
110 HEADER_FIELD_STRUCTURED
111 Return-Receipt-To: address ;Not Internet standard;
113 Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
114 Subject: *text ;RFC 822, RFC 2047; HEADER_FIELD_TEXT
115 Summary: *text ;RFC 1036; HEADER_FIELD_TEXT
116 To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
117 X-CHAOS-Marked: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
118 X-CHAOS-Read: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
119 X-CHAOS-Recipients: #*("<" atom word ">") ;local;
120 HEADER_FIELD_STRUCTURED
121 X-CHAOS-Size: 1*DIGIT ;local; HEADER_FIELD_STRUCTURED
122 X-Mailer: *text ;Not Internet standard; HEADER_FIELD_TEXT
123 X-Mozilla-Status: 4HEXDIG ;Mozilla; HEADER_FIELD_STRUCTURED
124 X-Newsreader: *text ;Not Internet standard; HEADER_FIELD_TEXT
125 X-Priority: "1" / "2" / "3" / "4" / "5" ;Not Internet standard;
126 HEADER_FIELD_STRUCTURED
128 1*((atom / string) *("." (atom / string)) ":" msg-number)
129 ;RFCs 1036, 2047, local; HEADER_FIELD_STRUCTURED
134 HEADER_FIELD_STRUCTURED
,
136 HEADER_FIELD_MESSAGE_ID
,
140 /** Check for US-ASCII character.
142 @param nChar Some UCS-4 character.
144 @return True if nChar is a US-ASCII character (0x00--0x7F).
146 static inline bool isUSASCII(sal_uInt32 nChar
);
148 /** Check for ISO 8859-1 character.
150 @param nChar Some UCS-4 character.
152 @return True if nChar is a ISO 8859-1 character (0x00--0xFF).
154 static inline bool isISO88591(sal_uInt32 nChar
);
156 /** Check for US-ASCII control character.
158 @param nChar Some UCS-4 character.
160 @return True if nChar is a US-ASCII control character (US-ASCII
163 static inline bool isControl(sal_uInt32 nChar
);
165 /** Check for US-ASCII white space character.
167 @param nChar Some UCS-4 character.
169 @return True if nChar is a US-ASCII white space character (US-ASCII
172 static inline bool isWhiteSpace(sal_uInt32 nChar
);
174 /** Check for US-ASCII visible character.
176 @param nChar Some UCS-4 character.
178 @return True if nChar is a US-ASCII visible character (US-ASCII
181 static inline bool isVisible(sal_uInt32 nChar
);
183 /** Check for US-ASCII digit character.
185 @param nChar Some UCS-4 character.
187 @return True if nChar is a US-ASCII (decimal) digit character (US-
190 static inline bool isDigit(sal_uInt32 nChar
);
192 /** Check for US-ASCII canonic hexadecimal digit character.
194 @param nChar Some UCS-4 character.
196 @return True if nChar is a US-ASCII canonic (i.e., upper case)
197 hexadecimal digit character (US-ASCII '0'--'9' or 'A'--'F').
199 static inline bool isCanonicHexDigit(sal_uInt32 nChar
);
201 /** Check for US-ASCII hexadecimal digit character.
203 @param nChar Some UCS-4 character.
205 @return True if nChar is a US-ASCII hexadecimal digit character (US-
206 ASCII '0'--'9', 'A'--'F', 'a'--'f').
208 static inline bool isHexDigit(sal_uInt32 nChar
);
210 /** Check for US-ASCII upper case character.
212 @param nChar Some UCS-4 character.
214 @return True if nChar is a US-ASCII upper case alphabetic character
217 static inline bool isUpperCase(sal_uInt32 nChar
);
219 /** Check for US-ASCII lower case character.
221 @param nChar Some UCS-4 character.
223 @return True if nChar is a US-ASCII lower case alphabetic character
226 static inline bool isLowerCase(sal_uInt32 nChar
);
228 /** Check for US-ASCII alphabetic character.
230 @param nChar Some UCS-4 character.
232 @return True if nChar is a US-ASCII alphabetic character (US-ASCII
233 'A'--'Z' or 'a'--'z').
235 static inline bool isAlpha(sal_uInt32 nChar
);
237 /** Check for US-ASCII alphanumeric character.
239 @param nChar Some UCS-4 character.
241 @return True if nChar is a US-ASCII alphanumeric character (US-ASCII
242 '0'--'9', 'A'--'Z' or 'a'--'z').
244 static inline bool isAlphanumeric(sal_uInt32 nChar
);
246 /** Check for US-ASCII Base 64 digit character.
248 @param nChar Some UCS-4 character.
250 @return True if nChar is a US-ASCII Base 64 digit character (US-ASCII
251 'A'--'Z', 'a'--'z', '0'--'9', '+', or '/').
253 static inline bool isBase64Digit(sal_uInt32 nChar
);
255 /** Check whether some character is valid within an RFC 822 <atom>.
257 @param nChar Some UCS-4 character.
259 @return True if nChar is valid within an RFC 822 <atom> (US-ASCII
260 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
261 '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', or '~').
263 static bool isAtomChar(sal_uInt32 nChar
);
265 /** Check whether some character is valid within an RFC 2045 <token>.
267 @param nChar Some UCS-4 character.
269 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
270 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
271 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
273 static bool isTokenChar(sal_uInt32 nChar
);
275 /** Check whether some character is valid within an RFC 2047 <token>.
277 @param nChar Some UCS-4 character.
279 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
280 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
281 '-', '^', '_', '`', '{', '|', '}', or '~').
283 static bool isEncodedWordTokenChar(sal_uInt32 nChar
);
285 /** Check whether some character is valid within an RFC 2060 <atom>.
287 @param nChar Some UCS-4 character.
289 @return True if nChar is valid within an RFC 2060 <atom> (US-ASCII
290 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '&', ''', '+', ',', '-',
291 '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`',
294 static bool isIMAPAtomChar(sal_uInt32 nChar
);
296 /** Translate an US-ASCII character to upper case.
298 @param nChar Some UCS-4 character.
300 @return If nChar is a US-ASCII upper case character (US-ASCII
301 'A'--'Z'), return the corresponding US-ASCII lower case character (US-
302 ASCII 'a'--'z'); otherwise, return nChar unchanged.
304 static inline sal_uInt32
toUpperCase(sal_uInt32 nChar
);
306 /** Translate an US-ASCII character to lower case.
308 @param nChar Some UCS-4 character.
310 @return If nChar is a US-ASCII lower case character (US-ASCII
311 'a'--'z'), return the corresponding US-ASCII upper case character (US-
312 ASCII 'A'--'Z'); otherwise, return nChar unchanged.
314 static inline sal_uInt32
toLowerCase(sal_uInt32 nChar
);
316 /** Get the digit weight of a US-ASCII character.
318 @param nChar Some UCS-4 character.
320 @return If nChar is a US-ASCII (decimal) digit character (US-ASCII
321 '0'--'9'), return the corresponding weight (0--9); otherwise,
324 static inline int getWeight(sal_uInt32 nChar
);
326 /** Get the hexadecimal digit weight of a US-ASCII character.
328 @param nChar Some UCS-4 character.
330 @return If nChar is a US-ASCII hexadecimal digit character (US-ASCII
331 '0'--'9', 'A'--'F', or 'a'--'f'), return the corresponding weight
332 (0--15); otherwise, return -1.
334 static inline int getHexWeight(sal_uInt32 nChar
);
336 /** Get the Base 64 digit weight of a US-ASCII character.
338 @param nChar Some UCS-4 character.
340 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
341 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
342 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
343 character (US-ASCII '='), return -1; otherwise, return -2.
345 static inline int getBase64Weight(sal_uInt32 nChar
);
347 /** Get a hexadecimal digit encoded as US-ASCII.
349 @param nWeight Must be in the range 0--15, inclusive.
351 @return The canonic (i.e., upper case) hexadecimal digit
352 corresponding to nWeight (US-ASCII '0'--'9' or 'A'--'F').
354 static sal_uInt32
getHexDigit(int nWeight
);
356 static inline bool isHighSurrogate(sal_uInt32 nUTF16
);
358 static inline bool isLowSurrogate(sal_uInt32 nUTF16
);
360 static inline sal_uInt32
toUTF32(sal_Unicode cHighSurrogate
,
361 sal_Unicode cLowSurrogate
);
363 /** Check two US-ASCII strings for equality, ignoring case.
365 @param pBegin1 Points to the start of the first string, must not be
368 @param pEnd1 Points past the end of the first string, must be >=
371 @param pString2 Points to the start of the null terminated second
372 string, must not be null.
374 @return True if the two strings are equal, ignoring the case of US-
375 ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
377 static bool equalIgnoreCase(const sal_Char
* pBegin1
,
378 const sal_Char
* pEnd1
,
379 const sal_Char
* pString2
);
381 /** Check two US-ASCII strings for equality, ignoring case.
383 @param pBegin1 Points to the start of the first string, must not be
386 @param pEnd1 Points past the end of the first string, must be >=
389 @param pString2 Points to the start of the null terminated second
390 string, must not be null.
392 @return True if the two strings are equal, ignoring the case of US-
393 ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
395 static bool equalIgnoreCase(const sal_Unicode
* pBegin1
,
396 const sal_Unicode
* pEnd1
,
397 const sal_Char
* pString2
);
399 static inline bool startsWithLineBreak(const sal_Char
* pBegin
,
400 const sal_Char
* pEnd
);
402 static inline bool startsWithLineBreak(const sal_Unicode
* pBegin
,
403 const sal_Unicode
* pEnd
);
405 static inline bool startsWithLineFolding(const sal_Char
* pBegin
,
406 const sal_Char
* pEnd
);
408 static inline bool startsWithLineFolding(const sal_Unicode
* pBegin
,
409 const sal_Unicode
* pEnd
);
411 static bool startsWithLinearWhiteSpace(const sal_Char
* pBegin
,
412 const sal_Char
* pEnd
);
414 static const sal_Unicode
* skipLinearWhiteSpace(const sal_Unicode
*
416 const sal_Unicode
* pEnd
);
418 static const sal_Unicode
* skipComment(const sal_Unicode
* pBegin
,
419 const sal_Unicode
* pEnd
);
421 static const sal_Unicode
* skipLinearWhiteSpaceComment(const sal_Unicode
*
426 static inline bool needsQuotedStringEscape(sal_uInt32 nChar
);
428 static const sal_Char
* skipQuotedString(const sal_Char
* pBegin
,
429 const sal_Char
* pEnd
);
431 static const sal_Unicode
* skipQuotedString(const sal_Unicode
* pBegin
,
432 const sal_Unicode
* pEnd
);
434 static bool scanUnsigned(const sal_Unicode
*& rBegin
,
435 const sal_Unicode
* pEnd
, bool bLeadingZeroes
,
436 sal_uInt32
& rValue
);
438 static const sal_Unicode
* scanQuotedBlock(const sal_Unicode
* pBegin
,
439 const sal_Unicode
* pEnd
,
445 static sal_Unicode
const * scanParameters(sal_Unicode
const * pBegin
,
446 sal_Unicode
const * pEnd
,
447 INetContentTypeParameterList
*
450 static inline rtl_TextEncoding
translateToMIME(rtl_TextEncoding
453 static inline rtl_TextEncoding
translateFromMIME(rtl_TextEncoding
456 static const sal_Char
* getCharsetName(rtl_TextEncoding eEncoding
);
458 static rtl_TextEncoding
getCharsetEncoding(const sal_Char
* pBegin
,
459 const sal_Char
* pEnd
);
461 static inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding
);
463 static INetMIMECharsetList_Impl
*
464 createPreferredCharsetList(rtl_TextEncoding eEncoding
);
466 static sal_Unicode
* convertToUnicode(const sal_Char
* pBegin
,
467 const sal_Char
* pEnd
,
468 rtl_TextEncoding eEncoding
,
471 static sal_Char
* convertFromUnicode(const sal_Unicode
* pBegin
,
472 const sal_Unicode
* pEnd
,
473 rtl_TextEncoding eEncoding
,
476 /** Get the number of octets required to encode an UCS-4 character using
479 @param nChar Some UCS-4 character.
481 @return The number of octets required (in the range 1--6, inclusive).
483 static inline int getUTF8OctetCount(sal_uInt32 nChar
);
485 static inline void writeEscapeSequence(INetMIMEOutputSink
& rSink
,
488 static void writeUTF8(INetMIMEOutputSink
& rSink
, sal_uInt32 nChar
);
490 static void writeHeaderFieldBody(INetMIMEOutputSink
& rSink
,
491 HeaderFieldType eType
,
492 const OUString
& rBody
,
493 rtl_TextEncoding ePreferredEncoding
,
494 bool bInitialSpace
= true);
496 static bool translateUTF8Char(const sal_Char
*& rBegin
,
497 const sal_Char
* pEnd
,
498 rtl_TextEncoding eEncoding
,
499 sal_uInt32
& rCharacter
);
501 static OUString
decodeHeaderFieldBody(HeaderFieldType eType
,
502 const OString
& rBody
);
504 // #i70651#: Prevent warnings on Mac OS X.
506 #pragma GCC system_header
509 /** Get the UTF-32 character at the head of a UTF-16 encoded string.
511 @param rBegin Points to the start of the UTF-16 encoded string, must
512 not be null. On exit, it points past the first UTF-32 character's
515 @param pEnd Points past the end of the UTF-16 encoded string, must be
516 strictly greater than rBegin.
518 @return The UCS-4 character at the head of the UTF-16 encoded string.
519 If the string does not start with the UTF-16 encoding of a UCS-32
520 character, the first UTF-16 value is returned.
522 static inline sal_uInt32
getUTF32Character(const sal_Unicode
*& rBegin
,
523 const sal_Unicode
* pEnd
);
525 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
527 @param pBuffer Points to a buffer, must not be null.
529 @param nUTF32 An UTF-32 character, must be in the range 0..0x10FFFF.
531 @return A pointer past the UTF-16 characters put into the buffer
532 (i.e., pBuffer + 1 or pBuffer + 2).
534 static inline sal_Unicode
* putUTF32Character(sal_Unicode
* pBuffer
,
539 inline bool INetMIME::isUSASCII(sal_uInt32 nChar
)
541 return rtl::isAscii(nChar
);
545 inline bool INetMIME::isISO88591(sal_uInt32 nChar
)
547 return nChar
<= 0xFF;
551 inline bool INetMIME::isControl(sal_uInt32 nChar
)
553 return nChar
<= 0x1F || nChar
== 0x7F;
557 inline bool INetMIME::isWhiteSpace(sal_uInt32 nChar
)
559 return nChar
== '\t' || nChar
== ' ';
563 inline bool INetMIME::isVisible(sal_uInt32 nChar
)
565 return nChar
>= '!' && nChar
<= '~';
569 inline bool INetMIME::isDigit(sal_uInt32 nChar
)
571 return rtl::isAsciiDigit(nChar
);
575 inline bool INetMIME::isCanonicHexDigit(sal_uInt32 nChar
)
577 return rtl::isAsciiCanonicHexDigit(nChar
);
581 inline bool INetMIME::isHexDigit(sal_uInt32 nChar
)
583 return rtl::isAsciiHexDigit(nChar
);
587 inline bool INetMIME::isUpperCase(sal_uInt32 nChar
)
589 return rtl::isAsciiUpperCase(nChar
);
593 inline bool INetMIME::isLowerCase(sal_uInt32 nChar
)
595 return rtl::isAsciiLowerCase(nChar
);
599 inline bool INetMIME::isAlpha(sal_uInt32 nChar
)
601 return rtl::isAsciiAlpha(nChar
);
605 inline bool INetMIME::isAlphanumeric(sal_uInt32 nChar
)
607 return rtl::isAsciiAlphanumeric(nChar
);
611 inline bool INetMIME::isBase64Digit(sal_uInt32 nChar
)
613 return rtl::isAsciiUpperCase(nChar
) || rtl::isAsciiLowerCase(nChar
) || rtl::isAsciiDigit(nChar
)
614 || nChar
== '+' || nChar
== '/';
618 inline sal_uInt32
INetMIME::toUpperCase(sal_uInt32 nChar
)
620 return rtl::isAsciiLowerCase(nChar
) ? nChar
- ('a' - 'A') : nChar
;
624 inline sal_uInt32
INetMIME::toLowerCase(sal_uInt32 nChar
)
626 return rtl::isAsciiUpperCase(nChar
) ? nChar
+ ('a' - 'A') : nChar
;
630 inline int INetMIME::getWeight(sal_uInt32 nChar
)
632 return rtl::isAsciiDigit(nChar
) ? int(nChar
- '0') : -1;
636 inline int INetMIME::getHexWeight(sal_uInt32 nChar
)
638 return rtl::isAsciiDigit(nChar
) ? int(nChar
- '0') :
639 nChar
>= 'A' && nChar
<= 'F' ? int(nChar
- 'A' + 10) :
640 nChar
>= 'a' && nChar
<= 'f' ? int(nChar
- 'a' + 10) : -1;
644 inline int INetMIME::getBase64Weight(sal_uInt32 nChar
)
646 return rtl::isAsciiUpperCase(nChar
) ? int(nChar
- 'A') :
647 rtl::isAsciiLowerCase(nChar
) ? int(nChar
- 'a' + 26) :
648 rtl::isAsciiDigit(nChar
) ? int(nChar
- '0' + 52) :
651 nChar
== '=' ? -1 : -2;
655 inline bool INetMIME::isHighSurrogate(sal_uInt32 nUTF16
)
657 return nUTF16
>= 0xD800 && nUTF16
<= 0xDBFF;
661 inline bool INetMIME::isLowSurrogate(sal_uInt32 nUTF16
)
663 return nUTF16
>= 0xDC00 && nUTF16
<= 0xDFFF;
667 inline sal_uInt32
INetMIME::toUTF32(sal_Unicode cHighSurrogate
,
668 sal_Unicode cLowSurrogate
)
670 DBG_ASSERT(isHighSurrogate(cHighSurrogate
)
671 && isLowSurrogate(cLowSurrogate
),
672 "INetMIME::toUTF32(): Bad chars");
673 return ((sal_uInt32(cHighSurrogate
) & 0x3FF) << 10)
674 | (sal_uInt32(cLowSurrogate
) & 0x3FF);
678 inline bool INetMIME::startsWithLineBreak(const sal_Char
* pBegin
,
679 const sal_Char
* pEnd
)
681 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
682 "INetMIME::startsWithLineBreak(): Bad sequence");
684 return pEnd
- pBegin
>= 2 && pBegin
[0] == 0x0D && pBegin
[1] == 0x0A;
689 inline bool INetMIME::startsWithLineBreak(const sal_Unicode
* pBegin
,
690 const sal_Unicode
* pEnd
)
692 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
693 "INetMIME::startsWithLineBreak(): Bad sequence");
695 return pEnd
- pBegin
>= 2 && pBegin
[0] == 0x0D && pBegin
[1] == 0x0A;
700 inline bool INetMIME::startsWithLineFolding(const sal_Char
* pBegin
,
701 const sal_Char
* pEnd
)
703 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
704 "INetMIME::startsWithLineFolding(): Bad sequence");
706 return pEnd
- pBegin
>= 3 && pBegin
[0] == 0x0D && pBegin
[1] == 0x0A
707 && isWhiteSpace(pBegin
[2]); // CR, LF
711 inline bool INetMIME::startsWithLineFolding(const sal_Unicode
* pBegin
,
712 const sal_Unicode
* pEnd
)
714 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
715 "INetMIME::startsWithLineFolding(): Bad sequence");
717 return pEnd
- pBegin
>= 3 && pBegin
[0] == 0x0D && pBegin
[1] == 0x0A
718 && isWhiteSpace(pBegin
[2]); // CR, LF
722 inline bool INetMIME::startsWithLinearWhiteSpace(const sal_Char
* pBegin
,
723 const sal_Char
* pEnd
)
725 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
726 "INetMIME::startsWithLinearWhiteSpace(): Bad sequence");
728 return pBegin
!= pEnd
729 && (isWhiteSpace(*pBegin
) || startsWithLineFolding(pBegin
, pEnd
));
733 inline bool INetMIME::needsQuotedStringEscape(sal_uInt32 nChar
)
735 return nChar
== '"' || nChar
== '\\';
739 inline rtl_TextEncoding
INetMIME::translateToMIME(rtl_TextEncoding eEncoding
)
742 return eEncoding
== RTL_TEXTENCODING_MS_1252
?
743 RTL_TEXTENCODING_ISO_8859_1
: eEncoding
;
750 inline rtl_TextEncoding
INetMIME::translateFromMIME(rtl_TextEncoding
754 return eEncoding
== RTL_TEXTENCODING_ISO_8859_1
?
755 RTL_TEXTENCODING_MS_1252
: eEncoding
;
762 inline bool INetMIME::isMIMECharsetEncoding(rtl_TextEncoding eEncoding
)
764 return ( rtl_isOctetTextEncoding(eEncoding
) == sal_True
);
768 inline int INetMIME::getUTF8OctetCount(sal_uInt32 nChar
)
770 DBG_ASSERT(nChar
< 0x80000000, "INetMIME::getUTF8OctetCount(): Bad char");
772 return nChar
< 0x80 ? 1 :
774 nChar
<= 0x10000 ? 3 :
775 nChar
<= 0x200000 ? 4 :
776 nChar
<= 0x4000000 ? 5 : 6;
780 inline sal_uInt32
INetMIME::getUTF32Character(const sal_Unicode
*& rBegin
,
781 const sal_Unicode
* pEnd
)
783 DBG_ASSERT(rBegin
&& rBegin
< pEnd
,
784 "INetMIME::getUTF32Character(): Bad sequence");
785 if (rBegin
+ 1 < pEnd
&& rBegin
[0] >= 0xD800 && rBegin
[0] <= 0xDBFF
786 && rBegin
[1] >= 0xDC00 && rBegin
[1] <= 0xDFFF)
788 sal_uInt32 nUTF32
= sal_uInt32(*rBegin
++ & 0x3FF) << 10;
789 return (nUTF32
| (*rBegin
++ & 0x3FF)) + 0x10000;
796 inline sal_Unicode
* INetMIME::putUTF32Character(sal_Unicode
* pBuffer
,
799 DBG_ASSERT(nUTF32
<= 0x10FFFF, "INetMIME::putUTF32Character(): Bad char");
800 if (nUTF32
< 0x10000)
801 *pBuffer
++ = sal_Unicode(nUTF32
);
805 *pBuffer
++ = sal_Unicode(0xD800 | (nUTF32
>> 10));
806 *pBuffer
++ = sal_Unicode(0xDC00 | (nUTF32
& 0x3FF));
811 class INetMIMEOutputSink
814 static sal_uInt32
const NO_LINE_LENGTH_LIMIT
= SAL_MAX_UINT32
;
817 sal_uInt32 m_nColumn
;
818 sal_uInt32 m_nLineLengthLimit
;
821 /** Write a sequence of octets.
823 @param pBegin Points to the start of the sequence, must not be null.
825 @param pEnd Points past the end of the sequence, must be >= pBegin.
827 virtual void writeSequence(const sal_Char
* pBegin
,
828 const sal_Char
* pEnd
) = 0;
830 /** Write a null terminated sequence of octets (without the terminating
833 @param pOctets A null terminated sequence of octets, must not be
836 @return The length of pOctets (without the terminating null).
838 virtual sal_Size
writeSequence(const sal_Char
* pSequence
);
840 /** Write a sequence of octets.
842 @descr The supplied sequence of UCS-4 characters is interpreted as a
843 sequence of octets. It is an error if any of the elements of the
844 sequence has a numerical value greater than 255.
846 @param pBegin Points to the start of the sequence, must not be null.
848 @param pEnd Points past the end of the sequence, must be >= pBegin.
850 virtual void writeSequence(const sal_uInt32
* pBegin
,
851 const sal_uInt32
* pEnd
);
853 /** Write a sequence of octets.
855 @descr The supplied sequence of Unicode characters is interpreted as
856 a sequence of octets. It is an error if any of the elements of the
857 sequence has a numerical value greater than 255.
859 @param pBegin Points to the start of the sequence, must not be null.
861 @param pEnd Points past the end of the sequence, must be >= pBegin.
863 virtual void writeSequence(const sal_Unicode
* pBegin
,
864 const sal_Unicode
* pEnd
);
867 INetMIMEOutputSink(sal_uInt32 nTheColumn
= 0,
868 sal_uInt32 nTheLineLengthLimit
869 = INetMIME::SOFT_LINE_LENGTH_LIMIT
):
870 m_nColumn(nTheColumn
), m_nLineLengthLimit(nTheLineLengthLimit
) {}
872 virtual ~INetMIMEOutputSink() {}
874 /** Get the current column.
876 @return The current column (starting from zero).
878 sal_uInt32
getColumn() const { return m_nColumn
; }
880 sal_uInt32
getLineLengthLimit() const { return m_nLineLengthLimit
; }
882 void setLineLengthLimit(sal_uInt32 nTheLineLengthLimit
)
883 { m_nLineLengthLimit
= nTheLineLengthLimit
; }
885 virtual ErrCode
getError() const;
887 /** Write a sequence of octets.
889 @param pBegin Points to the start of the sequence, must not be null.
891 @param pEnd Points past the end of the sequence, must be >= pBegin.
893 inline void write(const sal_Char
* pBegin
, const sal_Char
* pEnd
);
895 /** Write a sequence of octets.
897 @param pBegin Points to the start of the sequence, must not be null.
899 @param nLength The length of the sequence.
901 void write(const sal_Char
* pBegin
, sal_Size nLength
)
902 { write(pBegin
, pBegin
+ nLength
); }
904 /** Write a sequence of octets.
906 @descr The supplied sequence of UCS-4 characters is interpreted as a
907 sequence of octets. It is an error if any of the elements of the
908 sequence has a numerical value greater than 255.
910 @param pBegin Points to the start of the sequence, must not be null.
912 @param pEnd Points past the end of the sequence, must be >= pBegin.
914 inline void write(const sal_uInt32
* pBegin
, const sal_uInt32
* pEnd
);
916 /** Write a sequence of octets.
918 @descr The supplied sequence of Unicode characters is interpreted as
919 a sequence of octets. It is an error if any of the elements of the
920 sequence has a numerical value greater than 255.
922 @param pBegin Points to the start of the sequence, must not be null.
924 @param pEnd Points past the end of the sequence, must be >= pBegin.
926 inline void write(const sal_Unicode
* pBegin
, const sal_Unicode
* pEnd
);
928 /** Write a sequence of octets.
930 @param rOctets A OString, interpreted as a sequence of octets.
932 @param nBegin The offset of the first character to write.
934 @param nEnd The offset past the last character to write.
936 void write(const OString
& rOctets
, xub_StrLen nBegin
,
939 writeSequence(rOctets
.getStr() + nBegin
, rOctets
.getStr() + nEnd
);
940 m_nColumn
+= nEnd
- nBegin
;
943 /** Write a single octet.
945 @param nOctet Some octet.
947 @return This instance.
949 inline INetMIMEOutputSink
& operator <<(sal_Char nOctet
);
951 /** Write a null terminated sequence of octets (without the terminating
954 @param pOctets A null terminated sequence of octets, must not be
957 @return This instance.
959 inline INetMIMEOutputSink
& operator <<(const sal_Char
* pOctets
);
961 /** Write a sequence of octets.
963 @param rOctets A OString, interpreted as a sequence of octets.
965 @return This instance.
967 INetMIMEOutputSink
& operator <<(const OString
& rOctets
)
969 writeSequence(rOctets
.getStr(), rOctets
.getStr() + rOctets
.getLength());
970 m_nColumn
+= rOctets
.getLength();
974 /** Call a manipulator function.
976 @param pManipulator A manipulator function.
978 @return Whatever the manipulator function returns.
981 operator <<(INetMIMEOutputSink
& (* pManipulator
)(INetMIMEOutputSink
&))
982 { return pManipulator(*this); }
984 /** Write a line end (CR LF).
988 /** A manipulator function that writes a line end (CR LF).
990 @param rSink Some sink.
992 @return The sink rSink.
994 static inline INetMIMEOutputSink
& endl(INetMIMEOutputSink
& rSink
);
997 inline void INetMIMEOutputSink::write(const sal_Char
* pBegin
,
998 const sal_Char
* pEnd
)
1000 writeSequence(pBegin
, pEnd
);
1001 m_nColumn
+= pEnd
- pBegin
;
1004 inline void INetMIMEOutputSink::write(const sal_uInt32
* pBegin
,
1005 const sal_uInt32
* pEnd
)
1007 writeSequence(pBegin
, pEnd
);
1008 m_nColumn
+= pEnd
- pBegin
;
1011 inline void INetMIMEOutputSink::write(const sal_Unicode
* pBegin
,
1012 const sal_Unicode
* pEnd
)
1014 writeSequence(pBegin
, pEnd
);
1015 m_nColumn
+= pEnd
- pBegin
;
1018 inline INetMIMEOutputSink
& INetMIMEOutputSink::operator <<(sal_Char nOctet
)
1020 writeSequence(&nOctet
, &nOctet
+ 1);
1025 inline INetMIMEOutputSink
& INetMIMEOutputSink::operator <<(const sal_Char
*
1028 m_nColumn
+= writeSequence(pOctets
);
1033 inline INetMIMEOutputSink
& INetMIMEOutputSink::endl(INetMIMEOutputSink
&
1036 rSink
.writeLineEnd();
1041 inline void INetMIME::writeEscapeSequence(INetMIMEOutputSink
& rSink
,
1044 DBG_ASSERT(nChar
<= 0xFF, "INetMIME::writeEscapeSequence(): Bad char");
1045 rSink
<< '=' << sal_uInt8(getHexDigit(nChar
>> 4))
1046 << sal_uInt8(getHexDigit(nChar
& 15));
1049 class INetMIMEStringOutputSink
: public INetMIMEOutputSink
1051 OStringBuffer m_aBuffer
;
1053 using INetMIMEOutputSink::writeSequence
;
1055 virtual void writeSequence(const sal_Char
* pBegin
,
1056 const sal_Char
* pEnd
);
1059 inline INetMIMEStringOutputSink(sal_uInt32 nColumn
= 0,
1060 sal_uInt32 nLineLengthLimit
1061 = INetMIME::SOFT_LINE_LENGTH_LIMIT
):
1062 INetMIMEOutputSink(nColumn
, nLineLengthLimit
) {}
1064 virtual ErrCode
getError() const;
1066 OString
takeBuffer()
1068 return m_aBuffer
.makeStringAndClear();
1072 class INetMIMEEncodedWordOutputSink
1075 enum Context
{ CONTEXT_TEXT
= 1,
1076 CONTEXT_COMMENT
= 2,
1077 CONTEXT_PHRASE
= 4 };
1079 enum Space
{ SPACE_NO
, SPACE_ENCODED
, SPACE_ALWAYS
};
1082 enum { BUFFER_SIZE
= 256 };
1084 enum Coding
{ CODING_NONE
, CODING_QUOTED
, CODING_ENCODED
,
1085 CODING_ENCODED_TERMINATED
};
1087 enum EncodedWordState
{ STATE_INITIAL
, STATE_FIRST_EQUALS
,
1088 STATE_FIRST_QUESTION
, STATE_CHARSET
,
1089 STATE_SECOND_QUESTION
, STATE_ENCODING
,
1090 STATE_THIRD_QUESTION
, STATE_ENCODED_TEXT
,
1091 STATE_FOURTH_QUESTION
, STATE_SECOND_EQUALS
,
1094 INetMIMEOutputSink
& m_rSink
;
1096 Space m_eInitialSpace
;
1097 sal_uInt32 m_nExtraSpaces
;
1098 INetMIMECharsetList_Impl
* m_pEncodingList
;
1099 sal_Unicode
* m_pBuffer
;
1100 sal_uInt32 m_nBufferSize
;
1101 sal_Unicode
* m_pBufferEnd
;
1102 Coding m_ePrevCoding
;
1103 rtl_TextEncoding m_ePrevMIMEEncoding
;
1105 sal_uInt32 m_nQuotedEscaped
;
1106 EncodedWordState m_eEncodedWordState
;
1108 inline bool needsEncodedWordEscape(sal_uInt32 nChar
) const;
1110 void finish(bool bWriteTrailer
);
1113 inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink
& rTheSink
,
1114 Context eTheContext
,
1115 Space eTheInitialSpace
,
1116 rtl_TextEncoding ePreferredEncoding
);
1118 ~INetMIMEEncodedWordOutputSink();
1120 INetMIMEEncodedWordOutputSink
& operator <<(sal_uInt32 nChar
);
1122 inline void write(const sal_Char
* pBegin
, const sal_Char
* pEnd
);
1124 inline void write(const sal_Unicode
* pBegin
, const sal_Unicode
* pEnd
);
1126 inline bool flush();
1129 inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink(
1130 INetMIMEOutputSink
& rTheSink
, Context eTheContext
,
1131 Space eTheInitialSpace
, rtl_TextEncoding ePreferredEncoding
):
1133 m_eContext(eTheContext
),
1134 m_eInitialSpace(eTheInitialSpace
),
1136 m_pEncodingList(INetMIME::createPreferredCharsetList(ePreferredEncoding
)),
1137 m_ePrevCoding(CODING_NONE
),
1138 m_eCoding(CODING_NONE
),
1139 m_nQuotedEscaped(0),
1140 m_eEncodedWordState(STATE_INITIAL
)
1142 m_nBufferSize
= BUFFER_SIZE
;
1143 m_pBuffer
= static_cast< sal_Unicode
* >(rtl_allocateMemory(
1145 * sizeof (sal_Unicode
)));
1146 m_pBufferEnd
= m_pBuffer
;
1149 inline void INetMIMEEncodedWordOutputSink::write(const sal_Char
* pBegin
,
1150 const sal_Char
* pEnd
)
1152 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
1153 "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1155 while (pBegin
!= pEnd
)
1156 operator <<(*pBegin
++);
1159 inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode
* pBegin
,
1160 const sal_Unicode
* pEnd
)
1162 DBG_ASSERT(pBegin
&& pBegin
<= pEnd
,
1163 "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1165 while (pBegin
!= pEnd
)
1166 operator <<(*pBegin
++);
1169 inline bool INetMIMEEncodedWordOutputSink::flush()
1172 return m_ePrevCoding
!= CODING_NONE
;
1175 struct INetContentTypeParameter
1177 /** The name of the attribute, in US-ASCII encoding and converted to lower
1178 case. If a parameter value is split as described in RFC 2231, there
1179 will only be one item for the complete parameter, with the attribute
1180 name lacking any section suffix.
1182 const OString m_sAttribute
;
1184 /** The optional character set specification (see RFC 2231), in US-ASCII
1185 encoding and converted to lower case.
1187 const OString m_sCharset
;
1189 /** The optional language specification (see RFC 2231), in US-ASCII
1190 encoding and converted to lower case.
1192 const OString m_sLanguage
;
1194 /** The attribute value. If the value is a quoted-string, it is
1195 'unpacked.' If a character set is specified, and the value can be
1196 converted to Unicode, this is done. Also, if no character set is
1197 specified, it is first tried to convert the value from UTF-8 encoding
1198 to Unicode, and if that doesn't work (because the value is not in
1199 UTF-8 encoding), it is converted from ISO-8859-1 encoding to Unicode
1200 (which will always work). But if a character set is specified and the
1201 value cannot be converted from that character set to Unicode, special
1202 action is taken to produce a value that can possibly be transformed
1203 back into its original form: Any 8-bit character from a non-encoded
1204 part of the original value is directly converted to Unicode
1205 (effectively handling it as if it was ISO-8859-1 encoded), and any
1206 8-bit character from an encoded part of the original value is mapped
1207 to the range U+F800..U+F8FF at the top of the Corporate Use Subarea
1208 within Unicode's Private Use Area (effectively adding 0xF800 to the
1209 character's numeric value).
1211 const OUString m_sValue
;
1213 /** This is true if the value is successfully converted to Unicode, and
1214 false if the value is a special mixture of ISO-LATIN-1 characters and
1215 characters from Unicode's Private Use Area.
1217 const bool m_bConverted
;
1219 INetContentTypeParameter(const OString
& rTheAttribute
,
1220 const OString
& rTheCharset
, const OString
& rTheLanguage
,
1221 const OUString
& rTheValue
, bool bTheConverted
)
1222 : m_sAttribute(rTheAttribute
)
1223 , m_sCharset(rTheCharset
)
1224 , m_sLanguage(rTheLanguage
)
1225 , m_sValue(rTheValue
)
1226 , m_bConverted(bTheConverted
)
1231 class TOOLS_DLLPUBLIC INetContentTypeParameterList
1237 void Insert(INetContentTypeParameter
* pParameter
, sal_uIntPtr nIndex
)
1239 maEntries
.insert(maEntries
.begin()+nIndex
,pParameter
);
1242 void Append(INetContentTypeParameter
*pParameter
)
1244 maEntries
.push_back(pParameter
);
1247 inline const INetContentTypeParameter
* GetObject(sal_uIntPtr nIndex
) const
1249 return &(maEntries
[nIndex
]);
1252 const INetContentTypeParameter
* find(const OString
& rAttribute
) const;
1256 boost::ptr_vector
<INetContentTypeParameter
> maEntries
;
1261 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */