fdo#74697 Add Bluez 5 support for impress remote.
[LibreOffice.git] / include / tools / inetmime.hxx
blobafe4247c6612e380919afcb20caacec9e04e8bca
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef TOOLS_INETMIME_HXX
20 #define TOOLS_INETMIME_HXX
22 #include <boost/ptr_container/ptr_vector.hpp>
24 #include "tools/toolsdllapi.h"
25 #include <rtl/alloc.h>
26 #include <rtl/character.hxx>
27 #include <rtl/string.hxx>
28 #include <rtl/strbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/debug.hxx>
31 #include <tools/errcode.hxx>
32 #include <tools/string.hxx>
34 class DateTime;
35 class INetContentTypeParameterList;
36 class INetMIMECharsetList_Impl;
37 class INetMIMEOutputSink;
39 class TOOLS_DLLPUBLIC INetMIME
41 public:
42 enum { SOFT_LINE_LENGTH_LIMIT = 76,
43 HARD_LINE_LENGTH_LIMIT = 998 };
45 /** The various types of message header field bodies, with respect to
46 encoding and decoding them.
48 @descr At the moment, five different types of header fields suffice
49 to describe how to encoded and decode any known message header field
50 body, but need for more types may arise in the future as new header
51 fields are introduced.
53 @descr The following is an exhaustive list of all the header fields
54 currently known to our implementation. For every header field, it
55 includes a 'canonic' (with regard to capitalization) name, a grammar
56 rule for the body (using RFC 822 and RFC 2234 conventions), a list of
57 relevant sources of information, and the HeaderFieldType value to use
58 with that header field. The list is based on RFC 2076 and draft-
59 palme-mailext-headers-02.txt (see also <http://www.dsv.su.se/~jpalme/
60 ietf/jp-ietf-home.html#anchor1003783>).
62 Approved: address ;RFC 1036; HEADER_FIELD_ADDRESS
63 bcc: #address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
64 cc: 1#address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
65 Comments: *text ;RFCs 822, RFC 2047; HEADER_FIELD_TEXT
66 Content-Base: absoluteURI ;RFC 2110; HEADER_FIELD_TEXT
67 Content-Description: *text ;RFC 2045, RFC 2047; HEADER_FIELD_TEXT
68 Content-Disposition: disposition-type *(";" disposition-parm)
69 ;RFC 1806; HEADER_FIELD_STRUCTURED
70 Content-ID: msg-id ;RFC 2045, RFC 2047; HEADER_FIELD_MESSAGE_ID
71 Content-Location: absoluteURI / relativeURI ;RFC 2110;
72 HEADER_FIELD_TEXT
73 Content-Transfer-Encoding: mechanism ;RFC 2045, RFC 2047;
74 HEADER_FIELD_STRUCTURED
75 Content-Type: type "/" subtype *(";" parameter) ;RFC 2045, RFC 2047;
76 HEADER_FIELD_STRUCTURED
77 Control: *text ;RFC 1036; HEADER_FIELD_TEXT
78 Date: date-time ;RFC 822, RFC 1123, RFC 2047; HEADER_FIELD_STRUCTURED
79 Distribution: 1#atom ;RFC 1036; HEADER_FIELD_STRUCTURED
80 Encrypted: 1#2word ;RFC 822, RFC 2047; HEADER_FIELD_STRUCTURED
81 Expires: date-time ;RFC 1036; HEADER_FIELD_STRUCTURED
82 Followup-To: 1#(atom *("." atom)) ;RFC 1036; HEADER_FIELD_STRUCTURED
83 From: mailbox / 1#mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
84 In-Reply-To: *(phrase / msg-id) ;RFC 822, RFC 2047;
85 HEADER_FIELD_ADDRESS
86 Keywords: #phrase ;RFC 822, RFC 2047; HEADER_FIELD_PHRASE
87 MIME-Version: 1*DIGIT "." 1*DIGIT ;RFC 2045, RFC 2047;
88 HEADER_FIELD_STRUCTURED
89 Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
90 Newsgroups: 1#(atom *("." atom)) ;RFC 1036, RFC 2047;
91 HEADER_FIELD_STRUCTURED
92 Organization: *text ;RFC 1036; HEADER_FIELD_TEXT
93 Received: ["from" domain] ["by" domain] ["via" atom] *("with" atom)
94 ["id" msg-id] ["for" addr-spec] ";" date-time ;RFC 822, RFC 1123,
95 RFC 2047; HEADER_FIELD_STRUCTURED
96 References: *(phrase / msg-id) ;RFC 822, RFC 2047;
97 HEADER_FIELD_ADDRESS
98 Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
99 Resent-Date: date-time ;RFC 822, RFC 1123, RFC 2047;
100 HEADER_FIELD_STRUCTURED
101 Resent-From: mailbox / 1#mailbox ;RFC 822, RFC 2047;
102 HEADER_FIELD_ADDRESS
103 Resent-Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
104 Resent-Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
105 Resent-Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
106 Resent-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
107 Resent-bcc: #address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
108 Resent-cc: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
109 Return-path: route-addr / ("<" ">") ;RFC 822, RFC 1123, RFC 2047;
110 HEADER_FIELD_STRUCTURED
111 Return-Receipt-To: address ;Not Internet standard;
112 HEADER_FIELD_ADDRES
113 Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
114 Subject: *text ;RFC 822, RFC 2047; HEADER_FIELD_TEXT
115 Summary: *text ;RFC 1036; HEADER_FIELD_TEXT
116 To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
117 X-CHAOS-Marked: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
118 X-CHAOS-Read: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
119 X-CHAOS-Recipients: #*("<" atom word ">") ;local;
120 HEADER_FIELD_STRUCTURED
121 X-CHAOS-Size: 1*DIGIT ;local; HEADER_FIELD_STRUCTURED
122 X-Mailer: *text ;Not Internet standard; HEADER_FIELD_TEXT
123 X-Mozilla-Status: 4HEXDIG ;Mozilla; HEADER_FIELD_STRUCTURED
124 X-Newsreader: *text ;Not Internet standard; HEADER_FIELD_TEXT
125 X-Priority: "1" / "2" / "3" / "4" / "5" ;Not Internet standard;
126 HEADER_FIELD_STRUCTURED
127 Xref: sub-domain
128 1*((atom / string) *("." (atom / string)) ":" msg-number)
129 ;RFCs 1036, 2047, local; HEADER_FIELD_STRUCTURED
131 enum HeaderFieldType
133 HEADER_FIELD_TEXT,
134 HEADER_FIELD_STRUCTURED,
135 HEADER_FIELD_PHRASE,
136 HEADER_FIELD_MESSAGE_ID,
137 HEADER_FIELD_ADDRESS
140 /** Check for US-ASCII character.
142 @param nChar Some UCS-4 character.
144 @return True if nChar is a US-ASCII character (0x00--0x7F).
146 static inline bool isUSASCII(sal_uInt32 nChar);
148 /** Check for ISO 8859-1 character.
150 @param nChar Some UCS-4 character.
152 @return True if nChar is a ISO 8859-1 character (0x00--0xFF).
154 static inline bool isISO88591(sal_uInt32 nChar);
156 /** Check for US-ASCII control character.
158 @param nChar Some UCS-4 character.
160 @return True if nChar is a US-ASCII control character (US-ASCII
161 0x00--0x1F or 0x7F).
163 static inline bool isControl(sal_uInt32 nChar);
165 /** Check for US-ASCII white space character.
167 @param nChar Some UCS-4 character.
169 @return True if nChar is a US-ASCII white space character (US-ASCII
170 0x09 or 0x20).
172 static inline bool isWhiteSpace(sal_uInt32 nChar);
174 /** Check for US-ASCII visible character.
176 @param nChar Some UCS-4 character.
178 @return True if nChar is a US-ASCII visible character (US-ASCII
179 0x21--0x7E).
181 static inline bool isVisible(sal_uInt32 nChar);
183 /** Check for US-ASCII digit character.
185 @param nChar Some UCS-4 character.
187 @return True if nChar is a US-ASCII (decimal) digit character (US-
188 ASCII '0'--'9').
190 static inline bool isDigit(sal_uInt32 nChar);
192 /** Check for US-ASCII canonic hexadecimal digit character.
194 @param nChar Some UCS-4 character.
196 @return True if nChar is a US-ASCII canonic (i.e., upper case)
197 hexadecimal digit character (US-ASCII '0'--'9' or 'A'--'F').
199 static inline bool isCanonicHexDigit(sal_uInt32 nChar);
201 /** Check for US-ASCII hexadecimal digit character.
203 @param nChar Some UCS-4 character.
205 @return True if nChar is a US-ASCII hexadecimal digit character (US-
206 ASCII '0'--'9', 'A'--'F', 'a'--'f').
208 static inline bool isHexDigit(sal_uInt32 nChar);
210 /** Check for US-ASCII upper case character.
212 @param nChar Some UCS-4 character.
214 @return True if nChar is a US-ASCII upper case alphabetic character
215 (US-ASCII 'A'--'Z').
217 static inline bool isUpperCase(sal_uInt32 nChar);
219 /** Check for US-ASCII lower case character.
221 @param nChar Some UCS-4 character.
223 @return True if nChar is a US-ASCII lower case alphabetic character
224 (US-ASCII 'a'--'z').
226 static inline bool isLowerCase(sal_uInt32 nChar);
228 /** Check for US-ASCII alphabetic character.
230 @param nChar Some UCS-4 character.
232 @return True if nChar is a US-ASCII alphabetic character (US-ASCII
233 'A'--'Z' or 'a'--'z').
235 static inline bool isAlpha(sal_uInt32 nChar);
237 /** Check for US-ASCII alphanumeric character.
239 @param nChar Some UCS-4 character.
241 @return True if nChar is a US-ASCII alphanumeric character (US-ASCII
242 '0'--'9', 'A'--'Z' or 'a'--'z').
244 static inline bool isAlphanumeric(sal_uInt32 nChar);
246 /** Check for US-ASCII Base 64 digit character.
248 @param nChar Some UCS-4 character.
250 @return True if nChar is a US-ASCII Base 64 digit character (US-ASCII
251 'A'--'Z', 'a'--'z', '0'--'9', '+', or '/').
253 static inline bool isBase64Digit(sal_uInt32 nChar);
255 /** Check whether some character is valid within an RFC 822 <atom>.
257 @param nChar Some UCS-4 character.
259 @return True if nChar is valid within an RFC 822 <atom> (US-ASCII
260 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
261 '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', or '~').
263 static bool isAtomChar(sal_uInt32 nChar);
265 /** Check whether some character is valid within an RFC 2045 <token>.
267 @param nChar Some UCS-4 character.
269 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
270 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
271 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
273 static bool isTokenChar(sal_uInt32 nChar);
275 /** Check whether some character is valid within an RFC 2047 <token>.
277 @param nChar Some UCS-4 character.
279 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
280 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
281 '-', '^', '_', '`', '{', '|', '}', or '~').
283 static bool isEncodedWordTokenChar(sal_uInt32 nChar);
285 /** Check whether some character is valid within an RFC 2060 <atom>.
287 @param nChar Some UCS-4 character.
289 @return True if nChar is valid within an RFC 2060 <atom> (US-ASCII
290 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '&', ''', '+', ',', '-',
291 '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`',
292 '|', '}', or '~').
294 static bool isIMAPAtomChar(sal_uInt32 nChar);
296 /** Translate an US-ASCII character to upper case.
298 @param nChar Some UCS-4 character.
300 @return If nChar is a US-ASCII upper case character (US-ASCII
301 'A'--'Z'), return the corresponding US-ASCII lower case character (US-
302 ASCII 'a'--'z'); otherwise, return nChar unchanged.
304 static inline sal_uInt32 toUpperCase(sal_uInt32 nChar);
306 /** Translate an US-ASCII character to lower case.
308 @param nChar Some UCS-4 character.
310 @return If nChar is a US-ASCII lower case character (US-ASCII
311 'a'--'z'), return the corresponding US-ASCII upper case character (US-
312 ASCII 'A'--'Z'); otherwise, return nChar unchanged.
314 static inline sal_uInt32 toLowerCase(sal_uInt32 nChar);
316 /** Get the digit weight of a US-ASCII character.
318 @param nChar Some UCS-4 character.
320 @return If nChar is a US-ASCII (decimal) digit character (US-ASCII
321 '0'--'9'), return the corresponding weight (0--9); otherwise,
322 return -1.
324 static inline int getWeight(sal_uInt32 nChar);
326 /** Get the hexadecimal digit weight of a US-ASCII character.
328 @param nChar Some UCS-4 character.
330 @return If nChar is a US-ASCII hexadecimal digit character (US-ASCII
331 '0'--'9', 'A'--'F', or 'a'--'f'), return the corresponding weight
332 (0--15); otherwise, return -1.
334 static inline int getHexWeight(sal_uInt32 nChar);
336 /** Get the Base 64 digit weight of a US-ASCII character.
338 @param nChar Some UCS-4 character.
340 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
341 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
342 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
343 character (US-ASCII '='), return -1; otherwise, return -2.
345 static inline int getBase64Weight(sal_uInt32 nChar);
347 /** Get a hexadecimal digit encoded as US-ASCII.
349 @param nWeight Must be in the range 0--15, inclusive.
351 @return The canonic (i.e., upper case) hexadecimal digit
352 corresponding to nWeight (US-ASCII '0'--'9' or 'A'--'F').
354 static sal_uInt32 getHexDigit(int nWeight);
356 static inline bool isHighSurrogate(sal_uInt32 nUTF16);
358 static inline bool isLowSurrogate(sal_uInt32 nUTF16);
360 static inline sal_uInt32 toUTF32(sal_Unicode cHighSurrogate,
361 sal_Unicode cLowSurrogate);
363 /** Check two US-ASCII strings for equality, ignoring case.
365 @param pBegin1 Points to the start of the first string, must not be
366 null.
368 @param pEnd1 Points past the end of the first string, must be >=
369 pBegin1.
371 @param pString2 Points to the start of the null terminated second
372 string, must not be null.
374 @return True if the two strings are equal, ignoring the case of US-
375 ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
377 static bool equalIgnoreCase(const sal_Char * pBegin1,
378 const sal_Char * pEnd1,
379 const sal_Char * pString2);
381 /** Check two US-ASCII strings for equality, ignoring case.
383 @param pBegin1 Points to the start of the first string, must not be
384 null.
386 @param pEnd1 Points past the end of the first string, must be >=
387 pBegin1.
389 @param pString2 Points to the start of the null terminated second
390 string, must not be null.
392 @return True if the two strings are equal, ignoring the case of US-
393 ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
395 static bool equalIgnoreCase(const sal_Unicode * pBegin1,
396 const sal_Unicode * pEnd1,
397 const sal_Char * pString2);
399 static inline bool startsWithLineBreak(const sal_Char * pBegin,
400 const sal_Char * pEnd);
402 static inline bool startsWithLineBreak(const sal_Unicode * pBegin,
403 const sal_Unicode * pEnd);
405 static inline bool startsWithLineFolding(const sal_Char * pBegin,
406 const sal_Char * pEnd);
408 static inline bool startsWithLineFolding(const sal_Unicode * pBegin,
409 const sal_Unicode * pEnd);
411 static bool startsWithLinearWhiteSpace(const sal_Char * pBegin,
412 const sal_Char * pEnd);
414 static const sal_Unicode * skipLinearWhiteSpace(const sal_Unicode *
415 pBegin,
416 const sal_Unicode * pEnd);
418 static const sal_Unicode * skipComment(const sal_Unicode * pBegin,
419 const sal_Unicode * pEnd);
421 static const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
422 pBegin,
423 const sal_Unicode *
424 pEnd);
426 static inline bool needsQuotedStringEscape(sal_uInt32 nChar);
428 static const sal_Char * skipQuotedString(const sal_Char * pBegin,
429 const sal_Char * pEnd);
431 static const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
432 const sal_Unicode * pEnd);
434 static bool scanUnsigned(const sal_Unicode *& rBegin,
435 const sal_Unicode * pEnd, bool bLeadingZeroes,
436 sal_uInt32 & rValue);
438 static const sal_Unicode * scanQuotedBlock(const sal_Unicode * pBegin,
439 const sal_Unicode * pEnd,
440 sal_uInt32 nOpening,
441 sal_uInt32 nClosing,
442 sal_Size & rLength,
443 bool & rModify);
445 static sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
446 sal_Unicode const * pEnd,
447 INetContentTypeParameterList *
448 pParameters);
450 static inline rtl_TextEncoding translateToMIME(rtl_TextEncoding
451 eEncoding);
453 static inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
454 eEncoding);
456 static const sal_Char * getCharsetName(rtl_TextEncoding eEncoding);
458 static rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
459 const sal_Char * pEnd);
461 static inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);
463 static INetMIMECharsetList_Impl *
464 createPreferredCharsetList(rtl_TextEncoding eEncoding);
466 static sal_Unicode * convertToUnicode(const sal_Char * pBegin,
467 const sal_Char * pEnd,
468 rtl_TextEncoding eEncoding,
469 sal_Size & rSize);
471 static sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
472 const sal_Unicode * pEnd,
473 rtl_TextEncoding eEncoding,
474 sal_Size & rSize);
476 /** Get the number of octets required to encode an UCS-4 character using
477 UTF-8 encoding.
479 @param nChar Some UCS-4 character.
481 @return The number of octets required (in the range 1--6, inclusive).
483 static inline int getUTF8OctetCount(sal_uInt32 nChar);
485 static inline void writeEscapeSequence(INetMIMEOutputSink & rSink,
486 sal_uInt32 nChar);
488 static void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);
490 static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
491 HeaderFieldType eType,
492 const OUString& rBody,
493 rtl_TextEncoding ePreferredEncoding,
494 bool bInitialSpace = true);
496 static bool translateUTF8Char(const sal_Char *& rBegin,
497 const sal_Char * pEnd,
498 rtl_TextEncoding eEncoding,
499 sal_uInt32 & rCharacter);
501 static OUString decodeHeaderFieldBody(HeaderFieldType eType,
502 const OString& rBody);
504 // #i70651#: Prevent warnings on Mac OS X.
505 #ifdef MACOSX
506 #pragma GCC system_header
507 #endif
509 /** Get the UTF-32 character at the head of a UTF-16 encoded string.
511 @param rBegin Points to the start of the UTF-16 encoded string, must
512 not be null. On exit, it points past the first UTF-32 character's
513 encoding.
515 @param pEnd Points past the end of the UTF-16 encoded string, must be
516 strictly greater than rBegin.
518 @return The UCS-4 character at the head of the UTF-16 encoded string.
519 If the string does not start with the UTF-16 encoding of a UCS-32
520 character, the first UTF-16 value is returned.
522 static inline sal_uInt32 getUTF32Character(const sal_Unicode *& rBegin,
523 const sal_Unicode * pEnd);
525 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
527 @param pBuffer Points to a buffer, must not be null.
529 @param nUTF32 An UTF-32 character, must be in the range 0..0x10FFFF.
531 @return A pointer past the UTF-16 characters put into the buffer
532 (i.e., pBuffer + 1 or pBuffer + 2).
534 static inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
535 sal_uInt32 nUTF32);
538 // static
539 inline bool INetMIME::isUSASCII(sal_uInt32 nChar)
541 return rtl::isAscii(nChar);
544 // static
545 inline bool INetMIME::isISO88591(sal_uInt32 nChar)
547 return nChar <= 0xFF;
550 // static
551 inline bool INetMIME::isControl(sal_uInt32 nChar)
553 return nChar <= 0x1F || nChar == 0x7F;
556 // static
557 inline bool INetMIME::isWhiteSpace(sal_uInt32 nChar)
559 return nChar == '\t' || nChar == ' ';
562 // static
563 inline bool INetMIME::isVisible(sal_uInt32 nChar)
565 return nChar >= '!' && nChar <= '~';
568 // static
569 inline bool INetMIME::isDigit(sal_uInt32 nChar)
571 return rtl::isAsciiDigit(nChar);
574 // static
575 inline bool INetMIME::isCanonicHexDigit(sal_uInt32 nChar)
577 return rtl::isAsciiCanonicHexDigit(nChar);
580 // static
581 inline bool INetMIME::isHexDigit(sal_uInt32 nChar)
583 return rtl::isAsciiHexDigit(nChar);
586 // static
587 inline bool INetMIME::isUpperCase(sal_uInt32 nChar)
589 return rtl::isAsciiUpperCase(nChar);
592 // static
593 inline bool INetMIME::isLowerCase(sal_uInt32 nChar)
595 return rtl::isAsciiLowerCase(nChar);
598 // static
599 inline bool INetMIME::isAlpha(sal_uInt32 nChar)
601 return rtl::isAsciiAlpha(nChar);
604 // static
605 inline bool INetMIME::isAlphanumeric(sal_uInt32 nChar)
607 return rtl::isAsciiAlphanumeric(nChar);
610 // static
611 inline bool INetMIME::isBase64Digit(sal_uInt32 nChar)
613 return rtl::isAsciiUpperCase(nChar) || rtl::isAsciiLowerCase(nChar) || rtl::isAsciiDigit(nChar)
614 || nChar == '+' || nChar == '/';
617 // static
618 inline sal_uInt32 INetMIME::toUpperCase(sal_uInt32 nChar)
620 return rtl::isAsciiLowerCase(nChar) ? nChar - ('a' - 'A') : nChar;
623 // static
624 inline sal_uInt32 INetMIME::toLowerCase(sal_uInt32 nChar)
626 return rtl::isAsciiUpperCase(nChar) ? nChar + ('a' - 'A') : nChar;
629 // static
630 inline int INetMIME::getWeight(sal_uInt32 nChar)
632 return rtl::isAsciiDigit(nChar) ? int(nChar - '0') : -1;
635 // static
636 inline int INetMIME::getHexWeight(sal_uInt32 nChar)
638 return rtl::isAsciiDigit(nChar) ? int(nChar - '0') :
639 nChar >= 'A' && nChar <= 'F' ? int(nChar - 'A' + 10) :
640 nChar >= 'a' && nChar <= 'f' ? int(nChar - 'a' + 10) : -1;
643 // static
644 inline int INetMIME::getBase64Weight(sal_uInt32 nChar)
646 return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
647 rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
648 rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
649 nChar == '+' ? 62 :
650 nChar == '/' ? 63 :
651 nChar == '=' ? -1 : -2;
654 // static
655 inline bool INetMIME::isHighSurrogate(sal_uInt32 nUTF16)
657 return nUTF16 >= 0xD800 && nUTF16 <= 0xDBFF;
660 // static
661 inline bool INetMIME::isLowSurrogate(sal_uInt32 nUTF16)
663 return nUTF16 >= 0xDC00 && nUTF16 <= 0xDFFF;
666 // static
667 inline sal_uInt32 INetMIME::toUTF32(sal_Unicode cHighSurrogate,
668 sal_Unicode cLowSurrogate)
670 DBG_ASSERT(isHighSurrogate(cHighSurrogate)
671 && isLowSurrogate(cLowSurrogate),
672 "INetMIME::toUTF32(): Bad chars");
673 return ((sal_uInt32(cHighSurrogate) & 0x3FF) << 10)
674 | (sal_uInt32(cLowSurrogate) & 0x3FF);
677 // static
678 inline bool INetMIME::startsWithLineBreak(const sal_Char * pBegin,
679 const sal_Char * pEnd)
681 DBG_ASSERT(pBegin && pBegin <= pEnd,
682 "INetMIME::startsWithLineBreak(): Bad sequence");
684 return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
685 // CR, LF
688 // static
689 inline bool INetMIME::startsWithLineBreak(const sal_Unicode * pBegin,
690 const sal_Unicode * pEnd)
692 DBG_ASSERT(pBegin && pBegin <= pEnd,
693 "INetMIME::startsWithLineBreak(): Bad sequence");
695 return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
696 // CR, LF
699 // static
700 inline bool INetMIME::startsWithLineFolding(const sal_Char * pBegin,
701 const sal_Char * pEnd)
703 DBG_ASSERT(pBegin && pBegin <= pEnd,
704 "INetMIME::startsWithLineFolding(): Bad sequence");
706 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
707 && isWhiteSpace(pBegin[2]); // CR, LF
710 // static
711 inline bool INetMIME::startsWithLineFolding(const sal_Unicode * pBegin,
712 const sal_Unicode * pEnd)
714 DBG_ASSERT(pBegin && pBegin <= pEnd,
715 "INetMIME::startsWithLineFolding(): Bad sequence");
717 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
718 && isWhiteSpace(pBegin[2]); // CR, LF
721 // static
722 inline bool INetMIME::startsWithLinearWhiteSpace(const sal_Char * pBegin,
723 const sal_Char * pEnd)
725 DBG_ASSERT(pBegin && pBegin <= pEnd,
726 "INetMIME::startsWithLinearWhiteSpace(): Bad sequence");
728 return pBegin != pEnd
729 && (isWhiteSpace(*pBegin) || startsWithLineFolding(pBegin, pEnd));
732 // static
733 inline bool INetMIME::needsQuotedStringEscape(sal_uInt32 nChar)
735 return nChar == '"' || nChar == '\\';
738 // static
739 inline rtl_TextEncoding INetMIME::translateToMIME(rtl_TextEncoding eEncoding)
741 #if defined WNT
742 return eEncoding == RTL_TEXTENCODING_MS_1252 ?
743 RTL_TEXTENCODING_ISO_8859_1 : eEncoding;
744 #else // WNT
745 return eEncoding;
746 #endif // WNT
749 // static
750 inline rtl_TextEncoding INetMIME::translateFromMIME(rtl_TextEncoding
751 eEncoding)
753 #if defined WNT
754 return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
755 RTL_TEXTENCODING_MS_1252 : eEncoding;
756 #else
757 return eEncoding;
758 #endif
761 // static
762 inline bool INetMIME::isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
764 return ( rtl_isOctetTextEncoding(eEncoding) == sal_True );
767 // static
768 inline int INetMIME::getUTF8OctetCount(sal_uInt32 nChar)
770 DBG_ASSERT(nChar < 0x80000000, "INetMIME::getUTF8OctetCount(): Bad char");
772 return nChar < 0x80 ? 1 :
773 nChar < 0x800 ? 2 :
774 nChar <= 0x10000 ? 3 :
775 nChar <= 0x200000 ? 4 :
776 nChar <= 0x4000000 ? 5 : 6;
779 // static
780 inline sal_uInt32 INetMIME::getUTF32Character(const sal_Unicode *& rBegin,
781 const sal_Unicode * pEnd)
783 DBG_ASSERT(rBegin && rBegin < pEnd,
784 "INetMIME::getUTF32Character(): Bad sequence");
785 if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF
786 && rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF)
788 sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10;
789 return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000;
791 else
792 return *rBegin++;
795 // static
796 inline sal_Unicode * INetMIME::putUTF32Character(sal_Unicode * pBuffer,
797 sal_uInt32 nUTF32)
799 DBG_ASSERT(nUTF32 <= 0x10FFFF, "INetMIME::putUTF32Character(): Bad char");
800 if (nUTF32 < 0x10000)
801 *pBuffer++ = sal_Unicode(nUTF32);
802 else
804 nUTF32 -= 0x10000;
805 *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
806 *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
808 return pBuffer;
811 class INetMIMEOutputSink
813 public:
814 static sal_uInt32 const NO_LINE_LENGTH_LIMIT = SAL_MAX_UINT32;
816 private:
817 sal_uInt32 m_nColumn;
818 sal_uInt32 m_nLineLengthLimit;
820 protected:
821 /** Write a sequence of octets.
823 @param pBegin Points to the start of the sequence, must not be null.
825 @param pEnd Points past the end of the sequence, must be >= pBegin.
827 virtual void writeSequence(const sal_Char * pBegin,
828 const sal_Char * pEnd) = 0;
830 /** Write a null terminated sequence of octets (without the terminating
831 null).
833 @param pOctets A null terminated sequence of octets, must not be
834 null.
836 @return The length of pOctets (without the terminating null).
838 virtual sal_Size writeSequence(const sal_Char * pSequence);
840 /** Write a sequence of octets.
842 @descr The supplied sequence of UCS-4 characters is interpreted as a
843 sequence of octets. It is an error if any of the elements of the
844 sequence has a numerical value greater than 255.
846 @param pBegin Points to the start of the sequence, must not be null.
848 @param pEnd Points past the end of the sequence, must be >= pBegin.
850 virtual void writeSequence(const sal_uInt32 * pBegin,
851 const sal_uInt32 * pEnd);
853 /** Write a sequence of octets.
855 @descr The supplied sequence of Unicode characters is interpreted as
856 a sequence of octets. It is an error if any of the elements of the
857 sequence has a numerical value greater than 255.
859 @param pBegin Points to the start of the sequence, must not be null.
861 @param pEnd Points past the end of the sequence, must be >= pBegin.
863 virtual void writeSequence(const sal_Unicode * pBegin,
864 const sal_Unicode * pEnd);
866 public:
867 INetMIMEOutputSink(sal_uInt32 nTheColumn = 0,
868 sal_uInt32 nTheLineLengthLimit
869 = INetMIME::SOFT_LINE_LENGTH_LIMIT):
870 m_nColumn(nTheColumn), m_nLineLengthLimit(nTheLineLengthLimit) {}
872 virtual ~INetMIMEOutputSink() {}
874 /** Get the current column.
876 @return The current column (starting from zero).
878 sal_uInt32 getColumn() const { return m_nColumn; }
880 sal_uInt32 getLineLengthLimit() const { return m_nLineLengthLimit; }
882 void setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)
883 { m_nLineLengthLimit = nTheLineLengthLimit; }
885 virtual ErrCode getError() const;
887 /** Write a sequence of octets.
889 @param pBegin Points to the start of the sequence, must not be null.
891 @param pEnd Points past the end of the sequence, must be >= pBegin.
893 inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
895 /** Write a sequence of octets.
897 @param pBegin Points to the start of the sequence, must not be null.
899 @param nLength The length of the sequence.
901 void write(const sal_Char * pBegin, sal_Size nLength)
902 { write(pBegin, pBegin + nLength); }
904 /** Write a sequence of octets.
906 @descr The supplied sequence of UCS-4 characters is interpreted as a
907 sequence of octets. It is an error if any of the elements of the
908 sequence has a numerical value greater than 255.
910 @param pBegin Points to the start of the sequence, must not be null.
912 @param pEnd Points past the end of the sequence, must be >= pBegin.
914 inline void write(const sal_uInt32 * pBegin, const sal_uInt32 * pEnd);
916 /** Write a sequence of octets.
918 @descr The supplied sequence of Unicode characters is interpreted as
919 a sequence of octets. It is an error if any of the elements of the
920 sequence has a numerical value greater than 255.
922 @param pBegin Points to the start of the sequence, must not be null.
924 @param pEnd Points past the end of the sequence, must be >= pBegin.
926 inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
928 /** Write a sequence of octets.
930 @param rOctets A OString, interpreted as a sequence of octets.
932 @param nBegin The offset of the first character to write.
934 @param nEnd The offset past the last character to write.
936 void write(const OString& rOctets, xub_StrLen nBegin,
937 xub_StrLen nEnd)
939 writeSequence(rOctets.getStr() + nBegin, rOctets.getStr() + nEnd);
940 m_nColumn += nEnd - nBegin;
943 /** Write a single octet.
945 @param nOctet Some octet.
947 @return This instance.
949 inline INetMIMEOutputSink & operator <<(sal_Char nOctet);
951 /** Write a null terminated sequence of octets (without the terminating
952 null).
954 @param pOctets A null terminated sequence of octets, must not be
955 null.
957 @return This instance.
959 inline INetMIMEOutputSink & operator <<(const sal_Char * pOctets);
961 /** Write a sequence of octets.
963 @param rOctets A OString, interpreted as a sequence of octets.
965 @return This instance.
967 INetMIMEOutputSink & operator <<(const OString& rOctets)
969 writeSequence(rOctets.getStr(), rOctets.getStr() + rOctets.getLength());
970 m_nColumn += rOctets.getLength();
971 return *this;
974 /** Call a manipulator function.
976 @param pManipulator A manipulator function.
978 @return Whatever the manipulator function returns.
980 INetMIMEOutputSink &
981 operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))
982 { return pManipulator(*this); }
984 /** Write a line end (CR LF).
986 void writeLineEnd();
988 /** A manipulator function that writes a line end (CR LF).
990 @param rSink Some sink.
992 @return The sink rSink.
994 static inline INetMIMEOutputSink & endl(INetMIMEOutputSink & rSink);
997 inline void INetMIMEOutputSink::write(const sal_Char * pBegin,
998 const sal_Char * pEnd)
1000 writeSequence(pBegin, pEnd);
1001 m_nColumn += pEnd - pBegin;
1004 inline void INetMIMEOutputSink::write(const sal_uInt32 * pBegin,
1005 const sal_uInt32 * pEnd)
1007 writeSequence(pBegin, pEnd);
1008 m_nColumn += pEnd - pBegin;
1011 inline void INetMIMEOutputSink::write(const sal_Unicode * pBegin,
1012 const sal_Unicode * pEnd)
1014 writeSequence(pBegin, pEnd);
1015 m_nColumn += pEnd - pBegin;
1018 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(sal_Char nOctet)
1020 writeSequence(&nOctet, &nOctet + 1);
1021 ++m_nColumn;
1022 return *this;
1025 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const sal_Char *
1026 pOctets)
1028 m_nColumn += writeSequence(pOctets);
1029 return *this;
1032 // static
1033 inline INetMIMEOutputSink & INetMIMEOutputSink::endl(INetMIMEOutputSink &
1034 rSink)
1036 rSink.writeLineEnd();
1037 return rSink;
1040 // static
1041 inline void INetMIME::writeEscapeSequence(INetMIMEOutputSink & rSink,
1042 sal_uInt32 nChar)
1044 DBG_ASSERT(nChar <= 0xFF, "INetMIME::writeEscapeSequence(): Bad char");
1045 rSink << '=' << sal_uInt8(getHexDigit(nChar >> 4))
1046 << sal_uInt8(getHexDigit(nChar & 15));
1049 class INetMIMEStringOutputSink: public INetMIMEOutputSink
1051 OStringBuffer m_aBuffer;
1053 using INetMIMEOutputSink::writeSequence;
1055 virtual void writeSequence(const sal_Char * pBegin,
1056 const sal_Char * pEnd);
1058 public:
1059 inline INetMIMEStringOutputSink(sal_uInt32 nColumn = 0,
1060 sal_uInt32 nLineLengthLimit
1061 = INetMIME::SOFT_LINE_LENGTH_LIMIT):
1062 INetMIMEOutputSink(nColumn, nLineLengthLimit) {}
1064 virtual ErrCode getError() const;
1066 OString takeBuffer()
1068 return m_aBuffer.makeStringAndClear();
1072 class INetMIMEEncodedWordOutputSink
1074 public:
1075 enum Context { CONTEXT_TEXT = 1,
1076 CONTEXT_COMMENT = 2,
1077 CONTEXT_PHRASE = 4 };
1079 enum Space { SPACE_NO, SPACE_ENCODED, SPACE_ALWAYS };
1081 private:
1082 enum { BUFFER_SIZE = 256 };
1084 enum Coding { CODING_NONE, CODING_QUOTED, CODING_ENCODED,
1085 CODING_ENCODED_TERMINATED };
1087 enum EncodedWordState { STATE_INITIAL, STATE_FIRST_EQUALS,
1088 STATE_FIRST_QUESTION, STATE_CHARSET,
1089 STATE_SECOND_QUESTION, STATE_ENCODING,
1090 STATE_THIRD_QUESTION, STATE_ENCODED_TEXT,
1091 STATE_FOURTH_QUESTION, STATE_SECOND_EQUALS,
1092 STATE_BAD };
1094 INetMIMEOutputSink & m_rSink;
1095 Context m_eContext;
1096 Space m_eInitialSpace;
1097 sal_uInt32 m_nExtraSpaces;
1098 INetMIMECharsetList_Impl * m_pEncodingList;
1099 sal_Unicode * m_pBuffer;
1100 sal_uInt32 m_nBufferSize;
1101 sal_Unicode * m_pBufferEnd;
1102 Coding m_ePrevCoding;
1103 rtl_TextEncoding m_ePrevMIMEEncoding;
1104 Coding m_eCoding;
1105 sal_uInt32 m_nQuotedEscaped;
1106 EncodedWordState m_eEncodedWordState;
1108 inline bool needsEncodedWordEscape(sal_uInt32 nChar) const;
1110 void finish(bool bWriteTrailer);
1112 public:
1113 inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,
1114 Context eTheContext,
1115 Space eTheInitialSpace,
1116 rtl_TextEncoding ePreferredEncoding);
1118 ~INetMIMEEncodedWordOutputSink();
1120 INetMIMEEncodedWordOutputSink & operator <<(sal_uInt32 nChar);
1122 inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
1124 inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
1126 inline bool flush();
1129 inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink(
1130 INetMIMEOutputSink & rTheSink, Context eTheContext,
1131 Space eTheInitialSpace, rtl_TextEncoding ePreferredEncoding):
1132 m_rSink(rTheSink),
1133 m_eContext(eTheContext),
1134 m_eInitialSpace(eTheInitialSpace),
1135 m_nExtraSpaces(0),
1136 m_pEncodingList(INetMIME::createPreferredCharsetList(ePreferredEncoding)),
1137 m_ePrevCoding(CODING_NONE),
1138 m_eCoding(CODING_NONE),
1139 m_nQuotedEscaped(0),
1140 m_eEncodedWordState(STATE_INITIAL)
1142 m_nBufferSize = BUFFER_SIZE;
1143 m_pBuffer = static_cast< sal_Unicode * >(rtl_allocateMemory(
1144 m_nBufferSize
1145 * sizeof (sal_Unicode)));
1146 m_pBufferEnd = m_pBuffer;
1149 inline void INetMIMEEncodedWordOutputSink::write(const sal_Char * pBegin,
1150 const sal_Char * pEnd)
1152 DBG_ASSERT(pBegin && pBegin <= pEnd,
1153 "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1155 while (pBegin != pEnd)
1156 operator <<(*pBegin++);
1159 inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode * pBegin,
1160 const sal_Unicode * pEnd)
1162 DBG_ASSERT(pBegin && pBegin <= pEnd,
1163 "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1165 while (pBegin != pEnd)
1166 operator <<(*pBegin++);
1169 inline bool INetMIMEEncodedWordOutputSink::flush()
1171 finish(true);
1172 return m_ePrevCoding != CODING_NONE;
1175 struct INetContentTypeParameter
1177 /** The name of the attribute, in US-ASCII encoding and converted to lower
1178 case. If a parameter value is split as described in RFC 2231, there
1179 will only be one item for the complete parameter, with the attribute
1180 name lacking any section suffix.
1182 const OString m_sAttribute;
1184 /** The optional character set specification (see RFC 2231), in US-ASCII
1185 encoding and converted to lower case.
1187 const OString m_sCharset;
1189 /** The optional language specification (see RFC 2231), in US-ASCII
1190 encoding and converted to lower case.
1192 const OString m_sLanguage;
1194 /** The attribute value. If the value is a quoted-string, it is
1195 'unpacked.' If a character set is specified, and the value can be
1196 converted to Unicode, this is done. Also, if no character set is
1197 specified, it is first tried to convert the value from UTF-8 encoding
1198 to Unicode, and if that doesn't work (because the value is not in
1199 UTF-8 encoding), it is converted from ISO-8859-1 encoding to Unicode
1200 (which will always work). But if a character set is specified and the
1201 value cannot be converted from that character set to Unicode, special
1202 action is taken to produce a value that can possibly be transformed
1203 back into its original form: Any 8-bit character from a non-encoded
1204 part of the original value is directly converted to Unicode
1205 (effectively handling it as if it was ISO-8859-1 encoded), and any
1206 8-bit character from an encoded part of the original value is mapped
1207 to the range U+F800..U+F8FF at the top of the Corporate Use Subarea
1208 within Unicode's Private Use Area (effectively adding 0xF800 to the
1209 character's numeric value).
1211 const OUString m_sValue;
1213 /** This is true if the value is successfully converted to Unicode, and
1214 false if the value is a special mixture of ISO-LATIN-1 characters and
1215 characters from Unicode's Private Use Area.
1217 const bool m_bConverted;
1219 INetContentTypeParameter(const OString& rTheAttribute,
1220 const OString& rTheCharset, const OString& rTheLanguage,
1221 const OUString& rTheValue, bool bTheConverted)
1222 : m_sAttribute(rTheAttribute)
1223 , m_sCharset(rTheCharset)
1224 , m_sLanguage(rTheLanguage)
1225 , m_sValue(rTheValue)
1226 , m_bConverted(bTheConverted)
1231 class TOOLS_DLLPUBLIC INetContentTypeParameterList
1233 public:
1235 void Clear();
1237 void Insert(INetContentTypeParameter * pParameter, sal_uIntPtr nIndex)
1239 maEntries.insert(maEntries.begin()+nIndex,pParameter);
1242 void Append(INetContentTypeParameter *pParameter)
1244 maEntries.push_back(pParameter);
1247 inline const INetContentTypeParameter * GetObject(sal_uIntPtr nIndex) const
1249 return &(maEntries[nIndex]);
1252 const INetContentTypeParameter * find(const OString& rAttribute) const;
1254 private:
1256 boost::ptr_vector<INetContentTypeParameter> maEntries;
1259 #endif
1261 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */