bump product version to 4.1.6.2
[LibreOffice.git] / tools / source / inet / inetmime.cxx
blob30bdf43f9a1eac26932d435b0a88510a21e5d4a2
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <cstddef>
21 #include <limits>
23 #include <rtl/strbuf.hxx>
24 #include <rtl/tencinfo.h>
25 #include <tools/inetmime.hxx>
27 namespace unnamed_tools_inetmime {} using namespace unnamed_tools_inetmime;
28 // unnamed namespaces don't work well yet
30 namespace unnamed_tools_inetmime {
32 class Charset
34 rtl_TextEncoding m_eEncoding;
35 const sal_uInt32 * m_pRanges;
37 public:
38 inline Charset(rtl_TextEncoding eTheEncoding,
39 const sal_uInt32 * pTheRanges);
41 rtl_TextEncoding getEncoding() const { return m_eEncoding; }
43 bool contains(sal_uInt32 nChar) const;
46 inline Charset::Charset(rtl_TextEncoding eTheEncoding,
47 const sal_uInt32 * pTheRanges):
48 m_eEncoding(eTheEncoding),
49 m_pRanges(pTheRanges)
51 DBG_ASSERT(m_pRanges, "Charset::Charset(): Bad ranges");
54 void appendISO88591(OUString & rText, sal_Char const * pBegin,
55 sal_Char const * pEnd);
59 class INetMIMECharsetList_Impl
61 struct Node
63 Charset m_aCharset;
64 bool m_bDisabled;
65 Node * m_pNext;
67 inline Node(const Charset & rTheCharset, bool bTheDisabled,
68 Node * pTheNext);
71 Node * m_pFirst;
73 public:
74 INetMIMECharsetList_Impl(): m_pFirst(0) {}
76 ~INetMIMECharsetList_Impl();
78 void prepend(const Charset & rCharset)
79 { m_pFirst = new Node(rCharset, false, m_pFirst); }
81 void includes(sal_uInt32 nChar);
83 rtl_TextEncoding getPreferredEncoding(rtl_TextEncoding eDefault
84 = RTL_TEXTENCODING_DONTKNOW)
85 const;
87 void reset();
90 inline INetMIMECharsetList_Impl::Node::Node(const Charset & rTheCharset,
91 bool bTheDisabled,
92 Node * pTheNext):
93 m_aCharset(rTheCharset),
94 m_bDisabled(bTheDisabled),
95 m_pNext(pTheNext)
98 namespace unnamed_tools_inetmime {
100 struct Parameter
102 Parameter * m_pNext;
103 OString m_aAttribute;
104 OString m_aCharset;
105 OString m_aLanguage;
106 OString m_aValue;
107 sal_uInt32 m_nSection;
108 bool m_bExtended;
110 inline Parameter(Parameter * pTheNext, const OString& rTheAttribute,
111 const OString& rTheCharset,
112 const OString& rTheLanguage,
113 const OString& rTheValue, sal_uInt32 nTheSection,
114 bool bTheExtended);
117 inline Parameter::Parameter(Parameter * pTheNext,
118 const OString& rTheAttribute,
119 const OString& rTheCharset,
120 const OString& rTheLanguage,
121 const OString& rTheValue,
122 sal_uInt32 nTheSection, bool bTheExtended):
123 m_pNext(pTheNext),
124 m_aAttribute(rTheAttribute),
125 m_aCharset(rTheCharset),
126 m_aLanguage(rTheLanguage),
127 m_aValue(rTheValue),
128 m_nSection(nTheSection),
129 m_bExtended(bTheExtended)
132 struct ParameterList
134 Parameter * m_pList;
136 ParameterList(): m_pList(0) {}
138 inline ~ParameterList();
140 Parameter ** find(const OString& rAttribute, sal_uInt32 nSection,
141 bool & rPresent);
144 inline ParameterList::~ParameterList()
146 while (m_pList)
148 Parameter * pNext = m_pList->m_pNext;
149 delete m_pList;
150 m_pList = pNext;
154 bool parseParameters(ParameterList const & rInput,
155 INetContentTypeParameterList * pOutput);
159 // Charset
161 bool Charset::contains(sal_uInt32 nChar) const
163 for (const sal_uInt32 * p = m_pRanges;;)
165 if (nChar < *p++)
166 return false;
167 if (nChar <= *p++)
168 return true;
172 // appendISO88591
174 namespace unnamed_tools_inetmime {
176 void appendISO88591(OUString & rText, sal_Char const * pBegin,
177 sal_Char const * pEnd)
179 xub_StrLen nLength = static_cast< xub_StrLen >(pEnd - pBegin);
180 sal_Unicode * pBuffer = new sal_Unicode[nLength];
181 for (sal_Unicode * p = pBuffer; pBegin != pEnd;)
182 *p++ = sal_uChar(*pBegin++);
183 rText += OUString(pBuffer, nLength);
184 delete[] pBuffer;
189 // INetMIMECharsetList_Impl
191 INetMIMECharsetList_Impl::~INetMIMECharsetList_Impl()
193 while (m_pFirst)
195 Node * pRemove = m_pFirst;
196 m_pFirst = m_pFirst->m_pNext;
197 delete pRemove;
201 void INetMIMECharsetList_Impl::includes(sal_uInt32 nChar)
203 for (Node * p = m_pFirst; p; p = p->m_pNext)
204 if (!(p->m_bDisabled || p->m_aCharset.contains(nChar)))
205 p->m_bDisabled = true;
208 rtl_TextEncoding INetMIMECharsetList_Impl::getPreferredEncoding(rtl_TextEncoding eDefault)
209 const
211 for (Node * p = m_pFirst; p; p = p->m_pNext)
212 if (!p->m_bDisabled)
213 return p->m_aCharset.getEncoding();
214 return eDefault;
217 void INetMIMECharsetList_Impl::reset()
219 for (Node * p = m_pFirst; p; p = p->m_pNext)
220 p->m_bDisabled = false;
223 // ParameterList
225 Parameter ** ParameterList::find(const OString& rAttribute,
226 sal_uInt32 nSection, bool & rPresent)
228 Parameter ** p = &m_pList;
229 for (; *p; p = &(*p)->m_pNext)
231 sal_Int32 nCompare = rAttribute.compareTo((*p)->m_aAttribute);
232 if (nCompare > 0)
233 break;
234 else if (nCompare == 0)
236 if (nSection > (*p)->m_nSection)
237 break;
238 else if (nSection == (*p)->m_nSection)
240 rPresent = true;
241 return p;
245 rPresent = false;
246 return p;
249 // parseParameters
251 namespace unnamed_tools_inetmime {
253 bool parseParameters(ParameterList const & rInput,
254 INetContentTypeParameterList * pOutput)
256 if (pOutput)
257 pOutput->Clear();
259 Parameter * pPrev = 0;
260 for (Parameter * p = rInput.m_pList; p; p = p->m_pNext)
262 if (p->m_nSection > 0
263 && (!pPrev
264 || pPrev->m_nSection != p->m_nSection - 1
265 || pPrev->m_aAttribute != p->m_aAttribute))
266 return false;
267 pPrev = p;
270 if (pOutput)
271 for (Parameter * p = rInput.m_pList; p;)
273 bool bCharset = !p->m_aCharset.isEmpty();
274 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
275 if (bCharset)
276 eEncoding
277 = INetMIME::getCharsetEncoding(p->m_aCharset.getStr(),
278 p->m_aCharset.getStr()
279 + rInput.m_pList->
280 m_aCharset.
281 getLength());
282 OUString aValue;
283 bool bBadEncoding = false;
284 Parameter * pNext = p;
287 sal_Size nSize;
288 sal_Unicode * pUnicode
289 = INetMIME::convertToUnicode(pNext->m_aValue.getStr(),
290 pNext->m_aValue.getStr()
291 + pNext->m_aValue.getLength(),
292 bCharset && p->m_bExtended ?
293 eEncoding :
294 RTL_TEXTENCODING_UTF8,
295 nSize);
296 if (!pUnicode && !(bCharset && p->m_bExtended))
297 pUnicode = INetMIME::convertToUnicode(
298 pNext->m_aValue.getStr(),
299 pNext->m_aValue.getStr()
300 + pNext->m_aValue.getLength(),
301 RTL_TEXTENCODING_ISO_8859_1, nSize);
302 if (!pUnicode)
304 bBadEncoding = true;
305 break;
307 aValue += OUString(pUnicode, static_cast<sal_Int32>(nSize));
308 delete[] pUnicode;
309 pNext = pNext->m_pNext;
311 while (pNext && pNext->m_nSection > 0);
312 if (bBadEncoding)
314 aValue = OUString();
315 for (pNext = p;;)
317 if (pNext->m_bExtended)
319 for (sal_Int32 i = 0; i < pNext->m_aValue.getLength(); ++i)
320 aValue += OUString(sal_Unicode(
321 sal_Unicode(
322 sal_uChar(pNext->m_aValue[i]))
323 | 0xF800));
325 else
327 for (sal_Int32 i = 0; i < pNext->m_aValue.getLength(); ++i)
328 aValue += OUString( sal_Unicode(sal_uChar(pNext->m_aValue[i])) );
330 pNext = pNext->m_pNext;
331 if (!pNext || pNext->m_nSection == 0)
332 break;
335 pOutput->Append(new INetContentTypeParameter(p->m_aAttribute,
336 p->m_aCharset,
337 p->m_aLanguage,
338 aValue,
339 !bBadEncoding));
340 p = pNext;
342 return true;
347 // INetMIME
349 // static
350 bool INetMIME::isAtomChar(sal_uInt32 nChar)
352 static const bool aMap[128]
353 = { false, false, false, false, false, false, false, false,
354 false, false, false, false, false, false, false, false,
355 false, false, false, false, false, false, false, false,
356 false, false, false, false, false, false, false, false,
357 false, true, false, true, true, true, true, true, // !"#$%&'
358 false, false, true, true, false, true, false, true, //()*+,-./
359 true, true, true, true, true, true, true, true, //01234567
360 true, true, false, false, false, true, false, true, //89:;<=>?
361 false, true, true, true, true, true, true, true, //@ABCDEFG
362 true, true, true, true, true, true, true, true, //HIJKLMNO
363 true, true, true, true, true, true, true, true, //PQRSTUVW
364 true, true, true, false, false, false, true, true, //XYZ[\]^_
365 true, true, true, true, true, true, true, true, //`abcdefg
366 true, true, true, true, true, true, true, true, //hijklmno
367 true, true, true, true, true, true, true, true, //pqrstuvw
368 true, true, true, true, true, true, true, false //xyz{|}~
370 return isUSASCII(nChar) && aMap[nChar];
373 // static
374 bool INetMIME::isTokenChar(sal_uInt32 nChar)
376 static const sal_Char aMap[128]
377 = { false, false, false, false, false, false, false, false,
378 false, false, false, false, false, false, false, false,
379 false, false, false, false, false, false, false, false,
380 false, false, false, false, false, false, false, false,
381 false, true, false, true, true, true, true, true, // !"#$%&'
382 false, false, true, true, false, true, true, false, //()*+,-./
383 true, true, true, true, true, true, true, true, //01234567
384 true, true, false, false, false, false, false, false, //89:;<=>?
385 false, true, true, true, true, true, true, true, //@ABCDEFG
386 true, true, true, true, true, true, true, true, //HIJKLMNO
387 true, true, true, true, true, true, true, true, //PQRSTUVW
388 true, true, true, false, false, false, true, true, //XYZ[\]^_
389 true, true, true, true, true, true, true, true, //`abcdefg
390 true, true, true, true, true, true, true, true, //hijklmno
391 true, true, true, true, true, true, true, true, //pqrstuvw
392 true, true, true, true, true, true, true, false //xyz{|}~
394 return isUSASCII(nChar) && aMap[nChar];
397 // static
398 bool INetMIME::isEncodedWordTokenChar(sal_uInt32 nChar)
400 static const sal_Char aMap[128]
401 = { false, false, false, false, false, false, false, false,
402 false, false, false, false, false, false, false, false,
403 false, false, false, false, false, false, false, false,
404 false, false, false, false, false, false, false, false,
405 false, true, false, true, true, true, true, true, // !"#$%&'
406 false, false, true, true, false, true, false, false, //()*+,-./
407 true, true, true, true, true, true, true, true, //01234567
408 true, true, false, false, false, false, false, false, //89:;<=>?
409 false, true, true, true, true, true, true, true, //@ABCDEFG
410 true, true, true, true, true, true, true, true, //HIJKLMNO
411 true, true, true, true, true, true, true, true, //PQRSTUVW
412 true, true, true, false, false, false, true, true, //XYZ[\]^_
413 true, true, true, true, true, true, true, true, //`abcdefg
414 true, true, true, true, true, true, true, true, //hijklmno
415 true, true, true, true, true, true, true, true, //pqrstuvw
416 true, true, true, true, true, true, true, false //xyz{|}~
418 return isUSASCII(nChar) && aMap[nChar];
421 // static
422 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
424 static const sal_Char aMap[128]
425 = { false, false, false, false, false, false, false, false,
426 false, false, false, false, false, false, false, false,
427 false, false, false, false, false, false, false, false,
428 false, false, false, false, false, false, false, false,
429 false, true, false, true, true, false, true, true, // !"#$%&'
430 false, false, false, true, true, true, true, true, //()*+,-./
431 true, true, true, true, true, true, true, true, //01234567
432 true, true, true, true, true, true, true, true, //89:;<=>?
433 true, true, true, true, true, true, true, true, //@ABCDEFG
434 true, true, true, true, true, true, true, true, //HIJKLMNO
435 true, true, true, true, true, true, true, true, //PQRSTUVW
436 true, true, true, true, false, true, true, true, //XYZ[\]^_
437 true, true, true, true, true, true, true, true, //`abcdefg
438 true, true, true, true, true, true, true, true, //hijklmno
439 true, true, true, true, true, true, true, true, //pqrstuvw
440 true, true, true, false, true, true, true, false //xyz{|}~
442 return isUSASCII(nChar) && aMap[nChar];
445 // static
446 sal_uInt32 INetMIME::getHexDigit(int nWeight)
448 DBG_ASSERT(nWeight >= 0 && nWeight < 16,
449 "INetMIME::getHexDigit(): Bad weight");
451 static const sal_Char aDigits[16]
452 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
453 'D', 'E', 'F' };
454 return aDigits[nWeight];
457 // static
458 bool INetMIME::equalIgnoreCase(const sal_Char * pBegin1,
459 const sal_Char * pEnd1,
460 const sal_Char * pString2)
462 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
463 "INetMIME::equalIgnoreCase(): Bad sequences");
465 while (*pString2 != 0)
466 if (pBegin1 == pEnd1
467 || toUpperCase(*pBegin1++) != toUpperCase(*pString2++))
468 return false;
469 return pBegin1 == pEnd1;
472 // static
473 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
474 const sal_Unicode * pEnd1,
475 const sal_Char * pString2)
477 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
478 "INetMIME::equalIgnoreCase(): Bad sequences");
480 while (*pString2 != 0)
481 if (pBegin1 == pEnd1
482 || toUpperCase(*pBegin1++) != toUpperCase(*pString2++))
483 return false;
484 return pBegin1 == pEnd1;
487 // static
488 const sal_Unicode * INetMIME::skipLinearWhiteSpace(const sal_Unicode * pBegin,
489 const sal_Unicode * pEnd)
491 DBG_ASSERT(pBegin && pBegin <= pEnd,
492 "INetMIME::skipLinearWhiteSpace(): Bad sequence");
494 while (pBegin != pEnd)
495 switch (*pBegin)
497 case '\t':
498 case ' ':
499 ++pBegin;
500 break;
502 case 0x0D: // CR
503 if (startsWithLineFolding(pBegin, pEnd))
504 pBegin += 3;
505 else
506 return pBegin;
507 break;
509 default:
510 return pBegin;
512 return pBegin;
515 // static
516 const sal_Unicode * INetMIME::skipComment(const sal_Unicode * pBegin,
517 const sal_Unicode * pEnd)
519 DBG_ASSERT(pBegin && pBegin <= pEnd,
520 "INetMIME::skipComment(): Bad sequence");
522 if (pBegin != pEnd && *pBegin == '(')
524 sal_uInt32 nLevel = 0;
525 for (const sal_Unicode * p = pBegin; p != pEnd;)
526 switch (*p++)
528 case '(':
529 ++nLevel;
530 break;
532 case ')':
533 if (--nLevel == 0)
534 return p;
535 break;
537 case '\\':
538 if (p != pEnd)
539 ++p;
540 break;
543 return pBegin;
546 // static
547 const sal_Unicode * INetMIME::skipLinearWhiteSpaceComment(const sal_Unicode *
548 pBegin,
549 const sal_Unicode *
550 pEnd)
552 DBG_ASSERT(pBegin && pBegin <= pEnd,
553 "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence");
555 while (pBegin != pEnd)
556 switch (*pBegin)
558 case '\t':
559 case ' ':
560 ++pBegin;
561 break;
563 case 0x0D: // CR
564 if (startsWithLineFolding(pBegin, pEnd))
565 pBegin += 3;
566 else
567 return pBegin;
568 break;
570 case '(':
572 const sal_Unicode * p = skipComment(pBegin, pEnd);
573 if (p == pBegin)
574 return pBegin;
575 pBegin = p;
576 break;
579 default:
580 return pBegin;
582 return pBegin;
585 // static
586 const sal_Char * INetMIME::skipQuotedString(const sal_Char * pBegin,
587 const sal_Char * pEnd)
589 DBG_ASSERT(pBegin && pBegin <= pEnd,
590 "INetMIME::skipQuotedString(): Bad sequence");
592 if (pBegin != pEnd && *pBegin == '"')
593 for (const sal_Char * p = pBegin + 1; p != pEnd;)
594 switch (*p++)
596 case 0x0D: // CR
597 if (pEnd - p < 2 || *p++ != 0x0A // LF
598 || !isWhiteSpace(*p++))
599 return pBegin;
600 break;
602 case '"':
603 return p;
605 case '\\':
606 if (p != pEnd)
607 ++p;
608 break;
610 return pBegin;
613 // static
614 const sal_Unicode * INetMIME::skipQuotedString(const sal_Unicode * pBegin,
615 const sal_Unicode * pEnd)
617 DBG_ASSERT(pBegin && pBegin <= pEnd,
618 "INetMIME::skipQuotedString(): Bad sequence");
620 if (pBegin != pEnd && *pBegin == '"')
621 for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
622 switch (*p++)
624 case 0x0D: // CR
625 if (pEnd - p < 2 || *p++ != 0x0A // LF
626 || !isWhiteSpace(*p++))
627 return pBegin;
628 break;
630 case '"':
631 return p;
633 case '\\':
634 if (p != pEnd)
635 ++p;
636 break;
638 return pBegin;
641 // static
642 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
643 const sal_Unicode * pEnd, bool bLeadingZeroes,
644 sal_uInt32 & rValue)
646 sal_uInt64 nTheValue = 0;
647 const sal_Unicode * p = rBegin;
648 for ( ; p != pEnd; ++p)
650 int nWeight = getWeight(*p);
651 if (nWeight < 0)
652 break;
653 nTheValue = 10 * nTheValue + nWeight;
654 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
655 return false;
657 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
658 return false;
659 rBegin = p;
660 rValue = sal_uInt32(nTheValue);
661 return true;
664 // static
665 const sal_Unicode * INetMIME::scanQuotedBlock(const sal_Unicode * pBegin,
666 const sal_Unicode * pEnd,
667 sal_uInt32 nOpening,
668 sal_uInt32 nClosing,
669 sal_Size & rLength,
670 bool & rModify)
672 DBG_ASSERT(pBegin && pBegin <= pEnd,
673 "INetMIME::scanQuotedBlock(): Bad sequence");
675 if (pBegin != pEnd && *pBegin == nOpening)
677 ++rLength;
678 ++pBegin;
679 while (pBegin != pEnd)
680 if (*pBegin == nClosing)
682 ++rLength;
683 return ++pBegin;
685 else
687 sal_uInt32 c = *pBegin++;
688 switch (c)
690 case 0x0D: // CR
691 if (pBegin != pEnd && *pBegin == 0x0A) // LF
692 if (pEnd - pBegin >= 2 && isWhiteSpace(pBegin[1]))
694 ++rLength;
695 rModify = true;
696 pBegin += 2;
698 else
700 rLength += 3;
701 rModify = true;
702 ++pBegin;
704 else
705 ++rLength;
706 break;
708 case '\\':
709 ++rLength;
710 if (pBegin != pEnd)
712 if (startsWithLineBreak(pBegin, pEnd)
713 && (pEnd - pBegin < 3
714 || !isWhiteSpace(pBegin[2])))
716 rLength += 3;
717 rModify = true;
718 pBegin += 2;
720 else
721 ++pBegin;
723 break;
725 default:
726 ++rLength;
727 if (!isUSASCII(c))
728 rModify = true;
729 break;
733 return pBegin;
736 // static
737 sal_Unicode const * INetMIME::scanParameters(sal_Unicode const * pBegin,
738 sal_Unicode const * pEnd,
739 INetContentTypeParameterList *
740 pParameters)
742 ParameterList aList;
743 sal_Unicode const * pParameterBegin = pBegin;
744 for (sal_Unicode const * p = pParameterBegin;; pParameterBegin = p)
746 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
747 if (pParameterBegin == pEnd || *pParameterBegin != ';')
748 break;
749 p = pParameterBegin + 1;
751 sal_Unicode const * pAttributeBegin
752 = skipLinearWhiteSpaceComment(p, pEnd);
753 p = pAttributeBegin;
754 bool bDowncaseAttribute = false;
755 while (p != pEnd && isTokenChar(*p) && *p != '*')
757 bDowncaseAttribute = bDowncaseAttribute || isUpperCase(*p);
758 ++p;
760 if (p == pAttributeBegin)
761 break;
762 OString aAttribute = OString(
763 pAttributeBegin, p - pAttributeBegin,
764 RTL_TEXTENCODING_ASCII_US);
765 if (bDowncaseAttribute)
766 aAttribute = aAttribute.toAsciiLowerCase();
768 sal_uInt32 nSection = 0;
769 if (p != pEnd && *p == '*')
771 ++p;
772 if (p != pEnd && isDigit(*p)
773 && !scanUnsigned(p, pEnd, false, nSection))
774 break;
777 bool bPresent;
778 Parameter ** pPos = aList.find(aAttribute, nSection, bPresent);
779 if (bPresent)
780 break;
782 bool bExtended = false;
783 if (p != pEnd && *p == '*')
785 ++p;
786 bExtended = true;
789 p = skipLinearWhiteSpaceComment(p, pEnd);
791 if (p == pEnd || *p != '=')
792 break;
794 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
796 OString aCharset;
797 OString aLanguage;
798 OString aValue;
799 if (bExtended)
801 if (nSection == 0)
803 sal_Unicode const * pCharsetBegin = p;
804 bool bDowncaseCharset = false;
805 while (p != pEnd && isTokenChar(*p) && *p != '\'')
807 bDowncaseCharset = bDowncaseCharset || isUpperCase(*p);
808 ++p;
810 if (p == pCharsetBegin)
811 break;
812 if (pParameters)
814 aCharset = OString(
815 pCharsetBegin,
816 p - pCharsetBegin,
817 RTL_TEXTENCODING_ASCII_US);
818 if (bDowncaseCharset)
819 aCharset = aCharset.toAsciiLowerCase();
822 if (p == pEnd || *p != '\'')
823 break;
824 ++p;
826 sal_Unicode const * pLanguageBegin = p;
827 bool bDowncaseLanguage = false;
828 int nLetters = 0;
829 for (; p != pEnd; ++p)
830 if (isAlpha(*p))
832 if (++nLetters > 8)
833 break;
834 bDowncaseLanguage = bDowncaseLanguage
835 || isUpperCase(*p);
837 else if (*p == '-')
839 if (nLetters == 0)
840 break;
841 nLetters = 0;
843 else
844 break;
845 if (nLetters == 0 || nLetters > 8)
846 break;
847 if (pParameters)
849 aLanguage = OString(
850 pLanguageBegin,
851 p - pLanguageBegin,
852 RTL_TEXTENCODING_ASCII_US);
853 if (bDowncaseLanguage)
854 aLanguage = aLanguage.toAsciiLowerCase();
857 if (p == pEnd || *p != '\'')
858 break;
859 ++p;
861 if (pParameters)
863 INetMIMEStringOutputSink
864 aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT);
865 while (p != pEnd)
867 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
868 if (isUSASCII(nChar) && !isTokenChar(nChar))
869 break;
870 if (nChar == '%' && p + 1 < pEnd)
872 int nWeight1 = getHexWeight(p[0]);
873 int nWeight2 = getHexWeight(p[1]);
874 if (nWeight1 >= 0 && nWeight2 >= 0)
876 aSink << sal_Char(nWeight1 << 4 | nWeight2);
877 p += 2;
878 continue;
881 INetMIME::writeUTF8(aSink, nChar);
883 aValue = aSink.takeBuffer();
885 else
886 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
887 ++p;
889 else if (p != pEnd && *p == '"')
890 if (pParameters)
892 INetMIMEStringOutputSink
893 aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT);
894 bool bInvalid = false;
895 for (++p;;)
897 if (p == pEnd)
899 bInvalid = true;
900 break;
902 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
903 if (nChar == '"')
904 break;
905 else if (nChar == 0x0D) // CR
907 if (pEnd - p < 2 || *p++ != 0x0A // LF
908 || !isWhiteSpace(*p))
910 bInvalid = true;
911 break;
913 nChar = sal_uChar(*p++);
915 else if (nChar == '\\')
917 if (p == pEnd)
919 bInvalid = true;
920 break;
922 nChar = INetMIME::getUTF32Character(p, pEnd);
924 INetMIME::writeUTF8(aSink, nChar);
926 if (bInvalid)
927 break;
928 aValue = aSink.takeBuffer();
930 else
932 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
933 if (p == pStringEnd)
934 break;
935 p = pStringEnd;
937 else
939 sal_Unicode const * pTokenBegin = p;
940 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
941 ++p;
942 if (p == pTokenBegin)
943 break;
944 if (pParameters)
945 aValue = OString(
946 pTokenBegin, p - pTokenBegin,
947 RTL_TEXTENCODING_UTF8);
950 *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue,
951 nSection, bExtended);
953 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
956 // static
957 const sal_Char * INetMIME::getCharsetName(rtl_TextEncoding eEncoding)
959 if (rtl_isOctetTextEncoding(eEncoding))
961 char const * p = rtl_getMimeCharsetFromTextEncoding(eEncoding);
962 DBG_ASSERT(p, "INetMIME::getCharsetName(): Unsupported encoding");
963 return p;
965 else
966 switch (eEncoding)
968 case RTL_TEXTENCODING_UCS4:
969 return "ISO-10646-UCS-4";
971 case RTL_TEXTENCODING_UCS2:
972 return "ISO-10646-UCS-2";
974 default:
975 OSL_FAIL("INetMIME::getCharsetName(): Unsupported encoding");
976 return 0;
980 namespace unnamed_tools_inetmime {
982 struct EncodingEntry
984 sal_Char const * m_aName;
985 rtl_TextEncoding m_eEncoding;
988 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
989 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
990 // noted:
991 EncodingEntry const aEncodingMap[]
992 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
993 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
994 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
995 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
996 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
997 { "ASCII", RTL_TEXTENCODING_ASCII_US },
998 { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
999 { "US", RTL_TEXTENCODING_ASCII_US },
1000 { "IBM367", RTL_TEXTENCODING_ASCII_US },
1001 { "CP367", RTL_TEXTENCODING_ASCII_US },
1002 { "CSASCII", RTL_TEXTENCODING_ASCII_US },
1003 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
1004 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
1005 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
1006 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
1007 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
1008 { "L1", RTL_TEXTENCODING_ISO_8859_1 },
1009 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
1010 { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
1011 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
1012 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
1013 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
1014 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
1015 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
1016 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
1017 { "L2", RTL_TEXTENCODING_ISO_8859_2 },
1018 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
1019 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
1020 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
1021 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
1022 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
1023 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
1024 { "L3", RTL_TEXTENCODING_ISO_8859_3 },
1025 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
1026 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
1027 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
1028 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
1029 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
1030 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
1031 { "L4", RTL_TEXTENCODING_ISO_8859_4 },
1032 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
1033 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
1034 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
1035 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
1036 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
1037 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
1038 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
1039 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
1040 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
1041 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
1042 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
1043 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
1044 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
1045 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
1046 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
1047 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
1048 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
1049 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
1050 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
1051 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
1052 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
1053 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
1054 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
1055 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
1056 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
1057 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
1058 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
1059 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
1060 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
1061 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
1062 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
1063 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
1064 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
1065 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
1066 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
1067 { "L5", RTL_TEXTENCODING_ISO_8859_9 },
1068 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
1069 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
1070 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
1071 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
1072 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
1073 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
1074 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
1075 { "IBM437", RTL_TEXTENCODING_IBM_437 },
1076 { "CP437", RTL_TEXTENCODING_IBM_437 },
1077 { "437", RTL_TEXTENCODING_IBM_437 },
1078 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
1079 { "IBM850", RTL_TEXTENCODING_IBM_850 },
1080 { "CP850", RTL_TEXTENCODING_IBM_850 },
1081 { "850", RTL_TEXTENCODING_IBM_850 },
1082 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
1083 { "IBM860", RTL_TEXTENCODING_IBM_860 },
1084 { "CP860", RTL_TEXTENCODING_IBM_860 },
1085 { "860", RTL_TEXTENCODING_IBM_860 },
1086 { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
1087 { "IBM861", RTL_TEXTENCODING_IBM_861 },
1088 { "CP861", RTL_TEXTENCODING_IBM_861 },
1089 { "861", RTL_TEXTENCODING_IBM_861 },
1090 { "CP-IS", RTL_TEXTENCODING_IBM_861 },
1091 { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
1092 { "IBM863", RTL_TEXTENCODING_IBM_863 },
1093 { "CP863", RTL_TEXTENCODING_IBM_863 },
1094 { "863", RTL_TEXTENCODING_IBM_863 },
1095 { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
1096 { "IBM865", RTL_TEXTENCODING_IBM_865 },
1097 { "CP865", RTL_TEXTENCODING_IBM_865 },
1098 { "865", RTL_TEXTENCODING_IBM_865 },
1099 { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
1100 { "IBM775", RTL_TEXTENCODING_IBM_775 },
1101 { "CP775", RTL_TEXTENCODING_IBM_775 },
1102 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
1103 { "IBM852", RTL_TEXTENCODING_IBM_852 },
1104 { "CP852", RTL_TEXTENCODING_IBM_852 },
1105 { "852", RTL_TEXTENCODING_IBM_852 },
1106 { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
1107 { "IBM855", RTL_TEXTENCODING_IBM_855 },
1108 { "CP855", RTL_TEXTENCODING_IBM_855 },
1109 { "855", RTL_TEXTENCODING_IBM_855 },
1110 { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
1111 { "IBM857", RTL_TEXTENCODING_IBM_857 },
1112 { "CP857", RTL_TEXTENCODING_IBM_857 },
1113 { "857", RTL_TEXTENCODING_IBM_857 },
1114 { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
1115 { "IBM862", RTL_TEXTENCODING_IBM_862 },
1116 { "CP862", RTL_TEXTENCODING_IBM_862 },
1117 { "862", RTL_TEXTENCODING_IBM_862 },
1118 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
1119 { "IBM864", RTL_TEXTENCODING_IBM_864 },
1120 { "CP864", RTL_TEXTENCODING_IBM_864 },
1121 { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
1122 { "IBM866", RTL_TEXTENCODING_IBM_866 },
1123 { "CP866", RTL_TEXTENCODING_IBM_866 },
1124 { "866", RTL_TEXTENCODING_IBM_866 },
1125 { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
1126 { "IBM869", RTL_TEXTENCODING_IBM_869 },
1127 { "CP869", RTL_TEXTENCODING_IBM_869 },
1128 { "869", RTL_TEXTENCODING_IBM_869 },
1129 { "CP-GR", RTL_TEXTENCODING_IBM_869 },
1130 { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
1131 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
1132 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
1133 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
1134 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
1135 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
1136 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
1137 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
1138 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
1139 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
1140 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
1141 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
1142 { "GB2312", RTL_TEXTENCODING_GB_2312 },
1143 { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
1144 { "BIG5", RTL_TEXTENCODING_BIG5 },
1145 { "CSBIG5", RTL_TEXTENCODING_BIG5 },
1146 { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
1147 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
1148 RTL_TEXTENCODING_EUC_JP },
1149 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
1150 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
1151 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
1152 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
1153 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
1154 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
1155 { "UTF-7", RTL_TEXTENCODING_UTF7 },
1156 { "UTF-8", RTL_TEXTENCODING_UTF8 },
1157 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
1158 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
1159 { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
1160 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
1161 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
1162 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
1163 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
1164 { "CSUCS4", RTL_TEXTENCODING_UCS4 },
1165 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
1166 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
1168 template< typename T >
1169 inline rtl_TextEncoding getCharsetEncoding_Impl(T const * pBegin,
1170 T const * pEnd)
1172 for (sal_Size i = 0; i < sizeof aEncodingMap / sizeof (EncodingEntry);
1173 ++i)
1174 if (INetMIME::equalIgnoreCase(pBegin, pEnd, aEncodingMap[i].m_aName))
1175 return aEncodingMap[i].m_eEncoding;
1176 return RTL_TEXTENCODING_DONTKNOW;
1181 // static
1182 rtl_TextEncoding INetMIME::getCharsetEncoding(sal_Char const * pBegin,
1183 sal_Char const * pEnd)
1185 return getCharsetEncoding_Impl(pBegin, pEnd);
1188 // static
1189 INetMIMECharsetList_Impl *
1190 INetMIME::createPreferredCharsetList(rtl_TextEncoding eEncoding)
1192 static const sal_uInt32 aUSASCIIRanges[] = { 0, 0x7F, sal_uInt32(-1) };
1194 static const sal_uInt32 aISO88591Ranges[] = { 0, 0xFF, sal_uInt32(-1) };
1195 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT> version
1196 // 1.0 of 1999 July 27
1198 static const sal_uInt32 aISO88592Ranges[]
1199 = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0,
1200 0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC2, 0xC4, 0xC4, 0xC7, 0xC7,
1201 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xCE, 0xD3, 0xD4, 0xD6, 0xD7,
1202 0xDA, 0xDA, 0xDC, 0xDD, 0xDF, 0xDF, 0xE1, 0xE2, 0xE4, 0xE4,
1203 0xE7, 0xE7, 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF3, 0xF4,
1204 0xF6, 0xF7, 0xFA, 0xFA, 0xFC, 0xFD, 0x102, 0x107, 0x10C, 0x111,
1205 0x118, 0x11B, 0x139, 0x13A, 0x13D, 0x13E, 0x141, 0x144,
1206 0x147, 0x148, 0x150, 0x151, 0x154, 0x155, 0x158, 0x15B,
1207 0x15E, 0x165, 0x16E, 0x171, 0x179, 0x17E, 0x2C7, 0x2C7,
1208 0x2D8, 0x2D9, 0x2DB, 0x2DB, 0x2DD, 0x2DD, sal_uInt32(-1) };
1209 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT> version
1210 // 1.0 of 1999 July 27
1212 static const sal_uInt32 aISO88593Ranges[]
1213 = { 0, 0xA0, 0xA3, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0,
1214 0xB2, 0xB5, 0xB7, 0xB8, 0xBD, 0xBD, 0xC0, 0xC2, 0xC4, 0xC4,
1215 0xC7, 0xCF, 0xD1, 0xD4, 0xD6, 0xD7, 0xD9, 0xDC, 0xDF, 0xE2,
1216 0xE4, 0xE4, 0xE7, 0xEF, 0xF1, 0xF4, 0xF6, 0xF7, 0xF9, 0xFC,
1217 0x108, 0x10B, 0x11C, 0x121, 0x124, 0x127, 0x130, 0x131,
1218 0x134, 0x135, 0x15C, 0x15F, 0x16C, 0x16D, 0x17B, 0x17C,
1219 0x2D8, 0x2D9, sal_uInt32(-1) };
1220 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT> version
1221 // 1.0 of 1999 July 27
1223 static const sal_uInt32 aISO88594Ranges[]
1224 = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xAF, 0xB0,
1225 0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB,
1226 0xCD, 0xCE, 0xD4, 0xD8, 0xDA, 0xDC, 0xDF, 0xDF, 0xE1, 0xE6,
1227 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF4, 0xF8, 0xFA, 0xFC,
1228 0x100, 0x101, 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113,
1229 0x116, 0x119, 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F,
1230 0x136, 0x138, 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D,
1231 0x156, 0x157, 0x160, 0x161, 0x166, 0x16B, 0x172, 0x173,
1232 0x17D, 0x17E, 0x2C7, 0x2C7, 0x2D9, 0x2D9, 0x2DB, 0x2DB,
1233 sal_uInt32(-1) };
1234 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT> version
1235 // 1.0 of 1999 July 27
1237 static const sal_uInt32 aISO88595Ranges[]
1238 = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0x401, 0x40C, 0x40E, 0x44F,
1239 0x451, 0x45C, 0x45E, 0x45F, 0x2116, 0x2116, sal_uInt32(-1) };
1240 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT> version
1241 // 1.0 of 1999 July 27
1243 static const sal_uInt32 aISO88596Ranges[]
1244 = { 0, 0xA0, 0xA4, 0xA4, 0xAD, 0xAD, 0x60C, 0x60C, 0x61B, 0x61B,
1245 0x61F, 0x61F, 0x621, 0x63A, 0x640, 0x652, sal_uInt32(-1) };
1246 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT> version
1247 // 1.0 of 1999 July 27
1249 static const sal_uInt32 aISO88597Ranges[]
1250 = { 0, 0xA0, 0xA3, 0xA3, 0xA6, 0xA9, 0xAB, 0xAD, 0xB0, 0xB3,
1251 0xB7, 0xB7, 0xBB, 0xBB, 0xBD, 0xBD, 0x384, 0x386, 0x388, 0x38A,
1252 0x38C, 0x38C, 0x38E, 0x3A1, 0x3A3, 0x3CE, 0x2015, 0x2015,
1253 0x2018, 0x2019, sal_uInt32(-1) };
1254 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT> version
1255 // 1.0 of 1999 July 27
1257 static const sal_uInt32 aISO88598Ranges[]
1258 = { 0, 0xA0, 0xA2, 0xA9, 0xAB, 0xB9, 0xBB, 0xBE, 0xD7, 0xD7,
1259 0xF7, 0xF7, 0x5D0, 0x5EA, 0x200E, 0x200F, 0x2017, 0x2017,
1260 sal_uInt32(-1) };
1261 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT> version
1262 // 1.1 of 2000-Jan-03
1264 static const sal_uInt32 aISO88599Ranges[]
1265 = { 0, 0xCF, 0xD1, 0xDC, 0xDF, 0xEF, 0xF1, 0xFC, 0xFF, 0xFF,
1266 0x11E, 0x11F, 0x130, 0x131, 0x15E, 0x15F, sal_uInt32(-1) };
1267 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT> version
1268 // 1.0 of 1999 July 27
1270 static const sal_uInt32 aISO885910Ranges[]
1271 = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0xB0, 0xB0, 0xB7, 0xB7,
1272 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xD0, 0xD3, 0xD6,
1273 0xD8, 0xD8, 0xDA, 0xDF, 0xE1, 0xE6, 0xE9, 0xE9, 0xEB, 0xEB,
1274 0xED, 0xF0, 0xF3, 0xF6, 0xF8, 0xF8, 0xFA, 0xFE, 0x100, 0x101,
1275 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113, 0x116, 0x119,
1276 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F, 0x136, 0x138,
1277 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D, 0x160, 0x161,
1278 0x166, 0x16B, 0x172, 0x173, 0x17D, 0x17E, 0x2015, 0x2015,
1279 sal_uInt32(-1) };
1280 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT> version
1281 // 1.1 of 1999 October 11
1283 static const sal_uInt32 aISO885913Ranges[]
1284 = { 0, 0xA0, 0xA2, 0xA4, 0xA6, 0xA7, 0xA9, 0xA9, 0xAB, 0xAE,
1285 0xB0, 0xB3, 0xB5, 0xB7, 0xB9, 0xB9, 0xBB, 0xBE, 0xC4, 0xC6,
1286 0xC9, 0xC9, 0xD3, 0xD3, 0xD5, 0xD8, 0xDC, 0xDC, 0xDF, 0xDF,
1287 0xE4, 0xE6, 0xE9, 0xE9, 0xF3, 0xF3, 0xF5, 0xF8, 0xFC, 0xFC,
1288 0x100, 0x101, 0x104, 0x107, 0x10C, 0x10D, 0x112, 0x113,
1289 0x116, 0x119, 0x122, 0x123, 0x12A, 0x12B, 0x12E, 0x12F,
1290 0x136, 0x137, 0x13B, 0x13C, 0x141, 0x146, 0x14C, 0x14D,
1291 0x156, 0x157, 0x15A, 0x15B, 0x160, 0x161, 0x16A, 0x16B,
1292 0x172, 0x173, 0x179, 0x17E, 0x2019, 0x2019, 0x201C, 0x201E,
1293 sal_uInt32(-1) };
1294 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT> version
1295 // 1.0 of 1999 July 27
1297 static const sal_uInt32 aISO885914Ranges[]
1298 = { 0, 0xA0, 0xA3, 0xA3, 0xA7, 0xA7, 0xA9, 0xA9, 0xAD, 0xAE,
1299 0xB6, 0xB6, 0xC0, 0xCF, 0xD1, 0xD6, 0xD8, 0xDD, 0xDF, 0xEF,
1300 0xF1, 0xF6, 0xF8, 0xFD, 0xFF, 0xFF, 0x10A, 0x10B, 0x120, 0x121,
1301 0x174, 0x178, 0x1E02, 0x1E03, 0x1E0A, 0x1E0B, 0x1E1E, 0x1E1F,
1302 0x1E40, 0x1E41, 0x1E56, 0x1E57, 0x1E60, 0x1E61, 0x1E6A, 0x1E6B,
1303 0x1E80, 0x1E85, 0x1EF2, 0x1EF3, sal_uInt32(-1) };
1304 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT> version
1305 // 1.0 of 1999 July 27
1307 static const sal_uInt32 aISO885915Ranges[]
1308 = { 0, 0xA3, 0xA5, 0xA5, 0xA7, 0xA7, 0xA9, 0xB3, 0xB5, 0xB7,
1309 0xB9, 0xBB, 0xBF, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178,
1310 0x17D, 0x17E, 0x20AC, 0x20AC, sal_uInt32(-1) };
1311 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT> version
1312 // 1.0 of 1999 July 27
1314 static const sal_uInt32 aKOI8RRanges[]
1315 = { 0, 0x7F, 0xA0, 0xA0, 0xA9, 0xA9, 0xB0, 0xB0, 0xB2, 0xB2,
1316 0xB7, 0xB7, 0xF7, 0xF7, 0x401, 0x401, 0x410, 0x44F, 0x451, 0x451,
1317 0x2219, 0x221A, 0x2248, 0x2248, 0x2264, 0x2265, 0x2320, 0x2321,
1318 0x2500, 0x2500, 0x2502, 0x2502, 0x250C, 0x250C, 0x2510, 0x2510,
1319 0x2514, 0x2514, 0x2518, 0x2518, 0x251C, 0x251C, 0x2524, 0x2524,
1320 0x252C, 0x252C, 0x2534, 0x2534, 0x253C, 0x253C, 0x2550, 0x256C,
1321 0x2580, 0x2580, 0x2584, 0x2584, 0x2588, 0x2588, 0x258C, 0x258C,
1322 0x2590, 0x2593, 0x25A0, 0x25A0, sal_uInt32(-1) };
1323 // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT>
1324 // version 1.0 of 18 August 1999
1326 #if defined WNT
1327 static const sal_uInt32 aWindows1252Ranges[]
1328 = { 0, 0x7F, 0xA0, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178,
1329 0x17D, 0x17E, 0x192, 0x192, 0x2C6, 0x2C6, 0x2DC, 0x2DC,
1330 0x2013, 0x2014, 0x2018, 0x201A, 0x201C, 0x201E, 0x2020, 0x2022,
1331 0x2026, 0x2026, 0x2030, 0x2030, 0x2039, 0x203A, 0x20AC, 0x20AC,
1332 0x2122, 0x2122, sal_uInt32(-1) };
1333 // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/
1334 // CP1252.TXT> version 2.01 of 04/15/98
1335 #endif // WNT
1337 INetMIMECharsetList_Impl * pList = new INetMIMECharsetList_Impl;
1338 switch (eEncoding)
1340 case RTL_TEXTENCODING_MS_1252:
1341 #if defined WNT
1342 pList->prepend(Charset(RTL_TEXTENCODING_MS_1252,
1343 aWindows1252Ranges));
1344 #endif // WNT
1345 case RTL_TEXTENCODING_ISO_8859_1:
1346 case RTL_TEXTENCODING_UTF7:
1347 case RTL_TEXTENCODING_UTF8:
1348 break;
1350 case RTL_TEXTENCODING_ISO_8859_2:
1351 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2,
1352 aISO88592Ranges));
1353 break;
1355 case RTL_TEXTENCODING_ISO_8859_3:
1356 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_3,
1357 aISO88593Ranges));
1358 break;
1360 case RTL_TEXTENCODING_ISO_8859_4:
1361 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4,
1362 aISO88594Ranges));
1363 break;
1365 case RTL_TEXTENCODING_ISO_8859_5:
1366 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
1367 aISO88595Ranges));
1368 break;
1370 case RTL_TEXTENCODING_ISO_8859_6:
1371 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6,
1372 aISO88596Ranges));
1373 break;
1375 case RTL_TEXTENCODING_ISO_8859_7:
1376 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7,
1377 aISO88597Ranges));
1378 break;
1380 case RTL_TEXTENCODING_ISO_8859_8:
1381 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8,
1382 aISO88598Ranges));
1383 break;
1385 case RTL_TEXTENCODING_ISO_8859_9:
1386 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9,
1387 aISO88599Ranges));
1388 break;
1390 case RTL_TEXTENCODING_ISO_8859_10:
1391 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_10,
1392 aISO885910Ranges));
1393 break;
1395 case RTL_TEXTENCODING_ISO_8859_13:
1396 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_13,
1397 aISO885913Ranges));
1398 break;
1400 case RTL_TEXTENCODING_ISO_8859_14:
1401 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_14,
1402 aISO885914Ranges));
1403 break;
1405 case RTL_TEXTENCODING_ISO_8859_15:
1406 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_15,
1407 aISO885915Ranges));
1408 break;
1410 case RTL_TEXTENCODING_MS_1250:
1411 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2,
1412 aISO88592Ranges));
1413 break;
1415 case RTL_TEXTENCODING_MS_1251:
1416 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
1417 aISO88595Ranges));
1418 break;
1420 case RTL_TEXTENCODING_MS_1253:
1421 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7,
1422 aISO88597Ranges));
1423 break;
1425 case RTL_TEXTENCODING_MS_1254:
1426 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9,
1427 aISO88599Ranges));
1428 break;
1430 case RTL_TEXTENCODING_MS_1255:
1431 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8,
1432 aISO88598Ranges));
1433 break;
1435 case RTL_TEXTENCODING_MS_1256:
1436 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6,
1437 aISO88596Ranges));
1438 break;
1440 case RTL_TEXTENCODING_MS_1257:
1441 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4,
1442 aISO88594Ranges));
1443 break;
1445 case RTL_TEXTENCODING_KOI8_R:
1446 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
1447 aISO88595Ranges));
1448 pList->prepend(Charset(RTL_TEXTENCODING_KOI8_R, aKOI8RRanges));
1449 break;
1451 default: //@@@ more cases are missing!
1452 OSL_FAIL("INetMIME::createPreferredCharsetList():"
1453 " Unsupported encoding");
1454 break;
1456 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_1, aISO88591Ranges));
1457 pList->prepend(Charset(RTL_TEXTENCODING_ASCII_US, aUSASCIIRanges));
1458 return pList;
1461 // static
1462 sal_Unicode * INetMIME::convertToUnicode(const sal_Char * pBegin,
1463 const sal_Char * pEnd,
1464 rtl_TextEncoding eEncoding,
1465 sal_Size & rSize)
1467 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
1468 return 0;
1469 rtl_TextToUnicodeConverter hConverter
1470 = rtl_createTextToUnicodeConverter(eEncoding);
1471 rtl_TextToUnicodeContext hContext
1472 = rtl_createTextToUnicodeContext(hConverter);
1473 sal_Unicode * pBuffer;
1474 sal_uInt32 nInfo;
1475 for (sal_Size nBufferSize = pEnd - pBegin;;
1476 nBufferSize += nBufferSize / 3 + 1)
1478 pBuffer = new sal_Unicode[nBufferSize];
1479 sal_Size nSrcCvtBytes;
1480 rSize = rtl_convertTextToUnicode(
1481 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
1482 nBufferSize,
1483 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
1484 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
1485 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
1486 &nInfo, &nSrcCvtBytes);
1487 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
1488 break;
1489 delete[] pBuffer;
1490 rtl_resetTextToUnicodeContext(hConverter, hContext);
1492 rtl_destroyTextToUnicodeContext(hConverter, hContext);
1493 rtl_destroyTextToUnicodeConverter(hConverter);
1494 if (nInfo != 0)
1496 delete[] pBuffer;
1497 pBuffer = 0;
1499 return pBuffer;
1502 // static
1503 sal_Char * INetMIME::convertFromUnicode(const sal_Unicode * pBegin,
1504 const sal_Unicode * pEnd,
1505 rtl_TextEncoding eEncoding,
1506 sal_Size & rSize)
1508 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
1509 return 0;
1510 rtl_UnicodeToTextConverter hConverter
1511 = rtl_createUnicodeToTextConverter(eEncoding);
1512 rtl_UnicodeToTextContext hContext
1513 = rtl_createUnicodeToTextContext(hConverter);
1514 sal_Char * pBuffer;
1515 sal_uInt32 nInfo;
1516 for (sal_Size nBufferSize = pEnd - pBegin;;
1517 nBufferSize += nBufferSize / 3 + 1)
1519 pBuffer = new sal_Char[nBufferSize];
1520 sal_Size nSrcCvtBytes;
1521 rSize = rtl_convertUnicodeToText(
1522 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
1523 nBufferSize,
1524 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
1525 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
1526 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
1527 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
1528 &nInfo, &nSrcCvtBytes);
1529 if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
1530 break;
1531 delete[] pBuffer;
1532 rtl_resetUnicodeToTextContext(hConverter, hContext);
1534 rtl_destroyUnicodeToTextContext(hConverter, hContext);
1535 rtl_destroyUnicodeToTextConverter(hConverter);
1536 if (nInfo != 0)
1538 delete[] pBuffer;
1539 pBuffer = 0;
1541 return pBuffer;
1544 // static
1545 void INetMIME::writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar)
1547 // See RFC 2279 for a discussion of UTF-8.
1548 DBG_ASSERT(nChar < 0x80000000, "INetMIME::writeUTF8(): Bad char");
1550 if (nChar < 0x80)
1551 rSink << sal_Char(nChar);
1552 else if (nChar < 0x800)
1553 rSink << sal_Char(nChar >> 6 | 0xC0)
1554 << sal_Char((nChar & 0x3F) | 0x80);
1555 else if (nChar < 0x10000)
1556 rSink << sal_Char(nChar >> 12 | 0xE0)
1557 << sal_Char((nChar >> 6 & 0x3F) | 0x80)
1558 << sal_Char((nChar & 0x3F) | 0x80);
1559 else if (nChar < 0x200000)
1560 rSink << sal_Char(nChar >> 18 | 0xF0)
1561 << sal_Char((nChar >> 12 & 0x3F) | 0x80)
1562 << sal_Char((nChar >> 6 & 0x3F) | 0x80)
1563 << sal_Char((nChar & 0x3F) | 0x80);
1564 else if (nChar < 0x4000000)
1565 rSink << sal_Char(nChar >> 24 | 0xF8)
1566 << sal_Char((nChar >> 18 & 0x3F) | 0x80)
1567 << sal_Char((nChar >> 12 & 0x3F) | 0x80)
1568 << sal_Char((nChar >> 6 & 0x3F) | 0x80)
1569 << sal_Char((nChar & 0x3F) | 0x80);
1570 else
1571 rSink << sal_Char(nChar >> 30 | 0xFC)
1572 << sal_Char((nChar >> 24 & 0x3F) | 0x80)
1573 << sal_Char((nChar >> 18 & 0x3F) | 0x80)
1574 << sal_Char((nChar >> 12 & 0x3F) | 0x80)
1575 << sal_Char((nChar >> 6 & 0x3F) | 0x80)
1576 << sal_Char((nChar & 0x3F) | 0x80);
1579 // static
1580 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink & rSink,
1581 HeaderFieldType eType,
1582 const OUString& rBody,
1583 rtl_TextEncoding ePreferredEncoding,
1584 bool bInitialSpace)
1586 if (eType == HEADER_FIELD_TEXT)
1588 INetMIMEEncodedWordOutputSink
1589 aOutput(rSink, INetMIMEEncodedWordOutputSink::CONTEXT_TEXT,
1590 bInitialSpace ?
1591 INetMIMEEncodedWordOutputSink::SPACE_ALWAYS :
1592 INetMIMEEncodedWordOutputSink::SPACE_NO,
1593 ePreferredEncoding);
1594 aOutput.write(rBody.getStr(), rBody.getStr() + rBody.getLength());
1595 aOutput.flush();
1597 else
1599 enum Brackets { BRACKETS_OUTSIDE, BRACKETS_OPENING, BRACKETS_INSIDE };
1600 Brackets eBrackets = BRACKETS_OUTSIDE;
1602 const sal_Unicode * pBodyPtr = rBody.getStr();
1603 const sal_Unicode * pBodyEnd = pBodyPtr + rBody.getLength();
1604 while (pBodyPtr != pBodyEnd)
1605 switch (*pBodyPtr)
1607 case '\t':
1608 case ' ':
1609 // A WSP adds to accumulated space:
1610 bInitialSpace = true;
1611 ++pBodyPtr;
1612 break;
1614 case '(':
1616 // Write a pending '<' if necessary:
1617 if (eBrackets == BRACKETS_OPENING)
1619 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
1620 >= rSink.getLineLengthLimit())
1621 rSink << INetMIMEOutputSink::endl << ' ';
1622 else if (bInitialSpace)
1623 rSink << ' ';
1624 rSink << '<';
1625 bInitialSpace = false;
1626 eBrackets = BRACKETS_INSIDE;
1629 // Write the comment, introducing encoded-words where
1630 // necessary:
1631 int nLevel = 0;
1632 INetMIMEEncodedWordOutputSink
1633 aOutput(
1634 rSink,
1635 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT,
1636 INetMIMEEncodedWordOutputSink::SPACE_NO,
1637 ePreferredEncoding);
1638 while (pBodyPtr != pBodyEnd)
1639 switch (*pBodyPtr)
1641 case '(':
1642 aOutput.flush();
1643 if (rSink.getColumn()
1644 + (bInitialSpace ? 1 : 0)
1645 >= rSink.getLineLengthLimit())
1646 rSink << INetMIMEOutputSink::endl << ' ';
1647 else if (bInitialSpace)
1648 rSink << ' ';
1649 rSink << '(';
1650 bInitialSpace = false;
1651 ++nLevel;
1652 ++pBodyPtr;
1653 break;
1655 case ')':
1656 aOutput.flush();
1657 if (rSink.getColumn()
1658 >= rSink.getLineLengthLimit())
1659 rSink << INetMIMEOutputSink::endl << ' ';
1660 rSink << ')';
1661 ++pBodyPtr;
1662 if (--nLevel == 0)
1663 goto comment_done;
1664 break;
1666 case '\\':
1667 if (++pBodyPtr == pBodyEnd)
1668 break;
1669 default:
1670 aOutput << *pBodyPtr++;
1671 break;
1673 comment_done:
1674 break;
1677 case '<':
1678 // Write an already pending '<' if necessary:
1679 if (eBrackets == BRACKETS_OPENING)
1681 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
1682 >= rSink.getLineLengthLimit())
1683 rSink << INetMIMEOutputSink::endl << ' ';
1684 else if (bInitialSpace)
1685 rSink << ' ';
1686 rSink << '<';
1687 bInitialSpace = false;
1690 // Remember this '<' as pending, and open a bracketed
1691 // block:
1692 eBrackets = BRACKETS_OPENING;
1693 ++pBodyPtr;
1694 break;
1696 case '>':
1697 // Write a pending '<' if necessary:
1698 if (eBrackets == BRACKETS_OPENING)
1700 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
1701 >= rSink.getLineLengthLimit())
1702 rSink << INetMIMEOutputSink::endl << ' ';
1703 else if (bInitialSpace)
1704 rSink << ' ';
1705 rSink << '<';
1706 bInitialSpace = false;
1709 // Write this '>', and close any bracketed block:
1710 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
1711 >= rSink.getLineLengthLimit())
1712 rSink << INetMIMEOutputSink::endl << ' ';
1713 else if (bInitialSpace)
1714 rSink << ' ';
1715 rSink << '>';
1716 bInitialSpace = false;
1717 eBrackets = BRACKETS_OUTSIDE;
1718 ++pBodyPtr;
1719 break;
1721 case ',':
1722 case ':':
1723 case ';':
1724 case '\\':
1725 case ']':
1726 // Write a pending '<' if necessary:
1727 if (eBrackets == BRACKETS_OPENING)
1729 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
1730 >= rSink.getLineLengthLimit())
1731 rSink << INetMIMEOutputSink::endl << ' ';
1732 else if (bInitialSpace)
1733 rSink << ' ';
1734 rSink << '<';
1735 bInitialSpace = false;
1736 eBrackets = BRACKETS_INSIDE;
1739 // Write this specials:
1740 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
1741 >= rSink.getLineLengthLimit())
1742 rSink << INetMIMEOutputSink::endl << ' ';
1743 else if (bInitialSpace)
1744 rSink << ' ';
1745 rSink << sal_Char(*pBodyPtr++);
1746 bInitialSpace = false;
1747 break;
1749 case '\x0D': // CR
1750 // A <CRLF WSP> adds to accumulated space, a <CR> not
1751 // followed by <LF WSP> starts 'junk':
1752 if (startsWithLineFolding(pBodyPtr, pBodyEnd))
1754 bInitialSpace = true;
1755 pBodyPtr += 3;
1756 break;
1758 default:
1760 // The next token is either one of <"." / "@" / atom /
1761 // quoted-string / domain-literal>, or it's 'junk'; if it
1762 // is not 'junk', it is either a 'phrase' (i.e., it may
1763 // contain encoded-words) or a 'non-phrase' (i.e., it may
1764 // not contain encoded-words):
1765 enum Entity { ENTITY_JUNK, ENTITY_NON_PHRASE,
1766 ENTITY_PHRASE };
1767 Entity eEntity = ENTITY_JUNK;
1768 switch (*pBodyPtr)
1770 case '.':
1771 case '@':
1772 case '[':
1773 // A token of <"." / "@" / domain-literal> always
1774 // starts a 'non-phrase':
1775 eEntity = ENTITY_NON_PHRASE;
1776 break;
1778 default:
1779 if (isUSASCII(*pBodyPtr)
1780 && !isAtomChar(*pBodyPtr))
1782 eEntity = ENTITY_JUNK;
1783 break;
1785 case '"':
1786 // A token of <atom / quoted-string> can either be
1787 // a 'phrase' or a 'non-phrase':
1788 switch (eType)
1790 case HEADER_FIELD_STRUCTURED:
1791 eEntity = ENTITY_NON_PHRASE;
1792 break;
1794 case HEADER_FIELD_PHRASE:
1795 eEntity = ENTITY_PHRASE;
1796 break;
1798 case HEADER_FIELD_MESSAGE_ID:
1799 // A 'phrase' if and only if outside any
1800 // bracketed block:
1801 eEntity
1802 = eBrackets == BRACKETS_OUTSIDE ?
1803 ENTITY_PHRASE :
1804 ENTITY_NON_PHRASE;
1805 break;
1807 case HEADER_FIELD_ADDRESS:
1809 // A 'non-phrase' if and only if, after
1810 // skipping this token and any following
1811 // <linear-white-space> and <comment>s,
1812 // there is no token left, or the next
1813 // token is any of <"." / "@" / ">" / ","
1814 // / ";">, or the next token is <":"> and
1815 // is within a bracketed block:
1816 const sal_Unicode * pLookAhead = pBodyPtr;
1817 if (*pLookAhead == '"')
1819 pLookAhead
1820 = skipQuotedString(pLookAhead,
1821 pBodyEnd);
1822 if (pLookAhead == pBodyPtr)
1823 pLookAhead = pBodyEnd;
1825 else
1826 while (pLookAhead != pBodyEnd
1827 && (isAtomChar(*pLookAhead)
1828 || !isUSASCII(
1829 *pLookAhead)))
1830 ++pLookAhead;
1831 while (pLookAhead != pBodyEnd)
1832 switch (*pLookAhead)
1834 case '\t':
1835 case ' ':
1836 ++pLookAhead;
1837 break;
1839 case '(':
1841 const sal_Unicode * pPast
1842 = skipComment(pLookAhead,
1843 pBodyEnd);
1844 pLookAhead
1845 = pPast == pLookAhead ?
1846 pBodyEnd : pPast;
1847 break;
1850 case ',':
1851 case '.':
1852 case ';':
1853 case '>':
1854 case '@':
1855 eEntity = ENTITY_NON_PHRASE;
1856 goto entity_determined;
1858 case ':':
1859 eEntity
1860 = eBrackets
1861 == BRACKETS_OUTSIDE ?
1862 ENTITY_PHRASE :
1863 ENTITY_NON_PHRASE;
1864 goto entity_determined;
1866 case '\x0D': // CR
1867 if (startsWithLineFolding(
1868 pLookAhead, pBodyEnd))
1870 pLookAhead += 3;
1871 break;
1873 default:
1874 eEntity = ENTITY_PHRASE;
1875 goto entity_determined;
1877 eEntity = ENTITY_NON_PHRASE;
1878 entity_determined:
1879 break;
1882 case HEADER_FIELD_TEXT:
1883 OSL_ASSERT(false);
1884 break;
1887 // In a 'non-phrase', a non-US-ASCII character
1888 // cannot be part of an <atom>, but instead the
1889 // whole entity is 'junk' rather than 'non-
1890 // phrase':
1891 if (eEntity == ENTITY_NON_PHRASE
1892 && !isUSASCII(*pBodyPtr))
1893 eEntity = ENTITY_JUNK;
1894 break;
1897 switch (eEntity)
1899 case ENTITY_JUNK:
1901 // Write a pending '<' if necessary:
1902 if (eBrackets == BRACKETS_OPENING)
1904 if (rSink.getColumn()
1905 + (bInitialSpace ? 1 : 0)
1906 >= rSink.getLineLengthLimit())
1907 rSink << INetMIMEOutputSink::endl << ' ';
1908 else if (bInitialSpace)
1909 rSink << ' ';
1910 rSink << '<';
1911 bInitialSpace = false;
1912 eBrackets = BRACKETS_INSIDE;
1915 // Calculate the length of in- and output:
1916 const sal_Unicode * pStart = pBodyPtr;
1917 sal_Size nLength = 0;
1918 bool bModify = false;
1919 bool bEnd = false;
1920 while (pBodyPtr != pBodyEnd && !bEnd)
1921 switch (*pBodyPtr)
1923 case '\x0D': // CR
1924 if (startsWithLineFolding(pBodyPtr,
1925 pBodyEnd))
1926 bEnd = true;
1927 else if (startsWithLineBreak(
1928 pBodyPtr, pBodyEnd))
1930 nLength += 3;
1931 bModify = true;
1932 pBodyPtr += 2;
1934 else
1936 ++nLength;
1937 ++pBodyPtr;
1939 break;
1941 case '\t':
1942 case ' ':
1943 bEnd = true;
1944 break;
1946 default:
1947 if (isVisible(*pBodyPtr))
1948 bEnd = true;
1949 else if (isUSASCII(*pBodyPtr))
1951 ++nLength;
1952 ++pBodyPtr;
1954 else
1956 nLength += getUTF8OctetCount(
1957 *pBodyPtr++);
1958 bModify = true;
1960 break;
1963 // Write the output:
1964 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
1965 + nLength
1966 > rSink.getLineLengthLimit())
1967 rSink << INetMIMEOutputSink::endl << ' ';
1968 else if (bInitialSpace)
1969 rSink << ' ';
1970 bInitialSpace = false;
1971 if (bModify)
1972 while (pStart != pBodyPtr)
1973 if (startsWithLineBreak(pStart, pBodyPtr))
1975 rSink << "\x0D\\\x0A"; // CR, '\', LF
1976 pStart += 2;
1978 else
1979 writeUTF8(rSink, *pStart++);
1980 else
1981 rSink.write(pStart, pBodyPtr);
1982 break;
1985 case ENTITY_NON_PHRASE:
1987 // Calculate the length of in- and output:
1988 const sal_Unicode * pStart = pBodyPtr;
1989 sal_Size nLength = 0;
1990 bool bBracketedBlock = false;
1991 bool bSymbol = *pStart != '.' && *pStart != '@';
1992 bool bModify = false;
1993 bool bEnd = false;
1994 while (pBodyPtr != pBodyEnd && !bEnd)
1995 switch (*pBodyPtr)
1997 case '\t':
1998 case ' ':
1999 case '\x0D': // CR
2001 const sal_Unicode * pLookAhead
2002 = skipLinearWhiteSpace(pBodyPtr,
2003 pBodyEnd);
2004 if (pLookAhead < pBodyEnd
2005 && (bSymbol ?
2006 isAtomChar(*pLookAhead)
2007 || *pLookAhead == '"'
2008 || *pLookAhead == '[' :
2009 *pLookAhead == '.'
2010 || *pLookAhead == '@'
2011 || (*pLookAhead == '>'
2012 && eType
2013 >= HEADER_FIELD_MESSAGE_ID
2014 && eBrackets
2015 == BRACKETS_OPENING)))
2017 bModify = true;
2018 pBodyPtr = pLookAhead;
2020 else
2021 bEnd = true;
2022 break;
2025 case '"':
2026 if (bSymbol)
2028 pBodyPtr
2029 = scanQuotedBlock(pBodyPtr,
2030 pBodyEnd,
2031 '"', '"',
2032 nLength,
2033 bModify);
2034 bSymbol = false;
2036 else
2037 bEnd = true;
2038 break;
2040 case '[':
2041 if (bSymbol)
2043 pBodyPtr
2044 = scanQuotedBlock(pBodyPtr,
2045 pBodyEnd,
2046 '[', ']',
2047 nLength,
2048 bModify);
2049 bSymbol = false;
2051 else
2052 bEnd = true;
2053 break;
2055 case '.':
2056 case '@':
2057 if (bSymbol)
2058 bEnd = true;
2059 else
2061 ++nLength;
2062 bSymbol = true;
2063 ++pBodyPtr;
2065 break;
2067 case '>':
2068 if (eBrackets == BRACKETS_OPENING
2069 && eType
2070 >= HEADER_FIELD_MESSAGE_ID)
2072 ++nLength;
2073 bBracketedBlock = true;
2074 ++pBodyPtr;
2076 bEnd = true;
2077 break;
2079 default:
2080 if (isAtomChar(*pBodyPtr) && bSymbol)
2082 while (pBodyPtr != pBodyEnd
2083 && isAtomChar(*pBodyPtr))
2085 ++nLength;
2086 ++pBodyPtr;
2088 bSymbol = false;
2090 else
2092 if (!isUSASCII(*pBodyPtr))
2093 bModify = true;
2094 bEnd = true;
2096 break;
2099 // Write a pending '<' if necessary:
2100 if (eBrackets == BRACKETS_OPENING
2101 && !bBracketedBlock)
2103 if (rSink.getColumn()
2104 + (bInitialSpace ? 1 : 0)
2105 >= rSink.getLineLengthLimit())
2106 rSink << INetMIMEOutputSink::endl << ' ';
2107 else if (bInitialSpace)
2108 rSink << ' ';
2109 rSink << '<';
2110 bInitialSpace = false;
2111 eBrackets = BRACKETS_INSIDE;
2114 // Write the output:
2115 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2116 + nLength
2117 > rSink.getLineLengthLimit())
2118 rSink << INetMIMEOutputSink::endl << ' ';
2119 else if (bInitialSpace)
2120 rSink << ' ';
2121 bInitialSpace = false;
2122 if (bBracketedBlock)
2124 rSink << '<';
2125 eBrackets = BRACKETS_OUTSIDE;
2127 if (bModify)
2129 enum Mode { MODE_PLAIN, MODE_QUOTED_STRING,
2130 MODE_DOMAIN_LITERAL };
2131 Mode eMode = MODE_PLAIN;
2132 while (pStart != pBodyPtr)
2133 switch (*pStart)
2135 case '\x0D': // CR
2136 if (startsWithLineFolding(
2137 pStart, pBodyPtr))
2139 if (eMode != MODE_PLAIN)
2140 rSink << sal_Char(
2141 pStart[2]);
2142 pStart += 3;
2144 else if (startsWithLineBreak(
2145 pStart, pBodyPtr))
2147 rSink << "\x0D\\\x0A";
2148 // CR, '\', LF
2149 pStart += 2;
2151 else
2153 rSink << '\x0D'; // CR
2154 ++pStart;
2156 break;
2158 case '\t':
2159 case ' ':
2160 if (eMode != MODE_PLAIN)
2161 rSink << sal_Char(*pStart);
2162 ++pStart;
2163 break;
2165 case '"':
2166 if (eMode == MODE_PLAIN)
2167 eMode = MODE_QUOTED_STRING;
2168 else if (eMode
2169 == MODE_QUOTED_STRING)
2170 eMode = MODE_PLAIN;
2171 rSink << '"';
2172 ++pStart;
2173 break;
2175 case '[':
2176 if (eMode == MODE_PLAIN)
2177 eMode = MODE_DOMAIN_LITERAL;
2178 rSink << '[';
2179 ++pStart;
2180 break;
2182 case ']':
2183 if (eMode == MODE_DOMAIN_LITERAL)
2184 eMode = MODE_PLAIN;
2185 rSink << ']';
2186 ++pStart;
2187 break;
2189 case '\\':
2190 rSink << '\\';
2191 if (++pStart < pBodyPtr)
2192 writeUTF8(rSink, *pStart++);
2193 break;
2195 default:
2196 writeUTF8(rSink, *pStart++);
2197 break;
2200 else
2201 rSink.write(pStart, pBodyPtr);
2202 break;
2205 case ENTITY_PHRASE:
2207 // Write a pending '<' if necessary:
2208 if (eBrackets == BRACKETS_OPENING)
2210 if (rSink.getColumn()
2211 + (bInitialSpace ? 1 : 0)
2212 >= rSink.getLineLengthLimit())
2213 rSink << INetMIMEOutputSink::endl << ' ';
2214 else if (bInitialSpace)
2215 rSink << ' ';
2216 rSink << '<';
2217 bInitialSpace = false;
2218 eBrackets = BRACKETS_INSIDE;
2221 // Calculate the length of in- and output:
2222 const sal_Unicode * pStart = pBodyPtr;
2223 bool bQuotedString = false;
2224 bool bEnd = false;
2225 while (pBodyPtr != pBodyEnd && !bEnd)
2226 switch (*pBodyPtr)
2228 case '\t':
2229 case ' ':
2230 case '\x0D': // CR
2231 if (bQuotedString)
2232 ++pBodyPtr;
2233 else
2235 const sal_Unicode * pLookAhead
2236 = skipLinearWhiteSpace(
2237 pBodyPtr, pBodyEnd);
2238 if (pLookAhead != pBodyEnd
2239 && (isAtomChar(*pLookAhead)
2240 || !isUSASCII(*pLookAhead)
2241 || *pLookAhead == '"'))
2242 pBodyPtr = pLookAhead;
2243 else
2244 bEnd = true;
2246 break;
2248 case '"':
2249 bQuotedString = !bQuotedString;
2250 ++pBodyPtr;
2251 break;
2253 case '\\':
2254 if (bQuotedString)
2256 if (++pBodyPtr != pBodyEnd)
2257 ++pBodyPtr;
2259 else
2260 bEnd = true;
2261 break;
2263 default:
2264 if (bQuotedString
2265 || isAtomChar(*pBodyPtr)
2266 || !isUSASCII(*pBodyPtr))
2267 ++pBodyPtr;
2268 else
2269 bEnd = true;
2270 break;
2273 // Write the phrase, introducing encoded-words
2274 // where necessary:
2275 INetMIMEEncodedWordOutputSink
2276 aOutput(
2277 rSink,
2278 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,
2279 bInitialSpace ?
2280 INetMIMEEncodedWordOutputSink::SPACE_ALWAYS :
2281 INetMIMEEncodedWordOutputSink::SPACE_ENCODED,
2282 ePreferredEncoding);
2283 while (pStart != pBodyPtr)
2284 switch (*pStart)
2286 case '"':
2287 ++pStart;
2288 break;
2290 case '\\':
2291 if (++pStart != pBodyPtr)
2292 aOutput << *pStart++;
2293 break;
2295 case '\x0D': // CR
2296 pStart += 2;
2297 aOutput << *pStart++;
2298 break;
2300 default:
2301 aOutput << *pStart++;
2302 break;
2304 bInitialSpace = aOutput.flush();
2305 break;
2308 break;
2314 // static
2315 bool INetMIME::translateUTF8Char(const sal_Char *& rBegin,
2316 const sal_Char * pEnd,
2317 rtl_TextEncoding eEncoding,
2318 sal_uInt32 & rCharacter)
2320 if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
2321 || static_cast< unsigned char >(*rBegin) >= 0xFE)
2322 return false;
2324 int nCount;
2325 sal_uInt32 nMin;
2326 sal_uInt32 nUCS4;
2327 const sal_Char * p = rBegin;
2328 if (static_cast< unsigned char >(*p) < 0xE0)
2330 nCount = 1;
2331 nMin = 0x80;
2332 nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
2334 else if (static_cast< unsigned char >(*p) < 0xF0)
2336 nCount = 2;
2337 nMin = 0x800;
2338 nUCS4 = static_cast< unsigned char >(*p) & 0xF;
2340 else if (static_cast< unsigned char >(*p) < 0xF8)
2342 nCount = 3;
2343 nMin = 0x10000;
2344 nUCS4 = static_cast< unsigned char >(*p) & 7;
2346 else if (static_cast< unsigned char >(*p) < 0xFC)
2348 nCount = 4;
2349 nMin = 0x200000;
2350 nUCS4 = static_cast< unsigned char >(*p) & 3;
2352 else
2354 nCount = 5;
2355 nMin = 0x4000000;
2356 nUCS4 = static_cast< unsigned char >(*p) & 1;
2358 ++p;
2360 for (; nCount-- > 0; ++p)
2361 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
2362 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
2363 else
2364 return false;
2366 if (nUCS4 < nMin || nUCS4 > 0x10FFFF)
2367 return false;
2369 if (eEncoding >= RTL_TEXTENCODING_UCS4)
2370 rCharacter = nUCS4;
2371 else
2373 sal_Unicode aUTF16[2];
2374 const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
2375 sal_Size nSize;
2376 sal_Char * pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
2377 nSize);
2378 if (!pBuffer)
2379 return false;
2380 DBG_ASSERT(nSize == 1,
2381 "INetMIME::translateUTF8Char(): Bad conversion");
2382 rCharacter = *pBuffer;
2383 delete[] pBuffer;
2385 rBegin = p;
2386 return true;
2389 // static
2390 OUString INetMIME::decodeHeaderFieldBody(HeaderFieldType eType,
2391 const OString& rBody)
2393 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
2394 // versions of StarOffice send mails with header fields where encoded
2395 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
2396 // '=', ',', '.', '"', ')', without any required white space in between.
2397 // And there appear to exist some broken mailers that only encode single
2398 // letters within words, like "Appel
2399 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
2400 // detect encoded words even when not propperly surrounded by white space.
2402 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
2404 // encoded-word = "=?"
2405 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
2406 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
2407 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
2408 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
2409 // "?="
2411 // base64 = ALPHA / DIGIT / "+" / "/"
2413 const sal_Char * pBegin = rBody.getStr();
2414 const sal_Char * pEnd = pBegin + rBody.getLength();
2416 OUString sDecoded;
2417 const sal_Char * pCopyBegin = pBegin;
2419 /* bool bStartEncodedWord = true; */
2420 const sal_Char * pWSPBegin = pBegin;
2421 bool bQuotedEncodedText = false;
2422 sal_uInt32 nCommentLevel = 0;
2424 for (const sal_Char * p = pBegin; p != pEnd;)
2426 OUString sEncodedText;
2427 if (p != pEnd && *p == '=' /* && bStartEncodedWord */)
2429 const sal_Char * q = p + 1;
2430 bool bEncodedWord = q != pEnd && *q++ == '?';
2432 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
2433 if (bEncodedWord)
2435 const sal_Char * pCharsetBegin = q;
2436 const sal_Char * pLanguageBegin = 0;
2437 int nAlphaCount = 0;
2438 for (bool bDone = false; !bDone;)
2439 if (q == pEnd)
2441 bEncodedWord = false;
2442 bDone = true;
2444 else
2446 sal_Char cChar = *q++;
2447 switch (cChar)
2449 case '*':
2450 pLanguageBegin = q - 1;
2451 nAlphaCount = 0;
2452 break;
2454 case '-':
2455 if (pLanguageBegin != 0)
2457 if (nAlphaCount == 0)
2458 pLanguageBegin = 0;
2459 else
2460 nAlphaCount = 0;
2462 break;
2464 case '?':
2465 if (pCharsetBegin == q - 1)
2466 bEncodedWord = false;
2467 else
2469 eCharsetEncoding
2470 = getCharsetEncoding(
2471 pCharsetBegin,
2472 pLanguageBegin == 0
2473 || nAlphaCount == 0 ?
2474 q - 1 : pLanguageBegin);
2475 bEncodedWord = isMIMECharsetEncoding(
2476 eCharsetEncoding);
2477 eCharsetEncoding
2478 = translateFromMIME(eCharsetEncoding);
2480 bDone = true;
2481 break;
2483 default:
2484 if (pLanguageBegin != 0
2485 && (!isAlpha(cChar) || ++nAlphaCount > 8))
2486 pLanguageBegin = 0;
2487 break;
2492 bool bEncodingB = false;
2493 if (bEncodedWord)
2495 if (q == pEnd)
2496 bEncodedWord = false;
2497 else
2499 switch (*q++)
2501 case 'B':
2502 case 'b':
2503 bEncodingB = true;
2504 break;
2506 case 'Q':
2507 case 'q':
2508 bEncodingB = false;
2509 break;
2511 default:
2512 bEncodedWord = false;
2513 break;
2518 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
2520 OStringBuffer sText;
2521 if (bEncodedWord)
2523 if (bEncodingB)
2525 for (bool bDone = false; !bDone;)
2527 if (pEnd - q < 4)
2529 bEncodedWord = false;
2530 bDone = true;
2532 else
2534 bool bFinal = false;
2535 int nCount = 3;
2536 sal_uInt32 nValue = 0;
2537 for (int nShift = 18; nShift >= 0; nShift -= 6)
2539 int nWeight = getBase64Weight(*q++);
2540 if (nWeight == -2)
2542 bEncodedWord = false;
2543 bDone = true;
2544 break;
2546 if (nWeight == -1)
2548 if (!bFinal)
2550 if (nShift >= 12)
2552 bEncodedWord = false;
2553 bDone = true;
2554 break;
2556 bFinal = true;
2557 nCount = nShift == 6 ? 1 : 2;
2560 else
2561 nValue |= nWeight << nShift;
2563 if (bEncodedWord)
2565 for (int nShift = 16; nCount-- > 0; nShift -= 8)
2566 sText.append(sal_Char(nValue >> nShift & 0xFF));
2567 if (*q == '?')
2569 ++q;
2570 bDone = true;
2572 if (bFinal && !bDone)
2574 bEncodedWord = false;
2575 bDone = true;
2581 else
2583 const sal_Char * pEncodedTextBegin = q;
2584 const sal_Char * pEncodedTextCopyBegin = q;
2585 for (bool bDone = false; !bDone;)
2586 if (q == pEnd)
2588 bEncodedWord = false;
2589 bDone = true;
2591 else
2593 sal_uInt32 nChar = *q++;
2594 switch (nChar)
2596 case '=':
2598 if (pEnd - q < 2)
2600 bEncodedWord = false;
2601 bDone = true;
2602 break;
2604 int nDigit1 = getHexWeight(q[0]);
2605 int nDigit2 = getHexWeight(q[1]);
2606 if (nDigit1 < 0 || nDigit2 < 0)
2608 bEncodedWord = false;
2609 bDone = true;
2610 break;
2612 sText.append(rBody.copy(
2613 (pEncodedTextCopyBegin - pBegin),
2614 (q - 1 - pEncodedTextCopyBegin)));
2615 sText.append(sal_Char(nDigit1 << 4 | nDigit2));
2616 q += 2;
2617 pEncodedTextCopyBegin = q;
2618 break;
2621 case '?':
2622 if (q - pEncodedTextBegin > 1)
2623 sText.append(rBody.copy(
2624 (pEncodedTextCopyBegin - pBegin),
2625 (q - 1 - pEncodedTextCopyBegin)));
2626 else
2627 bEncodedWord = false;
2628 bDone = true;
2629 break;
2631 case '_':
2632 sText.append(rBody.copy(
2633 (pEncodedTextCopyBegin - pBegin),
2634 (q - 1 - pEncodedTextCopyBegin)));
2635 sText.append(' ');
2636 pEncodedTextCopyBegin = q;
2637 break;
2639 default:
2640 if (!isVisible(nChar))
2642 bEncodedWord = false;
2643 bDone = true;
2645 break;
2651 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
2653 sal_Unicode * pUnicodeBuffer = 0;
2654 sal_Size nUnicodeSize = 0;
2655 if (bEncodedWord)
2657 pUnicodeBuffer
2658 = convertToUnicode(sText.getStr(),
2659 sText.getStr() + sText.getLength(),
2660 eCharsetEncoding, nUnicodeSize);
2661 if (pUnicodeBuffer == 0)
2662 bEncodedWord = false;
2665 if (bEncodedWord)
2667 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
2668 if (eType == HEADER_FIELD_TEXT)
2669 sDecoded += OUString(
2670 pUnicodeBuffer,
2671 static_cast< xub_StrLen >(nUnicodeSize));
2672 else if (nCommentLevel == 0)
2674 sEncodedText = OUString(pUnicodeBuffer, nUnicodeSize);
2675 if (!bQuotedEncodedText)
2677 const sal_Unicode * pTextPtr = pUnicodeBuffer;
2678 const sal_Unicode * pTextEnd = pTextPtr
2679 + nUnicodeSize;
2680 for (; pTextPtr != pTextEnd; ++pTextPtr)
2681 if (!isEncodedWordTokenChar(*pTextPtr))
2683 bQuotedEncodedText = true;
2684 break;
2688 else
2690 const sal_Unicode * pTextPtr = pUnicodeBuffer;
2691 const sal_Unicode * pTextEnd = pTextPtr + nUnicodeSize;
2692 for (; pTextPtr != pTextEnd; ++pTextPtr)
2694 switch (*pTextPtr)
2696 case '(':
2697 case ')':
2698 case '\\':
2699 case '\x0D':
2700 case '=':
2701 sDecoded += "\\";
2702 break;
2704 sDecoded += OUString(*pTextPtr);
2707 delete[] pUnicodeBuffer;
2708 p = q;
2709 pCopyBegin = p;
2711 pWSPBegin = p;
2712 while (p != pEnd && isWhiteSpace(*p))
2713 ++p;
2714 /* bStartEncodedWord = p != pWSPBegin; */
2715 continue;
2719 if (!sEncodedText.isEmpty())
2721 if (bQuotedEncodedText)
2723 sDecoded += "\"";
2724 const sal_Unicode * pTextPtr = sEncodedText.getStr();
2725 const sal_Unicode * pTextEnd = pTextPtr + sEncodedText.getLength();
2726 for (;pTextPtr != pTextEnd; ++pTextPtr)
2728 switch (*pTextPtr)
2730 case '"':
2731 case '\\':
2732 case '\x0D':
2733 sDecoded += "\\";
2734 break;
2736 sDecoded += OUString(*pTextPtr);
2738 sDecoded += "\"";
2740 else
2741 sDecoded += sEncodedText;
2742 bQuotedEncodedText = false;
2745 if (p == pEnd)
2746 break;
2748 switch (*p++)
2750 case '"':
2751 if (eType != HEADER_FIELD_TEXT && nCommentLevel == 0)
2753 const sal_Char * pQuotedStringEnd
2754 = skipQuotedString(p - 1, pEnd);
2755 p = pQuotedStringEnd == p - 1 ? pEnd : pQuotedStringEnd;
2757 /* bStartEncodedWord = true; */
2758 break;
2760 case '(':
2761 if (eType != HEADER_FIELD_TEXT)
2762 ++nCommentLevel;
2763 /* bStartEncodedWord = true; */
2764 break;
2766 case ')':
2767 if (nCommentLevel > 0)
2768 --nCommentLevel;
2769 /* bStartEncodedWord = false; */
2770 break;
2772 default:
2774 const sal_Char * pUTF8Begin = p - 1;
2775 const sal_Char * pUTF8End = pUTF8Begin;
2776 sal_uInt32 nCharacter = 0;
2777 if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
2778 nCharacter))
2780 appendISO88591(sDecoded, pCopyBegin, p - 1);
2781 sal_Unicode aUTF16Buf[2];
2782 xub_StrLen nUTF16Len = static_cast< xub_StrLen >(
2783 putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf);
2784 sDecoded += OUString(aUTF16Buf, nUTF16Len);
2785 p = pUTF8End;
2786 pCopyBegin = p;
2788 /* bStartEncodedWord = false; */
2789 break;
2792 pWSPBegin = p;
2795 appendISO88591(sDecoded, pCopyBegin, pEnd);
2796 return sDecoded;
2799 // INetMIMEOutputSink
2801 // virtual
2802 sal_Size INetMIMEOutputSink::writeSequence(const sal_Char * pSequence)
2804 sal_Size nLength = rtl_str_getLength(pSequence);
2805 writeSequence(pSequence, pSequence + nLength);
2806 return nLength;
2809 // virtual
2810 void INetMIMEOutputSink::writeSequence(const sal_uInt32 * pBegin,
2811 const sal_uInt32 * pEnd)
2813 DBG_ASSERT(pBegin && pBegin <= pEnd,
2814 "INetMIMEOutputSink::writeSequence(): Bad sequence");
2816 sal_Char * pBufferBegin = new sal_Char[pEnd - pBegin];
2817 sal_Char * pBufferEnd = pBufferBegin;
2818 while (pBegin != pEnd)
2820 DBG_ASSERT(*pBegin < 256,
2821 "INetMIMEOutputSink::writeSequence(): Bad octet");
2822 *pBufferEnd++ = sal_Char(*pBegin++);
2824 writeSequence(pBufferBegin, pBufferEnd);
2825 delete[] pBufferBegin;
2828 // virtual
2829 void INetMIMEOutputSink::writeSequence(const sal_Unicode * pBegin,
2830 const sal_Unicode * pEnd)
2832 DBG_ASSERT(pBegin && pBegin <= pEnd,
2833 "INetMIMEOutputSink::writeSequence(): Bad sequence");
2835 sal_Char * pBufferBegin = new sal_Char[pEnd - pBegin];
2836 sal_Char * pBufferEnd = pBufferBegin;
2837 while (pBegin != pEnd)
2839 DBG_ASSERT(*pBegin < 256,
2840 "INetMIMEOutputSink::writeSequence(): Bad octet");
2841 *pBufferEnd++ = sal_Char(*pBegin++);
2843 writeSequence(pBufferBegin, pBufferEnd);
2844 delete[] pBufferBegin;
2847 // virtual
2848 ErrCode INetMIMEOutputSink::getError() const
2850 return ERRCODE_NONE;
2853 void INetMIMEOutputSink::writeLineEnd()
2855 static const sal_Char aCRLF[2] = { 0x0D, 0x0A };
2856 writeSequence(aCRLF, aCRLF + 2);
2857 m_nColumn = 0;
2860 // INetMIMEStringOutputSink
2862 // virtual
2863 void INetMIMEStringOutputSink::writeSequence(const sal_Char * pBegin,
2864 const sal_Char * pEnd)
2866 OSL_ENSURE(pBegin && pBegin <= pEnd,
2867 "INetMIMEStringOutputSink::writeSequence(): Bad sequence");
2869 m_aBuffer.append(pBegin, pEnd - pBegin);
2872 // virtual
2873 ErrCode INetMIMEStringOutputSink::getError() const
2875 return ERRCODE_NONE;
2878 // INetMIMEEncodedWordOutputSink
2880 static const sal_Char aEscape[128]
2881 = { INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x00
2882 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x01
2883 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x02
2884 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x03
2885 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x04
2886 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x05
2887 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x06
2888 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x07
2889 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x08
2890 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x09
2891 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0A
2892 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0B
2893 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0C
2894 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0D
2895 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0E
2896 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0F
2897 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x10
2898 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x11
2899 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x12
2900 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x13
2901 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x14
2902 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x15
2903 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x16
2904 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x17
2905 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x18
2906 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x19
2907 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1A
2908 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1B
2909 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1C
2910 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1D
2911 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1E
2912 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1F
2913 0, // ' '
2914 0, // '!'
2915 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '"'
2916 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '#'
2917 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '$'
2918 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '%'
2919 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '&'
2920 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '''
2921 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '('
2922 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ')'
2923 0, // '*'
2924 0, // '+'
2925 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ','
2926 0, // '-'
2927 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '.'
2928 0, // '/'
2929 0, // '0'
2930 0, // '1'
2931 0, // '2'
2932 0, // '3'
2933 0, // '4'
2934 0, // '5'
2935 0, // '6'
2936 0, // '7'
2937 0, // '8'
2938 0, // '9'
2939 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ':'
2940 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ';'
2941 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '<'
2942 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '='
2943 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '>'
2944 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '?'
2945 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '@'
2946 0, // 'A'
2947 0, // 'B'
2948 0, // 'C'
2949 0, // 'D'
2950 0, // 'E'
2951 0, // 'F'
2952 0, // 'G'
2953 0, // 'H'
2954 0, // 'I'
2955 0, // 'J'
2956 0, // 'K'
2957 0, // 'L'
2958 0, // 'M'
2959 0, // 'N'
2960 0, // 'O'
2961 0, // 'P'
2962 0, // 'Q'
2963 0, // 'R'
2964 0, // 'S'
2965 0, // 'T'
2966 0, // 'U'
2967 0, // 'V'
2968 0, // 'W'
2969 0, // 'X'
2970 0, // 'Y'
2971 0, // 'Z'
2972 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '['
2973 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '\'
2974 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ']'
2975 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '^'
2976 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '_'
2977 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '`'
2978 0, // 'a'
2979 0, // 'b'
2980 0, // 'c'
2981 0, // 'd'
2982 0, // 'e'
2983 0, // 'f'
2984 0, // 'g'
2985 0, // 'h'
2986 0, // 'i'
2987 0, // 'j'
2988 0, // 'k'
2989 0, // 'l'
2990 0, // 'm'
2991 0, // 'n'
2992 0, // 'o'
2993 0, // 'p'
2994 0, // 'q'
2995 0, // 'r'
2996 0, // 's'
2997 0, // 't'
2998 0, // 'u'
2999 0, // 'v'
3000 0, // 'w'
3001 0, // 'x'
3002 0, // 'y'
3003 0, // 'z'
3004 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '{'
3005 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '|'
3006 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '}'
3007 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '~'
3008 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE }; // DEL
3010 inline bool
3011 INetMIMEEncodedWordOutputSink::needsEncodedWordEscape(sal_uInt32 nChar) const
3013 return !INetMIME::isUSASCII(nChar) || aEscape[nChar] & m_eContext;
3016 void INetMIMEEncodedWordOutputSink::finish(bool bWriteTrailer)
3018 if (m_eInitialSpace == SPACE_ALWAYS && m_nExtraSpaces == 0)
3019 m_nExtraSpaces = 1;
3021 if (m_eEncodedWordState == STATE_SECOND_EQUALS)
3023 // If the text is already an encoded word, copy it verbatim:
3024 sal_uInt32 nSize = m_pBufferEnd - m_pBuffer;
3025 switch (m_ePrevCoding)
3027 case CODING_QUOTED:
3028 m_rSink << '"';
3029 case CODING_NONE:
3030 if (m_eInitialSpace == SPACE_ENCODED && m_nExtraSpaces == 0)
3031 m_nExtraSpaces = 1;
3032 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3034 if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3035 m_rSink << INetMIMEOutputSink::endl;
3036 m_rSink << ' ';
3038 if (m_nExtraSpaces == 1)
3040 if (m_rSink.getColumn() + nSize
3041 >= m_rSink.getLineLengthLimit())
3042 m_rSink << INetMIMEOutputSink::endl;
3043 m_rSink << ' ';
3045 break;
3047 case CODING_ENCODED:
3049 const sal_Char * pCharsetName
3050 = INetMIME::getCharsetName(m_ePrevMIMEEncoding);
3051 while (m_nExtraSpaces-- > 0)
3053 if (m_rSink.getColumn()
3054 > m_rSink.getLineLengthLimit() - 3)
3055 m_rSink << "?=" << INetMIMEOutputSink::endl << " =?"
3056 << pCharsetName << "?Q?";
3057 m_rSink << '_';
3059 m_rSink << "?=";
3061 case CODING_ENCODED_TERMINATED:
3062 if (m_rSink.getColumn() + nSize
3063 > m_rSink.getLineLengthLimit() - 1)
3064 m_rSink << INetMIMEOutputSink::endl;
3065 m_rSink << ' ';
3066 break;
3068 m_rSink.write(m_pBuffer, m_pBufferEnd);
3069 m_eCoding = CODING_ENCODED_TERMINATED;
3071 else
3073 // If the text itself is too long to fit into a single line, make it
3074 // into multiple encoded words:
3075 switch (m_eCoding)
3077 case CODING_NONE:
3078 if (m_nExtraSpaces == 0)
3080 DBG_ASSERT(m_ePrevCoding == CODING_NONE
3081 || m_pBuffer == m_pBufferEnd,
3082 "INetMIMEEncodedWordOutputSink::finish():"
3083 " Bad state");
3084 if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3085 > m_rSink.getLineLengthLimit())
3086 m_eCoding = CODING_ENCODED;
3088 else
3090 OSL_ASSERT(m_pBufferEnd >= m_pBuffer);
3091 if (static_cast< std::size_t >(m_pBufferEnd - m_pBuffer)
3092 > m_rSink.getLineLengthLimit() - 1)
3094 m_eCoding = CODING_ENCODED;
3097 break;
3099 case CODING_QUOTED:
3100 if (m_nExtraSpaces == 0)
3102 DBG_ASSERT(m_ePrevCoding == CODING_NONE,
3103 "INetMIMEEncodedWordOutputSink::finish():"
3104 " Bad state");
3105 if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3106 + m_nQuotedEscaped
3107 > m_rSink.getLineLengthLimit() - 2)
3108 m_eCoding = CODING_ENCODED;
3110 else if ((m_pBufferEnd - m_pBuffer) + m_nQuotedEscaped
3111 > m_rSink.getLineLengthLimit() - 3)
3112 m_eCoding = CODING_ENCODED;
3113 break;
3115 default:
3116 break;
3119 switch (m_eCoding)
3121 case CODING_NONE:
3122 switch (m_ePrevCoding)
3124 case CODING_QUOTED:
3125 if (m_rSink.getColumn() + m_nExtraSpaces
3126 + (m_pBufferEnd - m_pBuffer)
3127 < m_rSink.getLineLengthLimit())
3128 m_eCoding = CODING_QUOTED;
3129 else
3130 m_rSink << '"';
3131 break;
3133 case CODING_ENCODED:
3134 m_rSink << "?=";
3135 break;
3137 default:
3138 break;
3140 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3142 if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3143 m_rSink << INetMIMEOutputSink::endl;
3144 m_rSink << ' ';
3146 if (m_nExtraSpaces == 1)
3148 if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3149 >= m_rSink.getLineLengthLimit())
3150 m_rSink << INetMIMEOutputSink::endl;
3151 m_rSink << ' ';
3153 m_rSink.write(m_pBuffer, m_pBufferEnd);
3154 if (m_eCoding == CODING_QUOTED && bWriteTrailer)
3156 m_rSink << '"';
3157 m_eCoding = CODING_NONE;
3159 break;
3161 case CODING_QUOTED:
3163 bool bInsertLeadingQuote = true;
3164 sal_uInt32 nSize = (m_pBufferEnd - m_pBuffer)
3165 + m_nQuotedEscaped + 2;
3166 switch (m_ePrevCoding)
3168 case CODING_QUOTED:
3169 if (m_rSink.getColumn() + m_nExtraSpaces + nSize - 1
3170 < m_rSink.getLineLengthLimit())
3172 bInsertLeadingQuote = false;
3173 --nSize;
3175 else
3176 m_rSink << '"';
3177 break;
3179 case CODING_ENCODED:
3180 m_rSink << "?=";
3181 break;
3183 default:
3184 break;
3186 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3188 if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3189 m_rSink << INetMIMEOutputSink::endl;
3190 m_rSink << ' ';
3192 if (m_nExtraSpaces == 1)
3194 if (m_rSink.getColumn() + nSize
3195 >= m_rSink.getLineLengthLimit())
3196 m_rSink << INetMIMEOutputSink::endl;
3197 m_rSink << ' ';
3199 if (bInsertLeadingQuote)
3200 m_rSink << '"';
3201 for (const sal_Unicode * p = m_pBuffer; p != m_pBufferEnd;
3202 ++p)
3204 if (INetMIME::needsQuotedStringEscape(*p))
3205 m_rSink << '\\';
3206 m_rSink << sal_Char(*p);
3208 if (bWriteTrailer)
3210 m_rSink << '"';
3211 m_eCoding = CODING_NONE;
3213 break;
3216 case CODING_ENCODED:
3218 rtl_TextEncoding eCharsetEncoding
3219 = m_pEncodingList->
3220 getPreferredEncoding(RTL_TEXTENCODING_UTF8);
3221 rtl_TextEncoding eMIMEEncoding
3222 = INetMIME::translateToMIME(eCharsetEncoding);
3224 // The non UTF-8 code will only work for stateless single byte
3225 // character encodings (see also below):
3226 sal_Char * pTargetBuffer = NULL;
3227 sal_Size nTargetSize = 0;
3228 sal_uInt32 nSize;
3229 if (eMIMEEncoding == RTL_TEXTENCODING_UTF8)
3231 nSize = 0;
3232 for (sal_Unicode const * p = m_pBuffer;
3233 p != m_pBufferEnd;)
3235 sal_uInt32 nUTF32
3236 = INetMIME::getUTF32Character(p, m_pBufferEnd);
3237 nSize += needsEncodedWordEscape(nUTF32) ?
3238 3 * INetMIME::getUTF8OctetCount(nUTF32) :
3240 // only US-ASCII characters (that are converted to
3241 // a single byte by UTF-8) need no encoded word
3242 // escapes...
3245 else
3247 rtl_UnicodeToTextConverter hConverter
3248 = rtl_createUnicodeToTextConverter(eCharsetEncoding);
3249 rtl_UnicodeToTextContext hContext
3250 = rtl_createUnicodeToTextContext(hConverter);
3251 for (sal_Size nBufferSize = m_pBufferEnd - m_pBuffer;;
3252 nBufferSize += nBufferSize / 3 + 1)
3254 pTargetBuffer = new sal_Char[nBufferSize];
3255 sal_uInt32 nInfo;
3256 sal_Size nSrcCvtBytes;
3257 nTargetSize
3258 = rtl_convertUnicodeToText(
3259 hConverter, hContext, m_pBuffer,
3260 m_pBufferEnd - m_pBuffer, pTargetBuffer,
3261 nBufferSize,
3262 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE
3263 | RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE,
3264 &nInfo, &nSrcCvtBytes);
3265 if (!(nInfo
3266 & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
3267 break;
3268 delete[] pTargetBuffer;
3269 pTargetBuffer = NULL;
3270 rtl_resetUnicodeToTextContext(hConverter, hContext);
3272 rtl_destroyUnicodeToTextContext(hConverter, hContext);
3273 rtl_destroyUnicodeToTextConverter(hConverter);
3275 nSize = nTargetSize;
3276 for (sal_Size k = 0; k < nTargetSize; ++k)
3277 if (needsEncodedWordEscape(sal_uChar(
3278 pTargetBuffer[k])))
3279 nSize += 2;
3282 const sal_Char * pCharsetName
3283 = INetMIME::getCharsetName(eMIMEEncoding);
3284 sal_uInt32 nWrapperSize = rtl_str_getLength(pCharsetName) + 7;
3285 // '=?', '?Q?', '?='
3287 switch (m_ePrevCoding)
3289 case CODING_QUOTED:
3290 m_rSink << '"';
3291 case CODING_NONE:
3292 if (m_eInitialSpace == SPACE_ENCODED
3293 && m_nExtraSpaces == 0)
3294 m_nExtraSpaces = 1;
3295 nSize += nWrapperSize;
3296 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3298 if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3299 m_rSink << INetMIMEOutputSink::endl;
3300 m_rSink << ' ';
3302 if (m_nExtraSpaces == 1)
3304 if (m_rSink.getColumn() + nSize >= m_rSink.getLineLengthLimit())
3305 m_rSink << INetMIMEOutputSink::endl;
3306 m_rSink << ' ';
3308 m_rSink << "=?" << pCharsetName << "?Q?";
3309 break;
3311 case CODING_ENCODED:
3312 if (m_ePrevMIMEEncoding != eMIMEEncoding
3313 || m_rSink.getColumn() + m_nExtraSpaces + nSize
3314 > m_rSink.getLineLengthLimit() - 2)
3316 m_rSink << "?=";
3317 if (m_rSink.getColumn() + nWrapperSize
3318 + m_nExtraSpaces + nSize
3319 > m_rSink.getLineLengthLimit() - 1)
3320 m_rSink << INetMIMEOutputSink::endl;
3321 m_rSink << " =?" << pCharsetName << "?Q?";
3323 while (m_nExtraSpaces-- > 0)
3325 if (m_rSink.getColumn()
3326 > m_rSink.getLineLengthLimit() - 3)
3327 m_rSink << "?=" << INetMIMEOutputSink::endl
3328 << " =?" << pCharsetName << "?Q?";
3329 m_rSink << '_';
3331 break;
3333 case CODING_ENCODED_TERMINATED:
3334 if (m_rSink.getColumn() + nWrapperSize
3335 + m_nExtraSpaces + nSize
3336 > m_rSink.getLineLengthLimit() - 1)
3337 m_rSink << INetMIMEOutputSink::endl;
3338 m_rSink << " =?" << pCharsetName << "?Q?";
3339 while (m_nExtraSpaces-- > 0)
3341 if (m_rSink.getColumn()
3342 > m_rSink.getLineLengthLimit() - 3)
3343 m_rSink << "?=" << INetMIMEOutputSink::endl
3344 << " =?" << pCharsetName << "?Q?";
3345 m_rSink << '_';
3347 break;
3350 // The non UTF-8 code will only work for stateless single byte
3351 // character encodings (see also above):
3352 if (eMIMEEncoding == RTL_TEXTENCODING_UTF8)
3354 bool bInitial = true;
3355 for (sal_Unicode const * p = m_pBuffer;
3356 p != m_pBufferEnd;)
3358 sal_uInt32 nUTF32
3359 = INetMIME::getUTF32Character(p, m_pBufferEnd);
3360 bool bEscape = needsEncodedWordEscape(nUTF32);
3361 sal_uInt32 nWidth
3362 = bEscape ?
3363 3 * INetMIME::getUTF8OctetCount(nUTF32) : 1;
3364 // only US-ASCII characters (that are converted to
3365 // a single byte by UTF-8) need no encoded word
3366 // escapes...
3367 if (!bInitial
3368 && m_rSink.getColumn() + nWidth + 2
3369 > m_rSink.getLineLengthLimit())
3370 m_rSink << "?=" << INetMIMEOutputSink::endl
3371 << " =?" << pCharsetName << "?Q?";
3372 if (bEscape)
3374 DBG_ASSERT(
3375 nUTF32 < 0x10FFFF,
3376 "INetMIMEEncodedWordOutputSink::finish():"
3377 " Bad char");
3378 if (nUTF32 < 0x80)
3379 INetMIME::writeEscapeSequence(m_rSink,
3380 nUTF32);
3381 else if (nUTF32 < 0x800)
3383 INetMIME::writeEscapeSequence(m_rSink,
3384 (nUTF32 >> 6)
3385 | 0xC0);
3386 INetMIME::writeEscapeSequence(m_rSink,
3387 (nUTF32 & 0x3F)
3388 | 0x80);
3390 else if (nUTF32 < 0x10000)
3392 INetMIME::writeEscapeSequence(m_rSink,
3393 (nUTF32 >> 12)
3394 | 0xE0);
3395 INetMIME::writeEscapeSequence(m_rSink,
3396 ((nUTF32 >> 6)
3397 & 0x3F)
3398 | 0x80);
3399 INetMIME::writeEscapeSequence(m_rSink,
3400 (nUTF32 & 0x3F)
3401 | 0x80);
3403 else
3405 INetMIME::writeEscapeSequence(m_rSink,
3406 (nUTF32 >> 18)
3407 | 0xF0);
3408 INetMIME::writeEscapeSequence(m_rSink,
3409 ((nUTF32 >> 12)
3410 & 0x3F)
3411 | 0x80);
3412 INetMIME::writeEscapeSequence(m_rSink,
3413 ((nUTF32 >> 6)
3414 & 0x3F)
3415 | 0x80);
3416 INetMIME::writeEscapeSequence(m_rSink,
3417 (nUTF32 & 0x3F)
3418 | 0x80);
3421 else
3422 m_rSink << sal_Char(nUTF32);
3423 bInitial = false;
3426 else
3428 for (sal_Size k = 0; k < nTargetSize; ++k)
3430 sal_uInt32 nUCS4 = sal_uChar(pTargetBuffer[k]);
3431 bool bEscape = needsEncodedWordEscape(nUCS4);
3432 if (k > 0
3433 && m_rSink.getColumn() + (bEscape ? 5 : 3)
3434 > m_rSink.getLineLengthLimit())
3435 m_rSink << "?=" << INetMIMEOutputSink::endl
3436 << " =?" << pCharsetName << "?Q?";
3437 if (bEscape)
3438 INetMIME::writeEscapeSequence(m_rSink, nUCS4);
3439 else
3440 m_rSink << sal_Char(nUCS4);
3442 delete[] pTargetBuffer;
3445 if (bWriteTrailer)
3447 m_rSink << "?=";
3448 m_eCoding = CODING_ENCODED_TERMINATED;
3451 m_ePrevMIMEEncoding = eMIMEEncoding;
3452 break;
3455 default:
3456 OSL_ASSERT(false);
3457 break;
3461 m_eInitialSpace = SPACE_NO;
3462 m_nExtraSpaces = 0;
3463 m_pEncodingList->reset();
3464 m_pBufferEnd = m_pBuffer;
3465 m_ePrevCoding = m_eCoding;
3466 m_eCoding = CODING_NONE;
3467 m_nQuotedEscaped = 0;
3468 m_eEncodedWordState = STATE_INITIAL;
3471 INetMIMEEncodedWordOutputSink::~INetMIMEEncodedWordOutputSink()
3473 rtl_freeMemory(m_pBuffer);
3474 delete m_pEncodingList;
3477 INetMIMEEncodedWordOutputSink &
3478 INetMIMEEncodedWordOutputSink::operator <<(sal_uInt32 nChar)
3480 if (nChar == ' ')
3482 if (m_pBufferEnd != m_pBuffer)
3483 finish(false);
3484 ++m_nExtraSpaces;
3486 else
3488 // Check for an already encoded word:
3489 switch (m_eEncodedWordState)
3491 case STATE_INITIAL:
3492 if (nChar == '=')
3493 m_eEncodedWordState = STATE_FIRST_EQUALS;
3494 else
3495 m_eEncodedWordState = STATE_BAD;
3496 break;
3498 case STATE_FIRST_EQUALS:
3499 if (nChar == '?')
3500 m_eEncodedWordState = STATE_FIRST_EQUALS;
3501 else
3502 m_eEncodedWordState = STATE_BAD;
3503 break;
3505 case STATE_FIRST_QUESTION:
3506 if (INetMIME::isEncodedWordTokenChar(nChar))
3507 m_eEncodedWordState = STATE_CHARSET;
3508 else
3509 m_eEncodedWordState = STATE_BAD;
3510 break;
3512 case STATE_CHARSET:
3513 if (nChar == '?')
3514 m_eEncodedWordState = STATE_SECOND_QUESTION;
3515 else if (!INetMIME::isEncodedWordTokenChar(nChar))
3516 m_eEncodedWordState = STATE_BAD;
3517 break;
3519 case STATE_SECOND_QUESTION:
3520 if (nChar == 'B' || nChar == 'Q'
3521 || nChar == 'b' || nChar == 'q')
3522 m_eEncodedWordState = STATE_ENCODING;
3523 else
3524 m_eEncodedWordState = STATE_BAD;
3525 break;
3527 case STATE_ENCODING:
3528 if (nChar == '?')
3529 m_eEncodedWordState = STATE_THIRD_QUESTION;
3530 else
3531 m_eEncodedWordState = STATE_BAD;
3532 break;
3534 case STATE_THIRD_QUESTION:
3535 if (INetMIME::isVisible(nChar) && nChar != '?')
3536 m_eEncodedWordState = STATE_ENCODED_TEXT;
3537 else
3538 m_eEncodedWordState = STATE_BAD;
3539 break;
3541 case STATE_ENCODED_TEXT:
3542 if (nChar == '?')
3543 m_eEncodedWordState = STATE_FOURTH_QUESTION;
3544 else if (!INetMIME::isVisible(nChar))
3545 m_eEncodedWordState = STATE_BAD;
3546 break;
3548 case STATE_FOURTH_QUESTION:
3549 if (nChar == '=')
3550 m_eEncodedWordState = STATE_SECOND_EQUALS;
3551 else
3552 m_eEncodedWordState = STATE_BAD;
3553 break;
3555 case STATE_SECOND_EQUALS:
3556 m_eEncodedWordState = STATE_BAD;
3557 break;
3559 case STATE_BAD:
3560 break;
3563 // Update encoding:
3564 m_pEncodingList->includes(nChar);
3566 // Update coding:
3567 enum { TENQ = 1, // CONTEXT_TEXT, CODING_ENCODED
3568 CENQ = 2, // CONTEXT_COMMENT, CODING_ENCODED
3569 PQTD = 4, // CONTEXT_PHRASE, CODING_QUOTED
3570 PENQ = 8 }; // CONTEXT_PHRASE, CODING_ENCODED
3571 static const sal_Char aMinimal[128]
3572 = { TENQ | CENQ | PENQ, // 0x00
3573 TENQ | CENQ | PENQ, // 0x01
3574 TENQ | CENQ | PENQ, // 0x02
3575 TENQ | CENQ | PENQ, // 0x03
3576 TENQ | CENQ | PENQ, // 0x04
3577 TENQ | CENQ | PENQ, // 0x05
3578 TENQ | CENQ | PENQ, // 0x06
3579 TENQ | CENQ | PENQ, // 0x07
3580 TENQ | CENQ | PENQ, // 0x08
3581 TENQ | CENQ | PENQ, // 0x09
3582 TENQ | CENQ | PENQ, // 0x0A
3583 TENQ | CENQ | PENQ, // 0x0B
3584 TENQ | CENQ | PENQ, // 0x0C
3585 TENQ | CENQ | PENQ, // 0x0D
3586 TENQ | CENQ | PENQ, // 0x0E
3587 TENQ | CENQ | PENQ, // 0x0F
3588 TENQ | CENQ | PENQ, // 0x10
3589 TENQ | CENQ | PENQ, // 0x11
3590 TENQ | CENQ | PENQ, // 0x12
3591 TENQ | CENQ | PENQ, // 0x13
3592 TENQ | CENQ | PENQ, // 0x14
3593 TENQ | CENQ | PENQ, // 0x15
3594 TENQ | CENQ | PENQ, // 0x16
3595 TENQ | CENQ | PENQ, // 0x17
3596 TENQ | CENQ | PENQ, // 0x18
3597 TENQ | CENQ | PENQ, // 0x19
3598 TENQ | CENQ | PENQ, // 0x1A
3599 TENQ | CENQ | PENQ, // 0x1B
3600 TENQ | CENQ | PENQ, // 0x1C
3601 TENQ | CENQ | PENQ, // 0x1D
3602 TENQ | CENQ | PENQ, // 0x1E
3603 TENQ | CENQ | PENQ, // 0x1F
3604 0, // ' '
3605 0, // '!'
3606 PQTD , // '"'
3607 0, // '#'
3608 0, // '$'
3609 0, // '%'
3610 0, // '&'
3611 0, // '''
3612 CENQ | PQTD , // '('
3613 CENQ | PQTD , // ')'
3614 0, // '*'
3615 0, // '+'
3616 PQTD , // ','
3617 0, // '-'
3618 PQTD , // '.'
3619 0, // '/'
3620 0, // '0'
3621 0, // '1'
3622 0, // '2'
3623 0, // '3'
3624 0, // '4'
3625 0, // '5'
3626 0, // '6'
3627 0, // '7'
3628 0, // '8'
3629 0, // '9'
3630 PQTD , // ':'
3631 PQTD , // ';'
3632 PQTD , // '<'
3633 0, // '='
3634 PQTD , // '>'
3635 0, // '?'
3636 PQTD , // '@'
3637 0, // 'A'
3638 0, // 'B'
3639 0, // 'C'
3640 0, // 'D'
3641 0, // 'E'
3642 0, // 'F'
3643 0, // 'G'
3644 0, // 'H'
3645 0, // 'I'
3646 0, // 'J'
3647 0, // 'K'
3648 0, // 'L'
3649 0, // 'M'
3650 0, // 'N'
3651 0, // 'O'
3652 0, // 'P'
3653 0, // 'Q'
3654 0, // 'R'
3655 0, // 'S'
3656 0, // 'T'
3657 0, // 'U'
3658 0, // 'V'
3659 0, // 'W'
3660 0, // 'X'
3661 0, // 'Y'
3662 0, // 'Z'
3663 PQTD , // '['
3664 CENQ | PQTD , // '\'
3665 PQTD , // ']'
3666 0, // '^'
3667 0, // '_'
3668 0, // '`'
3669 0, // 'a'
3670 0, // 'b'
3671 0, // 'c'
3672 0, // 'd'
3673 0, // 'e'
3674 0, // 'f'
3675 0, // 'g'
3676 0, // 'h'
3677 0, // 'i'
3678 0, // 'j'
3679 0, // 'k'
3680 0, // 'l'
3681 0, // 'm'
3682 0, // 'n'
3683 0, // 'o'
3684 0, // 'p'
3685 0, // 'q'
3686 0, // 'r'
3687 0, // 's'
3688 0, // 't'
3689 0, // 'u'
3690 0, // 'v'
3691 0, // 'w'
3692 0, // 'x'
3693 0, // 'y'
3694 0, // 'z'
3695 0, // '{'
3696 0, // '|'
3697 0, // '}'
3698 0, // '~'
3699 TENQ | CENQ | PENQ }; // DEL
3700 Coding eNewCoding = !INetMIME::isUSASCII(nChar) ? CODING_ENCODED :
3701 m_eContext == CONTEXT_PHRASE ?
3702 Coding(aMinimal[nChar] >> 2) :
3703 aMinimal[nChar] & m_eContext ? CODING_ENCODED :
3704 CODING_NONE;
3705 if (eNewCoding > m_eCoding)
3706 m_eCoding = eNewCoding;
3707 if (m_eCoding == CODING_QUOTED
3708 && INetMIME::needsQuotedStringEscape(nChar))
3709 ++m_nQuotedEscaped;
3711 // Append to buffer:
3712 if (sal_uInt32(m_pBufferEnd - m_pBuffer) == m_nBufferSize)
3714 m_pBuffer
3715 = static_cast< sal_Unicode * >(
3716 rtl_reallocateMemory(m_pBuffer,
3717 (m_nBufferSize + BUFFER_SIZE)
3718 * sizeof (sal_Unicode)));
3719 m_pBufferEnd = m_pBuffer + m_nBufferSize;
3720 m_nBufferSize += BUFFER_SIZE;
3722 *m_pBufferEnd++ = sal_Unicode(nChar);
3724 return *this;
3727 // INetContentTypeParameterList
3729 void INetContentTypeParameterList::Clear()
3731 maEntries.clear();
3734 const INetContentTypeParameter *
3735 INetContentTypeParameterList::find(const OString& rAttribute) const
3737 boost::ptr_vector<INetContentTypeParameter>::const_iterator iter;
3738 for (iter = maEntries.begin(); iter != maEntries.end(); ++iter)
3740 if (iter->m_sAttribute.equalsIgnoreAsciiCase(rAttribute))
3741 return &(*iter);
3744 return NULL;
3747 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */