bump product version to 7.6.3.2-android
[LibreOffice.git] / tools / source / inet / inetmime.cxx
blobe6b725490e5887739d6fb95bc396ed43618cc7f5
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <algorithm>
21 #include <limits>
22 #include <forward_list>
23 #include <memory>
25 #include <sal/log.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/strbuf.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/inetmime.hxx>
31 #include <rtl/character.hxx>
33 namespace {
35 rtl_TextEncoding getCharsetEncoding(const char * pBegin,
36 const char * pEnd);
38 /** Check for US-ASCII white space character.
40 @param nChar Some UCS-4 character.
42 @return True if nChar is a US-ASCII white space character (US-ASCII
43 0x09 or 0x20).
45 bool isWhiteSpace(sal_uInt32 nChar)
47 return nChar == '\t' || nChar == ' ';
50 /** Get the Base 64 digit weight of a US-ASCII character.
52 @param nChar Some UCS-4 character.
54 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
55 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
56 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
57 character (US-ASCII '='), return -1; otherwise, return -2.
59 int getBase64Weight(sal_uInt32 nChar)
61 return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
62 rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
63 rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
64 nChar == '+' ? 62 :
65 nChar == '/' ? 63 :
66 nChar == '=' ? -1 : -2;
69 bool startsWithLineFolding(const sal_Unicode * pBegin,
70 const sal_Unicode * pEnd)
72 DBG_ASSERT(pBegin && pBegin <= pEnd,
73 "startsWithLineFolding(): Bad sequence");
75 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
76 && isWhiteSpace(pBegin[2]); // CR, LF
79 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
80 eEncoding)
82 #if defined(_WIN32)
83 return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
84 RTL_TEXTENCODING_MS_1252 : eEncoding;
85 #else
86 return eEncoding;
87 #endif
90 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
92 return rtl_isOctetTextEncoding(eEncoding);
95 std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin,
96 const char * pEnd,
97 rtl_TextEncoding eEncoding,
98 sal_Size & rSize)
100 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
101 return nullptr;
102 rtl_TextToUnicodeConverter hConverter
103 = rtl_createTextToUnicodeConverter(eEncoding);
104 rtl_TextToUnicodeContext hContext
105 = rtl_createTextToUnicodeContext(hConverter);
106 std::unique_ptr<sal_Unicode[]> pBuffer;
107 sal_uInt32 nInfo;
108 for (sal_Size nBufferSize = pEnd - pBegin;;
109 nBufferSize += nBufferSize / 3 + 1)
111 pBuffer.reset(new sal_Unicode[nBufferSize]);
112 sal_Size nSrcCvtBytes;
113 rSize = rtl_convertTextToUnicode(
114 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
115 nBufferSize,
116 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
119 &nInfo, &nSrcCvtBytes);
120 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
121 break;
122 pBuffer.reset();
123 rtl_resetTextToUnicodeContext(hConverter, hContext);
125 rtl_destroyTextToUnicodeContext(hConverter, hContext);
126 rtl_destroyTextToUnicodeConverter(hConverter);
127 if (nInfo != 0)
129 pBuffer.reset();
131 return pBuffer;
134 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
136 // See RFC 2279 for a discussion of UTF-8.
137 DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
139 if (nChar < 0x80)
140 rSink.append(char(nChar));
141 else if (nChar < 0x800)
142 rSink.append(OStringChar(char(nChar >> 6 | 0xC0))
143 + OStringChar(char((nChar & 0x3F) | 0x80)));
144 else if (nChar < 0x10000)
145 rSink.append(
146 OStringChar(char(nChar >> 12 | 0xE0))
147 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
148 + OStringChar(char((nChar & 0x3F) | 0x80)));
149 else if (nChar < 0x200000)
150 rSink.append(
151 OStringChar(char(nChar >> 18 | 0xF0))
152 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
153 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
154 + OStringChar(char((nChar & 0x3F) | 0x80)));
155 else if (nChar < 0x4000000)
156 rSink.append(
157 OStringChar(char(nChar >> 24 | 0xF8))
158 + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
159 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
160 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
161 + OStringChar(char((nChar & 0x3F) | 0x80)));
162 else
163 rSink.append(
164 OStringChar(char(nChar >> 30 | 0xFC))
165 + OStringChar(char((nChar >> 24 & 0x3F) | 0x80))
166 + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
167 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
168 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
169 + OStringChar(char((nChar & 0x3F) | 0x80)));
172 bool translateUTF8Char(const char *& rBegin,
173 const char * pEnd,
174 sal_uInt32 & rCharacter)
176 if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
177 || static_cast< unsigned char >(*rBegin) >= 0xFE)
178 return false;
180 int nCount;
181 sal_uInt32 nMin;
182 sal_uInt32 nUCS4;
183 const char * p = rBegin;
184 if (static_cast< unsigned char >(*p) < 0xE0)
186 nCount = 1;
187 nMin = 0x80;
188 nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
190 else if (static_cast< unsigned char >(*p) < 0xF0)
192 nCount = 2;
193 nMin = 0x800;
194 nUCS4 = static_cast< unsigned char >(*p) & 0xF;
196 else if (static_cast< unsigned char >(*p) < 0xF8)
198 nCount = 3;
199 nMin = 0x10000;
200 nUCS4 = static_cast< unsigned char >(*p) & 7;
202 else if (static_cast< unsigned char >(*p) < 0xFC)
204 nCount = 4;
205 nMin = 0x200000;
206 nUCS4 = static_cast< unsigned char >(*p) & 3;
208 else
210 nCount = 5;
211 nMin = 0x4000000;
212 nUCS4 = static_cast< unsigned char >(*p) & 1;
214 ++p;
216 for (; nCount-- > 0; ++p)
217 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
218 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
219 else
220 return false;
222 if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
223 return false;
225 rCharacter = nUCS4;
226 rBegin = p;
227 return true;
230 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
231 char const * pEnd);
233 struct Parameter
235 OString m_aAttribute;
236 OString m_aCharset;
237 OString m_aLanguage;
238 OString m_aValue;
239 sal_uInt32 m_nSection;
240 bool m_bExtended;
242 bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
244 int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
245 return nComp < 0 ||
246 (nComp == 0 && m_nSection < rhs.m_nSection);
248 struct IsSameSection // is used to check container for duplicates with std::any_of
250 const OString& rAttribute;
251 const sal_uInt32 nSection;
252 bool operator()(const Parameter& r) const
253 { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
257 typedef std::forward_list<Parameter> ParameterList;
259 bool parseParameters(ParameterList const & rInput,
260 INetContentTypeParameterList * pOutput);
262 // appendISO88591
264 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
265 char const * pEnd)
267 sal_Int32 nLength = pEnd - pBegin;
268 std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
269 for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
270 *p++ = static_cast<unsigned char>(*pBegin++);
271 rText.append(pBuffer.get(), nLength);
274 // parseParameters
276 bool parseParameters(ParameterList const & rInput,
277 INetContentTypeParameterList * pOutput)
279 if (pOutput)
280 pOutput->clear();
282 for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
284 if (it->m_nSection > 0
285 && (itPrev == rInput.end()
286 || itPrev->m_nSection != it->m_nSection - 1
287 || itPrev->m_aAttribute != it->m_aAttribute))
288 return false;
291 if (pOutput)
292 for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
294 bool bCharset = !it->m_aCharset.isEmpty();
295 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
296 if (bCharset)
297 eEncoding
298 = getCharsetEncoding(it->m_aCharset.getStr(),
299 it->m_aCharset.getStr()
300 + it->m_aCharset.getLength());
301 OUStringBuffer aValue(64);
302 bool bBadEncoding = false;
303 itNext = it;
306 sal_Size nSize;
307 std::unique_ptr<sal_Unicode[]> pUnicode
308 = convertToUnicode(itNext->m_aValue.getStr(),
309 itNext->m_aValue.getStr()
310 + itNext->m_aValue.getLength(),
311 bCharset && it->m_bExtended ?
312 eEncoding :
313 RTL_TEXTENCODING_UTF8,
314 nSize);
315 if (!pUnicode && !(bCharset && it->m_bExtended))
316 pUnicode = convertToUnicode(
317 itNext->m_aValue.getStr(),
318 itNext->m_aValue.getStr()
319 + itNext->m_aValue.getLength(),
320 RTL_TEXTENCODING_ISO_8859_1, nSize);
321 if (!pUnicode)
323 bBadEncoding = true;
324 break;
326 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
327 ++itNext;
329 while (itNext != rInput.end() && itNext->m_nSection != 0);
331 if (bBadEncoding)
333 aValue.setLength(0);
334 itNext = it;
337 if (itNext->m_bExtended)
339 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
340 aValue.append(
341 static_cast<sal_Unicode>(
342 static_cast<unsigned char>(itNext->m_aValue[i])
343 | 0xF800)); // map to unicode corporate use sub area
345 else
347 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
348 aValue.append( itNext->m_aValue[i] );
350 ++itNext;
352 while (itNext != rInput.end() && itNext->m_nSection != 0);
354 auto const ret = pOutput->insert(
355 {it->m_aAttribute,
356 {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
357 SAL_INFO_IF(!ret.second, "tools",
358 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
360 return true;
363 /** Check whether some character is valid within an RFC 2045 <token>.
365 @param nChar Some UCS-4 character.
367 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
368 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
369 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
371 bool isTokenChar(sal_uInt32 nChar)
373 static const bool aMap[128]
374 = { false, false, false, false, false, false, false, false,
375 false, false, false, false, false, false, false, false,
376 false, false, false, false, false, false, false, false,
377 false, false, false, false, false, false, false, false,
378 false, true, false, true, true, true, true, true, // !"#$%&'
379 false, false, true, true, false, true, true, false, //()*+,-./
380 true, true, true, true, true, true, true, true, //01234567
381 true, true, false, false, false, false, false, false, //89:;<=>?
382 false, true, true, true, true, true, true, true, //@ABCDEFG
383 true, true, true, true, true, true, true, true, //HIJKLMNO
384 true, true, true, true, true, true, true, true, //PQRSTUVW
385 true, true, true, false, false, false, true, true, //XYZ[\]^_
386 true, true, true, true, true, true, true, true, //`abcdefg
387 true, true, true, true, true, true, true, true, //hijklmno
388 true, true, true, true, true, true, true, true, //pqrstuvw
389 true, true, true, true, true, true, true, false //xyz{|}~
391 return rtl::isAscii(nChar) && aMap[nChar];
394 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
395 const sal_Unicode * pEnd)
397 DBG_ASSERT(pBegin && pBegin <= pEnd,
398 "skipComment(): Bad sequence");
400 if (pBegin != pEnd && *pBegin == '(')
402 sal_uInt32 nLevel = 0;
403 for (const sal_Unicode * p = pBegin; p != pEnd;)
404 switch (*p++)
406 case '(':
407 ++nLevel;
408 break;
410 case ')':
411 if (--nLevel == 0)
412 return p;
413 break;
415 case '\\':
416 if (p != pEnd)
417 ++p;
418 break;
421 return pBegin;
424 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
425 pBegin,
426 const sal_Unicode *
427 pEnd)
429 DBG_ASSERT(pBegin && pBegin <= pEnd,
430 "skipLinearWhiteSpaceComment(): Bad sequence");
432 while (pBegin != pEnd)
433 switch (*pBegin)
435 case '\t':
436 case ' ':
437 ++pBegin;
438 break;
440 case 0x0D: // CR
441 if (startsWithLineFolding(pBegin, pEnd))
442 pBegin += 3;
443 else
444 return pBegin;
445 break;
447 case '(':
449 const sal_Unicode * p = skipComment(pBegin, pEnd);
450 if (p == pBegin)
451 return pBegin;
452 pBegin = p;
453 break;
456 default:
457 return pBegin;
459 return pBegin;
462 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
463 const sal_Unicode * pEnd)
465 DBG_ASSERT(pBegin && pBegin <= pEnd,
466 "skipQuotedString(): Bad sequence");
468 if (pBegin != pEnd && *pBegin == '"')
469 for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
470 switch (*p++)
472 case 0x0D: // CR
473 if (pEnd - p < 2 || *p++ != 0x0A // LF
474 || !isWhiteSpace(*p++))
475 return pBegin;
476 break;
478 case '"':
479 return p;
481 case '\\':
482 if (p != pEnd)
483 ++p;
484 break;
486 return pBegin;
489 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
490 sal_Unicode const * pEnd,
491 INetContentTypeParameterList *
492 pParameters)
494 ParameterList aList;
495 sal_Unicode const * pParameterBegin = pBegin;
496 for (sal_Unicode const * p = pParameterBegin;;)
498 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
499 if (pParameterBegin == pEnd || *pParameterBegin != ';')
500 break;
501 p = pParameterBegin + 1;
503 sal_Unicode const * pAttributeBegin
504 = skipLinearWhiteSpaceComment(p, pEnd);
505 p = pAttributeBegin;
506 bool bDowncaseAttribute = false;
507 while (p != pEnd && isTokenChar(*p) && *p != '*')
509 bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
510 ++p;
512 if (p == pAttributeBegin)
513 break;
514 OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
515 if (bDowncaseAttribute)
516 aAttribute = aAttribute.toAsciiLowerCase();
518 sal_uInt32 nSection = 0;
519 if (p != pEnd && *p == '*')
521 ++p;
522 if (p != pEnd && rtl::isAsciiDigit(*p)
523 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
524 break;
527 bool bPresent = std::any_of(aList.begin(), aList.end(),
528 Parameter::IsSameSection{aAttribute, nSection});
529 if (bPresent)
530 break;
532 bool bExtended = false;
533 if (p != pEnd && *p == '*')
535 ++p;
536 bExtended = true;
539 p = skipLinearWhiteSpaceComment(p, pEnd);
541 if (p == pEnd || *p != '=')
542 break;
544 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
546 OString aCharset;
547 OString aLanguage;
548 OString aValue;
549 if (bExtended)
551 if (nSection == 0)
553 sal_Unicode const * pCharsetBegin = p;
554 bool bDowncaseCharset = false;
555 while (p != pEnd && isTokenChar(*p) && *p != '\'')
557 bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
558 ++p;
560 if (p == pCharsetBegin)
561 break;
562 if (pParameters)
564 aCharset = OString(
565 pCharsetBegin,
566 p - pCharsetBegin,
567 RTL_TEXTENCODING_ASCII_US);
568 if (bDowncaseCharset)
569 aCharset = aCharset.toAsciiLowerCase();
572 if (p == pEnd || *p != '\'')
573 break;
574 ++p;
576 sal_Unicode const * pLanguageBegin = p;
577 bool bDowncaseLanguage = false;
578 int nLetters = 0;
579 for (; p != pEnd; ++p)
580 if (rtl::isAsciiAlpha(*p))
582 if (++nLetters > 8)
583 break;
584 bDowncaseLanguage = bDowncaseLanguage
585 || rtl::isAsciiUpperCase(*p);
587 else if (*p == '-')
589 if (nLetters == 0)
590 break;
591 nLetters = 0;
593 else
594 break;
595 if (nLetters == 0 || nLetters > 8)
596 break;
597 if (pParameters)
599 aLanguage = OString(
600 pLanguageBegin,
601 p - pLanguageBegin,
602 RTL_TEXTENCODING_ASCII_US);
603 if (bDowncaseLanguage)
604 aLanguage = aLanguage.toAsciiLowerCase();
607 if (p == pEnd || *p != '\'')
608 break;
609 ++p;
611 if (pParameters)
613 OStringBuffer aSink;
614 while (p != pEnd)
616 auto q = p;
617 sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
618 if (rtl::isAscii(nChar) && !isTokenChar(nChar))
619 break;
620 p = q;
621 if (nChar == '%' && p + 1 < pEnd)
623 int nWeight1 = INetMIME::getHexWeight(p[0]);
624 int nWeight2 = INetMIME::getHexWeight(p[1]);
625 if (nWeight1 >= 0 && nWeight2 >= 0)
627 aSink.append(char(nWeight1 << 4 | nWeight2));
628 p += 2;
629 continue;
632 writeUTF8(aSink, nChar);
634 aValue = aSink.makeStringAndClear();
636 else
637 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
638 ++p;
640 else if (p != pEnd && *p == '"')
641 if (pParameters)
643 OStringBuffer aSink(256);
644 bool bInvalid = false;
645 for (++p;;)
647 if (p == pEnd)
649 bInvalid = true;
650 break;
652 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
653 if (nChar == '"')
654 break;
655 else if (nChar == 0x0D) // CR
657 if (pEnd - p < 2 || *p++ != 0x0A // LF
658 || !isWhiteSpace(*p))
660 bInvalid = true;
661 break;
663 nChar = static_cast<unsigned char>(*p++);
665 else if (nChar == '\\')
667 if (p == pEnd)
669 bInvalid = true;
670 break;
672 nChar = INetMIME::getUTF32Character(p, pEnd);
674 writeUTF8(aSink, nChar);
676 if (bInvalid)
677 break;
678 aValue = aSink.makeStringAndClear();
680 else
682 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
683 if (p == pStringEnd)
684 break;
685 p = pStringEnd;
687 else
689 sal_Unicode const * pTokenBegin = p;
690 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
691 ++p;
692 if (p == pTokenBegin)
693 break;
694 if (pParameters)
695 aValue = OString(
696 pTokenBegin, p - pTokenBegin,
697 RTL_TEXTENCODING_UTF8);
699 aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
701 aList.sort();
702 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
705 bool equalIgnoreCase(const char * pBegin1,
706 const char * pEnd1,
707 const char * pString2)
709 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
710 "equalIgnoreCase(): Bad sequences");
712 while (*pString2 != 0)
713 if (pBegin1 == pEnd1
714 || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
715 != rtl::toAsciiUpperCase(
716 static_cast<unsigned char>(*pString2++))))
717 return false;
718 return pBegin1 == pEnd1;
721 struct EncodingEntry
723 char const * m_aName;
724 rtl_TextEncoding m_eEncoding;
727 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
728 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
729 // noted:
730 EncodingEntry const aEncodingMap[]
731 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
732 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
733 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
734 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
735 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
736 { "ASCII", RTL_TEXTENCODING_ASCII_US },
737 { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
738 { "US", RTL_TEXTENCODING_ASCII_US },
739 { "IBM367", RTL_TEXTENCODING_ASCII_US },
740 { "CP367", RTL_TEXTENCODING_ASCII_US },
741 { "CSASCII", RTL_TEXTENCODING_ASCII_US },
742 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
743 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
744 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
745 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
746 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
747 { "L1", RTL_TEXTENCODING_ISO_8859_1 },
748 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
749 { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
750 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
751 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
752 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
753 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
754 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
755 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
756 { "L2", RTL_TEXTENCODING_ISO_8859_2 },
757 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
758 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
759 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
760 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
761 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
762 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
763 { "L3", RTL_TEXTENCODING_ISO_8859_3 },
764 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
765 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
766 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
767 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
768 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
769 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
770 { "L4", RTL_TEXTENCODING_ISO_8859_4 },
771 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
772 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
773 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
774 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
775 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
776 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
777 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
778 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
779 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
780 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
781 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
782 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
783 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
784 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
785 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
786 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
787 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
788 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
789 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
790 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
791 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
792 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
793 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
794 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
795 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
796 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
797 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
798 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
799 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
800 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
801 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
802 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
803 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
804 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
805 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
806 { "L5", RTL_TEXTENCODING_ISO_8859_9 },
807 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
808 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
809 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
810 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
811 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
812 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
813 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
814 { "IBM437", RTL_TEXTENCODING_IBM_437 },
815 { "CP437", RTL_TEXTENCODING_IBM_437 },
816 { "437", RTL_TEXTENCODING_IBM_437 },
817 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
818 { "IBM850", RTL_TEXTENCODING_IBM_850 },
819 { "CP850", RTL_TEXTENCODING_IBM_850 },
820 { "850", RTL_TEXTENCODING_IBM_850 },
821 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
822 { "IBM860", RTL_TEXTENCODING_IBM_860 },
823 { "CP860", RTL_TEXTENCODING_IBM_860 },
824 { "860", RTL_TEXTENCODING_IBM_860 },
825 { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
826 { "IBM861", RTL_TEXTENCODING_IBM_861 },
827 { "CP861", RTL_TEXTENCODING_IBM_861 },
828 { "861", RTL_TEXTENCODING_IBM_861 },
829 { "CP-IS", RTL_TEXTENCODING_IBM_861 },
830 { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
831 { "IBM863", RTL_TEXTENCODING_IBM_863 },
832 { "CP863", RTL_TEXTENCODING_IBM_863 },
833 { "863", RTL_TEXTENCODING_IBM_863 },
834 { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
835 { "IBM865", RTL_TEXTENCODING_IBM_865 },
836 { "CP865", RTL_TEXTENCODING_IBM_865 },
837 { "865", RTL_TEXTENCODING_IBM_865 },
838 { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
839 { "IBM775", RTL_TEXTENCODING_IBM_775 },
840 { "CP775", RTL_TEXTENCODING_IBM_775 },
841 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
842 { "IBM852", RTL_TEXTENCODING_IBM_852 },
843 { "CP852", RTL_TEXTENCODING_IBM_852 },
844 { "852", RTL_TEXTENCODING_IBM_852 },
845 { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
846 { "IBM855", RTL_TEXTENCODING_IBM_855 },
847 { "CP855", RTL_TEXTENCODING_IBM_855 },
848 { "855", RTL_TEXTENCODING_IBM_855 },
849 { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
850 { "IBM857", RTL_TEXTENCODING_IBM_857 },
851 { "CP857", RTL_TEXTENCODING_IBM_857 },
852 { "857", RTL_TEXTENCODING_IBM_857 },
853 { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
854 { "IBM862", RTL_TEXTENCODING_IBM_862 },
855 { "CP862", RTL_TEXTENCODING_IBM_862 },
856 { "862", RTL_TEXTENCODING_IBM_862 },
857 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
858 { "IBM864", RTL_TEXTENCODING_IBM_864 },
859 { "CP864", RTL_TEXTENCODING_IBM_864 },
860 { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
861 { "IBM866", RTL_TEXTENCODING_IBM_866 },
862 { "CP866", RTL_TEXTENCODING_IBM_866 },
863 { "866", RTL_TEXTENCODING_IBM_866 },
864 { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
865 { "IBM869", RTL_TEXTENCODING_IBM_869 },
866 { "CP869", RTL_TEXTENCODING_IBM_869 },
867 { "869", RTL_TEXTENCODING_IBM_869 },
868 { "CP-GR", RTL_TEXTENCODING_IBM_869 },
869 { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
870 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
871 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
872 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
873 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
874 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
875 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
876 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
877 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
878 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
879 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
880 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
881 { "GB2312", RTL_TEXTENCODING_GB_2312 },
882 { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
883 { "BIG5", RTL_TEXTENCODING_BIG5 },
884 { "CSBIG5", RTL_TEXTENCODING_BIG5 },
885 { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
886 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
887 RTL_TEXTENCODING_EUC_JP },
888 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
889 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
890 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
891 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
892 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
893 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
894 { "UTF-7", RTL_TEXTENCODING_UTF7 },
895 { "UTF-8", RTL_TEXTENCODING_UTF8 },
896 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
897 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
898 { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
899 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
900 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
901 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
902 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
903 { "CSUCS4", RTL_TEXTENCODING_UCS4 },
904 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
905 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
907 rtl_TextEncoding getCharsetEncoding(char const * pBegin,
908 char const * pEnd)
910 for (const EncodingEntry& i : aEncodingMap)
911 if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
912 return i.m_eEncoding;
913 return RTL_TEXTENCODING_DONTKNOW;
918 // INetMIME
920 // static
921 bool INetMIME::isAtomChar(sal_uInt32 nChar)
923 static const bool aMap[128]
924 = { false, false, false, false, false, false, false, false,
925 false, false, false, false, false, false, false, false,
926 false, false, false, false, false, false, false, false,
927 false, false, false, false, false, false, false, false,
928 false, true, false, true, true, true, true, true, // !"#$%&'
929 false, false, true, true, false, true, false, true, //()*+,-./
930 true, true, true, true, true, true, true, true, //01234567
931 true, true, false, false, false, true, false, true, //89:;<=>?
932 false, true, true, true, true, true, true, true, //@ABCDEFG
933 true, true, true, true, true, true, true, true, //HIJKLMNO
934 true, true, true, true, true, true, true, true, //PQRSTUVW
935 true, true, true, false, false, false, true, true, //XYZ[\]^_
936 true, true, true, true, true, true, true, true, //`abcdefg
937 true, true, true, true, true, true, true, true, //hijklmno
938 true, true, true, true, true, true, true, true, //pqrstuvw
939 true, true, true, true, true, true, true, false //xyz{|}~
941 return rtl::isAscii(nChar) && aMap[nChar];
944 // static
945 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
947 static const bool aMap[128]
948 = { false, false, false, false, false, false, false, false,
949 false, false, false, false, false, false, false, false,
950 false, false, false, false, false, false, false, false,
951 false, false, false, false, false, false, false, false,
952 false, true, false, true, true, false, true, true, // !"#$%&'
953 false, false, false, true, true, true, true, true, //()*+,-./
954 true, true, true, true, true, true, true, true, //01234567
955 true, true, true, true, true, true, true, true, //89:;<=>?
956 true, true, true, true, true, true, true, true, //@ABCDEFG
957 true, true, true, true, true, true, true, true, //HIJKLMNO
958 true, true, true, true, true, true, true, true, //PQRSTUVW
959 true, true, true, true, false, true, true, true, //XYZ[\]^_
960 true, true, true, true, true, true, true, true, //`abcdefg
961 true, true, true, true, true, true, true, true, //hijklmno
962 true, true, true, true, true, true, true, true, //pqrstuvw
963 true, true, true, false, true, true, true, false //xyz{|}~
965 return rtl::isAscii(nChar) && aMap[nChar];
968 // static
969 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
970 const sal_Unicode * pEnd1,
971 const char * pString2)
973 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
974 "INetMIME::equalIgnoreCase(): Bad sequences");
976 while (*pString2 != 0)
977 if (pBegin1 == pEnd1
978 || (rtl::toAsciiUpperCase(*pBegin1++)
979 != rtl::toAsciiUpperCase(
980 static_cast<unsigned char>(*pString2++))))
981 return false;
982 return pBegin1 == pEnd1;
985 // static
986 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
987 const sal_Unicode * pEnd, bool bLeadingZeroes,
988 sal_uInt32 & rValue)
990 sal_uInt64 nTheValue = 0;
991 const sal_Unicode * p = rBegin;
992 for ( ; p != pEnd; ++p)
994 int nWeight = getWeight(*p);
995 if (nWeight < 0)
996 break;
997 nTheValue = 10 * nTheValue + nWeight;
998 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
999 return false;
1001 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1002 return false;
1003 rBegin = p;
1004 rValue = sal_uInt32(nTheValue);
1005 return true;
1008 // static
1009 sal_Unicode const * INetMIME::scanContentType(
1010 std::u16string_view rStr, OUString * pType,
1011 OUString * pSubType, INetContentTypeParameterList * pParameters)
1013 sal_Unicode const * pBegin = rStr.data();
1014 sal_Unicode const * pEnd = pBegin + rStr.size();
1015 sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1016 sal_Unicode const * pTypeBegin = p;
1017 while (p != pEnd && isTokenChar(*p))
1019 ++p;
1021 if (p == pTypeBegin)
1022 return nullptr;
1023 sal_Unicode const * pTypeEnd = p;
1025 p = skipLinearWhiteSpaceComment(p, pEnd);
1026 if (p == pEnd || *p++ != '/')
1027 return nullptr;
1029 p = skipLinearWhiteSpaceComment(p, pEnd);
1030 sal_Unicode const * pSubTypeBegin = p;
1031 while (p != pEnd && isTokenChar(*p))
1033 ++p;
1035 if (p == pSubTypeBegin)
1036 return nullptr;
1037 sal_Unicode const * pSubTypeEnd = p;
1039 if (pType != nullptr)
1041 *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1043 if (pSubType != nullptr)
1045 *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1046 .toAsciiLowerCase();
1049 return scanParameters(p, pEnd, pParameters);
1052 // static
1053 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1055 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1056 // versions of StarOffice send mails with header fields where encoded
1057 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1058 // '=', ',', '.', '"', ')', without any required white space in between.
1059 // And there appear to exist some broken mailers that only encode single
1060 // letters within words, like "Appel
1061 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1062 // detect encoded words even when not properly surrounded by white space.
1064 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1066 // encoded-word = "=?"
1067 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1068 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1069 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1070 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1071 // "?="
1073 // base64 = ALPHA / DIGIT / "+" / "/"
1075 const char * pBegin = rBody.getStr();
1076 const char * pEnd = pBegin + rBody.getLength();
1078 OUStringBuffer sDecoded;
1079 const char * pCopyBegin = pBegin;
1081 /* bool bStartEncodedWord = true; */
1082 const char * pWSPBegin = pBegin;
1084 for (const char * p = pBegin; p != pEnd;)
1086 if (*p == '=' /* && bStartEncodedWord */)
1088 const char * q = p + 1;
1089 bool bEncodedWord = q != pEnd && *q++ == '?';
1091 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1092 if (bEncodedWord)
1094 const char * pCharsetBegin = q;
1095 const char * pLanguageBegin = nullptr;
1096 int nAlphaCount = 0;
1097 for (bool bDone = false; !bDone;)
1098 if (q == pEnd)
1100 bEncodedWord = false;
1101 bDone = true;
1103 else
1105 char cChar = *q++;
1106 switch (cChar)
1108 case '*':
1109 pLanguageBegin = q - 1;
1110 nAlphaCount = 0;
1111 break;
1113 case '-':
1114 if (pLanguageBegin != nullptr)
1116 if (nAlphaCount == 0)
1117 pLanguageBegin = nullptr;
1118 else
1119 nAlphaCount = 0;
1121 break;
1123 case '?':
1124 if (pCharsetBegin == q - 1)
1125 bEncodedWord = false;
1126 else
1128 eCharsetEncoding
1129 = getCharsetEncoding(
1130 pCharsetBegin,
1131 pLanguageBegin == nullptr
1132 || nAlphaCount == 0 ?
1133 q - 1 : pLanguageBegin);
1134 bEncodedWord = isMIMECharsetEncoding(
1135 eCharsetEncoding);
1136 eCharsetEncoding
1137 = translateFromMIME(eCharsetEncoding);
1139 bDone = true;
1140 break;
1142 default:
1143 if (pLanguageBegin != nullptr
1144 && (!rtl::isAsciiAlpha(
1145 static_cast<unsigned char>(cChar))
1146 || ++nAlphaCount > 8))
1147 pLanguageBegin = nullptr;
1148 break;
1153 bool bEncodingB = false;
1154 if (bEncodedWord)
1156 if (q == pEnd)
1157 bEncodedWord = false;
1158 else
1160 switch (*q++)
1162 case 'B':
1163 case 'b':
1164 bEncodingB = true;
1165 break;
1167 case 'Q':
1168 case 'q':
1169 bEncodingB = false;
1170 break;
1172 default:
1173 bEncodedWord = false;
1174 break;
1179 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1181 OStringBuffer sText;
1182 if (bEncodedWord)
1184 if (bEncodingB)
1186 for (bool bDone = false; !bDone;)
1188 if (pEnd - q < 4)
1190 bEncodedWord = false;
1191 bDone = true;
1193 else
1195 bool bFinal = false;
1196 int nCount = 3;
1197 sal_uInt32 nValue = 0;
1198 for (int nShift = 18; nShift >= 0; nShift -= 6)
1200 int nWeight = getBase64Weight(*q++);
1201 if (nWeight == -2)
1203 bEncodedWord = false;
1204 bDone = true;
1205 break;
1207 if (nWeight == -1)
1209 if (!bFinal)
1211 if (nShift >= 12)
1213 bEncodedWord = false;
1214 bDone = true;
1215 break;
1217 bFinal = true;
1218 nCount = nShift == 6 ? 1 : 2;
1221 else
1222 nValue |= nWeight << nShift;
1224 if (bEncodedWord)
1226 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1227 sText.append(char(nValue >> nShift & 0xFF));
1228 if (*q == '?')
1230 ++q;
1231 bDone = true;
1233 if (bFinal && !bDone)
1235 bEncodedWord = false;
1236 bDone = true;
1242 else
1244 const char * pEncodedTextBegin = q;
1245 const char * pEncodedTextCopyBegin = q;
1246 for (bool bDone = false; !bDone;)
1247 if (q == pEnd)
1249 bEncodedWord = false;
1250 bDone = true;
1252 else
1254 sal_uInt32 nChar = static_cast<unsigned char>(*q++);
1255 switch (nChar)
1257 case '=':
1259 if (pEnd - q < 2)
1261 bEncodedWord = false;
1262 bDone = true;
1263 break;
1265 int nDigit1 = getHexWeight(q[0]);
1266 int nDigit2 = getHexWeight(q[1]);
1267 if (nDigit1 < 0 || nDigit2 < 0)
1269 bEncodedWord = false;
1270 bDone = true;
1271 break;
1273 sText.append(
1274 rBody.subView(
1275 (pEncodedTextCopyBegin - pBegin),
1276 (q - 1 - pEncodedTextCopyBegin))
1277 + OStringChar(char(nDigit1 << 4 | nDigit2)));
1278 q += 2;
1279 pEncodedTextCopyBegin = q;
1280 break;
1283 case '?':
1284 if (q - pEncodedTextBegin > 1)
1285 sText.append(rBody.subView(
1286 (pEncodedTextCopyBegin - pBegin),
1287 (q - 1 - pEncodedTextCopyBegin)));
1288 else
1289 bEncodedWord = false;
1290 bDone = true;
1291 break;
1293 case '_':
1294 sText.append(
1295 rBody.subView(
1296 (pEncodedTextCopyBegin - pBegin),
1297 (q - 1 - pEncodedTextCopyBegin))
1298 + OString::Concat(" "));
1299 pEncodedTextCopyBegin = q;
1300 break;
1302 default:
1303 if (!isVisible(nChar))
1305 bEncodedWord = false;
1306 bDone = true;
1308 break;
1314 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1316 std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1317 sal_Size nUnicodeSize = 0;
1318 if (bEncodedWord)
1320 pUnicodeBuffer
1321 = convertToUnicode(sText.getStr(),
1322 sText.getStr() + sText.getLength(),
1323 eCharsetEncoding, nUnicodeSize);
1324 if (!pUnicodeBuffer)
1325 bEncodedWord = false;
1328 if (bEncodedWord)
1330 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1331 sDecoded.append(
1332 pUnicodeBuffer.get(),
1333 static_cast< sal_Int32 >(nUnicodeSize));
1334 pUnicodeBuffer.reset();
1335 p = q;
1336 pCopyBegin = p;
1338 pWSPBegin = p;
1339 while (p != pEnd && isWhiteSpace(*p))
1340 ++p;
1341 /* bStartEncodedWord = p != pWSPBegin; */
1342 continue;
1346 if (p == pEnd)
1347 break;
1349 switch (*p++)
1351 case '"':
1352 /* bStartEncodedWord = true; */
1353 break;
1355 case '(':
1356 /* bStartEncodedWord = true; */
1357 break;
1359 case ')':
1360 /* bStartEncodedWord = false; */
1361 break;
1363 default:
1365 const char * pUTF8Begin = p - 1;
1366 const char * pUTF8End = pUTF8Begin;
1367 sal_uInt32 nCharacter = 0;
1368 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1370 appendISO88591(sDecoded, pCopyBegin, p - 1);
1371 sDecoded.appendUtf32(nCharacter);
1372 p = pUTF8End;
1373 pCopyBegin = p;
1375 /* bStartEncodedWord = false; */
1376 break;
1379 pWSPBegin = p;
1382 appendISO88591(sDecoded, pCopyBegin, pEnd);
1383 return sDecoded.makeStringAndClear();
1386 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */