tdf#130857 qt weld: Implement QtInstanceWidget::strip_mnemonic
[LibreOffice.git] / tools / source / inet / inetmime.cxx
blob28572483951f28112740ed2decaf8cf6abe180f3
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <algorithm>
21 #include <limits>
22 #include <forward_list>
23 #include <memory>
25 #include <sal/log.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/strbuf.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/debug.hxx>
31 #include <tools/inetmime.hxx>
32 #include <rtl/character.hxx>
34 namespace {
36 rtl_TextEncoding getCharsetEncoding(const char * pBegin,
37 const char * pEnd);
39 /** Check for US-ASCII white space character.
41 @param nChar Some UCS-4 character.
43 @return True if nChar is a US-ASCII white space character (US-ASCII
44 0x09 or 0x20).
46 bool isWhiteSpace(sal_uInt32 nChar)
48 return nChar == '\t' || nChar == ' ';
51 /** Get the Base 64 digit weight of a US-ASCII character.
53 @param nChar Some UCS-4 character.
55 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
56 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
57 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
58 character (US-ASCII '='), return -1; otherwise, return -2.
60 int getBase64Weight(sal_uInt32 nChar)
62 return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
63 rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
64 rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
65 nChar == '+' ? 62 :
66 nChar == '/' ? 63 :
67 nChar == '=' ? -1 : -2;
70 bool startsWithLineFolding(const sal_Unicode * pBegin,
71 const sal_Unicode * pEnd)
73 assert(pBegin && pBegin <= pEnd && "startsWithLineFolding(): Bad sequence");
75 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
76 && isWhiteSpace(pBegin[2]); // CR, LF
79 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
80 eEncoding)
82 #if defined(_WIN32)
83 return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
84 RTL_TEXTENCODING_MS_1252 : eEncoding;
85 #else
86 return eEncoding;
87 #endif
90 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
92 return rtl_isOctetTextEncoding(eEncoding);
95 std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin,
96 const char * pEnd,
97 rtl_TextEncoding eEncoding,
98 sal_Size & rSize)
100 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
101 return nullptr;
102 rtl_TextToUnicodeConverter hConverter
103 = rtl_createTextToUnicodeConverter(eEncoding);
104 rtl_TextToUnicodeContext hContext
105 = rtl_createTextToUnicodeContext(hConverter);
106 std::unique_ptr<sal_Unicode[]> pBuffer;
107 sal_uInt32 nInfo;
108 for (sal_Size nBufferSize = pEnd - pBegin;;
109 nBufferSize += nBufferSize / 3 + 1)
111 pBuffer.reset(new sal_Unicode[nBufferSize]);
112 sal_Size nSrcCvtBytes;
113 rSize = rtl_convertTextToUnicode(
114 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
115 nBufferSize,
116 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
119 &nInfo, &nSrcCvtBytes);
120 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
121 break;
122 pBuffer.reset();
123 rtl_resetTextToUnicodeContext(hConverter, hContext);
125 rtl_destroyTextToUnicodeContext(hConverter, hContext);
126 rtl_destroyTextToUnicodeConverter(hConverter);
127 if (nInfo != 0)
129 pBuffer.reset();
131 return pBuffer;
134 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
136 // See RFC 2279 for a discussion of UTF-8.
137 DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
139 if (nChar < 0x80)
140 rSink.append(char(nChar));
141 else if (nChar < 0x800)
142 rSink.append(OStringChar(char(nChar >> 6 | 0xC0))
143 + OStringChar(char((nChar & 0x3F) | 0x80)));
144 else if (nChar < 0x10000)
145 rSink.append(
146 OStringChar(char(nChar >> 12 | 0xE0))
147 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
148 + OStringChar(char((nChar & 0x3F) | 0x80)));
149 else if (nChar < 0x200000)
150 rSink.append(
151 OStringChar(char(nChar >> 18 | 0xF0))
152 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
153 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
154 + OStringChar(char((nChar & 0x3F) | 0x80)));
155 else if (nChar < 0x4000000)
156 rSink.append(
157 OStringChar(char(nChar >> 24 | 0xF8))
158 + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
159 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
160 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
161 + OStringChar(char((nChar & 0x3F) | 0x80)));
162 else
163 rSink.append(
164 OStringChar(char(nChar >> 30 | 0xFC))
165 + OStringChar(char((nChar >> 24 & 0x3F) | 0x80))
166 + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
167 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
168 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
169 + OStringChar(char((nChar & 0x3F) | 0x80)));
172 bool translateUTF8Char(const char *& rBegin,
173 const char * pEnd,
174 sal_uInt32 & rCharacter)
176 if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
177 || static_cast< unsigned char >(*rBegin) >= 0xFE)
178 return false;
180 int nCount;
181 sal_uInt32 nMin;
182 sal_uInt32 nUCS4;
183 const char * p = rBegin;
184 if (static_cast< unsigned char >(*p) < 0xE0)
186 nCount = 1;
187 nMin = 0x80;
188 nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
190 else if (static_cast< unsigned char >(*p) < 0xF0)
192 nCount = 2;
193 nMin = 0x800;
194 nUCS4 = static_cast< unsigned char >(*p) & 0xF;
196 else if (static_cast< unsigned char >(*p) < 0xF8)
198 nCount = 3;
199 nMin = 0x10000;
200 nUCS4 = static_cast< unsigned char >(*p) & 7;
202 else if (static_cast< unsigned char >(*p) < 0xFC)
204 nCount = 4;
205 nMin = 0x200000;
206 nUCS4 = static_cast< unsigned char >(*p) & 3;
208 else
210 nCount = 5;
211 nMin = 0x4000000;
212 nUCS4 = static_cast< unsigned char >(*p) & 1;
214 ++p;
216 for (; nCount-- > 0; ++p)
217 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
218 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
219 else
220 return false;
222 if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
223 return false;
225 rCharacter = nUCS4;
226 rBegin = p;
227 return true;
230 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
231 char const * pEnd);
233 struct Parameter
235 OString m_aAttribute;
236 OString m_aCharset;
237 OString m_aLanguage;
238 OString m_aValue;
239 sal_uInt32 m_nSection;
240 bool m_bExtended;
242 bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
244 int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
245 return nComp < 0 ||
246 (nComp == 0 && m_nSection < rhs.m_nSection);
248 struct IsSameSection // is used to check container for duplicates with std::any_of
250 const OString& rAttribute;
251 const sal_uInt32 nSection;
252 bool operator()(const Parameter& r) const
253 { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
257 typedef std::forward_list<Parameter> ParameterList;
259 bool parseParameters(ParameterList const & rInput,
260 INetContentTypeParameterList * pOutput);
262 // appendISO88591
264 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
265 char const * pEnd)
267 sal_Int32 nLength = pEnd - pBegin;
268 std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
269 for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
270 *p++ = static_cast<unsigned char>(*pBegin++);
271 rText.append(pBuffer.get(), nLength);
274 // parseParameters
276 bool parseParameters(ParameterList const & rInput,
277 INetContentTypeParameterList * pOutput)
279 if (pOutput)
280 pOutput->clear();
282 for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
284 if (it->m_nSection > 0
285 && (itPrev == rInput.end()
286 || itPrev->m_nSection != it->m_nSection - 1
287 || itPrev->m_aAttribute != it->m_aAttribute))
288 return false;
291 if (pOutput)
292 for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
294 bool bCharset = !it->m_aCharset.isEmpty();
295 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
296 if (bCharset)
297 eEncoding
298 = getCharsetEncoding(it->m_aCharset.getStr(),
299 it->m_aCharset.getStr()
300 + it->m_aCharset.getLength());
301 OUStringBuffer aValue(64);
302 bool bBadEncoding = false;
303 itNext = it;
306 sal_Size nSize;
307 std::unique_ptr<sal_Unicode[]> pUnicode
308 = convertToUnicode(itNext->m_aValue.getStr(),
309 itNext->m_aValue.getStr()
310 + itNext->m_aValue.getLength(),
311 bCharset && it->m_bExtended ?
312 eEncoding :
313 RTL_TEXTENCODING_UTF8,
314 nSize);
315 if (!pUnicode && !(bCharset && it->m_bExtended))
316 pUnicode = convertToUnicode(
317 itNext->m_aValue.getStr(),
318 itNext->m_aValue.getStr()
319 + itNext->m_aValue.getLength(),
320 RTL_TEXTENCODING_ISO_8859_1, nSize);
321 if (!pUnicode)
323 bBadEncoding = true;
324 break;
326 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
327 ++itNext;
329 while (itNext != rInput.end() && itNext->m_nSection != 0);
331 if (bBadEncoding)
333 aValue.setLength(0);
334 itNext = it;
337 if (itNext->m_bExtended)
339 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
340 aValue.append(
341 static_cast<sal_Unicode>(
342 static_cast<unsigned char>(itNext->m_aValue[i])
343 | 0xF800)); // map to unicode corporate use sub area
345 else
347 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
348 aValue.append( itNext->m_aValue[i] );
350 ++itNext;
352 while (itNext != rInput.end() && itNext->m_nSection != 0);
354 auto const ret = pOutput->insert(
355 {it->m_aAttribute,
356 {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
357 SAL_INFO_IF(!ret.second, "tools",
358 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
360 return true;
363 /** Check whether some character is valid within an RFC 2045 <token>.
365 @param nChar Some UCS-4 character.
367 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
368 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
369 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
371 bool isTokenChar(sal_uInt32 nChar)
373 static const bool aMap[128]
374 = { false, false, false, false, false, false, false, false,
375 false, false, false, false, false, false, false, false,
376 false, false, false, false, false, false, false, false,
377 false, false, false, false, false, false, false, false,
378 false, true, false, true, true, true, true, true, // !"#$%&'
379 false, false, true, true, false, true, true, false, //()*+,-./
380 true, true, true, true, true, true, true, true, //01234567
381 true, true, false, false, false, false, false, false, //89:;<=>?
382 false, true, true, true, true, true, true, true, //@ABCDEFG
383 true, true, true, true, true, true, true, true, //HIJKLMNO
384 true, true, true, true, true, true, true, true, //PQRSTUVW
385 true, true, true, false, false, false, true, true, //XYZ[\]^_
386 true, true, true, true, true, true, true, true, //`abcdefg
387 true, true, true, true, true, true, true, true, //hijklmno
388 true, true, true, true, true, true, true, true, //pqrstuvw
389 true, true, true, true, true, true, true, false //xyz{|}~
391 return rtl::isAscii(nChar) && aMap[nChar];
394 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
395 const sal_Unicode * pEnd)
397 assert(pBegin && pBegin <= pEnd && "skipComment(): Bad sequence");
399 if (pBegin != pEnd && *pBegin == '(')
401 sal_uInt32 nLevel = 0;
402 for (const sal_Unicode * p = pBegin; p != pEnd;)
403 switch (*p++)
405 case '(':
406 ++nLevel;
407 break;
409 case ')':
410 if (--nLevel == 0)
411 return p;
412 break;
414 case '\\':
415 if (p != pEnd)
416 ++p;
417 break;
420 return pBegin;
423 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
424 pBegin,
425 const sal_Unicode *
426 pEnd)
428 assert(pBegin && pBegin <= pEnd && "skipLinearWhiteSpaceComment(): Bad sequence");
430 while (pBegin != pEnd)
431 switch (*pBegin)
433 case '\t':
434 case ' ':
435 ++pBegin;
436 break;
438 case 0x0D: // CR
439 if (startsWithLineFolding(pBegin, pEnd))
440 pBegin += 3;
441 else
442 return pBegin;
443 break;
445 case '(':
447 const sal_Unicode * p = skipComment(pBegin, pEnd);
448 if (p == pBegin)
449 return pBegin;
450 pBegin = p;
451 break;
454 default:
455 return pBegin;
457 return pBegin;
460 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
461 const sal_Unicode * pEnd)
463 assert(pBegin && pBegin <= pEnd && "skipQuotedString(): Bad sequence");
465 if (pBegin != pEnd && *pBegin == '"')
466 for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
467 switch (*p++)
469 case 0x0D: // CR
470 if (pEnd - p < 2 || *p++ != 0x0A // LF
471 || !isWhiteSpace(*p++))
472 return pBegin;
473 break;
475 case '"':
476 return p;
478 case '\\':
479 if (p != pEnd)
480 ++p;
481 break;
483 return pBegin;
486 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
487 sal_Unicode const * pEnd,
488 INetContentTypeParameterList *
489 pParameters)
491 ParameterList aList;
492 sal_Unicode const * pParameterBegin = pBegin;
493 for (sal_Unicode const * p = pParameterBegin;;)
495 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
496 if (pParameterBegin == pEnd || *pParameterBegin != ';')
497 break;
498 p = pParameterBegin + 1;
500 sal_Unicode const * pAttributeBegin
501 = skipLinearWhiteSpaceComment(p, pEnd);
502 p = pAttributeBegin;
503 bool bDowncaseAttribute = false;
504 while (p != pEnd && isTokenChar(*p) && *p != '*')
506 bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
507 ++p;
509 if (p == pAttributeBegin)
510 break;
511 OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
512 if (bDowncaseAttribute)
513 aAttribute = aAttribute.toAsciiLowerCase();
515 sal_uInt32 nSection = 0;
516 if (p != pEnd && *p == '*')
518 ++p;
519 if (p != pEnd && rtl::isAsciiDigit(*p)
520 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
521 break;
524 bool bPresent = std::any_of(aList.begin(), aList.end(),
525 Parameter::IsSameSection{aAttribute, nSection});
526 if (bPresent)
527 break;
529 bool bExtended = false;
530 if (p != pEnd && *p == '*')
532 ++p;
533 bExtended = true;
536 p = skipLinearWhiteSpaceComment(p, pEnd);
538 if (p == pEnd || *p != '=')
539 break;
541 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
543 OString aCharset;
544 OString aLanguage;
545 OString aValue;
546 if (bExtended)
548 if (nSection == 0)
550 sal_Unicode const * pCharsetBegin = p;
551 bool bDowncaseCharset = false;
552 while (p != pEnd && isTokenChar(*p) && *p != '\'')
554 bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
555 ++p;
557 if (p == pCharsetBegin)
558 break;
559 if (pParameters)
561 aCharset = OString(
562 pCharsetBegin,
563 p - pCharsetBegin,
564 RTL_TEXTENCODING_ASCII_US);
565 if (bDowncaseCharset)
566 aCharset = aCharset.toAsciiLowerCase();
569 if (p == pEnd || *p != '\'')
570 break;
571 ++p;
573 sal_Unicode const * pLanguageBegin = p;
574 bool bDowncaseLanguage = false;
575 int nLetters = 0;
576 for (; p != pEnd; ++p)
577 if (rtl::isAsciiAlpha(*p))
579 if (++nLetters > 8)
580 break;
581 bDowncaseLanguage = bDowncaseLanguage
582 || rtl::isAsciiUpperCase(*p);
584 else if (*p == '-')
586 if (nLetters == 0)
587 break;
588 nLetters = 0;
590 else
591 break;
592 if (nLetters == 0 || nLetters > 8)
593 break;
594 if (pParameters)
596 aLanguage = OString(
597 pLanguageBegin,
598 p - pLanguageBegin,
599 RTL_TEXTENCODING_ASCII_US);
600 if (bDowncaseLanguage)
601 aLanguage = aLanguage.toAsciiLowerCase();
604 if (p == pEnd || *p != '\'')
605 break;
606 ++p;
608 if (pParameters)
610 OStringBuffer aSink;
611 while (p != pEnd)
613 auto q = p;
614 sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
615 if (rtl::isAscii(nChar) && !isTokenChar(nChar))
616 break;
617 p = q;
618 if (nChar == '%' && p + 1 < pEnd)
620 int nWeight1 = INetMIME::getHexWeight(p[0]);
621 int nWeight2 = INetMIME::getHexWeight(p[1]);
622 if (nWeight1 >= 0 && nWeight2 >= 0)
624 aSink.append(char(nWeight1 << 4 | nWeight2));
625 p += 2;
626 continue;
629 writeUTF8(aSink, nChar);
631 aValue = aSink.makeStringAndClear();
633 else
634 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
635 ++p;
637 else if (p != pEnd && *p == '"')
638 if (pParameters)
640 OStringBuffer aSink(256);
641 bool bInvalid = false;
642 for (++p;;)
644 if (p == pEnd)
646 bInvalid = true;
647 break;
649 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
650 if (nChar == '"')
651 break;
652 else if (nChar == 0x0D) // CR
654 if (pEnd - p < 2 || *p++ != 0x0A // LF
655 || !isWhiteSpace(*p))
657 bInvalid = true;
658 break;
660 nChar = static_cast<unsigned char>(*p++);
662 else if (nChar == '\\')
664 if (p == pEnd)
666 bInvalid = true;
667 break;
669 nChar = INetMIME::getUTF32Character(p, pEnd);
671 writeUTF8(aSink, nChar);
673 if (bInvalid)
674 break;
675 aValue = aSink.makeStringAndClear();
677 else
679 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
680 if (p == pStringEnd)
681 break;
682 p = pStringEnd;
684 else
686 sal_Unicode const * pTokenBegin = p;
687 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
688 ++p;
689 if (p == pTokenBegin)
690 break;
691 if (pParameters)
692 aValue = OString(
693 pTokenBegin, p - pTokenBegin,
694 RTL_TEXTENCODING_UTF8);
696 aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
698 aList.sort();
699 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
702 bool equalIgnoreCase(const char * pBegin1,
703 const char * pEnd1,
704 const char * pString2)
706 assert(pBegin1 && pBegin1 <= pEnd1 && pString2 &&
707 "equalIgnoreCase(): Bad sequences");
709 while (*pString2 != 0)
710 if (pBegin1 == pEnd1
711 || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
712 != rtl::toAsciiUpperCase(
713 static_cast<unsigned char>(*pString2++))))
714 return false;
715 return pBegin1 == pEnd1;
718 struct EncodingEntry
720 char const * m_aName;
721 rtl_TextEncoding m_eEncoding;
724 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
725 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
726 // noted:
727 EncodingEntry const aEncodingMap[]
728 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
729 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
730 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
731 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
732 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
733 { "ASCII", RTL_TEXTENCODING_ASCII_US },
734 { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
735 { "US", RTL_TEXTENCODING_ASCII_US },
736 { "IBM367", RTL_TEXTENCODING_ASCII_US },
737 { "CP367", RTL_TEXTENCODING_ASCII_US },
738 { "CSASCII", RTL_TEXTENCODING_ASCII_US },
739 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
740 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
741 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
742 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
743 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
744 { "L1", RTL_TEXTENCODING_ISO_8859_1 },
745 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
746 { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
747 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
748 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
749 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
750 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
751 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
752 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
753 { "L2", RTL_TEXTENCODING_ISO_8859_2 },
754 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
755 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
756 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
757 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
758 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
759 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
760 { "L3", RTL_TEXTENCODING_ISO_8859_3 },
761 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
762 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
763 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
764 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
765 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
766 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
767 { "L4", RTL_TEXTENCODING_ISO_8859_4 },
768 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
769 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
770 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
771 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
772 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
773 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
774 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
775 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
776 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
777 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
778 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
779 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
780 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
781 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
782 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
783 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
784 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
785 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
786 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
787 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
788 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
789 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
790 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
791 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
792 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
793 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
794 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
795 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
796 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
797 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
798 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
799 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
800 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
801 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
802 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
803 { "L5", RTL_TEXTENCODING_ISO_8859_9 },
804 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
805 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
806 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
807 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
808 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
809 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
810 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
811 { "IBM437", RTL_TEXTENCODING_IBM_437 },
812 { "CP437", RTL_TEXTENCODING_IBM_437 },
813 { "437", RTL_TEXTENCODING_IBM_437 },
814 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
815 { "IBM850", RTL_TEXTENCODING_IBM_850 },
816 { "CP850", RTL_TEXTENCODING_IBM_850 },
817 { "850", RTL_TEXTENCODING_IBM_850 },
818 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
819 { "IBM860", RTL_TEXTENCODING_IBM_860 },
820 { "CP860", RTL_TEXTENCODING_IBM_860 },
821 { "860", RTL_TEXTENCODING_IBM_860 },
822 { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
823 { "IBM861", RTL_TEXTENCODING_IBM_861 },
824 { "CP861", RTL_TEXTENCODING_IBM_861 },
825 { "861", RTL_TEXTENCODING_IBM_861 },
826 { "CP-IS", RTL_TEXTENCODING_IBM_861 },
827 { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
828 { "IBM863", RTL_TEXTENCODING_IBM_863 },
829 { "CP863", RTL_TEXTENCODING_IBM_863 },
830 { "863", RTL_TEXTENCODING_IBM_863 },
831 { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
832 { "IBM865", RTL_TEXTENCODING_IBM_865 },
833 { "CP865", RTL_TEXTENCODING_IBM_865 },
834 { "865", RTL_TEXTENCODING_IBM_865 },
835 { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
836 { "IBM775", RTL_TEXTENCODING_IBM_775 },
837 { "CP775", RTL_TEXTENCODING_IBM_775 },
838 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
839 { "IBM852", RTL_TEXTENCODING_IBM_852 },
840 { "CP852", RTL_TEXTENCODING_IBM_852 },
841 { "852", RTL_TEXTENCODING_IBM_852 },
842 { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
843 { "IBM855", RTL_TEXTENCODING_IBM_855 },
844 { "CP855", RTL_TEXTENCODING_IBM_855 },
845 { "855", RTL_TEXTENCODING_IBM_855 },
846 { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
847 { "IBM857", RTL_TEXTENCODING_IBM_857 },
848 { "CP857", RTL_TEXTENCODING_IBM_857 },
849 { "857", RTL_TEXTENCODING_IBM_857 },
850 { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
851 { "IBM862", RTL_TEXTENCODING_IBM_862 },
852 { "CP862", RTL_TEXTENCODING_IBM_862 },
853 { "862", RTL_TEXTENCODING_IBM_862 },
854 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
855 { "IBM864", RTL_TEXTENCODING_IBM_864 },
856 { "CP864", RTL_TEXTENCODING_IBM_864 },
857 { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
858 { "IBM866", RTL_TEXTENCODING_IBM_866 },
859 { "CP866", RTL_TEXTENCODING_IBM_866 },
860 { "866", RTL_TEXTENCODING_IBM_866 },
861 { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
862 { "IBM869", RTL_TEXTENCODING_IBM_869 },
863 { "CP869", RTL_TEXTENCODING_IBM_869 },
864 { "869", RTL_TEXTENCODING_IBM_869 },
865 { "CP-GR", RTL_TEXTENCODING_IBM_869 },
866 { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
867 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
868 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
869 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
870 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
871 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
872 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
873 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
874 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
875 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
876 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
877 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
878 { "GB2312", RTL_TEXTENCODING_GB_2312 },
879 { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
880 { "BIG5", RTL_TEXTENCODING_BIG5 },
881 { "CSBIG5", RTL_TEXTENCODING_BIG5 },
882 { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
883 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
884 RTL_TEXTENCODING_EUC_JP },
885 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
886 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
887 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
888 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
889 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
890 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
891 { "UTF-7", RTL_TEXTENCODING_UTF7 },
892 { "UTF-8", RTL_TEXTENCODING_UTF8 },
893 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
894 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
895 { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
896 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
897 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
898 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
899 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
900 { "CSUCS4", RTL_TEXTENCODING_UCS4 },
901 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
902 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
904 rtl_TextEncoding getCharsetEncoding(char const * pBegin,
905 char const * pEnd)
907 for (const EncodingEntry& i : aEncodingMap)
908 if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
909 return i.m_eEncoding;
910 return RTL_TEXTENCODING_DONTKNOW;
915 // INetMIME
917 // static
918 bool INetMIME::isAtomChar(sal_uInt32 nChar)
920 static const bool aMap[128]
921 = { false, false, false, false, false, false, false, false,
922 false, false, false, false, false, false, false, false,
923 false, false, false, false, false, false, false, false,
924 false, false, false, false, false, false, false, false,
925 false, true, false, true, true, true, true, true, // !"#$%&'
926 false, false, true, true, false, true, false, true, //()*+,-./
927 true, true, true, true, true, true, true, true, //01234567
928 true, true, false, false, false, true, false, true, //89:;<=>?
929 false, true, true, true, true, true, true, true, //@ABCDEFG
930 true, true, true, true, true, true, true, true, //HIJKLMNO
931 true, true, true, true, true, true, true, true, //PQRSTUVW
932 true, true, true, false, false, false, true, true, //XYZ[\]^_
933 true, true, true, true, true, true, true, true, //`abcdefg
934 true, true, true, true, true, true, true, true, //hijklmno
935 true, true, true, true, true, true, true, true, //pqrstuvw
936 true, true, true, true, true, true, true, false //xyz{|}~
938 return rtl::isAscii(nChar) && aMap[nChar];
941 // static
942 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
944 static const bool aMap[128]
945 = { false, false, false, false, false, false, false, false,
946 false, false, false, false, false, false, false, false,
947 false, false, false, false, false, false, false, false,
948 false, false, false, false, false, false, false, false,
949 false, true, false, true, true, false, true, true, // !"#$%&'
950 false, false, false, true, true, true, true, true, //()*+,-./
951 true, true, true, true, true, true, true, true, //01234567
952 true, true, true, true, true, true, true, true, //89:;<=>?
953 true, true, true, true, true, true, true, true, //@ABCDEFG
954 true, true, true, true, true, true, true, true, //HIJKLMNO
955 true, true, true, true, true, true, true, true, //PQRSTUVW
956 true, true, true, true, false, true, true, true, //XYZ[\]^_
957 true, true, true, true, true, true, true, true, //`abcdefg
958 true, true, true, true, true, true, true, true, //hijklmno
959 true, true, true, true, true, true, true, true, //pqrstuvw
960 true, true, true, false, true, true, true, false //xyz{|}~
962 return rtl::isAscii(nChar) && aMap[nChar];
965 // static
966 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
967 const sal_Unicode * pEnd1,
968 const char * pString2)
970 assert(pBegin1 && pBegin1 <= pEnd1 && pString2 &&
971 "INetMIME::equalIgnoreCase(): Bad sequences");
973 while (*pString2 != 0)
974 if (pBegin1 == pEnd1
975 || (rtl::toAsciiUpperCase(*pBegin1++)
976 != rtl::toAsciiUpperCase(
977 static_cast<unsigned char>(*pString2++))))
978 return false;
979 return pBegin1 == pEnd1;
982 // static
983 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
984 const sal_Unicode * pEnd, bool bLeadingZeroes,
985 sal_uInt32 & rValue)
987 sal_uInt64 nTheValue = 0;
988 const sal_Unicode * p = rBegin;
989 for ( ; p != pEnd; ++p)
991 int nWeight = getWeight(*p);
992 if (nWeight < 0)
993 break;
994 nTheValue = 10 * nTheValue + nWeight;
995 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
996 return false;
998 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
999 return false;
1000 rBegin = p;
1001 rValue = sal_uInt32(nTheValue);
1002 return true;
1005 // static
1006 sal_Unicode const * INetMIME::scanContentType(
1007 std::u16string_view rStr, OUString * pType,
1008 OUString * pSubType, INetContentTypeParameterList * pParameters)
1010 sal_Unicode const * pBegin = rStr.data();
1011 sal_Unicode const * pEnd = pBegin + rStr.size();
1012 sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1013 sal_Unicode const * pTypeBegin = p;
1014 while (p != pEnd && isTokenChar(*p))
1016 ++p;
1018 if (p == pTypeBegin)
1019 return nullptr;
1020 sal_Unicode const * pTypeEnd = p;
1022 p = skipLinearWhiteSpaceComment(p, pEnd);
1023 if (p == pEnd || *p++ != '/')
1024 return nullptr;
1026 p = skipLinearWhiteSpaceComment(p, pEnd);
1027 sal_Unicode const * pSubTypeBegin = p;
1028 while (p != pEnd && isTokenChar(*p))
1030 ++p;
1032 if (p == pSubTypeBegin)
1033 return nullptr;
1034 sal_Unicode const * pSubTypeEnd = p;
1036 if (pType != nullptr)
1038 *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1040 if (pSubType != nullptr)
1042 *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1043 .toAsciiLowerCase();
1046 return scanParameters(p, pEnd, pParameters);
1049 // static
1050 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1052 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1053 // versions of StarOffice send mails with header fields where encoded
1054 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1055 // '=', ',', '.', '"', ')', without any required white space in between.
1056 // And there appear to exist some broken mailers that only encode single
1057 // letters within words, like "Appel
1058 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1059 // detect encoded words even when not properly surrounded by white space.
1061 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1063 // encoded-word = "=?"
1064 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1065 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1066 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1067 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1068 // "?="
1070 // base64 = ALPHA / DIGIT / "+" / "/"
1072 const char * pBegin = rBody.getStr();
1073 const char * pEnd = pBegin + rBody.getLength();
1075 OUStringBuffer sDecoded;
1076 const char * pCopyBegin = pBegin;
1078 /* bool bStartEncodedWord = true; */
1079 const char * pWSPBegin = pBegin;
1081 for (const char * p = pBegin; p != pEnd;)
1083 if (*p == '=' /* && bStartEncodedWord */)
1085 const char * q = p + 1;
1086 bool bEncodedWord = q != pEnd && *q++ == '?';
1088 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1089 if (bEncodedWord)
1091 const char * pCharsetBegin = q;
1092 const char * pLanguageBegin = nullptr;
1093 int nAlphaCount = 0;
1094 for (bool bDone = false; !bDone;)
1095 if (q == pEnd)
1097 bEncodedWord = false;
1098 bDone = true;
1100 else
1102 char cChar = *q++;
1103 switch (cChar)
1105 case '*':
1106 pLanguageBegin = q - 1;
1107 nAlphaCount = 0;
1108 break;
1110 case '-':
1111 if (pLanguageBegin != nullptr)
1113 if (nAlphaCount == 0)
1114 pLanguageBegin = nullptr;
1115 else
1116 nAlphaCount = 0;
1118 break;
1120 case '?':
1121 if (pCharsetBegin == q - 1)
1122 bEncodedWord = false;
1123 else
1125 eCharsetEncoding
1126 = getCharsetEncoding(
1127 pCharsetBegin,
1128 pLanguageBegin == nullptr
1129 || nAlphaCount == 0 ?
1130 q - 1 : pLanguageBegin);
1131 bEncodedWord = isMIMECharsetEncoding(
1132 eCharsetEncoding);
1133 eCharsetEncoding
1134 = translateFromMIME(eCharsetEncoding);
1136 bDone = true;
1137 break;
1139 default:
1140 if (pLanguageBegin != nullptr
1141 && (!rtl::isAsciiAlpha(
1142 static_cast<unsigned char>(cChar))
1143 || ++nAlphaCount > 8))
1144 pLanguageBegin = nullptr;
1145 break;
1150 bool bEncodingB = false;
1151 if (bEncodedWord)
1153 if (q == pEnd)
1154 bEncodedWord = false;
1155 else
1157 switch (*q++)
1159 case 'B':
1160 case 'b':
1161 bEncodingB = true;
1162 break;
1164 case 'Q':
1165 case 'q':
1166 bEncodingB = false;
1167 break;
1169 default:
1170 bEncodedWord = false;
1171 break;
1176 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1178 OStringBuffer sText;
1179 if (bEncodedWord)
1181 if (bEncodingB)
1183 for (bool bDone = false; !bDone;)
1185 if (pEnd - q < 4)
1187 bEncodedWord = false;
1188 bDone = true;
1190 else
1192 bool bFinal = false;
1193 int nCount = 3;
1194 sal_uInt32 nValue = 0;
1195 for (int nShift = 18; nShift >= 0; nShift -= 6)
1197 int nWeight = getBase64Weight(*q++);
1198 if (nWeight == -2)
1200 bEncodedWord = false;
1201 bDone = true;
1202 break;
1204 if (nWeight == -1)
1206 if (!bFinal)
1208 if (nShift >= 12)
1210 bEncodedWord = false;
1211 bDone = true;
1212 break;
1214 bFinal = true;
1215 nCount = nShift == 6 ? 1 : 2;
1218 else
1219 nValue |= nWeight << nShift;
1221 if (bEncodedWord)
1223 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1224 sText.append(char(nValue >> nShift & 0xFF));
1225 if (*q == '?')
1227 ++q;
1228 bDone = true;
1230 if (bFinal && !bDone)
1232 bEncodedWord = false;
1233 bDone = true;
1239 else
1241 const char * pEncodedTextBegin = q;
1242 const char * pEncodedTextCopyBegin = q;
1243 for (bool bDone = false; !bDone;)
1244 if (q == pEnd)
1246 bEncodedWord = false;
1247 bDone = true;
1249 else
1251 sal_uInt32 nChar = static_cast<unsigned char>(*q++);
1252 switch (nChar)
1254 case '=':
1256 if (pEnd - q < 2)
1258 bEncodedWord = false;
1259 bDone = true;
1260 break;
1262 int nDigit1 = getHexWeight(q[0]);
1263 int nDigit2 = getHexWeight(q[1]);
1264 if (nDigit1 < 0 || nDigit2 < 0)
1266 bEncodedWord = false;
1267 bDone = true;
1268 break;
1270 sText.append(
1271 rBody.subView(
1272 (pEncodedTextCopyBegin - pBegin),
1273 (q - 1 - pEncodedTextCopyBegin))
1274 + OStringChar(char(nDigit1 << 4 | nDigit2)));
1275 q += 2;
1276 pEncodedTextCopyBegin = q;
1277 break;
1280 case '?':
1281 if (q - pEncodedTextBegin > 1)
1282 sText.append(rBody.subView(
1283 (pEncodedTextCopyBegin - pBegin),
1284 (q - 1 - pEncodedTextCopyBegin)));
1285 else
1286 bEncodedWord = false;
1287 bDone = true;
1288 break;
1290 case '_':
1291 sText.append(
1292 rBody.subView(
1293 (pEncodedTextCopyBegin - pBegin),
1294 (q - 1 - pEncodedTextCopyBegin))
1295 + OString::Concat(" "));
1296 pEncodedTextCopyBegin = q;
1297 break;
1299 default:
1300 if (!isVisible(nChar))
1302 bEncodedWord = false;
1303 bDone = true;
1305 break;
1311 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1313 std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1314 sal_Size nUnicodeSize = 0;
1315 if (bEncodedWord)
1317 pUnicodeBuffer
1318 = convertToUnicode(sText.getStr(),
1319 sText.getStr() + sText.getLength(),
1320 eCharsetEncoding, nUnicodeSize);
1321 if (!pUnicodeBuffer)
1322 bEncodedWord = false;
1325 if (bEncodedWord)
1327 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1328 sDecoded.append(
1329 pUnicodeBuffer.get(),
1330 static_cast< sal_Int32 >(nUnicodeSize));
1331 pUnicodeBuffer.reset();
1332 p = q;
1333 pCopyBegin = p;
1335 pWSPBegin = p;
1336 while (p != pEnd && isWhiteSpace(*p))
1337 ++p;
1338 /* bStartEncodedWord = p != pWSPBegin; */
1339 continue;
1343 if (p == pEnd)
1344 break;
1346 switch (*p++)
1348 case '"':
1349 /* bStartEncodedWord = true; */
1350 break;
1352 case '(':
1353 /* bStartEncodedWord = true; */
1354 break;
1356 case ')':
1357 /* bStartEncodedWord = false; */
1358 break;
1360 default:
1362 const char * pUTF8Begin = p - 1;
1363 const char * pUTF8End = pUTF8Begin;
1364 sal_uInt32 nCharacter = 0;
1365 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1367 appendISO88591(sDecoded, pCopyBegin, p - 1);
1368 sDecoded.appendUtf32(nCharacter);
1369 p = pUTF8End;
1370 pCopyBegin = p;
1372 /* bStartEncodedWord = false; */
1373 break;
1376 pWSPBegin = p;
1379 appendISO88591(sDecoded, pCopyBegin, pEnd);
1380 return sDecoded.makeStringAndClear();
1383 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */