Avoid potential negative array index access to cached text.
[LibreOffice.git] / tools / source / inet / inetmime.cxx
blob6694dc39866937917ce54e4cd3d305bf1899ee5f
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <algorithm>
21 #include <limits>
22 #include <forward_list>
23 #include <memory>
25 #include <sal/log.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/strbuf.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/debug.hxx>
31 #include <tools/inetmime.hxx>
32 #include <rtl/character.hxx>
34 namespace {
36 rtl_TextEncoding getCharsetEncoding(const char * pBegin,
37 const char * pEnd);
39 /** Check for US-ASCII white space character.
41 @param nChar Some UCS-4 character.
43 @return True if nChar is a US-ASCII white space character (US-ASCII
44 0x09 or 0x20).
46 bool isWhiteSpace(sal_uInt32 nChar)
48 return nChar == '\t' || nChar == ' ';
51 /** Get the Base 64 digit weight of a US-ASCII character.
53 @param nChar Some UCS-4 character.
55 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
56 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
57 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
58 character (US-ASCII '='), return -1; otherwise, return -2.
60 int getBase64Weight(sal_uInt32 nChar)
62 return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
63 rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
64 rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
65 nChar == '+' ? 62 :
66 nChar == '/' ? 63 :
67 nChar == '=' ? -1 : -2;
70 bool startsWithLineFolding(const sal_Unicode * pBegin,
71 const sal_Unicode * pEnd)
73 DBG_ASSERT(pBegin && pBegin <= pEnd,
74 "startsWithLineFolding(): Bad sequence");
76 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
77 && isWhiteSpace(pBegin[2]); // CR, LF
80 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
81 eEncoding)
83 #if defined(_WIN32)
84 return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
85 RTL_TEXTENCODING_MS_1252 : eEncoding;
86 #else
87 return eEncoding;
88 #endif
91 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
93 return rtl_isOctetTextEncoding(eEncoding);
96 std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin,
97 const char * pEnd,
98 rtl_TextEncoding eEncoding,
99 sal_Size & rSize)
101 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
102 return nullptr;
103 rtl_TextToUnicodeConverter hConverter
104 = rtl_createTextToUnicodeConverter(eEncoding);
105 rtl_TextToUnicodeContext hContext
106 = rtl_createTextToUnicodeContext(hConverter);
107 std::unique_ptr<sal_Unicode[]> pBuffer;
108 sal_uInt32 nInfo;
109 for (sal_Size nBufferSize = pEnd - pBegin;;
110 nBufferSize += nBufferSize / 3 + 1)
112 pBuffer.reset(new sal_Unicode[nBufferSize]);
113 sal_Size nSrcCvtBytes;
114 rSize = rtl_convertTextToUnicode(
115 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
116 nBufferSize,
117 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
118 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
119 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
120 &nInfo, &nSrcCvtBytes);
121 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
122 break;
123 pBuffer.reset();
124 rtl_resetTextToUnicodeContext(hConverter, hContext);
126 rtl_destroyTextToUnicodeContext(hConverter, hContext);
127 rtl_destroyTextToUnicodeConverter(hConverter);
128 if (nInfo != 0)
130 pBuffer.reset();
132 return pBuffer;
135 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
137 // See RFC 2279 for a discussion of UTF-8.
138 DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
140 if (nChar < 0x80)
141 rSink.append(char(nChar));
142 else if (nChar < 0x800)
143 rSink.append(OStringChar(char(nChar >> 6 | 0xC0))
144 + OStringChar(char((nChar & 0x3F) | 0x80)));
145 else if (nChar < 0x10000)
146 rSink.append(
147 OStringChar(char(nChar >> 12 | 0xE0))
148 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
149 + OStringChar(char((nChar & 0x3F) | 0x80)));
150 else if (nChar < 0x200000)
151 rSink.append(
152 OStringChar(char(nChar >> 18 | 0xF0))
153 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
154 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
155 + OStringChar(char((nChar & 0x3F) | 0x80)));
156 else if (nChar < 0x4000000)
157 rSink.append(
158 OStringChar(char(nChar >> 24 | 0xF8))
159 + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
160 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
161 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
162 + OStringChar(char((nChar & 0x3F) | 0x80)));
163 else
164 rSink.append(
165 OStringChar(char(nChar >> 30 | 0xFC))
166 + OStringChar(char((nChar >> 24 & 0x3F) | 0x80))
167 + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
168 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
169 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
170 + OStringChar(char((nChar & 0x3F) | 0x80)));
173 bool translateUTF8Char(const char *& rBegin,
174 const char * pEnd,
175 sal_uInt32 & rCharacter)
177 if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
178 || static_cast< unsigned char >(*rBegin) >= 0xFE)
179 return false;
181 int nCount;
182 sal_uInt32 nMin;
183 sal_uInt32 nUCS4;
184 const char * p = rBegin;
185 if (static_cast< unsigned char >(*p) < 0xE0)
187 nCount = 1;
188 nMin = 0x80;
189 nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
191 else if (static_cast< unsigned char >(*p) < 0xF0)
193 nCount = 2;
194 nMin = 0x800;
195 nUCS4 = static_cast< unsigned char >(*p) & 0xF;
197 else if (static_cast< unsigned char >(*p) < 0xF8)
199 nCount = 3;
200 nMin = 0x10000;
201 nUCS4 = static_cast< unsigned char >(*p) & 7;
203 else if (static_cast< unsigned char >(*p) < 0xFC)
205 nCount = 4;
206 nMin = 0x200000;
207 nUCS4 = static_cast< unsigned char >(*p) & 3;
209 else
211 nCount = 5;
212 nMin = 0x4000000;
213 nUCS4 = static_cast< unsigned char >(*p) & 1;
215 ++p;
217 for (; nCount-- > 0; ++p)
218 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
219 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
220 else
221 return false;
223 if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
224 return false;
226 rCharacter = nUCS4;
227 rBegin = p;
228 return true;
231 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
232 char const * pEnd);
234 struct Parameter
236 OString m_aAttribute;
237 OString m_aCharset;
238 OString m_aLanguage;
239 OString m_aValue;
240 sal_uInt32 m_nSection;
241 bool m_bExtended;
243 bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
245 int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
246 return nComp < 0 ||
247 (nComp == 0 && m_nSection < rhs.m_nSection);
249 struct IsSameSection // is used to check container for duplicates with std::any_of
251 const OString& rAttribute;
252 const sal_uInt32 nSection;
253 bool operator()(const Parameter& r) const
254 { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
258 typedef std::forward_list<Parameter> ParameterList;
260 bool parseParameters(ParameterList const & rInput,
261 INetContentTypeParameterList * pOutput);
263 // appendISO88591
265 void appendISO88591(OUStringBuffer & rText, char const * pBegin,
266 char const * pEnd)
268 sal_Int32 nLength = pEnd - pBegin;
269 std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
270 for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
271 *p++ = static_cast<unsigned char>(*pBegin++);
272 rText.append(pBuffer.get(), nLength);
275 // parseParameters
277 bool parseParameters(ParameterList const & rInput,
278 INetContentTypeParameterList * pOutput)
280 if (pOutput)
281 pOutput->clear();
283 for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
285 if (it->m_nSection > 0
286 && (itPrev == rInput.end()
287 || itPrev->m_nSection != it->m_nSection - 1
288 || itPrev->m_aAttribute != it->m_aAttribute))
289 return false;
292 if (pOutput)
293 for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
295 bool bCharset = !it->m_aCharset.isEmpty();
296 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
297 if (bCharset)
298 eEncoding
299 = getCharsetEncoding(it->m_aCharset.getStr(),
300 it->m_aCharset.getStr()
301 + it->m_aCharset.getLength());
302 OUStringBuffer aValue(64);
303 bool bBadEncoding = false;
304 itNext = it;
307 sal_Size nSize;
308 std::unique_ptr<sal_Unicode[]> pUnicode
309 = convertToUnicode(itNext->m_aValue.getStr(),
310 itNext->m_aValue.getStr()
311 + itNext->m_aValue.getLength(),
312 bCharset && it->m_bExtended ?
313 eEncoding :
314 RTL_TEXTENCODING_UTF8,
315 nSize);
316 if (!pUnicode && !(bCharset && it->m_bExtended))
317 pUnicode = convertToUnicode(
318 itNext->m_aValue.getStr(),
319 itNext->m_aValue.getStr()
320 + itNext->m_aValue.getLength(),
321 RTL_TEXTENCODING_ISO_8859_1, nSize);
322 if (!pUnicode)
324 bBadEncoding = true;
325 break;
327 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
328 ++itNext;
330 while (itNext != rInput.end() && itNext->m_nSection != 0);
332 if (bBadEncoding)
334 aValue.setLength(0);
335 itNext = it;
338 if (itNext->m_bExtended)
340 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
341 aValue.append(
342 static_cast<sal_Unicode>(
343 static_cast<unsigned char>(itNext->m_aValue[i])
344 | 0xF800)); // map to unicode corporate use sub area
346 else
348 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
349 aValue.append( itNext->m_aValue[i] );
351 ++itNext;
353 while (itNext != rInput.end() && itNext->m_nSection != 0);
355 auto const ret = pOutput->insert(
356 {it->m_aAttribute,
357 {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
358 SAL_INFO_IF(!ret.second, "tools",
359 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
361 return true;
364 /** Check whether some character is valid within an RFC 2045 <token>.
366 @param nChar Some UCS-4 character.
368 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
369 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
370 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
372 bool isTokenChar(sal_uInt32 nChar)
374 static const bool aMap[128]
375 = { false, false, false, false, false, false, false, false,
376 false, false, false, false, false, false, false, false,
377 false, false, false, false, false, false, false, false,
378 false, false, false, false, false, false, false, false,
379 false, true, false, true, true, true, true, true, // !"#$%&'
380 false, false, true, true, false, true, true, false, //()*+,-./
381 true, true, true, true, true, true, true, true, //01234567
382 true, true, false, false, false, false, false, false, //89:;<=>?
383 false, true, true, true, true, true, true, true, //@ABCDEFG
384 true, true, true, true, true, true, true, true, //HIJKLMNO
385 true, true, true, true, true, true, true, true, //PQRSTUVW
386 true, true, true, false, false, false, true, true, //XYZ[\]^_
387 true, true, true, true, true, true, true, true, //`abcdefg
388 true, true, true, true, true, true, true, true, //hijklmno
389 true, true, true, true, true, true, true, true, //pqrstuvw
390 true, true, true, true, true, true, true, false //xyz{|}~
392 return rtl::isAscii(nChar) && aMap[nChar];
395 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
396 const sal_Unicode * pEnd)
398 DBG_ASSERT(pBegin && pBegin <= pEnd,
399 "skipComment(): Bad sequence");
401 if (pBegin != pEnd && *pBegin == '(')
403 sal_uInt32 nLevel = 0;
404 for (const sal_Unicode * p = pBegin; p != pEnd;)
405 switch (*p++)
407 case '(':
408 ++nLevel;
409 break;
411 case ')':
412 if (--nLevel == 0)
413 return p;
414 break;
416 case '\\':
417 if (p != pEnd)
418 ++p;
419 break;
422 return pBegin;
425 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
426 pBegin,
427 const sal_Unicode *
428 pEnd)
430 DBG_ASSERT(pBegin && pBegin <= pEnd,
431 "skipLinearWhiteSpaceComment(): Bad sequence");
433 while (pBegin != pEnd)
434 switch (*pBegin)
436 case '\t':
437 case ' ':
438 ++pBegin;
439 break;
441 case 0x0D: // CR
442 if (startsWithLineFolding(pBegin, pEnd))
443 pBegin += 3;
444 else
445 return pBegin;
446 break;
448 case '(':
450 const sal_Unicode * p = skipComment(pBegin, pEnd);
451 if (p == pBegin)
452 return pBegin;
453 pBegin = p;
454 break;
457 default:
458 return pBegin;
460 return pBegin;
463 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
464 const sal_Unicode * pEnd)
466 DBG_ASSERT(pBegin && pBegin <= pEnd,
467 "skipQuotedString(): Bad sequence");
469 if (pBegin != pEnd && *pBegin == '"')
470 for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
471 switch (*p++)
473 case 0x0D: // CR
474 if (pEnd - p < 2 || *p++ != 0x0A // LF
475 || !isWhiteSpace(*p++))
476 return pBegin;
477 break;
479 case '"':
480 return p;
482 case '\\':
483 if (p != pEnd)
484 ++p;
485 break;
487 return pBegin;
490 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
491 sal_Unicode const * pEnd,
492 INetContentTypeParameterList *
493 pParameters)
495 ParameterList aList;
496 sal_Unicode const * pParameterBegin = pBegin;
497 for (sal_Unicode const * p = pParameterBegin;;)
499 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
500 if (pParameterBegin == pEnd || *pParameterBegin != ';')
501 break;
502 p = pParameterBegin + 1;
504 sal_Unicode const * pAttributeBegin
505 = skipLinearWhiteSpaceComment(p, pEnd);
506 p = pAttributeBegin;
507 bool bDowncaseAttribute = false;
508 while (p != pEnd && isTokenChar(*p) && *p != '*')
510 bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
511 ++p;
513 if (p == pAttributeBegin)
514 break;
515 OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
516 if (bDowncaseAttribute)
517 aAttribute = aAttribute.toAsciiLowerCase();
519 sal_uInt32 nSection = 0;
520 if (p != pEnd && *p == '*')
522 ++p;
523 if (p != pEnd && rtl::isAsciiDigit(*p)
524 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
525 break;
528 bool bPresent = std::any_of(aList.begin(), aList.end(),
529 Parameter::IsSameSection{aAttribute, nSection});
530 if (bPresent)
531 break;
533 bool bExtended = false;
534 if (p != pEnd && *p == '*')
536 ++p;
537 bExtended = true;
540 p = skipLinearWhiteSpaceComment(p, pEnd);
542 if (p == pEnd || *p != '=')
543 break;
545 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
547 OString aCharset;
548 OString aLanguage;
549 OString aValue;
550 if (bExtended)
552 if (nSection == 0)
554 sal_Unicode const * pCharsetBegin = p;
555 bool bDowncaseCharset = false;
556 while (p != pEnd && isTokenChar(*p) && *p != '\'')
558 bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
559 ++p;
561 if (p == pCharsetBegin)
562 break;
563 if (pParameters)
565 aCharset = OString(
566 pCharsetBegin,
567 p - pCharsetBegin,
568 RTL_TEXTENCODING_ASCII_US);
569 if (bDowncaseCharset)
570 aCharset = aCharset.toAsciiLowerCase();
573 if (p == pEnd || *p != '\'')
574 break;
575 ++p;
577 sal_Unicode const * pLanguageBegin = p;
578 bool bDowncaseLanguage = false;
579 int nLetters = 0;
580 for (; p != pEnd; ++p)
581 if (rtl::isAsciiAlpha(*p))
583 if (++nLetters > 8)
584 break;
585 bDowncaseLanguage = bDowncaseLanguage
586 || rtl::isAsciiUpperCase(*p);
588 else if (*p == '-')
590 if (nLetters == 0)
591 break;
592 nLetters = 0;
594 else
595 break;
596 if (nLetters == 0 || nLetters > 8)
597 break;
598 if (pParameters)
600 aLanguage = OString(
601 pLanguageBegin,
602 p - pLanguageBegin,
603 RTL_TEXTENCODING_ASCII_US);
604 if (bDowncaseLanguage)
605 aLanguage = aLanguage.toAsciiLowerCase();
608 if (p == pEnd || *p != '\'')
609 break;
610 ++p;
612 if (pParameters)
614 OStringBuffer aSink;
615 while (p != pEnd)
617 auto q = p;
618 sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
619 if (rtl::isAscii(nChar) && !isTokenChar(nChar))
620 break;
621 p = q;
622 if (nChar == '%' && p + 1 < pEnd)
624 int nWeight1 = INetMIME::getHexWeight(p[0]);
625 int nWeight2 = INetMIME::getHexWeight(p[1]);
626 if (nWeight1 >= 0 && nWeight2 >= 0)
628 aSink.append(char(nWeight1 << 4 | nWeight2));
629 p += 2;
630 continue;
633 writeUTF8(aSink, nChar);
635 aValue = aSink.makeStringAndClear();
637 else
638 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
639 ++p;
641 else if (p != pEnd && *p == '"')
642 if (pParameters)
644 OStringBuffer aSink(256);
645 bool bInvalid = false;
646 for (++p;;)
648 if (p == pEnd)
650 bInvalid = true;
651 break;
653 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
654 if (nChar == '"')
655 break;
656 else if (nChar == 0x0D) // CR
658 if (pEnd - p < 2 || *p++ != 0x0A // LF
659 || !isWhiteSpace(*p))
661 bInvalid = true;
662 break;
664 nChar = static_cast<unsigned char>(*p++);
666 else if (nChar == '\\')
668 if (p == pEnd)
670 bInvalid = true;
671 break;
673 nChar = INetMIME::getUTF32Character(p, pEnd);
675 writeUTF8(aSink, nChar);
677 if (bInvalid)
678 break;
679 aValue = aSink.makeStringAndClear();
681 else
683 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
684 if (p == pStringEnd)
685 break;
686 p = pStringEnd;
688 else
690 sal_Unicode const * pTokenBegin = p;
691 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
692 ++p;
693 if (p == pTokenBegin)
694 break;
695 if (pParameters)
696 aValue = OString(
697 pTokenBegin, p - pTokenBegin,
698 RTL_TEXTENCODING_UTF8);
700 aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
702 aList.sort();
703 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
706 bool equalIgnoreCase(const char * pBegin1,
707 const char * pEnd1,
708 const char * pString2)
710 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
711 "equalIgnoreCase(): Bad sequences");
713 while (*pString2 != 0)
714 if (pBegin1 == pEnd1
715 || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
716 != rtl::toAsciiUpperCase(
717 static_cast<unsigned char>(*pString2++))))
718 return false;
719 return pBegin1 == pEnd1;
722 struct EncodingEntry
724 char const * m_aName;
725 rtl_TextEncoding m_eEncoding;
728 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
729 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
730 // noted:
731 EncodingEntry const aEncodingMap[]
732 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
733 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
734 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
735 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
736 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
737 { "ASCII", RTL_TEXTENCODING_ASCII_US },
738 { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
739 { "US", RTL_TEXTENCODING_ASCII_US },
740 { "IBM367", RTL_TEXTENCODING_ASCII_US },
741 { "CP367", RTL_TEXTENCODING_ASCII_US },
742 { "CSASCII", RTL_TEXTENCODING_ASCII_US },
743 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
744 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
745 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
746 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
747 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
748 { "L1", RTL_TEXTENCODING_ISO_8859_1 },
749 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
750 { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
751 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
752 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
753 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
754 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
755 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
756 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
757 { "L2", RTL_TEXTENCODING_ISO_8859_2 },
758 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
759 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
760 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
761 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
762 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
763 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
764 { "L3", RTL_TEXTENCODING_ISO_8859_3 },
765 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
766 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
767 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
768 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
769 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
770 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
771 { "L4", RTL_TEXTENCODING_ISO_8859_4 },
772 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
773 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
774 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
775 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
776 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
777 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
778 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
779 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
780 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
781 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
782 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
783 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
784 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
785 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
786 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
787 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
788 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
789 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
790 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
791 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
792 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
793 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
794 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
795 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
796 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
797 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
798 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
799 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
800 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
801 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
802 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
803 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
804 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
805 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
806 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
807 { "L5", RTL_TEXTENCODING_ISO_8859_9 },
808 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
809 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
810 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
811 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
812 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
813 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
814 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
815 { "IBM437", RTL_TEXTENCODING_IBM_437 },
816 { "CP437", RTL_TEXTENCODING_IBM_437 },
817 { "437", RTL_TEXTENCODING_IBM_437 },
818 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
819 { "IBM850", RTL_TEXTENCODING_IBM_850 },
820 { "CP850", RTL_TEXTENCODING_IBM_850 },
821 { "850", RTL_TEXTENCODING_IBM_850 },
822 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
823 { "IBM860", RTL_TEXTENCODING_IBM_860 },
824 { "CP860", RTL_TEXTENCODING_IBM_860 },
825 { "860", RTL_TEXTENCODING_IBM_860 },
826 { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
827 { "IBM861", RTL_TEXTENCODING_IBM_861 },
828 { "CP861", RTL_TEXTENCODING_IBM_861 },
829 { "861", RTL_TEXTENCODING_IBM_861 },
830 { "CP-IS", RTL_TEXTENCODING_IBM_861 },
831 { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
832 { "IBM863", RTL_TEXTENCODING_IBM_863 },
833 { "CP863", RTL_TEXTENCODING_IBM_863 },
834 { "863", RTL_TEXTENCODING_IBM_863 },
835 { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
836 { "IBM865", RTL_TEXTENCODING_IBM_865 },
837 { "CP865", RTL_TEXTENCODING_IBM_865 },
838 { "865", RTL_TEXTENCODING_IBM_865 },
839 { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
840 { "IBM775", RTL_TEXTENCODING_IBM_775 },
841 { "CP775", RTL_TEXTENCODING_IBM_775 },
842 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
843 { "IBM852", RTL_TEXTENCODING_IBM_852 },
844 { "CP852", RTL_TEXTENCODING_IBM_852 },
845 { "852", RTL_TEXTENCODING_IBM_852 },
846 { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
847 { "IBM855", RTL_TEXTENCODING_IBM_855 },
848 { "CP855", RTL_TEXTENCODING_IBM_855 },
849 { "855", RTL_TEXTENCODING_IBM_855 },
850 { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
851 { "IBM857", RTL_TEXTENCODING_IBM_857 },
852 { "CP857", RTL_TEXTENCODING_IBM_857 },
853 { "857", RTL_TEXTENCODING_IBM_857 },
854 { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
855 { "IBM862", RTL_TEXTENCODING_IBM_862 },
856 { "CP862", RTL_TEXTENCODING_IBM_862 },
857 { "862", RTL_TEXTENCODING_IBM_862 },
858 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
859 { "IBM864", RTL_TEXTENCODING_IBM_864 },
860 { "CP864", RTL_TEXTENCODING_IBM_864 },
861 { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
862 { "IBM866", RTL_TEXTENCODING_IBM_866 },
863 { "CP866", RTL_TEXTENCODING_IBM_866 },
864 { "866", RTL_TEXTENCODING_IBM_866 },
865 { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
866 { "IBM869", RTL_TEXTENCODING_IBM_869 },
867 { "CP869", RTL_TEXTENCODING_IBM_869 },
868 { "869", RTL_TEXTENCODING_IBM_869 },
869 { "CP-GR", RTL_TEXTENCODING_IBM_869 },
870 { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
871 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
872 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
873 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
874 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
875 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
876 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
877 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
878 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
879 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
880 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
881 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
882 { "GB2312", RTL_TEXTENCODING_GB_2312 },
883 { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
884 { "BIG5", RTL_TEXTENCODING_BIG5 },
885 { "CSBIG5", RTL_TEXTENCODING_BIG5 },
886 { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
887 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
888 RTL_TEXTENCODING_EUC_JP },
889 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
890 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
891 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
892 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
893 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
894 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
895 { "UTF-7", RTL_TEXTENCODING_UTF7 },
896 { "UTF-8", RTL_TEXTENCODING_UTF8 },
897 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
898 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
899 { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
900 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
901 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
902 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
903 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
904 { "CSUCS4", RTL_TEXTENCODING_UCS4 },
905 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
906 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
908 rtl_TextEncoding getCharsetEncoding(char const * pBegin,
909 char const * pEnd)
911 for (const EncodingEntry& i : aEncodingMap)
912 if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
913 return i.m_eEncoding;
914 return RTL_TEXTENCODING_DONTKNOW;
919 // INetMIME
921 // static
922 bool INetMIME::isAtomChar(sal_uInt32 nChar)
924 static const bool aMap[128]
925 = { false, false, false, false, false, false, false, false,
926 false, false, false, false, false, false, false, false,
927 false, false, false, false, false, false, false, false,
928 false, false, false, false, false, false, false, false,
929 false, true, false, true, true, true, true, true, // !"#$%&'
930 false, false, true, true, false, true, false, true, //()*+,-./
931 true, true, true, true, true, true, true, true, //01234567
932 true, true, false, false, false, true, false, true, //89:;<=>?
933 false, true, true, true, true, true, true, true, //@ABCDEFG
934 true, true, true, true, true, true, true, true, //HIJKLMNO
935 true, true, true, true, true, true, true, true, //PQRSTUVW
936 true, true, true, false, false, false, true, true, //XYZ[\]^_
937 true, true, true, true, true, true, true, true, //`abcdefg
938 true, true, true, true, true, true, true, true, //hijklmno
939 true, true, true, true, true, true, true, true, //pqrstuvw
940 true, true, true, true, true, true, true, false //xyz{|}~
942 return rtl::isAscii(nChar) && aMap[nChar];
945 // static
946 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
948 static const bool aMap[128]
949 = { false, false, false, false, false, false, false, false,
950 false, false, false, false, false, false, false, false,
951 false, false, false, false, false, false, false, false,
952 false, false, false, false, false, false, false, false,
953 false, true, false, true, true, false, true, true, // !"#$%&'
954 false, false, false, true, true, true, true, true, //()*+,-./
955 true, true, true, true, true, true, true, true, //01234567
956 true, true, true, true, true, true, true, true, //89:;<=>?
957 true, true, true, true, true, true, true, true, //@ABCDEFG
958 true, true, true, true, true, true, true, true, //HIJKLMNO
959 true, true, true, true, true, true, true, true, //PQRSTUVW
960 true, true, true, true, false, true, true, true, //XYZ[\]^_
961 true, true, true, true, true, true, true, true, //`abcdefg
962 true, true, true, true, true, true, true, true, //hijklmno
963 true, true, true, true, true, true, true, true, //pqrstuvw
964 true, true, true, false, true, true, true, false //xyz{|}~
966 return rtl::isAscii(nChar) && aMap[nChar];
969 // static
970 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
971 const sal_Unicode * pEnd1,
972 const char * pString2)
974 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
975 "INetMIME::equalIgnoreCase(): Bad sequences");
977 while (*pString2 != 0)
978 if (pBegin1 == pEnd1
979 || (rtl::toAsciiUpperCase(*pBegin1++)
980 != rtl::toAsciiUpperCase(
981 static_cast<unsigned char>(*pString2++))))
982 return false;
983 return pBegin1 == pEnd1;
986 // static
987 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
988 const sal_Unicode * pEnd, bool bLeadingZeroes,
989 sal_uInt32 & rValue)
991 sal_uInt64 nTheValue = 0;
992 const sal_Unicode * p = rBegin;
993 for ( ; p != pEnd; ++p)
995 int nWeight = getWeight(*p);
996 if (nWeight < 0)
997 break;
998 nTheValue = 10 * nTheValue + nWeight;
999 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
1000 return false;
1002 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1003 return false;
1004 rBegin = p;
1005 rValue = sal_uInt32(nTheValue);
1006 return true;
1009 // static
1010 sal_Unicode const * INetMIME::scanContentType(
1011 std::u16string_view rStr, OUString * pType,
1012 OUString * pSubType, INetContentTypeParameterList * pParameters)
1014 sal_Unicode const * pBegin = rStr.data();
1015 sal_Unicode const * pEnd = pBegin + rStr.size();
1016 sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1017 sal_Unicode const * pTypeBegin = p;
1018 while (p != pEnd && isTokenChar(*p))
1020 ++p;
1022 if (p == pTypeBegin)
1023 return nullptr;
1024 sal_Unicode const * pTypeEnd = p;
1026 p = skipLinearWhiteSpaceComment(p, pEnd);
1027 if (p == pEnd || *p++ != '/')
1028 return nullptr;
1030 p = skipLinearWhiteSpaceComment(p, pEnd);
1031 sal_Unicode const * pSubTypeBegin = p;
1032 while (p != pEnd && isTokenChar(*p))
1034 ++p;
1036 if (p == pSubTypeBegin)
1037 return nullptr;
1038 sal_Unicode const * pSubTypeEnd = p;
1040 if (pType != nullptr)
1042 *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1044 if (pSubType != nullptr)
1046 *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1047 .toAsciiLowerCase();
1050 return scanParameters(p, pEnd, pParameters);
1053 // static
1054 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1056 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1057 // versions of StarOffice send mails with header fields where encoded
1058 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1059 // '=', ',', '.', '"', ')', without any required white space in between.
1060 // And there appear to exist some broken mailers that only encode single
1061 // letters within words, like "Appel
1062 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1063 // detect encoded words even when not properly surrounded by white space.
1065 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1067 // encoded-word = "=?"
1068 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1069 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1070 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1071 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1072 // "?="
1074 // base64 = ALPHA / DIGIT / "+" / "/"
1076 const char * pBegin = rBody.getStr();
1077 const char * pEnd = pBegin + rBody.getLength();
1079 OUStringBuffer sDecoded;
1080 const char * pCopyBegin = pBegin;
1082 /* bool bStartEncodedWord = true; */
1083 const char * pWSPBegin = pBegin;
1085 for (const char * p = pBegin; p != pEnd;)
1087 if (*p == '=' /* && bStartEncodedWord */)
1089 const char * q = p + 1;
1090 bool bEncodedWord = q != pEnd && *q++ == '?';
1092 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1093 if (bEncodedWord)
1095 const char * pCharsetBegin = q;
1096 const char * pLanguageBegin = nullptr;
1097 int nAlphaCount = 0;
1098 for (bool bDone = false; !bDone;)
1099 if (q == pEnd)
1101 bEncodedWord = false;
1102 bDone = true;
1104 else
1106 char cChar = *q++;
1107 switch (cChar)
1109 case '*':
1110 pLanguageBegin = q - 1;
1111 nAlphaCount = 0;
1112 break;
1114 case '-':
1115 if (pLanguageBegin != nullptr)
1117 if (nAlphaCount == 0)
1118 pLanguageBegin = nullptr;
1119 else
1120 nAlphaCount = 0;
1122 break;
1124 case '?':
1125 if (pCharsetBegin == q - 1)
1126 bEncodedWord = false;
1127 else
1129 eCharsetEncoding
1130 = getCharsetEncoding(
1131 pCharsetBegin,
1132 pLanguageBegin == nullptr
1133 || nAlphaCount == 0 ?
1134 q - 1 : pLanguageBegin);
1135 bEncodedWord = isMIMECharsetEncoding(
1136 eCharsetEncoding);
1137 eCharsetEncoding
1138 = translateFromMIME(eCharsetEncoding);
1140 bDone = true;
1141 break;
1143 default:
1144 if (pLanguageBegin != nullptr
1145 && (!rtl::isAsciiAlpha(
1146 static_cast<unsigned char>(cChar))
1147 || ++nAlphaCount > 8))
1148 pLanguageBegin = nullptr;
1149 break;
1154 bool bEncodingB = false;
1155 if (bEncodedWord)
1157 if (q == pEnd)
1158 bEncodedWord = false;
1159 else
1161 switch (*q++)
1163 case 'B':
1164 case 'b':
1165 bEncodingB = true;
1166 break;
1168 case 'Q':
1169 case 'q':
1170 bEncodingB = false;
1171 break;
1173 default:
1174 bEncodedWord = false;
1175 break;
1180 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1182 OStringBuffer sText;
1183 if (bEncodedWord)
1185 if (bEncodingB)
1187 for (bool bDone = false; !bDone;)
1189 if (pEnd - q < 4)
1191 bEncodedWord = false;
1192 bDone = true;
1194 else
1196 bool bFinal = false;
1197 int nCount = 3;
1198 sal_uInt32 nValue = 0;
1199 for (int nShift = 18; nShift >= 0; nShift -= 6)
1201 int nWeight = getBase64Weight(*q++);
1202 if (nWeight == -2)
1204 bEncodedWord = false;
1205 bDone = true;
1206 break;
1208 if (nWeight == -1)
1210 if (!bFinal)
1212 if (nShift >= 12)
1214 bEncodedWord = false;
1215 bDone = true;
1216 break;
1218 bFinal = true;
1219 nCount = nShift == 6 ? 1 : 2;
1222 else
1223 nValue |= nWeight << nShift;
1225 if (bEncodedWord)
1227 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1228 sText.append(char(nValue >> nShift & 0xFF));
1229 if (*q == '?')
1231 ++q;
1232 bDone = true;
1234 if (bFinal && !bDone)
1236 bEncodedWord = false;
1237 bDone = true;
1243 else
1245 const char * pEncodedTextBegin = q;
1246 const char * pEncodedTextCopyBegin = q;
1247 for (bool bDone = false; !bDone;)
1248 if (q == pEnd)
1250 bEncodedWord = false;
1251 bDone = true;
1253 else
1255 sal_uInt32 nChar = static_cast<unsigned char>(*q++);
1256 switch (nChar)
1258 case '=':
1260 if (pEnd - q < 2)
1262 bEncodedWord = false;
1263 bDone = true;
1264 break;
1266 int nDigit1 = getHexWeight(q[0]);
1267 int nDigit2 = getHexWeight(q[1]);
1268 if (nDigit1 < 0 || nDigit2 < 0)
1270 bEncodedWord = false;
1271 bDone = true;
1272 break;
1274 sText.append(
1275 rBody.subView(
1276 (pEncodedTextCopyBegin - pBegin),
1277 (q - 1 - pEncodedTextCopyBegin))
1278 + OStringChar(char(nDigit1 << 4 | nDigit2)));
1279 q += 2;
1280 pEncodedTextCopyBegin = q;
1281 break;
1284 case '?':
1285 if (q - pEncodedTextBegin > 1)
1286 sText.append(rBody.subView(
1287 (pEncodedTextCopyBegin - pBegin),
1288 (q - 1 - pEncodedTextCopyBegin)));
1289 else
1290 bEncodedWord = false;
1291 bDone = true;
1292 break;
1294 case '_':
1295 sText.append(
1296 rBody.subView(
1297 (pEncodedTextCopyBegin - pBegin),
1298 (q - 1 - pEncodedTextCopyBegin))
1299 + OString::Concat(" "));
1300 pEncodedTextCopyBegin = q;
1301 break;
1303 default:
1304 if (!isVisible(nChar))
1306 bEncodedWord = false;
1307 bDone = true;
1309 break;
1315 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1317 std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1318 sal_Size nUnicodeSize = 0;
1319 if (bEncodedWord)
1321 pUnicodeBuffer
1322 = convertToUnicode(sText.getStr(),
1323 sText.getStr() + sText.getLength(),
1324 eCharsetEncoding, nUnicodeSize);
1325 if (!pUnicodeBuffer)
1326 bEncodedWord = false;
1329 if (bEncodedWord)
1331 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1332 sDecoded.append(
1333 pUnicodeBuffer.get(),
1334 static_cast< sal_Int32 >(nUnicodeSize));
1335 pUnicodeBuffer.reset();
1336 p = q;
1337 pCopyBegin = p;
1339 pWSPBegin = p;
1340 while (p != pEnd && isWhiteSpace(*p))
1341 ++p;
1342 /* bStartEncodedWord = p != pWSPBegin; */
1343 continue;
1347 if (p == pEnd)
1348 break;
1350 switch (*p++)
1352 case '"':
1353 /* bStartEncodedWord = true; */
1354 break;
1356 case '(':
1357 /* bStartEncodedWord = true; */
1358 break;
1360 case ')':
1361 /* bStartEncodedWord = false; */
1362 break;
1364 default:
1366 const char * pUTF8Begin = p - 1;
1367 const char * pUTF8End = pUTF8Begin;
1368 sal_uInt32 nCharacter = 0;
1369 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1371 appendISO88591(sDecoded, pCopyBegin, p - 1);
1372 sDecoded.appendUtf32(nCharacter);
1373 p = pUTF8End;
1374 pCopyBegin = p;
1376 /* bStartEncodedWord = false; */
1377 break;
1380 pWSPBegin = p;
1383 appendISO88591(sDecoded, pCopyBegin, pEnd);
1384 return sDecoded.makeStringAndClear();
1387 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */