netwerk/mime/src/nsMIMEHeaderParamImpl.cpp

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim:expandtab:shiftwidth=2:tabstop=4:
   3  */
   4 /* ***** BEGIN LICENSE BLOCK *****
   5  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   6  *
   7  * The contents of this file are subject to the Mozilla Public License Version
   8  * 1.1 (the "License"); you may not use this file except in compliance with
   9  * the License. You may obtain a copy of the License at
  10  * http://www.mozilla.org/MPL/
  11  *
  12  * Software distributed under the License is distributed on an "AS IS" basis,
  13  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14  * for the specific language governing rights and limitations under the
  15  * License.
  16  *
  17  * The Original Code is mozilla.org code.
  18  *
  19  * The Initial Developer of the Original Code is
  20  * Netscape Communications Corporation.
  21  * Portions created by the Initial Developer are Copyright (C) 1998
  22  * the Initial Developer. All Rights Reserved.
  23  *
  24  * Contributor(s):
  25  *   rhp@netscape.com
  26  *   Jungshik Shin <jshin@mailaps.org>
  27  *   John G Myers   <jgmyers@netscape.com>
  28  *   Takayuki Tei   <taka@netscape.com>
  29  *
  30  * Alternatively, the contents of this file may be used under the terms of
  31  * either the GNU General Public License Version 2 or later (the "GPL"), or
  32  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  33  * in which case the provisions of the GPL or the LGPL are applicable instead
  34  * of those above. If you wish to allow use of your version of this file only
  35  * under the terms of either the GPL or the LGPL, and not to allow others to
  36  * use your version of this file under the terms of the MPL, indicate your
  37  * decision by deleting the provisions above and replace them with the notice
  38  * and other provisions required by the GPL or the LGPL. If you do not delete
  39  * the provisions above, a recipient may use your version of this file under
  40  * the terms of any one of the MPL, the GPL or the LGPL.
  41  *
  42  * ***** END LICENSE BLOCK ***** */
  43
  44 #include <string.h>
  45 #include "prtypes.h"
  46 #include "prmem.h"
  47 #include "prprf.h"
  48 #include "plstr.h"
  49 #include "plbase64.h"
  50 #include "nsCRT.h"
  51 #include "nsMemory.h"
  52 #include "nsCOMPtr.h"
  53 #include "nsEscape.h"
  54 #include "nsIUTF8ConverterService.h"
  55 #include "nsUConvCID.h"
  56 #include "nsIServiceManager.h"
  57 #include "nsMIMEHeaderParamImpl.h"
  58 #include "nsReadableUtils.h"
  59 #include "nsNativeCharsetUtils.h"
  60
  61 // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
  62
  63 static char *DecodeQ(const char *, PRUint32);
  64 static PRBool Is7bitNonAsciiString(const char *, PRUint32);
  65 static void CopyRawHeader(const char *, PRUint32, const char *, nsACString &);
  66 static nsresult DecodeRFC2047Str(const char *, const char *, PRBool, nsACString&);
  67
  68 // XXX The chance of UTF-7 being used in the message header is really
  69 // low, but in theory it's possible.
  70 #define IS_7BIT_NON_ASCII_CHARSET(cset)            \
  71     (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
  72      !nsCRT::strncasecmp((cset), "HZ-GB", 5)    || \
  73      !nsCRT::strncasecmp((cset), "UTF-7", 5))
  74
  75 NS_IMPL_ISUPPORTS1(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
  76
  77 // XXX : aTryLocaleCharset is not yet effective.
  78 NS_IMETHODIMP
  79 nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
  80                                     const char *aParamName,
  81                                     const nsACString& aFallbackCharset,
  82                                     PRBool aTryLocaleCharset,
  83                                     char **aLang, nsAString& aResult)
  84 {
  85     aResult.Truncate();
  86     nsresult rv;
  87
  88     // get parameter (decode RFC 2231 if it's RFC 2231-encoded and
  89     // return charset.)
  90     nsXPIDLCString med;
  91     nsXPIDLCString charset;
  92     rv = GetParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
  93                               getter_Copies(charset), aLang, getter_Copies(med));
  94     if (NS_FAILED(rv))
  95         return rv;
  96
  97     // convert to UTF-8 after charset conversion and RFC 2047 decoding
  98     // if necessary.
  99
 100     nsCAutoString str1;
 101     rv = DecodeParameter(med, charset.get(), nsnull, PR_FALSE, str1);
 102     NS_ENSURE_SUCCESS(rv, rv);
 103
 104     if (!aFallbackCharset.IsEmpty())
 105     {
 106         nsCAutoString str2;
 107         nsCOMPtr<nsIUTF8ConverterService>
 108           cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
 109         if (cvtUTF8 &&
 110             NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1,
 111                 PromiseFlatCString(aFallbackCharset).get(), PR_FALSE, str2))) {
 112           CopyUTF8toUTF16(str2, aResult);
 113           return NS_OK;
 114         }
 115     }
 116
 117     if (IsUTF8(str1)) {
 118       CopyUTF8toUTF16(str1, aResult);
 119       return NS_OK;
 120     }
 121
 122     if (aTryLocaleCharset && !NS_IsNativeUTF8())
 123       return NS_CopyNativeToUnicode(str1, aResult);
 124
 125     CopyASCIItoUTF16(str1, aResult);
 126     return NS_OK;
 127 }
 128
 129 // moved almost verbatim from mimehdrs.cpp
 130 // char *
 131 // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
 132 //                            char **charset, char **language)
 133 //
 134 // The format of these header lines  is
 135 // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
 136 NS_IMETHODIMP
 137 nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
 138                                             const char *aParamName,
 139                                             char **aCharset,
 140                                             char **aLang,
 141                                             char **aResult)
 142 {
 143   if (!aHeaderValue ||  !*aHeaderValue || !aResult)
 144     return NS_ERROR_INVALID_ARG;
 145
 146   *aResult = nsnull;
 147
 148   if (aCharset) *aCharset = nsnull;
 149   if (aLang) *aLang = nsnull;
 150
 151   const char *str = aHeaderValue;
 152
 153   // skip leading white space.
 154   for (; *str &&  nsCRT::IsAsciiSpace(*str); ++str)
 155     ;
 156   const char *start = str;
 157
 158   // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
 159   // For instance, return 'inline' in the following case:
 160   // Content-Disposition: inline; filename=.....
 161   if (!aParamName || !*aParamName)
 162     {
 163       for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
 164         ;
 165       if (str == start)
 166         return NS_ERROR_UNEXPECTED;
 167       *aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
 168       (*aResult)[str - start] = '\0';  // null-terminate
 169       NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
 170       return NS_OK;
 171     }
 172
 173   /* Skip forward to first ';' */
 174   for (; *str && *str != ';' && *str != ','; ++str)
 175     ;
 176   if (*str)
 177     str++;
 178   /* Skip over following whitespace */
 179   for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
 180     ;
 181
 182   // Some broken http servers just specify parameters
 183   // like 'filename' without sepcifying disposition
 184   // method. Rewind to the first non-white-space
 185   // character.
 186
 187   if (!*str)
 188     str = start;
 189
 190   // RFC2231 - The legitimate parm format can be:
 191   // A. title=ThisIsTitle
 192   // B. title*=us-ascii'en-us'This%20is%20wierd.
 193   // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
 194   //    title*1*=have%20to%20support%20this.
 195   //    title*2="Else..."
 196   // D. title*0="Hey, what you think you are doing?"
 197   //    title*1="There is no charset and lang info."
 198
 199   PRInt32 paramLen = strlen(aParamName);
 200
 201   while (*str) {
 202     const char *tokenStart = str;
 203     const char *tokenEnd = 0;
 204     const char *valueStart = str;
 205     const char *valueEnd = 0;
 206
 207     NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
 208
 209     // Skip forward to the end of this token.
 210     for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
 211       ;
 212     tokenEnd = str;
 213
 214     // Skip over whitespace, '=', and whitespace
 215     while (nsCRT::IsAsciiSpace(*str)) ++str;
 216     if (*str == '=') ++str;
 217     while (nsCRT::IsAsciiSpace(*str)) ++str;
 218
 219     if (*str != '"')
 220     {
 221       // The value is a token, not a quoted string.
 222       valueStart = str;
 223       for (valueEnd = str;
 224            *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
 225            valueEnd++)
 226         ;
 227       str = valueEnd;
 228     }
 229     else
 230     {
 231       // The value is a quoted string.
 232       ++str;
 233       valueStart = str;
 234       for (valueEnd = str; *valueEnd; ++valueEnd)
 235       {
 236         if (*valueEnd == '\\')
 237           ++valueEnd;
 238         else if (*valueEnd == '"')
 239           break;
 240       }
 241       str = valueEnd + 1;
 242     }
 243
 244     // See if this is the simplest case (case A above),
 245     // a 'single' line value with no charset and lang.
 246     // If so, copy it and return.
 247     if (tokenEnd - tokenStart == paramLen &&
 248         !nsCRT::strncasecmp(tokenStart, aParamName, paramLen))
 249     {
 250       // if the parameter spans across multiple lines we have to strip out the
 251       //     line continuation -- jht 4/29/98
 252       nsCAutoString tempStr(valueStart, valueEnd - valueStart);
 253       tempStr.StripChars("\r\n");
 254       *aResult = ToNewCString(tempStr);
 255       NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
 256       return NS_OK;
 257     }
 258     // case B, C, and D
 259     else if (tokenEnd - tokenStart > paramLen &&
 260              !nsCRT::strncasecmp(tokenStart, aParamName, paramLen) &&
 261              *(tokenStart + paramLen) == '*')
 262     {
 263       const char *cp = tokenStart + paramLen + 1; // 1st char pass '*'
 264       PRBool needUnescape = *(tokenEnd - 1) == '*';
 265       // the 1st line of a multi-line parameter or a single line  that needs
 266       // unescaping. ( title*0*=  or  title*= )
 267       if ((*cp == '0' && needUnescape) || (tokenEnd - tokenStart == paramLen + 1))
 268       {
 269         // look for single quotation mark(')
 270         const char *sQuote1 = PL_strchr(valueStart, 0x27);
 271         const char *sQuote2 = (char *) (sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nsnull);
 272
 273         // Two single quotation marks must be present even in
 274         // absence of charset and lang.
 275         if (!sQuote1 || !sQuote2)
 276           NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
 277         if (aCharset && sQuote1 > valueStart && sQuote1 < valueEnd)
 278         {
 279           *aCharset = (char *) nsMemory::Clone(valueStart, sQuote1 - valueStart + 1);
 280           if (*aCharset)
 281             *(*aCharset + (sQuote1 - valueStart)) = 0;
 282         }
 283         if (aLang && sQuote1 && sQuote2 && sQuote2 > sQuote1 + 1 &&
 284             sQuote2 < valueEnd)
 285         {
 286           *aLang = (char *) nsMemory::Clone(sQuote1 + 1, sQuote2 - (sQuote1 + 1) + 1);
 287           if (*aLang)
 288             *(*aLang + (sQuote2 - (sQuote1 + 1))) = 0;
 289         }
 290
 291         // Be generous and handle gracefully when required
 292         // single quotes are absent.
 293         if (sQuote1)
 294         {
 295           if(!sQuote2)
 296             sQuote2 = sQuote1;
 297         }
 298         else
 299           sQuote2 = valueStart - 1;
 300
 301         if (sQuote2 && sQuote2 + 1 < valueEnd)
 302         {
 303           NS_ASSERTION(!*aResult, "This is the 1st line. result buffer should be null.");
 304           *aResult = (char *) nsMemory::Alloc(valueEnd - (sQuote2 + 1) + 1);
 305           if (*aResult)
 306           {
 307             memcpy(*aResult, sQuote2 + 1, valueEnd - (sQuote2 + 1));
 308             *(*aResult + (valueEnd - (sQuote2 + 1))) = 0;
 309             if (needUnescape)
 310             {
 311               nsUnescape(*aResult);
 312               if (tokenEnd - tokenStart == paramLen + 1)
 313                 // we're done; this is case B
 314                 return NS_OK;
 315             }
 316           }
 317         }
 318       }  // end of if-block :  title*0*=  or  title*=
 319       // a line of multiline param with no need for unescaping : title*[0-9]=
 320       // or 2nd or later lines of a multiline param : title*[1-9]*=
 321       else if (nsCRT::IsAsciiDigit(PRUnichar(*cp)))
 322       {
 323         PRInt32 len = 0;
 324         if (*aResult) // 2nd or later lines of multiline parameter
 325         {
 326           len = strlen(*aResult);
 327           char *ns = (char *) nsMemory::Realloc(*aResult, len + (valueEnd - valueStart) + 1);
 328           if (!ns)
 329           {
 330             nsMemory::Free(*aResult);
 331           }
 332           *aResult = ns;
 333         }
 334         else if (*cp == '0') // must be; 1st line :  title*0=
 335         {
 336           *aResult = (char *) nsMemory::Alloc(valueEnd - valueStart + 1);
 337         }
 338         // else {} something is really wrong; out of memory
 339         if (*aResult)
 340         {
 341           // append a partial value
 342           memcpy(*aResult + len, valueStart, valueEnd - valueStart);
 343           *(*aResult + len + (valueEnd - valueStart)) = 0;
 344           if (needUnescape)
 345             nsUnescape(*aResult + len);
 346         }
 347         else
 348           return NS_ERROR_OUT_OF_MEMORY;
 349       } // end of if-block :  title*[0-9]= or title*[1-9]*=
 350     }
 351
 352     // str now points after the end of the value.
 353     //   skip over whitespace, ';', whitespace.
 354
 355     while (nsCRT::IsAsciiSpace(*str)) ++str;
 356     if (*str == ';') ++str;
 357     while (nsCRT::IsAsciiSpace(*str)) ++str;
 358   }
 359
 360   if (*aResult)
 361     return NS_OK;
 362   else
 363     return NS_ERROR_INVALID_ARG; // aParameter not found !!
 364 }
 365
 366
 367 NS_IMETHODIMP
 368 nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
 369                                            const char* aDefaultCharset,
 370                                            PRBool aOverrideCharset,
 371                                            PRBool aEatContinuations,
 372                                            nsACString& aResult)
 373 {
 374   aResult.Truncate();
 375   if (!aHeaderVal)
 376     return NS_ERROR_INVALID_ARG;
 377   if (!*aHeaderVal)
 378     return NS_OK;
 379
 380
 381   // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string  but
 382   // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
 383   // to UTF-8. Otherwise, just strips away CRLF.
 384   if (PL_strstr(aHeaderVal, "=?") ||
 385       aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) ||
 386       Is7bitNonAsciiString(aHeaderVal, PL_strlen(aHeaderVal)))) {
 387     DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
 388   } else if (aEatContinuations &&
 389              (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
 390     aResult = aHeaderVal;
 391   } else {
 392     aEatContinuations = PR_FALSE;
 393     aResult = aHeaderVal;
 394   }
 395
 396   if (aEatContinuations) {
 397     nsCAutoString temp(aResult);
 398     temp.StripChars("\r\n");
 399     aResult = temp;
 400   }
 401
 402   return NS_OK;
 403 }
 404
 405 NS_IMETHODIMP
 406 nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
 407                                        const char* aCharset,
 408                                        const char* aDefaultCharset,
 409                                        PRBool aOverrideCharset,
 410                                        nsACString& aResult)
 411 {
 412   aResult.Truncate();
 413   // If aCharset is given, aParamValue was obtained from RFC2231
 414   // encoding and we're pretty sure that it's in aCharset.
 415   if (aCharset && *aCharset)
 416   {
 417     nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
 418     if (cvtUTF8)
 419       // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
 420       return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
 421           IS_7BIT_NON_ASCII_CHARSET(aCharset), aResult);
 422   }
 423
 424   const nsAFlatCString& param = PromiseFlatCString(aParamValue);
 425   nsCAutoString unQuoted;
 426   nsACString::const_iterator s, e;
 427   param.BeginReading(s);
 428   param.EndReading(e);
 429
 430   // strip '\' when used to quote CR, LF, '"' and '\'
 431   for ( ; s != e; ++s) {
 432     if ((*s == '\\')) {
 433       if (++s == e) {
 434         --s; // '\' is at the end. move back and append '\'.
 435       }
 436       else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
 437         --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
 438       }
 439       // else : skip '\' and append the quoted character.
 440     }
 441     unQuoted.Append(*s);
 442   }
 443
 444   aResult = unQuoted;
 445
 446   nsCAutoString decoded;
 447
 448   // Try RFC 2047 encoding, instead.
 449   nsresult rv = DecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
 450                                     aOverrideCharset, PR_TRUE, decoded);
 451
 452   if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
 453     aResult = decoded;
 454
 455   return rv;
 456 }
 457
 458 #define ISHEXCHAR(c) \
 459         (0x30 <= PRUint8(c) && PRUint8(c) <= 0x39  ||  \
 460          0x41 <= PRUint8(c) && PRUint8(c) <= 0x46  ||  \
 461          0x61 <= PRUint8(c) && PRUint8(c) <= 0x66)
 462
 463 // Decode Q encoding (RFC 2047).
 464 // static
 465 char *DecodeQ(const char *in, PRUint32 length)
 466 {
 467   char *out, *dest = 0;
 468
 469   out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
 470   if (dest == nsnull)
 471     return nsnull;
 472   while (length > 0) {
 473     PRUintn c = 0;
 474     switch (*in) {
 475     case '=':
 476       // check if |in| in the form of '=hh'  where h is [0-9a-fA-F].
 477       if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
 478         goto badsyntax;
 479       PR_sscanf(in + 1, "%2X", &c);
 480       *out++ = (char) c;
 481       in += 3;
 482       length -= 3;
 483       break;
 484
 485     case '_':
 486       *out++ = ' ';
 487       in++;
 488       length--;
 489       break;
 490
 491     default:
 492       if (*in & 0x80) goto badsyntax;
 493       *out++ = *in++;
 494       length--;
 495     }
 496   }
 497   *out++ = '\0';
 498
 499   for (out = dest; *out ; ++out) {
 500     if (*out == '\t')
 501       *out = ' ';
 502   }
 503
 504   return dest;
 505
 506  badsyntax:
 507   PR_Free(dest);
 508   return nsnull;
 509 }
 510
 511 // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
 512 // or has  ESC which may be an  indication that  it's in one of many ISO
 513 // 2022 7bit  encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
 514 // static
 515 PRBool Is7bitNonAsciiString(const char *input, PRUint32 len)
 516 {
 517   PRInt32 c;
 518
 519   enum { hz_initial, // No HZ seen yet
 520          hz_escaped, // Inside an HZ ~{ escape sequence
 521          hz_seen, // Have seen at least one complete HZ sequence
 522          hz_notpresent // Have seen something that is not legal HZ
 523   } hz_state;
 524
 525   hz_state = hz_initial;
 526   while (len) {
 527     c = PRUint8(*input++);
 528     len--;
 529     if (c & 0x80) return PR_FALSE;
 530     if (c == 0x1B) return PR_TRUE;
 531     if (c == '~') {
 532       switch (hz_state) {
 533       case hz_initial:
 534       case hz_seen:
 535         if (*input == '{') {
 536           hz_state = hz_escaped;
 537         } else if (*input == '~') {
 538           // ~~ is the HZ encoding of ~.  Skip over second ~ as well
 539           hz_state = hz_seen;
 540           input++;
 541           len--;
 542         } else {
 543           hz_state = hz_notpresent;
 544         }
 545         break;
 546
 547       case hz_escaped:
 548         if (*input == '}') hz_state = hz_seen;
 549         break;
 550       default:
 551         break;
 552       }
 553     }
 554   }
 555   return hz_state == hz_seen;
 556 }
 557
 558 #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
 559
 560 // copy 'raw' sequences of octets in aInput to aOutput.
 561 // If aDefaultCharset is specified, the input is assumed to be in the
 562 // charset and converted to UTF-8. Otherwise, a blind copy is made.
 563 // If aDefaultCharset is specified, but the conversion to UTF-8
 564 // is not successful, each octet is replaced by Unicode replacement
 565 // chars. *aOutput is advanced by the number of output octets.
 566 // static
 567 void CopyRawHeader(const char *aInput, PRUint32 aLen,
 568                    const char *aDefaultCharset, nsACString &aOutput)
 569 {
 570   PRInt32 c;
 571
 572   // If aDefaultCharset is not specified, make a blind copy.
 573   if (!aDefaultCharset || !*aDefaultCharset) {
 574     aOutput.Append(aInput, aLen);
 575     return;
 576   }
 577
 578   // Copy as long as it's US-ASCII.  An ESC may indicate ISO 2022
 579   // A ~ may indicate it is HZ
 580   while (aLen && (c = PRUint8(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
 581     aOutput.Append(char(c));
 582     aLen--;
 583   }
 584   if (!aLen) {
 585     return;
 586   }
 587   aInput--;
 588
 589   // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
 590   // string and aDefaultCharset is a 7bit non-ascii charset.
 591   PRBool skipCheck = (c == 0x1B || c == '~') &&
 592                      IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
 593
 594   // If not UTF-8, treat as default charset
 595   nsCOMPtr<nsIUTF8ConverterService>
 596     cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
 597   nsCAutoString utf8Text;
 598   if (cvtUTF8 &&
 599       NS_SUCCEEDED(
 600       cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen),
 601       aDefaultCharset, skipCheck, utf8Text))) {
 602     aOutput.Append(utf8Text);
 603   } else { // replace each octet with Unicode replacement char in UTF-8.
 604     for (PRUint32 i = 0; i < aLen; i++) {
 605       aOutput.Append(REPLACEMENT_CHAR);
 606     }
 607   }
 608 }
 609
 610 static const char especials[] = "()<>@,;:\\\"/[]?.=";
 611
 612 // |decode_mime_part2_str| taken from comi18n.c
 613 // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
 614 // If aOverrideCharset is true, charset in RFC2047-encoded words is
 615 // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
 616 // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
 617 //static
 618 nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset,
 619                           PRBool aOverrideCharset, nsACString &aResult)
 620 {
 621   const char *p, *q, *r;
 622   char *decodedText;
 623   const char *begin; // tracking pointer for where we are in the input buffer
 624   PRInt32 isLastEncodedWord = 0;
 625   const char *charsetStart, *charsetEnd;
 626   char charset[80];
 627
 628   // initialize charset name to an empty string
 629   charset[0] = '\0';
 630
 631   begin = aHeader;
 632
 633   // To avoid buffer realloc, if possible, set capacity in advance. No
 634   // matter what,  more than 3x expansion can never happen for all charsets
 635   // supported by Mozilla. SCSU/BCSU with the sliding window set to a
 636   // non-BMP block may be exceptions, but Mozilla does not support them.
 637   // Neither any known mail/news program use them. Even if there's, we're
 638   // safe because we don't use a raw *char any more.
 639   aResult.SetCapacity(3 * strlen(aHeader));
 640
 641   while ((p = PL_strstr(begin, "=?")) != 0) {
 642     if (isLastEncodedWord) {
 643       // See if it's all whitespace.
 644       for (q = begin; q < p; ++q) {
 645         if (!PL_strchr(" \t\r\n", *q)) break;
 646       }
 647     }
 648
 649     if (!isLastEncodedWord || q < p) {
 650       // copy the part before the encoded-word
 651       CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
 652       begin = p;
 653     }
 654
 655     p += 2;
 656
 657     // Get charset info
 658     charsetStart = p;
 659     charsetEnd = 0;
 660     for (q = p; *q != '?'; q++) {
 661       if (*q <= ' ' || PL_strchr(especials, *q)) {
 662         goto badsyntax;
 663       }
 664
 665       // RFC 2231 section 5
 666       if (!charsetEnd && *q == '*') {
 667         charsetEnd = q;
 668       }
 669     }
 670     if (!charsetEnd) {
 671       charsetEnd = q;
 672     }
 673
 674     // Check for too-long charset name
 675     if (PRUint32(charsetEnd - charsetStart) >= sizeof(charset))
 676       goto badsyntax;
 677
 678     memcpy(charset, charsetStart, charsetEnd - charsetStart);
 679     charset[charsetEnd - charsetStart] = 0;
 680
 681     q++;
 682     if (*q != 'Q' && *q != 'q' && *q != 'B' && *q != 'b')
 683       goto badsyntax;
 684
 685     if (q[1] != '?')
 686       goto badsyntax;
 687
 688     r = q;
 689     for (r = q + 2; *r != '?'; r++) {
 690       if (*r < ' ') goto badsyntax;
 691     }
 692     if (r[1] != '=')
 693         goto badsyntax;
 694     else if (r == q + 2) {
 695         // it's empty, skip
 696         begin = r + 2;
 697         isLastEncodedWord = 1;
 698         continue;
 699     }
 700
 701     if(*q == 'Q' || *q == 'q')
 702       decodedText = DecodeQ(q + 2, r - (q + 2));
 703     else {
 704       // bug 227290. ignore an extraneous '=' at the end.
 705       // (# of characters in B-encoded part has to be a multiple of 4)
 706       PRInt32 n = r - (q + 2);
 707       n -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
 708       decodedText = PL_Base64Decode(q + 2, n, nsnull);
 709     }
 710
 711     if (decodedText == nsnull)
 712       goto badsyntax;
 713
 714     // Override charset if requested.  Never override labeled UTF-8.
 715     // Use default charset instead of UNKNOWN-8BIT
 716     if ((aOverrideCharset && 0 != nsCRT::strcasecmp(charset, "UTF-8")) ||
 717         (aDefaultCharset && 0 == nsCRT::strcasecmp(charset, "UNKNOWN-8BIT"))) {
 718       PL_strncpy(charset, aDefaultCharset, sizeof(charset) - 1);
 719       charset[sizeof(charset) - 1] = '\0';
 720     }
 721
 722     {
 723       nsCOMPtr<nsIUTF8ConverterService>
 724         cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
 725       nsCAutoString utf8Text;
 726       // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
 727       if (cvtUTF8 &&
 728           NS_SUCCEEDED(
 729             cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
 730             charset, IS_7BIT_NON_ASCII_CHARSET(charset), utf8Text))) {
 731         aResult.Append(utf8Text);
 732       } else {
 733         aResult.Append(REPLACEMENT_CHAR);
 734       }
 735     }
 736     PR_Free(decodedText);
 737     begin = r + 2;
 738     isLastEncodedWord = 1;
 739     continue;
 740
 741   badsyntax:
 742     // copy the part before the encoded-word
 743     aResult.Append(begin, p - begin);
 744     begin = p;
 745     isLastEncodedWord = 0;
 746   }
 747
 748   // put the tail back
 749   CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
 750
 751   nsCAutoString tempStr(aResult);
 752   tempStr.ReplaceChar('\t', ' ');
 753   aResult = tempStr;
 754
 755   return NS_OK;
 756 }
 757