netwerk/base/src/nsURLHelper.cpp

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /* vim:set ts=4 sw=4 sts=4 et cindent: */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is mozilla.org code.
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Andreas Otte.
  20  * Portions created by the Initial Developer are Copyright (C) 2000
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Darin Fisher <darin@netscape.com>
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either the GNU General Public License Version 2 or later (the "GPL"), or
  28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 #include "nsURLHelper.h"
  41 #include "nsReadableUtils.h"
  42 #include "nsIServiceManager.h"
  43 #include "nsIIOService.h"
  44 #include "nsIURLParser.h"
  45 #include "nsIURI.h"
  46 #include "nsMemory.h"
  47 #include "nsEscape.h"
  48 #include "nsCOMPtr.h"
  49 #include "nsCRT.h"
  50 #include "nsNetCID.h"
  51 #include "netCore.h"
  52 #include "prprf.h"
  53 #include "prnetdb.h"
  54
  55 //----------------------------------------------------------------------------
  56 // Init/Shutdown
  57 //----------------------------------------------------------------------------
  58
  59 static PRBool gInitialized = PR_FALSE;
  60 static nsIURLParser *gNoAuthURLParser = nsnull;
  61 static nsIURLParser *gAuthURLParser = nsnull;
  62 static nsIURLParser *gStdURLParser = nsnull;
  63
  64 static void
  65 InitGlobals()
  66 {
  67     nsCOMPtr<nsIURLParser> parser;
  68
  69     parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
  70     NS_ASSERTION(parser, "failed getting 'noauth' url parser");
  71     if (parser) {
  72         gNoAuthURLParser = parser.get();
  73         NS_ADDREF(gNoAuthURLParser);
  74     }
  75
  76     parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
  77     NS_ASSERTION(parser, "failed getting 'auth' url parser");
  78     if (parser) {
  79         gAuthURLParser = parser.get();
  80         NS_ADDREF(gAuthURLParser);
  81     }
  82
  83     parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
  84     NS_ASSERTION(parser, "failed getting 'std' url parser");
  85     if (parser) {
  86         gStdURLParser = parser.get();
  87         NS_ADDREF(gStdURLParser);
  88     }
  89
  90     gInitialized = PR_TRUE;
  91 }
  92
  93 void
  94 net_ShutdownURLHelper()
  95 {
  96     if (gInitialized) {
  97         NS_IF_RELEASE(gNoAuthURLParser);
  98         NS_IF_RELEASE(gAuthURLParser);
  99         NS_IF_RELEASE(gStdURLParser);
 100         gInitialized = PR_FALSE;
 101     }
 102 }
 103
 104 //----------------------------------------------------------------------------
 105 // nsIURLParser getters
 106 //----------------------------------------------------------------------------
 107
 108 nsIURLParser *
 109 net_GetAuthURLParser()
 110 {
 111     if (!gInitialized)
 112         InitGlobals();
 113     return gAuthURLParser;
 114 }
 115
 116 nsIURLParser *
 117 net_GetNoAuthURLParser()
 118 {
 119     if (!gInitialized)
 120         InitGlobals();
 121     return gNoAuthURLParser;
 122 }
 123
 124 nsIURLParser *
 125 net_GetStdURLParser()
 126 {
 127     if (!gInitialized)
 128         InitGlobals();
 129     return gStdURLParser;
 130 }
 131
 132 //----------------------------------------------------------------------------
 133 // file:// URL parsing
 134 //----------------------------------------------------------------------------
 135
 136 nsresult
 137 net_ParseFileURL(const nsACString &inURL,
 138                  nsACString &outDirectory,
 139                  nsACString &outFileBaseName,
 140                  nsACString &outFileExtension)
 141 {
 142     nsresult rv;
 143
 144     outDirectory.Truncate();
 145     outFileBaseName.Truncate();
 146     outFileExtension.Truncate();
 147
 148     const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
 149     const char *url = flatURL.get();
 150
 151     PRUint32 schemeBeg, schemeEnd;
 152     rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nsnull);
 153     if (NS_FAILED(rv)) return rv;
 154
 155     if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
 156         NS_ERROR("must be a file:// url");
 157         return NS_ERROR_UNEXPECTED;
 158     }
 159
 160     nsIURLParser *parser = net_GetNoAuthURLParser();
 161     NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
 162
 163     PRUint32 pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
 164     PRInt32 pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
 165
 166     // invoke the parser to extract the URL path
 167     rv = parser->ParseURL(url, flatURL.Length(),
 168                           nsnull, nsnull, // don't care about scheme
 169                           nsnull, nsnull, // don't care about authority
 170                           &pathPos, &pathLen);
 171     if (NS_FAILED(rv)) return rv;
 172
 173     // invoke the parser to extract filepath from the path
 174     rv = parser->ParsePath(url + pathPos, pathLen,
 175                            &filepathPos, &filepathLen,
 176                            nsnull, nsnull,  // don't care about param
 177                            nsnull, nsnull,  // don't care about query
 178                            nsnull, nsnull); // don't care about ref
 179     if (NS_FAILED(rv)) return rv;
 180
 181     filepathPos += pathPos;
 182
 183     // invoke the parser to extract the directory and filename from filepath
 184     rv = parser->ParseFilePath(url + filepathPos, filepathLen,
 185                                &directoryPos, &directoryLen,
 186                                &basenamePos, &basenameLen,
 187                                &extensionPos, &extensionLen);
 188     if (NS_FAILED(rv)) return rv;
 189
 190     if (directoryLen > 0)
 191         outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
 192     if (basenameLen > 0)
 193         outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
 194     if (extensionLen > 0)
 195         outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen);
 196     // since we are using a no-auth url parser, there will never be a host
 197     // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
 198
 199     return NS_OK;
 200 }
 201
 202 //----------------------------------------------------------------------------
 203 // path manipulation functions
 204 //----------------------------------------------------------------------------
 205
 206 // Replace all /./ with a / while resolving URLs
 207 // But only till #?
 208 void
 209 net_CoalesceDirs(netCoalesceFlags flags, char* path)
 210 {
 211     /* Stolen from the old netlib's mkparse.c.
 212      *
 213      * modifies a url of the form   /foo/../foo1  ->  /foo1
 214      *                       and    /foo/./foo1   ->  /foo/foo1
 215      *                       and    /foo/foo1/..  ->  /foo/
 216      */
 217     char *fwdPtr = path;
 218     char *urlPtr = path;
 219     char *lastslash = path;
 220     PRUint32 traversal = 0;
 221     PRUint32 special_ftp_len = 0;
 222
 223     /* Remember if this url is a special ftp one: */
 224     if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT)
 225     {
 226        /* some schemes (for example ftp) have the speciality that
 227           the path can begin // or /%2F to mark the root of the
 228           servers filesystem, a simple / only marks the root relative
 229           to the user loging in. We remember the length of the marker */
 230         if (nsCRT::strncasecmp(path,"/%2F",4) == 0)
 231             special_ftp_len = 4;
 232         else if (nsCRT::strncmp(path,"//",2) == 0 )
 233             special_ftp_len = 2;
 234     }
 235
 236     /* find the last slash before # or ? */
 237     for(; (*fwdPtr != '\0') &&
 238             (*fwdPtr != '?') &&
 239             (*fwdPtr != '#'); ++fwdPtr)
 240     {
 241     }
 242
 243     /* found nothing, but go back one only */
 244     /* if there is something to go back to */
 245     if (fwdPtr != path && *fwdPtr == '\0')
 246     {
 247         --fwdPtr;
 248     }
 249
 250     /* search the slash */
 251     for(; (fwdPtr != path) &&
 252             (*fwdPtr != '/'); --fwdPtr)
 253     {
 254     }
 255     lastslash = fwdPtr;
 256     fwdPtr = path;
 257
 258     /* replace all %2E or %2e with . in the path */
 259     /* but stop at lastchar if non null */
 260     for(; (*fwdPtr != '\0') &&
 261             (*fwdPtr != '?') &&
 262             (*fwdPtr != '#') &&
 263             (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr)
 264     {
 265         if (*fwdPtr == '%' && *(fwdPtr+1) == '2' &&
 266             (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e'))
 267         {
 268             *urlPtr++ = '.';
 269             ++fwdPtr;
 270             ++fwdPtr;
 271         }
 272         else
 273         {
 274             *urlPtr++ = *fwdPtr;
 275         }
 276     }
 277     // Copy remaining stuff past the #?;
 278     for (; *fwdPtr != '\0'; ++fwdPtr)
 279     {
 280         *urlPtr++ = *fwdPtr;
 281     }
 282     *urlPtr = '\0';  // terminate the url
 283
 284     // start again, this time for real
 285     fwdPtr = path;
 286     urlPtr = path;
 287
 288     for(; (*fwdPtr != '\0') &&
 289             (*fwdPtr != '?') &&
 290             (*fwdPtr != '#'); ++fwdPtr)
 291     {
 292         if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' )
 293         {
 294             // remove . followed by slash
 295             ++fwdPtr;
 296         }
 297         else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' &&
 298                 (*(fwdPtr+3) == '/' ||
 299                     *(fwdPtr+3) == '\0' || // This will take care of
 300                     *(fwdPtr+3) == '?' ||  // something like foo/bar/..#sometag
 301                     *(fwdPtr+3) == '#'))
 302         {
 303             // remove foo/..
 304             // reverse the urlPtr to the previous slash if possible
 305             // if url does not allow relative root then drop .. above root
 306             // otherwise retain them in the path
 307             if(traversal > 0 || !(flags &
 308                                   NET_COALESCE_ALLOW_RELATIVE_ROOT))
 309             {
 310                 if (urlPtr != path)
 311                     urlPtr--; // we must be going back at least by one
 312                 for(;*urlPtr != '/' && urlPtr != path; urlPtr--)
 313                     ;  // null body
 314                 --traversal; // count back
 315                 // forward the fwdPtr past the ../
 316                 fwdPtr += 2;
 317                 // if we have reached the beginning of the path
 318                 // while searching for the previous / and we remember
 319                 // that it is an url that begins with /%2F then
 320                 // advance urlPtr again by 3 chars because /%2F already
 321                 // marks the root of the path
 322                 if (urlPtr == path && special_ftp_len > 3)
 323                 {
 324                     ++urlPtr;
 325                     ++urlPtr;
 326                     ++urlPtr;
 327                 }
 328                 // special case if we have reached the end
 329                 // to preserve the last /
 330                 if (*fwdPtr == '.' && *(fwdPtr+1) == '\0')
 331                     ++urlPtr;
 332             }
 333             else
 334             {
 335                 // there are to much /.. in this path, just copy them instead.
 336                 // forward the urlPtr past the /.. and copying it
 337
 338                 // However if we remember it is an url that starts with
 339                 // /%2F and urlPtr just points at the "F" of "/%2F" then do
 340                 // not overwrite it with the /, just copy .. and move forward
 341                 // urlPtr.
 342                 if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1)
 343                     ++urlPtr;
 344                 else
 345                     *urlPtr++ = *fwdPtr;
 346                 ++fwdPtr;
 347                 *urlPtr++ = *fwdPtr;
 348                 ++fwdPtr;
 349                 *urlPtr++ = *fwdPtr;
 350             }
 351         }
 352         else
 353         {
 354             // count the hierachie, but only if we do not have reached
 355             // the root of some special urls with a special root marker
 356             if (*fwdPtr == '/' &&  *(fwdPtr+1) != '.' &&
 357                (special_ftp_len != 2 || *(fwdPtr+1) != '/'))
 358                 traversal++;
 359             // copy the url incrementaly
 360             *urlPtr++ = *fwdPtr;
 361         }
 362     }
 363
 364     /*
 365      *  Now lets remove trailing . case
 366      *     /foo/foo1/.   ->  /foo/foo1/
 367      */
 368
 369     if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/'))
 370         urlPtr--;
 371
 372     // Copy remaining stuff past the #?;
 373     for (; *fwdPtr != '\0'; ++fwdPtr)
 374     {
 375         *urlPtr++ = *fwdPtr;
 376     }
 377     *urlPtr = '\0';  // terminate the url
 378 }
 379
 380 nsresult
 381 net_ResolveRelativePath(const nsACString &relativePath,
 382                         const nsACString &basePath,
 383                         nsACString &result)
 384 {
 385     nsCAutoString name;
 386     nsCAutoString path(basePath);
 387     PRBool needsDelim = PR_FALSE;
 388
 389     if ( !path.IsEmpty() ) {
 390         PRUnichar last = path.Last();
 391         needsDelim = !(last == '/');
 392     }
 393
 394     nsACString::const_iterator beg, end;
 395     relativePath.BeginReading(beg);
 396     relativePath.EndReading(end);
 397
 398     PRBool stop = PR_FALSE;
 399     char c;
 400     for (; !stop; ++beg) {
 401         c = (beg == end) ? '\0' : *beg;
 402         //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get());
 403         switch (c) {
 404           case '\0':
 405           case '#':
 406           case ';':
 407           case '?':
 408             stop = PR_TRUE;
 409             // fall through...
 410           case '/':
 411             // delimiter found
 412             if (name.EqualsLiteral("..")) {
 413                 // pop path
 414                 // If we already have the delim at end, then
 415                 //  skip over that when searching for next one to the left
 416                 PRInt32 offset = path.Length() - (needsDelim ? 1 : 2);
 417                 // First check for errors
 418                 if (offset < 0 )
 419                     return NS_ERROR_MALFORMED_URI;
 420                 PRInt32 pos = path.RFind("/", PR_FALSE, offset);
 421                 if (pos >= 0)
 422                     path.Truncate(pos + 1);
 423                 else
 424                     path.Truncate();
 425             }
 426             else if (name.IsEmpty() || name.EqualsLiteral(".")) {
 427                 // do nothing
 428             }
 429             else {
 430                 // append name to path
 431                 if (needsDelim)
 432                     path += '/';
 433                 path += name;
 434                 needsDelim = PR_TRUE;
 435             }
 436             name.Truncate();
 437             break;
 438
 439           default:
 440             // append char to name
 441             name += c;
 442         }
 443     }
 444     // append anything left on relativePath (e.g. #..., ;..., ?...)
 445     if (c != '\0')
 446         path += Substring(--beg, end);
 447
 448     result = path;
 449     return NS_OK;
 450 }
 451
 452 //----------------------------------------------------------------------------
 453 // scheme fu
 454 //----------------------------------------------------------------------------
 455
 456 /* Extract URI-Scheme if possible */
 457 nsresult
 458 net_ExtractURLScheme(const nsACString &inURI,
 459                      PRUint32 *startPos,
 460                      PRUint32 *endPos,
 461                      nsACString *scheme)
 462 {
 463     // search for something up to a colon, and call it the scheme
 464     const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
 465     const char* uri_start = flatURI.get();
 466     const char* uri = uri_start;
 467
 468     if (!uri)
 469         return NS_ERROR_MALFORMED_URI;
 470
 471     // skip leading white space
 472     while (nsCRT::IsAsciiSpace(*uri))
 473         uri++;
 474
 475     PRUint32 start = uri - uri_start;
 476     if (startPos) {
 477         *startPos = start;
 478     }
 479
 480     PRUint32 length = 0;
 481     char c;
 482     while ((c = *uri++) != '\0') {
 483         // First char must be Alpha
 484         if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
 485             length++;
 486         }
 487         // Next chars can be alpha + digit + some special chars
 488         else if (length > 0 && (nsCRT::IsAsciiAlpha(c) ||
 489                  nsCRT::IsAsciiDigit(c) || c == '+' ||
 490                  c == '.' || c == '-')) {
 491             length++;
 492         }
 493         // stop if colon reached but not as first char
 494         else if (c == ':' && length > 0) {
 495             if (endPos) {
 496                 *endPos = start + length;
 497             }
 498
 499             if (scheme)
 500                 scheme->Assign(Substring(inURI, start, length));
 501             return NS_OK;
 502         }
 503         else
 504             break;
 505     }
 506     return NS_ERROR_MALFORMED_URI;
 507 }
 508
 509 PRBool
 510 net_IsValidScheme(const char *scheme, PRUint32 schemeLen)
 511 {
 512     // first char must be alpha
 513     if (!nsCRT::IsAsciiAlpha(*scheme))
 514         return PR_FALSE;
 515
 516     // nsCStrings may have embedded nulls -- reject those too
 517     for (; schemeLen; ++scheme, --schemeLen) {
 518         if (!(nsCRT::IsAsciiAlpha(*scheme) ||
 519               nsCRT::IsAsciiDigit(*scheme) ||
 520               *scheme == '+' ||
 521               *scheme == '.' ||
 522               *scheme == '-'))
 523             return PR_FALSE;
 524     }
 525
 526     return PR_TRUE;
 527 }
 528
 529 PRBool
 530 net_FilterURIString(const char *str, nsACString& result)
 531 {
 532     NS_PRECONDITION(str, "Must have a non-null string!");
 533     PRBool writing = PR_FALSE;
 534     result.Truncate();
 535     const char *p = str;
 536
 537     // Remove leading spaces, tabs, CR, LF if any.
 538     while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
 539         writing = PR_TRUE;
 540         str = p + 1;
 541         p++;
 542     }
 543
 544     while (*p) {
 545         if (*p == '\t' || *p == '\r' || *p == '\n') {
 546             writing = PR_TRUE;
 547             // append chars up to but not including *p
 548             if (p > str)
 549                 result.Append(str, p - str);
 550             str = p + 1;
 551         }
 552         p++;
 553     }
 554
 555     // Remove trailing spaces if any
 556     while (((p-1) >= str) && (*(p-1) == ' ')) {
 557         writing = PR_TRUE;
 558         p--;
 559     }
 560
 561     if (writing && p > str)
 562         result.Append(str, p - str);
 563
 564     return writing;
 565 }
 566
 567 #if defined(XP_WIN) || defined(XP_OS2)
 568 PRBool
 569 net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
 570 {
 571     PRBool writing = PR_FALSE;
 572
 573     nsACString::const_iterator beginIter, endIter;
 574     aURL.BeginReading(beginIter);
 575     aURL.EndReading(endIter);
 576
 577     const char *s, *begin = beginIter.get();
 578
 579     for (s = begin; s != endIter.get(); ++s)
 580     {
 581         if (*s == '\\')
 582         {
 583             writing = PR_TRUE;
 584             if (s > begin)
 585                 aResultBuf.Append(begin, s - begin);
 586             aResultBuf += '/';
 587             begin = s + 1;
 588         }
 589     }
 590     if (writing && s > begin)
 591         aResultBuf.Append(begin, s - begin);
 592
 593     return writing;
 594 }
 595 #endif
 596
 597 //----------------------------------------------------------------------------
 598 // miscellaneous (i.e., stuff that should really be elsewhere)
 599 //----------------------------------------------------------------------------
 600
 601 static inline
 602 void ToLower(char &c)
 603 {
 604     if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A'))
 605         c += 'a' - 'A';
 606 }
 607
 608 void
 609 net_ToLowerCase(char *str, PRUint32 length)
 610 {
 611     for (char *end = str + length; str < end; ++str)
 612         ToLower(*str);
 613 }
 614
 615 void
 616 net_ToLowerCase(char *str)
 617 {
 618     for (; *str; ++str)
 619         ToLower(*str);
 620 }
 621
 622 char *
 623 net_FindCharInSet(const char *iter, const char *stop, const char *set)
 624 {
 625     for (; iter != stop && *iter; ++iter) {
 626         for (const char *s = set; *s; ++s) {
 627             if (*iter == *s)
 628                 return (char *) iter;
 629         }
 630     }
 631     return (char *) iter;
 632 }
 633
 634 char *
 635 net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
 636 {
 637 repeat:
 638     for (const char *s = set; *s; ++s) {
 639         if (*iter == *s) {
 640             if (++iter == stop)
 641                 break;
 642             goto repeat;
 643         }
 644     }
 645     return (char *) iter;
 646 }
 647
 648 char *
 649 net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
 650 {
 651     --iter;
 652     --stop;
 653
 654     if (iter == stop)
 655         return (char *) iter;
 656
 657 repeat:
 658     for (const char *s = set; *s; ++s) {
 659         if (*iter == *s) {
 660             if (--iter == stop)
 661                 break;
 662             goto repeat;
 663         }
 664     }
 665     return (char *) iter;
 666 }
 667
 668 #define HTTP_LWS " \t"
 669
 670 // Return the index of the closing quote of the string, if any
 671 static PRUint32
 672 net_FindStringEnd(const nsCString& flatStr,
 673                   PRUint32 stringStart,
 674                   char stringDelim)
 675 {
 676     NS_ASSERTION(stringStart < flatStr.Length() &&
 677                  flatStr.CharAt(stringStart) == stringDelim &&
 678                  (stringDelim == '"' || stringDelim == '\''),
 679                  "Invalid stringStart");
 680
 681     const char set[] = { stringDelim, '\\', '\0' };
 682     do {
 683         // stringStart points to either the start quote or the last
 684         // escaped char (the char following a '\\')
 685
 686         // Write to searchStart here, so that when we get back to the
 687         // top of the loop right outside this one we search from the
 688         // right place.
 689         PRUint32 stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
 690         if (stringEnd == PRUint32(kNotFound))
 691             return flatStr.Length();
 692
 693         if (flatStr.CharAt(stringEnd) == '\\') {
 694             // Hit a backslash-escaped char.  Need to skip over it.
 695             stringStart = stringEnd + 1;
 696             if (stringStart == flatStr.Length())
 697                 return stringStart;
 698
 699             // Go back to looking for the next escape or the string end
 700             continue;
 701         }
 702
 703         return stringEnd;
 704
 705     } while (PR_TRUE);
 706
 707     NS_NOTREACHED("How did we get here?");
 708     return flatStr.Length();
 709 }
 710
 711
 712 static PRUint32
 713 net_FindMediaDelimiter(const nsCString& flatStr,
 714                        PRUint32 searchStart,
 715                        char delimiter)
 716 {
 717     do {
 718         // searchStart points to the spot from which we should start looking
 719         // for the delimiter.
 720         const char delimStr[] = { delimiter, '"', '\'', '\0' };
 721         PRUint32 curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
 722         if (curDelimPos == PRUint32(kNotFound))
 723             return flatStr.Length();
 724
 725         char ch = flatStr.CharAt(curDelimPos);
 726         if (ch == delimiter) {
 727             // Found delimiter
 728             return curDelimPos;
 729         }
 730
 731         // We hit the start of a quoted string.  Look for its end.
 732         searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
 733         if (searchStart == flatStr.Length())
 734             return searchStart;
 735
 736         ++searchStart;
 737
 738         // searchStart now points to the first char after the end of the
 739         // string, so just go back to the top of the loop and look for
 740         // |delimiter| again.
 741     } while (PR_TRUE);
 742
 743     NS_NOTREACHED("How did we get here?");
 744     return flatStr.Length();
 745 }
 746
 747 // aOffset should be added to aCharsetStart and aCharsetEnd if this
 748 // function sets them.
 749 static void
 750 net_ParseMediaType(const nsACString &aMediaTypeStr,
 751                    nsACString       &aContentType,
 752                    nsACString       &aContentCharset,
 753                    PRInt32          aOffset,
 754                    PRBool           *aHadCharset,
 755                    PRInt32          *aCharsetStart,
 756                    PRInt32          *aCharsetEnd)
 757 {
 758     const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
 759     const char* start = flatStr.get();
 760     const char* end = start + flatStr.Length();
 761
 762     // Trim LWS leading and trailing whitespace from type.  We include '(' in
 763     // the trailing trim set to catch media-type comments, which are not at all
 764     // standard, but may occur in rare cases.
 765     const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
 766     const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");
 767
 768     const char* charset = "";
 769     const char* charsetEnd = charset;
 770     PRInt32 charsetParamStart;
 771     PRInt32 charsetParamEnd;
 772
 773     // Iterate over parameters
 774     PRBool typeHasCharset = PR_FALSE;
 775     PRUint32 paramStart = flatStr.FindChar(';', typeEnd - start);
 776     if (paramStart != PRUint32(kNotFound)) {
 777         // We have parameters.  Iterate over them.
 778         PRUint32 curParamStart = paramStart + 1;
 779         do {
 780             PRUint32 curParamEnd =
 781                 net_FindMediaDelimiter(flatStr, curParamStart, ';');
 782
 783             const char* paramName = net_FindCharNotInSet(start + curParamStart,
 784                                                          start + curParamEnd,
 785                                                          HTTP_LWS);
 786             static const char charsetStr[] = "charset=";
 787             if (PL_strncasecmp(paramName, charsetStr,
 788                                sizeof(charsetStr) - 1) == 0) {
 789                 charset = paramName + sizeof(charsetStr) - 1;
 790                 charsetEnd = start + curParamEnd;
 791                 typeHasCharset = PR_TRUE;
 792                 charsetParamStart = curParamStart - 1;
 793                 charsetParamEnd = curParamEnd;
 794             }
 795
 796             curParamStart = curParamEnd + 1;
 797         } while (curParamStart < flatStr.Length());
 798     }
 799
 800     if (typeHasCharset) {
 801         // Trim LWS leading and trailing whitespace from charset.  We include
 802         // '(' in the trailing trim set to catch media-type comments, which are
 803         // not at all standard, but may occur in rare cases.
 804         charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
 805         if (*charset == '"' || *charset == '\'') {
 806             charsetEnd =
 807                 start + net_FindStringEnd(flatStr, charset - start, *charset);
 808             charset++;
 809             NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
 810         } else {
 811             charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");
 812         }
 813     }
 814
 815     // if the server sent "*/*", it is meaningless, so do not store it.
 816     // also, if type is the same as aContentType, then just update the
 817     // charset.  however, if charset is empty and aContentType hasn't
 818     // changed, then don't wipe-out an existing aContentCharset.  We
 819     // also want to reject a mime-type if it does not include a slash.
 820     // some servers give junk after the charset parameter, which may
 821     // include a comma, so this check makes us a bit more tolerant.
 822
 823     if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 &&
 824         memchr(type, '/', typeEnd - type) != NULL) {
 825         // Common case here is that aContentType is empty
 826         PRBool eq = !aContentType.IsEmpty() &&
 827             aContentType.Equals(Substring(type, typeEnd),
 828                                 nsCaseInsensitiveCStringComparator());
 829         if (!eq) {
 830             aContentType.Assign(type, typeEnd - type);
 831             ToLowerCase(aContentType);
 832         }
 833
 834         if ((!eq && *aHadCharset) || typeHasCharset) {
 835             *aHadCharset = PR_TRUE;
 836             aContentCharset.Assign(charset, charsetEnd - charset);
 837             if (typeHasCharset) {
 838                 *aCharsetStart = charsetParamStart + aOffset;
 839                 *aCharsetEnd = charsetParamEnd + aOffset;
 840             }
 841         }
 842         // Only set a new charset position if this is a different type
 843         // from the last one we had and it doesn't already have a
 844         // charset param.  If this is the same type, we probably want
 845         // to leave the charset position on its first occurrence.
 846         if (!eq && !typeHasCharset) {
 847             PRInt32 charsetStart = PRInt32(paramStart);
 848             if (charsetStart == kNotFound)
 849                 charsetStart =  flatStr.Length();
 850
 851             *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
 852         }
 853     }
 854 }
 855
 856 #undef HTTP_LWS
 857
 858 void
 859 net_ParseContentType(const nsACString &aHeaderStr,
 860                      nsACString       &aContentType,
 861                      nsACString       &aContentCharset,
 862                      PRBool           *aHadCharset)
 863 {
 864     PRInt32 dummy1, dummy2;
 865     net_ParseContentType(aHeaderStr, aContentType, aContentCharset,
 866                          aHadCharset, &dummy1, &dummy2);
 867 }
 868
 869 void
 870 net_ParseContentType(const nsACString &aHeaderStr,
 871                      nsACString       &aContentType,
 872                      nsACString       &aContentCharset,
 873                      PRBool           *aHadCharset,
 874                      PRInt32          *aCharsetStart,
 875                      PRInt32          *aCharsetEnd)
 876 {
 877     //
 878     // Augmented BNF (from RFC 2616 section 3.7):
 879     //
 880     //   header-value = media-type *( LWS "," LWS media-type )
 881     //   media-type   = type "/" subtype *( LWS ";" LWS parameter )
 882     //   type         = token
 883     //   subtype      = token
 884     //   parameter    = attribute "=" value
 885     //   attribute    = token
 886     //   value        = token | quoted-string
 887     //
 888     //
 889     // Examples:
 890     //
 891     //   text/html
 892     //   text/html, text/html
 893     //   text/html,text/html; charset=ISO-8859-1
 894     //   text/html,text/html; charset="ISO-8859-1"
 895     //   text/html;charset=ISO-8859-1, text/html
 896     //   text/html;charset='ISO-8859-1', text/html
 897     //   application/octet-stream
 898     //
 899
 900     *aHadCharset = PR_FALSE;
 901     const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
 902
 903     // iterate over media-types.  Note that ',' characters can happen
 904     // inside quoted strings, so we need to watch out for that.
 905     PRUint32 curTypeStart = 0;
 906     do {
 907         // curTypeStart points to the start of the current media-type.  We want
 908         // to look for its end.
 909         PRUint32 curTypeEnd =
 910             net_FindMediaDelimiter(flatStr, curTypeStart, ',');
 911
 912         // At this point curTypeEnd points to the spot where the media-type
 913         // starting at curTypeEnd ends.  Time to parse that!
 914         net_ParseMediaType(Substring(flatStr, curTypeStart,
 915                                      curTypeEnd - curTypeStart),
 916                            aContentType, aContentCharset, curTypeStart,
 917                            aHadCharset, aCharsetStart, aCharsetEnd);
 918
 919         // And let's move on to the next media-type
 920         curTypeStart = curTypeEnd + 1;
 921     } while (curTypeStart < flatStr.Length());
 922 }
 923
 924 PRBool
 925 net_IsValidHostName(const nsCSubstring &host)
 926 {
 927     const char *end = host.EndReading();
 928     // Use explicit whitelists to select which characters we are
 929     // willing to send to lower-level DNS logic. This is more
 930     // self-documenting, and can also be slightly faster than the
 931     // blacklist approach, since DNS names are the common case, and
 932     // the commonest characters will tend to be near the start of
 933     // the list.
 934
 935     // Whitelist for DNS names (RFC 1035) with extra characters added
 936     // for pragmatic reasons "$+_"
 937     // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
 938     if (net_FindCharNotInSet(host.BeginReading(), end,
 939                              "abcdefghijklmnopqrstuvwxyz"
 940                              ".-0123456789"
 941                              "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end)
 942         return PR_TRUE;
 943
 944     // Might be a valid IPv6 link-local address containing a percent sign
 945     nsCAutoString strhost(host);
 946     PRNetAddr addr;
 947     return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS;
 948 }