Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / netwerk / base / src / nsURLHelper.cpp
blobf4de015f3d0896556bd85f8c5a2d8af14b74c183
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* vim:set ts=4 sw=4 sts=4 et cindent: */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is mozilla.org code.
18 * The Initial Developer of the Original Code is
19 * Andreas Otte.
20 * Portions created by the Initial Developer are Copyright (C) 2000
21 * the Initial Developer. All Rights Reserved.
23 * Contributor(s):
24 * Darin Fisher <darin@netscape.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #include "nsURLHelper.h"
41 #include "nsReadableUtils.h"
42 #include "nsIServiceManager.h"
43 #include "nsIIOService.h"
44 #include "nsIURLParser.h"
45 #include "nsIURI.h"
46 #include "nsMemory.h"
47 #include "nsEscape.h"
48 #include "nsCOMPtr.h"
49 #include "nsCRT.h"
50 #include "nsNetCID.h"
51 #include "netCore.h"
52 #include "prprf.h"
53 #include "prnetdb.h"
55 //----------------------------------------------------------------------------
56 // Init/Shutdown
57 //----------------------------------------------------------------------------
59 static PRBool gInitialized = PR_FALSE;
60 static nsIURLParser *gNoAuthURLParser = nsnull;
61 static nsIURLParser *gAuthURLParser = nsnull;
62 static nsIURLParser *gStdURLParser = nsnull;
64 static void
65 InitGlobals()
67 nsCOMPtr<nsIURLParser> parser;
69 parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
70 NS_ASSERTION(parser, "failed getting 'noauth' url parser");
71 if (parser) {
72 gNoAuthURLParser = parser.get();
73 NS_ADDREF(gNoAuthURLParser);
76 parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
77 NS_ASSERTION(parser, "failed getting 'auth' url parser");
78 if (parser) {
79 gAuthURLParser = parser.get();
80 NS_ADDREF(gAuthURLParser);
83 parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
84 NS_ASSERTION(parser, "failed getting 'std' url parser");
85 if (parser) {
86 gStdURLParser = parser.get();
87 NS_ADDREF(gStdURLParser);
90 gInitialized = PR_TRUE;
93 void
94 net_ShutdownURLHelper()
96 if (gInitialized) {
97 NS_IF_RELEASE(gNoAuthURLParser);
98 NS_IF_RELEASE(gAuthURLParser);
99 NS_IF_RELEASE(gStdURLParser);
100 gInitialized = PR_FALSE;
104 //----------------------------------------------------------------------------
105 // nsIURLParser getters
106 //----------------------------------------------------------------------------
108 nsIURLParser *
109 net_GetAuthURLParser()
111 if (!gInitialized)
112 InitGlobals();
113 return gAuthURLParser;
116 nsIURLParser *
117 net_GetNoAuthURLParser()
119 if (!gInitialized)
120 InitGlobals();
121 return gNoAuthURLParser;
124 nsIURLParser *
125 net_GetStdURLParser()
127 if (!gInitialized)
128 InitGlobals();
129 return gStdURLParser;
132 //----------------------------------------------------------------------------
133 // file:// URL parsing
134 //----------------------------------------------------------------------------
136 nsresult
137 net_ParseFileURL(const nsACString &inURL,
138 nsACString &outDirectory,
139 nsACString &outFileBaseName,
140 nsACString &outFileExtension)
142 nsresult rv;
144 outDirectory.Truncate();
145 outFileBaseName.Truncate();
146 outFileExtension.Truncate();
148 const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
149 const char *url = flatURL.get();
151 PRUint32 schemeBeg, schemeEnd;
152 rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nsnull);
153 if (NS_FAILED(rv)) return rv;
155 if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
156 NS_ERROR("must be a file:// url");
157 return NS_ERROR_UNEXPECTED;
160 nsIURLParser *parser = net_GetNoAuthURLParser();
161 NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
163 PRUint32 pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
164 PRInt32 pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
166 // invoke the parser to extract the URL path
167 rv = parser->ParseURL(url, flatURL.Length(),
168 nsnull, nsnull, // don't care about scheme
169 nsnull, nsnull, // don't care about authority
170 &pathPos, &pathLen);
171 if (NS_FAILED(rv)) return rv;
173 // invoke the parser to extract filepath from the path
174 rv = parser->ParsePath(url + pathPos, pathLen,
175 &filepathPos, &filepathLen,
176 nsnull, nsnull, // don't care about param
177 nsnull, nsnull, // don't care about query
178 nsnull, nsnull); // don't care about ref
179 if (NS_FAILED(rv)) return rv;
181 filepathPos += pathPos;
183 // invoke the parser to extract the directory and filename from filepath
184 rv = parser->ParseFilePath(url + filepathPos, filepathLen,
185 &directoryPos, &directoryLen,
186 &basenamePos, &basenameLen,
187 &extensionPos, &extensionLen);
188 if (NS_FAILED(rv)) return rv;
190 if (directoryLen > 0)
191 outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
192 if (basenameLen > 0)
193 outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
194 if (extensionLen > 0)
195 outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen);
196 // since we are using a no-auth url parser, there will never be a host
197 // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
199 return NS_OK;
202 //----------------------------------------------------------------------------
203 // path manipulation functions
204 //----------------------------------------------------------------------------
206 // Replace all /./ with a / while resolving URLs
207 // But only till #?
208 void
209 net_CoalesceDirs(netCoalesceFlags flags, char* path)
211 /* Stolen from the old netlib's mkparse.c.
213 * modifies a url of the form /foo/../foo1 -> /foo1
214 * and /foo/./foo1 -> /foo/foo1
215 * and /foo/foo1/.. -> /foo/
217 char *fwdPtr = path;
218 char *urlPtr = path;
219 char *lastslash = path;
220 PRUint32 traversal = 0;
221 PRUint32 special_ftp_len = 0;
223 /* Remember if this url is a special ftp one: */
224 if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT)
226 /* some schemes (for example ftp) have the speciality that
227 the path can begin // or /%2F to mark the root of the
228 servers filesystem, a simple / only marks the root relative
229 to the user loging in. We remember the length of the marker */
230 if (nsCRT::strncasecmp(path,"/%2F",4) == 0)
231 special_ftp_len = 4;
232 else if (nsCRT::strncmp(path,"//",2) == 0 )
233 special_ftp_len = 2;
236 /* find the last slash before # or ? */
237 for(; (*fwdPtr != '\0') &&
238 (*fwdPtr != '?') &&
239 (*fwdPtr != '#'); ++fwdPtr)
243 /* found nothing, but go back one only */
244 /* if there is something to go back to */
245 if (fwdPtr != path && *fwdPtr == '\0')
247 --fwdPtr;
250 /* search the slash */
251 for(; (fwdPtr != path) &&
252 (*fwdPtr != '/'); --fwdPtr)
255 lastslash = fwdPtr;
256 fwdPtr = path;
258 /* replace all %2E or %2e with . in the path */
259 /* but stop at lastchar if non null */
260 for(; (*fwdPtr != '\0') &&
261 (*fwdPtr != '?') &&
262 (*fwdPtr != '#') &&
263 (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr)
265 if (*fwdPtr == '%' && *(fwdPtr+1) == '2' &&
266 (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e'))
268 *urlPtr++ = '.';
269 ++fwdPtr;
270 ++fwdPtr;
272 else
274 *urlPtr++ = *fwdPtr;
277 // Copy remaining stuff past the #?;
278 for (; *fwdPtr != '\0'; ++fwdPtr)
280 *urlPtr++ = *fwdPtr;
282 *urlPtr = '\0'; // terminate the url
284 // start again, this time for real
285 fwdPtr = path;
286 urlPtr = path;
288 for(; (*fwdPtr != '\0') &&
289 (*fwdPtr != '?') &&
290 (*fwdPtr != '#'); ++fwdPtr)
292 if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' )
294 // remove . followed by slash
295 ++fwdPtr;
297 else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' &&
298 (*(fwdPtr+3) == '/' ||
299 *(fwdPtr+3) == '\0' || // This will take care of
300 *(fwdPtr+3) == '?' || // something like foo/bar/..#sometag
301 *(fwdPtr+3) == '#'))
303 // remove foo/..
304 // reverse the urlPtr to the previous slash if possible
305 // if url does not allow relative root then drop .. above root
306 // otherwise retain them in the path
307 if(traversal > 0 || !(flags &
308 NET_COALESCE_ALLOW_RELATIVE_ROOT))
310 if (urlPtr != path)
311 urlPtr--; // we must be going back at least by one
312 for(;*urlPtr != '/' && urlPtr != path; urlPtr--)
313 ; // null body
314 --traversal; // count back
315 // forward the fwdPtr past the ../
316 fwdPtr += 2;
317 // if we have reached the beginning of the path
318 // while searching for the previous / and we remember
319 // that it is an url that begins with /%2F then
320 // advance urlPtr again by 3 chars because /%2F already
321 // marks the root of the path
322 if (urlPtr == path && special_ftp_len > 3)
324 ++urlPtr;
325 ++urlPtr;
326 ++urlPtr;
328 // special case if we have reached the end
329 // to preserve the last /
330 if (*fwdPtr == '.' && *(fwdPtr+1) == '\0')
331 ++urlPtr;
333 else
335 // there are to much /.. in this path, just copy them instead.
336 // forward the urlPtr past the /.. and copying it
338 // However if we remember it is an url that starts with
339 // /%2F and urlPtr just points at the "F" of "/%2F" then do
340 // not overwrite it with the /, just copy .. and move forward
341 // urlPtr.
342 if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1)
343 ++urlPtr;
344 else
345 *urlPtr++ = *fwdPtr;
346 ++fwdPtr;
347 *urlPtr++ = *fwdPtr;
348 ++fwdPtr;
349 *urlPtr++ = *fwdPtr;
352 else
354 // count the hierachie, but only if we do not have reached
355 // the root of some special urls with a special root marker
356 if (*fwdPtr == '/' && *(fwdPtr+1) != '.' &&
357 (special_ftp_len != 2 || *(fwdPtr+1) != '/'))
358 traversal++;
359 // copy the url incrementaly
360 *urlPtr++ = *fwdPtr;
365 * Now lets remove trailing . case
366 * /foo/foo1/. -> /foo/foo1/
369 if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/'))
370 urlPtr--;
372 // Copy remaining stuff past the #?;
373 for (; *fwdPtr != '\0'; ++fwdPtr)
375 *urlPtr++ = *fwdPtr;
377 *urlPtr = '\0'; // terminate the url
380 nsresult
381 net_ResolveRelativePath(const nsACString &relativePath,
382 const nsACString &basePath,
383 nsACString &result)
385 nsCAutoString name;
386 nsCAutoString path(basePath);
387 PRBool needsDelim = PR_FALSE;
389 if ( !path.IsEmpty() ) {
390 PRUnichar last = path.Last();
391 needsDelim = !(last == '/');
394 nsACString::const_iterator beg, end;
395 relativePath.BeginReading(beg);
396 relativePath.EndReading(end);
398 PRBool stop = PR_FALSE;
399 char c;
400 for (; !stop; ++beg) {
401 c = (beg == end) ? '\0' : *beg;
402 //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get());
403 switch (c) {
404 case '\0':
405 case '#':
406 case ';':
407 case '?':
408 stop = PR_TRUE;
409 // fall through...
410 case '/':
411 // delimiter found
412 if (name.EqualsLiteral("..")) {
413 // pop path
414 // If we already have the delim at end, then
415 // skip over that when searching for next one to the left
416 PRInt32 offset = path.Length() - (needsDelim ? 1 : 2);
417 // First check for errors
418 if (offset < 0 )
419 return NS_ERROR_MALFORMED_URI;
420 PRInt32 pos = path.RFind("/", PR_FALSE, offset);
421 if (pos >= 0)
422 path.Truncate(pos + 1);
423 else
424 path.Truncate();
426 else if (name.IsEmpty() || name.EqualsLiteral(".")) {
427 // do nothing
429 else {
430 // append name to path
431 if (needsDelim)
432 path += '/';
433 path += name;
434 needsDelim = PR_TRUE;
436 name.Truncate();
437 break;
439 default:
440 // append char to name
441 name += c;
444 // append anything left on relativePath (e.g. #..., ;..., ?...)
445 if (c != '\0')
446 path += Substring(--beg, end);
448 result = path;
449 return NS_OK;
452 //----------------------------------------------------------------------------
453 // scheme fu
454 //----------------------------------------------------------------------------
456 /* Extract URI-Scheme if possible */
457 nsresult
458 net_ExtractURLScheme(const nsACString &inURI,
459 PRUint32 *startPos,
460 PRUint32 *endPos,
461 nsACString *scheme)
463 // search for something up to a colon, and call it the scheme
464 const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
465 const char* uri_start = flatURI.get();
466 const char* uri = uri_start;
468 if (!uri)
469 return NS_ERROR_MALFORMED_URI;
471 // skip leading white space
472 while (nsCRT::IsAsciiSpace(*uri))
473 uri++;
475 PRUint32 start = uri - uri_start;
476 if (startPos) {
477 *startPos = start;
480 PRUint32 length = 0;
481 char c;
482 while ((c = *uri++) != '\0') {
483 // First char must be Alpha
484 if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
485 length++;
487 // Next chars can be alpha + digit + some special chars
488 else if (length > 0 && (nsCRT::IsAsciiAlpha(c) ||
489 nsCRT::IsAsciiDigit(c) || c == '+' ||
490 c == '.' || c == '-')) {
491 length++;
493 // stop if colon reached but not as first char
494 else if (c == ':' && length > 0) {
495 if (endPos) {
496 *endPos = start + length;
499 if (scheme)
500 scheme->Assign(Substring(inURI, start, length));
501 return NS_OK;
503 else
504 break;
506 return NS_ERROR_MALFORMED_URI;
509 PRBool
510 net_IsValidScheme(const char *scheme, PRUint32 schemeLen)
512 // first char must be alpha
513 if (!nsCRT::IsAsciiAlpha(*scheme))
514 return PR_FALSE;
516 // nsCStrings may have embedded nulls -- reject those too
517 for (; schemeLen; ++scheme, --schemeLen) {
518 if (!(nsCRT::IsAsciiAlpha(*scheme) ||
519 nsCRT::IsAsciiDigit(*scheme) ||
520 *scheme == '+' ||
521 *scheme == '.' ||
522 *scheme == '-'))
523 return PR_FALSE;
526 return PR_TRUE;
529 PRBool
530 net_FilterURIString(const char *str, nsACString& result)
532 NS_PRECONDITION(str, "Must have a non-null string!");
533 PRBool writing = PR_FALSE;
534 result.Truncate();
535 const char *p = str;
537 // Remove leading spaces, tabs, CR, LF if any.
538 while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
539 writing = PR_TRUE;
540 str = p + 1;
541 p++;
544 while (*p) {
545 if (*p == '\t' || *p == '\r' || *p == '\n') {
546 writing = PR_TRUE;
547 // append chars up to but not including *p
548 if (p > str)
549 result.Append(str, p - str);
550 str = p + 1;
552 p++;
555 // Remove trailing spaces if any
556 while (((p-1) >= str) && (*(p-1) == ' ')) {
557 writing = PR_TRUE;
558 p--;
561 if (writing && p > str)
562 result.Append(str, p - str);
564 return writing;
567 #if defined(XP_WIN) || defined(XP_OS2)
568 PRBool
569 net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
571 PRBool writing = PR_FALSE;
573 nsACString::const_iterator beginIter, endIter;
574 aURL.BeginReading(beginIter);
575 aURL.EndReading(endIter);
577 const char *s, *begin = beginIter.get();
579 for (s = begin; s != endIter.get(); ++s)
581 if (*s == '\\')
583 writing = PR_TRUE;
584 if (s > begin)
585 aResultBuf.Append(begin, s - begin);
586 aResultBuf += '/';
587 begin = s + 1;
590 if (writing && s > begin)
591 aResultBuf.Append(begin, s - begin);
593 return writing;
595 #endif
597 //----------------------------------------------------------------------------
598 // miscellaneous (i.e., stuff that should really be elsewhere)
599 //----------------------------------------------------------------------------
601 static inline
602 void ToLower(char &c)
604 if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A'))
605 c += 'a' - 'A';
608 void
609 net_ToLowerCase(char *str, PRUint32 length)
611 for (char *end = str + length; str < end; ++str)
612 ToLower(*str);
615 void
616 net_ToLowerCase(char *str)
618 for (; *str; ++str)
619 ToLower(*str);
622 char *
623 net_FindCharInSet(const char *iter, const char *stop, const char *set)
625 for (; iter != stop && *iter; ++iter) {
626 for (const char *s = set; *s; ++s) {
627 if (*iter == *s)
628 return (char *) iter;
631 return (char *) iter;
634 char *
635 net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
637 repeat:
638 for (const char *s = set; *s; ++s) {
639 if (*iter == *s) {
640 if (++iter == stop)
641 break;
642 goto repeat;
645 return (char *) iter;
648 char *
649 net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
651 --iter;
652 --stop;
654 if (iter == stop)
655 return (char *) iter;
657 repeat:
658 for (const char *s = set; *s; ++s) {
659 if (*iter == *s) {
660 if (--iter == stop)
661 break;
662 goto repeat;
665 return (char *) iter;
668 #define HTTP_LWS " \t"
670 // Return the index of the closing quote of the string, if any
671 static PRUint32
672 net_FindStringEnd(const nsCString& flatStr,
673 PRUint32 stringStart,
674 char stringDelim)
676 NS_ASSERTION(stringStart < flatStr.Length() &&
677 flatStr.CharAt(stringStart) == stringDelim &&
678 (stringDelim == '"' || stringDelim == '\''),
679 "Invalid stringStart");
681 const char set[] = { stringDelim, '\\', '\0' };
682 do {
683 // stringStart points to either the start quote or the last
684 // escaped char (the char following a '\\')
686 // Write to searchStart here, so that when we get back to the
687 // top of the loop right outside this one we search from the
688 // right place.
689 PRUint32 stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
690 if (stringEnd == PRUint32(kNotFound))
691 return flatStr.Length();
693 if (flatStr.CharAt(stringEnd) == '\\') {
694 // Hit a backslash-escaped char. Need to skip over it.
695 stringStart = stringEnd + 1;
696 if (stringStart == flatStr.Length())
697 return stringStart;
699 // Go back to looking for the next escape or the string end
700 continue;
703 return stringEnd;
705 } while (PR_TRUE);
707 NS_NOTREACHED("How did we get here?");
708 return flatStr.Length();
712 static PRUint32
713 net_FindMediaDelimiter(const nsCString& flatStr,
714 PRUint32 searchStart,
715 char delimiter)
717 do {
718 // searchStart points to the spot from which we should start looking
719 // for the delimiter.
720 const char delimStr[] = { delimiter, '"', '\'', '\0' };
721 PRUint32 curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
722 if (curDelimPos == PRUint32(kNotFound))
723 return flatStr.Length();
725 char ch = flatStr.CharAt(curDelimPos);
726 if (ch == delimiter) {
727 // Found delimiter
728 return curDelimPos;
731 // We hit the start of a quoted string. Look for its end.
732 searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
733 if (searchStart == flatStr.Length())
734 return searchStart;
736 ++searchStart;
738 // searchStart now points to the first char after the end of the
739 // string, so just go back to the top of the loop and look for
740 // |delimiter| again.
741 } while (PR_TRUE);
743 NS_NOTREACHED("How did we get here?");
744 return flatStr.Length();
747 // aOffset should be added to aCharsetStart and aCharsetEnd if this
748 // function sets them.
749 static void
750 net_ParseMediaType(const nsACString &aMediaTypeStr,
751 nsACString &aContentType,
752 nsACString &aContentCharset,
753 PRInt32 aOffset,
754 PRBool *aHadCharset,
755 PRInt32 *aCharsetStart,
756 PRInt32 *aCharsetEnd)
758 const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
759 const char* start = flatStr.get();
760 const char* end = start + flatStr.Length();
762 // Trim LWS leading and trailing whitespace from type. We include '(' in
763 // the trailing trim set to catch media-type comments, which are not at all
764 // standard, but may occur in rare cases.
765 const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
766 const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");
768 const char* charset = "";
769 const char* charsetEnd = charset;
770 PRInt32 charsetParamStart;
771 PRInt32 charsetParamEnd;
773 // Iterate over parameters
774 PRBool typeHasCharset = PR_FALSE;
775 PRUint32 paramStart = flatStr.FindChar(';', typeEnd - start);
776 if (paramStart != PRUint32(kNotFound)) {
777 // We have parameters. Iterate over them.
778 PRUint32 curParamStart = paramStart + 1;
779 do {
780 PRUint32 curParamEnd =
781 net_FindMediaDelimiter(flatStr, curParamStart, ';');
783 const char* paramName = net_FindCharNotInSet(start + curParamStart,
784 start + curParamEnd,
785 HTTP_LWS);
786 static const char charsetStr[] = "charset=";
787 if (PL_strncasecmp(paramName, charsetStr,
788 sizeof(charsetStr) - 1) == 0) {
789 charset = paramName + sizeof(charsetStr) - 1;
790 charsetEnd = start + curParamEnd;
791 typeHasCharset = PR_TRUE;
792 charsetParamStart = curParamStart - 1;
793 charsetParamEnd = curParamEnd;
796 curParamStart = curParamEnd + 1;
797 } while (curParamStart < flatStr.Length());
800 if (typeHasCharset) {
801 // Trim LWS leading and trailing whitespace from charset. We include
802 // '(' in the trailing trim set to catch media-type comments, which are
803 // not at all standard, but may occur in rare cases.
804 charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
805 if (*charset == '"' || *charset == '\'') {
806 charsetEnd =
807 start + net_FindStringEnd(flatStr, charset - start, *charset);
808 charset++;
809 NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
810 } else {
811 charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");
815 // if the server sent "*/*", it is meaningless, so do not store it.
816 // also, if type is the same as aContentType, then just update the
817 // charset. however, if charset is empty and aContentType hasn't
818 // changed, then don't wipe-out an existing aContentCharset. We
819 // also want to reject a mime-type if it does not include a slash.
820 // some servers give junk after the charset parameter, which may
821 // include a comma, so this check makes us a bit more tolerant.
823 if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 &&
824 memchr(type, '/', typeEnd - type) != NULL) {
825 // Common case here is that aContentType is empty
826 PRBool eq = !aContentType.IsEmpty() &&
827 aContentType.Equals(Substring(type, typeEnd),
828 nsCaseInsensitiveCStringComparator());
829 if (!eq) {
830 aContentType.Assign(type, typeEnd - type);
831 ToLowerCase(aContentType);
834 if ((!eq && *aHadCharset) || typeHasCharset) {
835 *aHadCharset = PR_TRUE;
836 aContentCharset.Assign(charset, charsetEnd - charset);
837 if (typeHasCharset) {
838 *aCharsetStart = charsetParamStart + aOffset;
839 *aCharsetEnd = charsetParamEnd + aOffset;
842 // Only set a new charset position if this is a different type
843 // from the last one we had and it doesn't already have a
844 // charset param. If this is the same type, we probably want
845 // to leave the charset position on its first occurrence.
846 if (!eq && !typeHasCharset) {
847 PRInt32 charsetStart = PRInt32(paramStart);
848 if (charsetStart == kNotFound)
849 charsetStart = flatStr.Length();
851 *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
856 #undef HTTP_LWS
858 void
859 net_ParseContentType(const nsACString &aHeaderStr,
860 nsACString &aContentType,
861 nsACString &aContentCharset,
862 PRBool *aHadCharset)
864 PRInt32 dummy1, dummy2;
865 net_ParseContentType(aHeaderStr, aContentType, aContentCharset,
866 aHadCharset, &dummy1, &dummy2);
869 void
870 net_ParseContentType(const nsACString &aHeaderStr,
871 nsACString &aContentType,
872 nsACString &aContentCharset,
873 PRBool *aHadCharset,
874 PRInt32 *aCharsetStart,
875 PRInt32 *aCharsetEnd)
878 // Augmented BNF (from RFC 2616 section 3.7):
880 // header-value = media-type *( LWS "," LWS media-type )
881 // media-type = type "/" subtype *( LWS ";" LWS parameter )
882 // type = token
883 // subtype = token
884 // parameter = attribute "=" value
885 // attribute = token
886 // value = token | quoted-string
889 // Examples:
891 // text/html
892 // text/html, text/html
893 // text/html,text/html; charset=ISO-8859-1
894 // text/html,text/html; charset="ISO-8859-1"
895 // text/html;charset=ISO-8859-1, text/html
896 // text/html;charset='ISO-8859-1', text/html
897 // application/octet-stream
900 *aHadCharset = PR_FALSE;
901 const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
903 // iterate over media-types. Note that ',' characters can happen
904 // inside quoted strings, so we need to watch out for that.
905 PRUint32 curTypeStart = 0;
906 do {
907 // curTypeStart points to the start of the current media-type. We want
908 // to look for its end.
909 PRUint32 curTypeEnd =
910 net_FindMediaDelimiter(flatStr, curTypeStart, ',');
912 // At this point curTypeEnd points to the spot where the media-type
913 // starting at curTypeEnd ends. Time to parse that!
914 net_ParseMediaType(Substring(flatStr, curTypeStart,
915 curTypeEnd - curTypeStart),
916 aContentType, aContentCharset, curTypeStart,
917 aHadCharset, aCharsetStart, aCharsetEnd);
919 // And let's move on to the next media-type
920 curTypeStart = curTypeEnd + 1;
921 } while (curTypeStart < flatStr.Length());
924 PRBool
925 net_IsValidHostName(const nsCSubstring &host)
927 const char *end = host.EndReading();
928 // Use explicit whitelists to select which characters we are
929 // willing to send to lower-level DNS logic. This is more
930 // self-documenting, and can also be slightly faster than the
931 // blacklist approach, since DNS names are the common case, and
932 // the commonest characters will tend to be near the start of
933 // the list.
935 // Whitelist for DNS names (RFC 1035) with extra characters added
936 // for pragmatic reasons "$+_"
937 // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
938 if (net_FindCharNotInSet(host.BeginReading(), end,
939 "abcdefghijklmnopqrstuvwxyz"
940 ".-0123456789"
941 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end)
942 return PR_TRUE;
944 // Might be a valid IPv6 link-local address containing a percent sign
945 nsCAutoString strhost(host);
946 PRNetAddr addr;
947 return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS;