Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / netwerk / base / src / nsURLParsers.cpp
blob565730126bb5cd844972b6a457673572657a474f
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Darin Fisher (original author)
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include <string.h>
40 #include "nsURLParsers.h"
41 #include "nsURLHelper.h"
42 #include "nsIURI.h"
43 #include "prtypes.h"
44 #include "nsString.h"
45 #include "nsCRT.h"
46 #include "netCore.h"
48 //----------------------------------------------------------------------------
50 static PRUint32
51 CountConsecutiveSlashes(const char *str, PRInt32 len)
53 PRUint32 count = 0;
54 while (len-- && *str++ == '/') ++count;
55 return count;
58 //----------------------------------------------------------------------------
59 // nsBaseURLParser implementation
60 //----------------------------------------------------------------------------
62 // The URL parser service does not have any internal state; however, it can
63 // be called from multiple threads, so we must use a threadsafe AddRef and
64 // Release implementation.
65 NS_IMPL_THREADSAFE_ISUPPORTS1(nsBaseURLParser, nsIURLParser)
67 #define SET_RESULT(component, pos, len) \
68 PR_BEGIN_MACRO \
69 if (component ## Pos) \
70 *component ## Pos = PRUint32(pos); \
71 if (component ## Len) \
72 *component ## Len = PRInt32(len); \
73 PR_END_MACRO
75 #define OFFSET_RESULT(component, offset) \
76 PR_BEGIN_MACRO \
77 if (component ## Pos) \
78 *component ## Pos += offset; \
79 PR_END_MACRO
81 NS_IMETHODIMP
82 nsBaseURLParser::ParseURL(const char *spec, PRInt32 specLen,
83 PRUint32 *schemePos, PRInt32 *schemeLen,
84 PRUint32 *authorityPos, PRInt32 *authorityLen,
85 PRUint32 *pathPos, PRInt32 *pathLen)
87 NS_PRECONDITION(spec, "null pointer");
89 if (specLen < 0)
90 specLen = strlen(spec);
92 const char *stop = nsnull;
93 const char *colon = nsnull;
94 const char *slash = nsnull;
95 const char *p;
96 PRInt32 len = specLen;
97 for (p = spec; len && *p && !colon && !slash; ++p, --len) {
98 // skip leading whitespace and control characters
99 if (*p > '\0' && *p <= ' ') {
100 spec++;
101 specLen--;
102 continue;
104 switch (*p) {
105 case ':':
106 if (!colon)
107 colon = p;
108 break;
109 case '/': // start of filepath
110 case '?': // start of query
111 case '#': // start of ref
112 case ';': // start of param
113 if (!slash)
114 slash = p;
115 break;
116 case '@': // username@hostname
117 case '[': // start of IPv6 address literal
118 if (!stop)
119 stop = p;
120 break;
123 // disregard the first colon if it follows an '@' or a '['
124 if (colon && stop && colon > stop)
125 colon = nsnull;
127 // if the spec only contained whitespace or control characters...
128 if (specLen == 0) {
129 SET_RESULT(scheme, 0, -1);
130 SET_RESULT(authority, 0, 0);
131 SET_RESULT(path, 0, 0);
132 return NS_OK;
135 // ignore trailing whitespace and control characters
136 for (p = spec + specLen - 1; ((unsigned char) *p <= ' ') && (p != spec); --p)
139 specLen = p - spec + 1;
141 if (colon && (colon < slash || !slash)) {
143 // spec = <scheme>:/<the-rest>
145 // or
147 // spec = <scheme>:<authority>
148 // spec = <scheme>:<path-no-slashes>
150 if (!net_IsValidScheme(spec, colon - spec) || (*(colon+1) == ':')) {
151 NS_WARNING("malformed uri");
152 return NS_ERROR_MALFORMED_URI;
154 SET_RESULT(scheme, 0, colon - spec);
155 if (authorityLen || pathLen) {
156 PRUint32 offset = colon + 1 - spec;
157 ParseAfterScheme(colon + 1, specLen - offset,
158 authorityPos, authorityLen,
159 pathPos, pathLen);
160 OFFSET_RESULT(authority, offset);
161 OFFSET_RESULT(path, offset);
164 else {
166 // spec = <authority-no-port-or-password>/<path>
167 // spec = <path>
169 // or
171 // spec = <authority-no-port-or-password>/<path-with-colon>
172 // spec = <path-with-colon>
174 // or
176 // spec = <authority-no-port-or-password>
177 // spec = <path-no-slashes-or-colon>
179 SET_RESULT(scheme, 0, -1);
180 if (authorityLen || pathLen)
181 ParseAfterScheme(spec, specLen,
182 authorityPos, authorityLen,
183 pathPos, pathLen);
185 return NS_OK;
188 NS_IMETHODIMP
189 nsBaseURLParser::ParseAuthority(const char *auth, PRInt32 authLen,
190 PRUint32 *usernamePos, PRInt32 *usernameLen,
191 PRUint32 *passwordPos, PRInt32 *passwordLen,
192 PRUint32 *hostnamePos, PRInt32 *hostnameLen,
193 PRInt32 *port)
195 NS_PRECONDITION(auth, "null pointer");
197 if (authLen < 0)
198 authLen = strlen(auth);
200 SET_RESULT(username, 0, -1);
201 SET_RESULT(password, 0, -1);
202 SET_RESULT(hostname, 0, authLen);
203 if (port)
204 *port = -1;
205 return NS_OK;
208 NS_IMETHODIMP
209 nsBaseURLParser::ParseUserInfo(const char *userinfo, PRInt32 userinfoLen,
210 PRUint32 *usernamePos, PRInt32 *usernameLen,
211 PRUint32 *passwordPos, PRInt32 *passwordLen)
213 SET_RESULT(username, 0, -1);
214 SET_RESULT(password, 0, -1);
215 return NS_OK;
218 NS_IMETHODIMP
219 nsBaseURLParser::ParseServerInfo(const char *serverinfo, PRInt32 serverinfoLen,
220 PRUint32 *hostnamePos, PRInt32 *hostnameLen,
221 PRInt32 *port)
223 SET_RESULT(hostname, 0, -1);
224 if (port)
225 *port = -1;
226 return NS_OK;
229 NS_IMETHODIMP
230 nsBaseURLParser::ParsePath(const char *path, PRInt32 pathLen,
231 PRUint32 *filepathPos, PRInt32 *filepathLen,
232 PRUint32 *paramPos, PRInt32 *paramLen,
233 PRUint32 *queryPos, PRInt32 *queryLen,
234 PRUint32 *refPos, PRInt32 *refLen)
236 NS_PRECONDITION(path, "null pointer");
238 if (pathLen < 0)
239 pathLen = strlen(path);
241 // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>
243 // XXX PL_strnpbrk would be nice, but it's buggy
245 // search for first occurrence of either ? or #
246 const char *query_beg = 0, *query_end = 0;
247 const char *ref_beg = 0;
248 const char *p = 0;
249 for (p = path; p < path + pathLen; ++p) {
250 // only match the query string if it precedes the reference fragment
251 if (!ref_beg && !query_beg && *p == '?')
252 query_beg = p + 1;
253 else if (*p == '#') {
254 ref_beg = p + 1;
255 if (query_beg)
256 query_end = p;
257 break;
261 if (query_beg) {
262 if (query_end)
263 SET_RESULT(query, query_beg - path, query_end - query_beg);
264 else
265 SET_RESULT(query, query_beg - path, pathLen - (query_beg - path));
267 else
268 SET_RESULT(query, 0, -1);
270 if (ref_beg)
271 SET_RESULT(ref, ref_beg - path, pathLen - (ref_beg - path));
272 else
273 SET_RESULT(ref, 0, -1);
275 // search backwards for param
276 const char *param_beg = 0;
277 const char *end;
278 if (query_beg)
279 end = query_beg - 1;
280 else if (ref_beg)
281 end = ref_beg - 1;
282 else
283 end = path + pathLen;
284 for (p = end - 1; p >= path && *p != '/'; --p) {
285 if (*p == ';') {
286 // found param
287 param_beg = p + 1;
291 if (param_beg) {
292 // found <filepath>;<param>
293 SET_RESULT(param, param_beg - path, end - param_beg);
294 end = param_beg - 1;
296 else
297 SET_RESULT(param, 0, -1);
299 // an empty file path is no file path
300 if (end != path)
301 SET_RESULT(filepath, 0, end - path);
302 else
303 SET_RESULT(filepath, 0, -1);
304 return NS_OK;
307 NS_IMETHODIMP
308 nsBaseURLParser::ParseFilePath(const char *filepath, PRInt32 filepathLen,
309 PRUint32 *directoryPos, PRInt32 *directoryLen,
310 PRUint32 *basenamePos, PRInt32 *basenameLen,
311 PRUint32 *extensionPos, PRInt32 *extensionLen)
313 NS_PRECONDITION(filepath, "null pointer");
315 if (filepathLen < 0)
316 filepathLen = strlen(filepath);
318 if (filepathLen == 0) {
319 SET_RESULT(directory, 0, -1);
320 SET_RESULT(basename, 0, 0); // assume a zero length file basename
321 SET_RESULT(extension, 0, -1);
322 return NS_OK;
325 const char *p;
326 const char *end = filepath + filepathLen;
328 // search backwards for filename
329 for (p = end - 1; *p != '/' && p > filepath; --p)
331 if (*p == '/') {
332 // catch /.. and /.
333 if ((p+1 < end && *(p+1) == '.') &&
334 (p+2 == end || (*(p+2) == '.' && p+3 == end)))
335 p = end - 1;
336 // filepath = <directory><filename>.<extension>
337 SET_RESULT(directory, 0, p - filepath + 1);
338 ParseFileName(p + 1, end - (p + 1),
339 basenamePos, basenameLen,
340 extensionPos, extensionLen);
341 OFFSET_RESULT(basename, p + 1 - filepath);
342 OFFSET_RESULT(extension, p + 1 - filepath);
344 else {
345 // filepath = <filename>.<extension>
346 SET_RESULT(directory, 0, -1);
347 ParseFileName(filepath, filepathLen,
348 basenamePos, basenameLen,
349 extensionPos, extensionLen);
351 return NS_OK;
354 nsresult
355 nsBaseURLParser::ParseFileName(const char *filename, PRInt32 filenameLen,
356 PRUint32 *basenamePos, PRInt32 *basenameLen,
357 PRUint32 *extensionPos, PRInt32 *extensionLen)
359 NS_PRECONDITION(filename, "null pointer");
361 if (filenameLen < 0)
362 filenameLen = strlen(filename);
364 // no extension if filename ends with a '.'
365 if (filename[filenameLen-1] != '.') {
366 // ignore '.' at the beginning
367 for (const char *p = filename + filenameLen - 1; p > filename; --p) {
368 if (*p == '.') {
369 // filename = <basename.extension>
370 SET_RESULT(basename, 0, p - filename);
371 SET_RESULT(extension, p + 1 - filename, filenameLen - (p - filename + 1));
372 return NS_OK;
376 // filename = <basename>
377 SET_RESULT(basename, 0, filenameLen);
378 SET_RESULT(extension, 0, -1);
379 return NS_OK;
382 //----------------------------------------------------------------------------
383 // nsNoAuthURLParser implementation
384 //----------------------------------------------------------------------------
386 void
387 nsNoAuthURLParser::ParseAfterScheme(const char *spec, PRInt32 specLen,
388 PRUint32 *authPos, PRInt32 *authLen,
389 PRUint32 *pathPos, PRInt32 *pathLen)
391 NS_PRECONDITION(specLen >= 0, "unexpected");
393 // everything is the path
394 PRUint32 pos = 0;
395 switch (CountConsecutiveSlashes(spec, specLen)) {
396 case 0:
397 case 1:
398 break;
399 case 2:
401 const char *p = nsnull;
402 if (specLen > 2) {
403 // looks like there is an authority section
404 #if defined(XP_WIN) || defined(XP_OS2)
405 // if the authority looks like a drive number then we
406 // really want to treat it as part of the path
407 if ((specLen > 3) && (spec[3] == ':' || spec[3] == '|') &&
408 nsCRT::IsAsciiAlpha(spec[2]) &&
409 ((specLen == 4) || (spec[4] == '/') || (spec[4] == '\\'))) {
410 pos = 1;
411 break;
413 #endif
414 p = (const char *) memchr(spec + 2, '/', specLen - 2);
416 if (p) {
417 SET_RESULT(auth, 2, p - (spec + 2));
418 SET_RESULT(path, p - spec, specLen - (p - spec));
420 else {
421 SET_RESULT(auth, 2, specLen - 2);
422 SET_RESULT(path, 0, -1);
424 return;
426 default:
427 pos = 2;
428 break;
430 SET_RESULT(auth, pos, 0);
431 SET_RESULT(path, pos, specLen - pos);
434 #if defined(XP_WIN) || defined(XP_OS2)
435 NS_IMETHODIMP
436 nsNoAuthURLParser::ParseFilePath(const char *filepath, PRInt32 filepathLen,
437 PRUint32 *directoryPos, PRInt32 *directoryLen,
438 PRUint32 *basenamePos, PRInt32 *basenameLen,
439 PRUint32 *extensionPos, PRInt32 *extensionLen)
441 NS_PRECONDITION(filepath, "null pointer");
443 if (filepathLen < 0)
444 filepathLen = strlen(filepath);
446 // look for a filepath consisting of only a drive number, which may or
447 // may not have a leading slash.
448 if (filepathLen > 1 && filepathLen < 4) {
449 const char *end = filepath + filepathLen;
450 const char *p = filepath;
451 if (*p == '/')
452 p++;
453 if ((end-p == 2) && (p[1]==':' || p[1]=='|') && nsCRT::IsAsciiAlpha(*p)) {
454 // filepath = <drive-number>:
455 SET_RESULT(directory, 0, filepathLen);
456 SET_RESULT(basename, 0, -1);
457 SET_RESULT(extension, 0, -1);
458 return NS_OK;
462 // otherwise fallback on common implementation
463 return nsBaseURLParser::ParseFilePath(filepath, filepathLen,
464 directoryPos, directoryLen,
465 basenamePos, basenameLen,
466 extensionPos, extensionLen);
468 #endif
470 //----------------------------------------------------------------------------
471 // nsAuthURLParser implementation
472 //----------------------------------------------------------------------------
474 NS_IMETHODIMP
475 nsAuthURLParser::ParseAuthority(const char *auth, PRInt32 authLen,
476 PRUint32 *usernamePos, PRInt32 *usernameLen,
477 PRUint32 *passwordPos, PRInt32 *passwordLen,
478 PRUint32 *hostnamePos, PRInt32 *hostnameLen,
479 PRInt32 *port)
481 nsresult rv;
483 NS_PRECONDITION(auth, "null pointer");
485 if (authLen < 0)
486 authLen = strlen(auth);
488 if (authLen == 0) {
489 SET_RESULT(username, 0, -1);
490 SET_RESULT(password, 0, -1);
491 SET_RESULT(hostname, 0, 0);
492 if (port)
493 *port = -1;
494 return NS_OK;
497 // search backwards for @
498 const char *p = auth + authLen - 1;
499 for (; (*p != '@') && (p > auth); --p);
500 if ( *p == '@' ) {
501 // auth = <user-info@server-info>
502 rv = ParseUserInfo(auth, p - auth,
503 usernamePos, usernameLen,
504 passwordPos, passwordLen);
505 if (NS_FAILED(rv)) return rv;
506 rv = ParseServerInfo(p + 1, authLen - (p - auth + 1),
507 hostnamePos, hostnameLen,
508 port);
509 if (NS_FAILED(rv)) return rv;
510 OFFSET_RESULT(hostname, p + 1 - auth);
512 else {
513 // auth = <server-info>
514 SET_RESULT(username, 0, -1);
515 SET_RESULT(password, 0, -1);
516 rv = ParseServerInfo(auth, authLen,
517 hostnamePos, hostnameLen,
518 port);
519 if (NS_FAILED(rv)) return rv;
521 return NS_OK;
524 NS_IMETHODIMP
525 nsAuthURLParser::ParseUserInfo(const char *userinfo, PRInt32 userinfoLen,
526 PRUint32 *usernamePos, PRInt32 *usernameLen,
527 PRUint32 *passwordPos, PRInt32 *passwordLen)
529 NS_PRECONDITION(userinfo, "null pointer");
531 if (userinfoLen < 0)
532 userinfoLen = strlen(userinfo);
534 if (userinfoLen == 0) {
535 SET_RESULT(username, 0, -1);
536 SET_RESULT(password, 0, -1);
537 return NS_OK;
540 const char *p = (const char *) memchr(userinfo, ':', userinfoLen);
541 if (p) {
542 // userinfo = <username:password>
543 if (p == userinfo) {
544 // must have a username!
545 return NS_ERROR_MALFORMED_URI;
547 SET_RESULT(username, 0, p - userinfo);
548 SET_RESULT(password, p - userinfo + 1, userinfoLen - (p - userinfo + 1));
550 else {
551 // userinfo = <username>
552 SET_RESULT(username, 0, userinfoLen);
553 SET_RESULT(password, 0, -1);
555 return NS_OK;
558 NS_IMETHODIMP
559 nsAuthURLParser::ParseServerInfo(const char *serverinfo, PRInt32 serverinfoLen,
560 PRUint32 *hostnamePos, PRInt32 *hostnameLen,
561 PRInt32 *port)
563 NS_PRECONDITION(serverinfo, "null pointer");
565 if (serverinfoLen < 0)
566 serverinfoLen = strlen(serverinfo);
568 if (serverinfoLen == 0) {
569 SET_RESULT(hostname, 0, 0);
570 if (port)
571 *port = -1;
572 return NS_OK;
575 // search backwards for a ':' but stop on ']' (IPv6 address literal
576 // delimiter). check for illegal characters in the hostname.
577 const char *p = serverinfo + serverinfoLen - 1;
578 const char *colon = nsnull, *bracket = nsnull;
579 for (; p > serverinfo; --p) {
580 switch (*p) {
581 case ']':
582 bracket = p;
583 break;
584 case ':':
585 if (bracket == nsnull)
586 colon = p;
587 break;
588 case ' ':
589 // hostname must not contain a space
590 NS_WARNING("malformed hostname");
591 return NS_ERROR_MALFORMED_URI;
595 if (colon) {
596 // serverinfo = <hostname:port>
597 SET_RESULT(hostname, 0, colon - serverinfo);
598 if (port) {
599 // XXX unfortunately ToInteger is not defined for substrings
600 nsCAutoString buf(colon+1, serverinfoLen - (colon + 1 - serverinfo));
601 PRInt32 err;
602 *port = buf.ToInteger(&err);
603 if (NS_FAILED(err))
604 *port = -1;
607 else {
608 // serverinfo = <hostname>
609 SET_RESULT(hostname, 0, serverinfoLen);
610 if (port)
611 *port = -1;
613 return NS_OK;
616 void
617 nsAuthURLParser::ParseAfterScheme(const char *spec, PRInt32 specLen,
618 PRUint32 *authPos, PRInt32 *authLen,
619 PRUint32 *pathPos, PRInt32 *pathLen)
621 NS_PRECONDITION(specLen >= 0, "unexpected");
623 PRUint32 nslash = CountConsecutiveSlashes(spec, specLen);
625 // search for the end of the authority section
626 const char *end = spec + specLen;
627 const char *p;
628 for (p = spec + nslash; p < end; ++p) {
629 if (*p == '/' || *p == '?' || *p == '#' || *p == ';')
630 break;
632 if (p < end) {
633 // spec = [/]<auth><path>
634 SET_RESULT(auth, nslash, p - (spec + nslash));
635 SET_RESULT(path, p - spec, specLen - (p - spec));
637 else {
638 // spec = [/]<auth>
639 SET_RESULT(auth, nslash, specLen - nslash);
640 SET_RESULT(path, 0, -1);
644 //----------------------------------------------------------------------------
645 // nsStdURLParser implementation
646 //----------------------------------------------------------------------------
648 void
649 nsStdURLParser::ParseAfterScheme(const char *spec, PRInt32 specLen,
650 PRUint32 *authPos, PRInt32 *authLen,
651 PRUint32 *pathPos, PRInt32 *pathLen)
653 NS_PRECONDITION(specLen >= 0, "unexpected");
655 PRUint32 nslash = CountConsecutiveSlashes(spec, specLen);
657 // search for the end of the authority section
658 const char *end = spec + specLen;
659 const char *p;
660 for (p = spec + nslash; p < end; ++p) {
661 if (strchr("/?#;", *p))
662 break;
664 switch (nslash) {
665 case 0:
666 case 2:
667 if (p < end) {
668 // spec = (//)<auth><path>
669 SET_RESULT(auth, nslash, p - (spec + nslash));
670 SET_RESULT(path, p - spec, specLen - (p - spec));
672 else {
673 // spec = (//)<auth>
674 SET_RESULT(auth, nslash, specLen - nslash);
675 SET_RESULT(path, 0, -1);
677 break;
678 case 1:
679 // spec = /<path>
680 SET_RESULT(auth, 0, -1);
681 SET_RESULT(path, 0, specLen);
682 break;
683 default:
684 // spec = ///[/]<path>
685 SET_RESULT(auth, 2, 0);
686 SET_RESULT(path, 2, specLen - 2);