1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
23 * Darin Fisher (original author)
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
40 #include "nsURLParsers.h"
41 #include "nsURLHelper.h"
48 //----------------------------------------------------------------------------
51 CountConsecutiveSlashes(const char *str
, PRInt32 len
)
54 while (len
-- && *str
++ == '/') ++count
;
58 //----------------------------------------------------------------------------
59 // nsBaseURLParser implementation
60 //----------------------------------------------------------------------------
62 // The URL parser service does not have any internal state; however, it can
63 // be called from multiple threads, so we must use a threadsafe AddRef and
64 // Release implementation.
65 NS_IMPL_THREADSAFE_ISUPPORTS1(nsBaseURLParser
, nsIURLParser
)
67 #define SET_RESULT(component, pos, len) \
69 if (component ## Pos) \
70 *component ## Pos = PRUint32(pos); \
71 if (component ## Len) \
72 *component ## Len = PRInt32(len); \
75 #define OFFSET_RESULT(component, offset) \
77 if (component ## Pos) \
78 *component ## Pos += offset; \
82 nsBaseURLParser::ParseURL(const char *spec
, PRInt32 specLen
,
83 PRUint32
*schemePos
, PRInt32
*schemeLen
,
84 PRUint32
*authorityPos
, PRInt32
*authorityLen
,
85 PRUint32
*pathPos
, PRInt32
*pathLen
)
87 NS_PRECONDITION(spec
, "null pointer");
90 specLen
= strlen(spec
);
92 const char *stop
= nsnull
;
93 const char *colon
= nsnull
;
94 const char *slash
= nsnull
;
96 PRInt32 len
= specLen
;
97 for (p
= spec
; len
&& *p
&& !colon
&& !slash
; ++p
, --len
) {
98 // skip leading whitespace and control characters
99 if (*p
> '\0' && *p
<= ' ') {
109 case '/': // start of filepath
110 case '?': // start of query
111 case '#': // start of ref
112 case ';': // start of param
116 case '@': // username@hostname
117 case '[': // start of IPv6 address literal
123 // disregard the first colon if it follows an '@' or a '['
124 if (colon
&& stop
&& colon
> stop
)
127 // if the spec only contained whitespace or control characters...
129 SET_RESULT(scheme
, 0, -1);
130 SET_RESULT(authority
, 0, 0);
131 SET_RESULT(path
, 0, 0);
135 // ignore trailing whitespace and control characters
136 for (p
= spec
+ specLen
- 1; ((unsigned char) *p
<= ' ') && (p
!= spec
); --p
)
139 specLen
= p
- spec
+ 1;
141 if (colon
&& (colon
< slash
|| !slash
)) {
143 // spec = <scheme>:/<the-rest>
147 // spec = <scheme>:<authority>
148 // spec = <scheme>:<path-no-slashes>
150 if (!net_IsValidScheme(spec
, colon
- spec
) || (*(colon
+1) == ':')) {
151 NS_WARNING("malformed uri");
152 return NS_ERROR_MALFORMED_URI
;
154 SET_RESULT(scheme
, 0, colon
- spec
);
155 if (authorityLen
|| pathLen
) {
156 PRUint32 offset
= colon
+ 1 - spec
;
157 ParseAfterScheme(colon
+ 1, specLen
- offset
,
158 authorityPos
, authorityLen
,
160 OFFSET_RESULT(authority
, offset
);
161 OFFSET_RESULT(path
, offset
);
166 // spec = <authority-no-port-or-password>/<path>
171 // spec = <authority-no-port-or-password>/<path-with-colon>
172 // spec = <path-with-colon>
176 // spec = <authority-no-port-or-password>
177 // spec = <path-no-slashes-or-colon>
179 SET_RESULT(scheme
, 0, -1);
180 if (authorityLen
|| pathLen
)
181 ParseAfterScheme(spec
, specLen
,
182 authorityPos
, authorityLen
,
189 nsBaseURLParser::ParseAuthority(const char *auth
, PRInt32 authLen
,
190 PRUint32
*usernamePos
, PRInt32
*usernameLen
,
191 PRUint32
*passwordPos
, PRInt32
*passwordLen
,
192 PRUint32
*hostnamePos
, PRInt32
*hostnameLen
,
195 NS_PRECONDITION(auth
, "null pointer");
198 authLen
= strlen(auth
);
200 SET_RESULT(username
, 0, -1);
201 SET_RESULT(password
, 0, -1);
202 SET_RESULT(hostname
, 0, authLen
);
209 nsBaseURLParser::ParseUserInfo(const char *userinfo
, PRInt32 userinfoLen
,
210 PRUint32
*usernamePos
, PRInt32
*usernameLen
,
211 PRUint32
*passwordPos
, PRInt32
*passwordLen
)
213 SET_RESULT(username
, 0, -1);
214 SET_RESULT(password
, 0, -1);
219 nsBaseURLParser::ParseServerInfo(const char *serverinfo
, PRInt32 serverinfoLen
,
220 PRUint32
*hostnamePos
, PRInt32
*hostnameLen
,
223 SET_RESULT(hostname
, 0, -1);
230 nsBaseURLParser::ParsePath(const char *path
, PRInt32 pathLen
,
231 PRUint32
*filepathPos
, PRInt32
*filepathLen
,
232 PRUint32
*paramPos
, PRInt32
*paramLen
,
233 PRUint32
*queryPos
, PRInt32
*queryLen
,
234 PRUint32
*refPos
, PRInt32
*refLen
)
236 NS_PRECONDITION(path
, "null pointer");
239 pathLen
= strlen(path
);
241 // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>
243 // XXX PL_strnpbrk would be nice, but it's buggy
245 // search for first occurrence of either ? or #
246 const char *query_beg
= 0, *query_end
= 0;
247 const char *ref_beg
= 0;
249 for (p
= path
; p
< path
+ pathLen
; ++p
) {
250 // only match the query string if it precedes the reference fragment
251 if (!ref_beg
&& !query_beg
&& *p
== '?')
253 else if (*p
== '#') {
263 SET_RESULT(query
, query_beg
- path
, query_end
- query_beg
);
265 SET_RESULT(query
, query_beg
- path
, pathLen
- (query_beg
- path
));
268 SET_RESULT(query
, 0, -1);
271 SET_RESULT(ref
, ref_beg
- path
, pathLen
- (ref_beg
- path
));
273 SET_RESULT(ref
, 0, -1);
275 // search backwards for param
276 const char *param_beg
= 0;
283 end
= path
+ pathLen
;
284 for (p
= end
- 1; p
>= path
&& *p
!= '/'; --p
) {
292 // found <filepath>;<param>
293 SET_RESULT(param
, param_beg
- path
, end
- param_beg
);
297 SET_RESULT(param
, 0, -1);
299 // an empty file path is no file path
301 SET_RESULT(filepath
, 0, end
- path
);
303 SET_RESULT(filepath
, 0, -1);
308 nsBaseURLParser::ParseFilePath(const char *filepath
, PRInt32 filepathLen
,
309 PRUint32
*directoryPos
, PRInt32
*directoryLen
,
310 PRUint32
*basenamePos
, PRInt32
*basenameLen
,
311 PRUint32
*extensionPos
, PRInt32
*extensionLen
)
313 NS_PRECONDITION(filepath
, "null pointer");
316 filepathLen
= strlen(filepath
);
318 if (filepathLen
== 0) {
319 SET_RESULT(directory
, 0, -1);
320 SET_RESULT(basename
, 0, 0); // assume a zero length file basename
321 SET_RESULT(extension
, 0, -1);
326 const char *end
= filepath
+ filepathLen
;
328 // search backwards for filename
329 for (p
= end
- 1; *p
!= '/' && p
> filepath
; --p
)
333 if ((p
+1 < end
&& *(p
+1) == '.') &&
334 (p
+2 == end
|| (*(p
+2) == '.' && p
+3 == end
)))
336 // filepath = <directory><filename>.<extension>
337 SET_RESULT(directory
, 0, p
- filepath
+ 1);
338 ParseFileName(p
+ 1, end
- (p
+ 1),
339 basenamePos
, basenameLen
,
340 extensionPos
, extensionLen
);
341 OFFSET_RESULT(basename
, p
+ 1 - filepath
);
342 OFFSET_RESULT(extension
, p
+ 1 - filepath
);
345 // filepath = <filename>.<extension>
346 SET_RESULT(directory
, 0, -1);
347 ParseFileName(filepath
, filepathLen
,
348 basenamePos
, basenameLen
,
349 extensionPos
, extensionLen
);
355 nsBaseURLParser::ParseFileName(const char *filename
, PRInt32 filenameLen
,
356 PRUint32
*basenamePos
, PRInt32
*basenameLen
,
357 PRUint32
*extensionPos
, PRInt32
*extensionLen
)
359 NS_PRECONDITION(filename
, "null pointer");
362 filenameLen
= strlen(filename
);
364 // no extension if filename ends with a '.'
365 if (filename
[filenameLen
-1] != '.') {
366 // ignore '.' at the beginning
367 for (const char *p
= filename
+ filenameLen
- 1; p
> filename
; --p
) {
369 // filename = <basename.extension>
370 SET_RESULT(basename
, 0, p
- filename
);
371 SET_RESULT(extension
, p
+ 1 - filename
, filenameLen
- (p
- filename
+ 1));
376 // filename = <basename>
377 SET_RESULT(basename
, 0, filenameLen
);
378 SET_RESULT(extension
, 0, -1);
382 //----------------------------------------------------------------------------
383 // nsNoAuthURLParser implementation
384 //----------------------------------------------------------------------------
387 nsNoAuthURLParser::ParseAfterScheme(const char *spec
, PRInt32 specLen
,
388 PRUint32
*authPos
, PRInt32
*authLen
,
389 PRUint32
*pathPos
, PRInt32
*pathLen
)
391 NS_PRECONDITION(specLen
>= 0, "unexpected");
393 // everything is the path
395 switch (CountConsecutiveSlashes(spec
, specLen
)) {
401 const char *p
= nsnull
;
403 // looks like there is an authority section
404 #if defined(XP_WIN) || defined(XP_OS2)
405 // if the authority looks like a drive number then we
406 // really want to treat it as part of the path
407 if ((specLen
> 3) && (spec
[3] == ':' || spec
[3] == '|') &&
408 nsCRT::IsAsciiAlpha(spec
[2]) &&
409 ((specLen
== 4) || (spec
[4] == '/') || (spec
[4] == '\\'))) {
414 p
= (const char *) memchr(spec
+ 2, '/', specLen
- 2);
417 SET_RESULT(auth
, 2, p
- (spec
+ 2));
418 SET_RESULT(path
, p
- spec
, specLen
- (p
- spec
));
421 SET_RESULT(auth
, 2, specLen
- 2);
422 SET_RESULT(path
, 0, -1);
430 SET_RESULT(auth
, pos
, 0);
431 SET_RESULT(path
, pos
, specLen
- pos
);
434 #if defined(XP_WIN) || defined(XP_OS2)
436 nsNoAuthURLParser::ParseFilePath(const char *filepath
, PRInt32 filepathLen
,
437 PRUint32
*directoryPos
, PRInt32
*directoryLen
,
438 PRUint32
*basenamePos
, PRInt32
*basenameLen
,
439 PRUint32
*extensionPos
, PRInt32
*extensionLen
)
441 NS_PRECONDITION(filepath
, "null pointer");
444 filepathLen
= strlen(filepath
);
446 // look for a filepath consisting of only a drive number, which may or
447 // may not have a leading slash.
448 if (filepathLen
> 1 && filepathLen
< 4) {
449 const char *end
= filepath
+ filepathLen
;
450 const char *p
= filepath
;
453 if ((end
-p
== 2) && (p
[1]==':' || p
[1]=='|') && nsCRT::IsAsciiAlpha(*p
)) {
454 // filepath = <drive-number>:
455 SET_RESULT(directory
, 0, filepathLen
);
456 SET_RESULT(basename
, 0, -1);
457 SET_RESULT(extension
, 0, -1);
462 // otherwise fallback on common implementation
463 return nsBaseURLParser::ParseFilePath(filepath
, filepathLen
,
464 directoryPos
, directoryLen
,
465 basenamePos
, basenameLen
,
466 extensionPos
, extensionLen
);
470 //----------------------------------------------------------------------------
471 // nsAuthURLParser implementation
472 //----------------------------------------------------------------------------
475 nsAuthURLParser::ParseAuthority(const char *auth
, PRInt32 authLen
,
476 PRUint32
*usernamePos
, PRInt32
*usernameLen
,
477 PRUint32
*passwordPos
, PRInt32
*passwordLen
,
478 PRUint32
*hostnamePos
, PRInt32
*hostnameLen
,
483 NS_PRECONDITION(auth
, "null pointer");
486 authLen
= strlen(auth
);
489 SET_RESULT(username
, 0, -1);
490 SET_RESULT(password
, 0, -1);
491 SET_RESULT(hostname
, 0, 0);
497 // search backwards for @
498 const char *p
= auth
+ authLen
- 1;
499 for (; (*p
!= '@') && (p
> auth
); --p
);
501 // auth = <user-info@server-info>
502 rv
= ParseUserInfo(auth
, p
- auth
,
503 usernamePos
, usernameLen
,
504 passwordPos
, passwordLen
);
505 if (NS_FAILED(rv
)) return rv
;
506 rv
= ParseServerInfo(p
+ 1, authLen
- (p
- auth
+ 1),
507 hostnamePos
, hostnameLen
,
509 if (NS_FAILED(rv
)) return rv
;
510 OFFSET_RESULT(hostname
, p
+ 1 - auth
);
513 // auth = <server-info>
514 SET_RESULT(username
, 0, -1);
515 SET_RESULT(password
, 0, -1);
516 rv
= ParseServerInfo(auth
, authLen
,
517 hostnamePos
, hostnameLen
,
519 if (NS_FAILED(rv
)) return rv
;
525 nsAuthURLParser::ParseUserInfo(const char *userinfo
, PRInt32 userinfoLen
,
526 PRUint32
*usernamePos
, PRInt32
*usernameLen
,
527 PRUint32
*passwordPos
, PRInt32
*passwordLen
)
529 NS_PRECONDITION(userinfo
, "null pointer");
532 userinfoLen
= strlen(userinfo
);
534 if (userinfoLen
== 0) {
535 SET_RESULT(username
, 0, -1);
536 SET_RESULT(password
, 0, -1);
540 const char *p
= (const char *) memchr(userinfo
, ':', userinfoLen
);
542 // userinfo = <username:password>
544 // must have a username!
545 return NS_ERROR_MALFORMED_URI
;
547 SET_RESULT(username
, 0, p
- userinfo
);
548 SET_RESULT(password
, p
- userinfo
+ 1, userinfoLen
- (p
- userinfo
+ 1));
551 // userinfo = <username>
552 SET_RESULT(username
, 0, userinfoLen
);
553 SET_RESULT(password
, 0, -1);
559 nsAuthURLParser::ParseServerInfo(const char *serverinfo
, PRInt32 serverinfoLen
,
560 PRUint32
*hostnamePos
, PRInt32
*hostnameLen
,
563 NS_PRECONDITION(serverinfo
, "null pointer");
565 if (serverinfoLen
< 0)
566 serverinfoLen
= strlen(serverinfo
);
568 if (serverinfoLen
== 0) {
569 SET_RESULT(hostname
, 0, 0);
575 // search backwards for a ':' but stop on ']' (IPv6 address literal
576 // delimiter). check for illegal characters in the hostname.
577 const char *p
= serverinfo
+ serverinfoLen
- 1;
578 const char *colon
= nsnull
, *bracket
= nsnull
;
579 for (; p
> serverinfo
; --p
) {
585 if (bracket
== nsnull
)
589 // hostname must not contain a space
590 NS_WARNING("malformed hostname");
591 return NS_ERROR_MALFORMED_URI
;
596 // serverinfo = <hostname:port>
597 SET_RESULT(hostname
, 0, colon
- serverinfo
);
599 // XXX unfortunately ToInteger is not defined for substrings
600 nsCAutoString
buf(colon
+1, serverinfoLen
- (colon
+ 1 - serverinfo
));
602 *port
= buf
.ToInteger(&err
);
608 // serverinfo = <hostname>
609 SET_RESULT(hostname
, 0, serverinfoLen
);
617 nsAuthURLParser::ParseAfterScheme(const char *spec
, PRInt32 specLen
,
618 PRUint32
*authPos
, PRInt32
*authLen
,
619 PRUint32
*pathPos
, PRInt32
*pathLen
)
621 NS_PRECONDITION(specLen
>= 0, "unexpected");
623 PRUint32 nslash
= CountConsecutiveSlashes(spec
, specLen
);
625 // search for the end of the authority section
626 const char *end
= spec
+ specLen
;
628 for (p
= spec
+ nslash
; p
< end
; ++p
) {
629 if (*p
== '/' || *p
== '?' || *p
== '#' || *p
== ';')
633 // spec = [/]<auth><path>
634 SET_RESULT(auth
, nslash
, p
- (spec
+ nslash
));
635 SET_RESULT(path
, p
- spec
, specLen
- (p
- spec
));
639 SET_RESULT(auth
, nslash
, specLen
- nslash
);
640 SET_RESULT(path
, 0, -1);
644 //----------------------------------------------------------------------------
645 // nsStdURLParser implementation
646 //----------------------------------------------------------------------------
649 nsStdURLParser::ParseAfterScheme(const char *spec
, PRInt32 specLen
,
650 PRUint32
*authPos
, PRInt32
*authLen
,
651 PRUint32
*pathPos
, PRInt32
*pathLen
)
653 NS_PRECONDITION(specLen
>= 0, "unexpected");
655 PRUint32 nslash
= CountConsecutiveSlashes(spec
, specLen
);
657 // search for the end of the authority section
658 const char *end
= spec
+ specLen
;
660 for (p
= spec
+ nslash
; p
< end
; ++p
) {
661 if (strchr("/?#;", *p
))
668 // spec = (//)<auth><path>
669 SET_RESULT(auth
, nslash
, p
- (spec
+ nslash
));
670 SET_RESULT(path
, p
- spec
, specLen
- (p
- spec
));
674 SET_RESULT(auth
, nslash
, specLen
- nslash
);
675 SET_RESULT(path
, 0, -1);
680 SET_RESULT(auth
, 0, -1);
681 SET_RESULT(path
, 0, specLen
);
684 // spec = ///[/]<path>
685 SET_RESULT(auth
, 2, 0);
686 SET_RESULT(path
, 2, specLen
- 2);