2 * uri.c: set of generic URI related routines
6 * See Copyright for the status of this software.
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
21 /************************************************************************
23 * Macros to differentiate various character type *
24 * directly extracted from RFC 2396 *
26 ************************************************************************/
29 * alpha = lowalpha | upalpha
31 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
40 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
47 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
50 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
53 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56 * alphanum = alpha | digit
59 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
63 * "a" | "b" | "c" | "d" | "e" | "f"
66 #define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
67 (((x) >= 'A') && ((x) <= 'F')))
70 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
74 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
75 ((x) == '(') || ((x) == ')'))
79 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
82 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
83 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
84 ((x) == '+') || ((x) == '$') || ((x) == ','))
87 * unreserved = alphanum | mark
90 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
93 * escaped = "%" hex hex
96 #define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
100 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
101 * "&" | "=" | "+" | "$" | ","
103 #define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
104 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
105 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
106 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
109 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
111 #define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
112 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
113 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
117 * rel_segment = 1*( unreserved | escaped |
118 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
121 #define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
122 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
123 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
127 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
130 #define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
131 ((x) == '+') || ((x) == '-') || ((x) == '.'))
134 * reg_name = 1*( unreserved | escaped | "$" | "," |
135 * ";" | ":" | "@" | "&" | "=" | "+" )
138 #define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
139 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
140 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
141 ((*(p) == '=')) || ((*(p) == '+')))
144 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
147 #define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
148 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
149 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
153 * uric = reserved | unreserved | escaped
156 #define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
160 * unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
163 #define IS_UNWISE(p) \
164 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
165 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
166 ((*(p) == ']')) || ((*(p) == '`')))
169 * Skip to next pointer char, handle escaped sequences
172 #define NEXT(p) ((*p == '%')? p += 3 : p++)
175 * Productions from the spec.
177 * authority = server | reg_name
178 * reg_name = 1*( unreserved | escaped | "$" | "," |
179 * ";" | ":" | "@" | "&" | "=" | "+" )
181 * path = [ abs_path | opaque_part ]
184 /************************************************************************
186 * Generic URI structure functions *
188 ************************************************************************/
193 * Simply creates an empty xmlURI
195 * Returns the new structure or NULL in case of error
201 ret
= (xmlURIPtr
) xmlMalloc(sizeof(xmlURI
));
203 xmlGenericError(xmlGenericErrorContext
,
204 "xmlCreateURI: out of memory\n");
207 memset(ret
, 0, sizeof(xmlURI
));
213 * @uri: pointer to an xmlURI
215 * Save the URI as an escaped string
217 * Returns a new string (to be deallocated by caller)
220 xmlSaveUri(xmlURIPtr uri
) {
226 if (uri
== NULL
) return(NULL
);
230 ret
= (xmlChar
*) xmlMalloc((max
+ 1) * sizeof(xmlChar
));
232 xmlGenericError(xmlGenericErrorContext
,
233 "xmlSaveUri: out of memory\n");
238 if (uri
->scheme
!= NULL
) {
243 ret
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
245 xmlGenericError(xmlGenericErrorContext
,
246 "xmlSaveUri: out of memory\n");
254 ret
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
256 xmlGenericError(xmlGenericErrorContext
,
257 "xmlSaveUri: out of memory\n");
263 if (uri
->opaque
!= NULL
) {
266 if (len
+ 3 >= max
) {
268 ret
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
270 xmlGenericError(xmlGenericErrorContext
,
271 "xmlSaveUri: out of memory\n");
275 if ((IS_UNRESERVED(*(p
))) ||
276 ((*(p
) == ';')) || ((*(p
) == '?')) || ((*(p
) == ':')) ||
277 ((*(p
) == '@')) || ((*(p
) == '&')) || ((*(p
) == '=')) ||
278 ((*(p
) == '+')) || ((*(p
) == '$')) || ((*(p
) == ',')))
281 int val
= *(unsigned char *)p
++;
282 int hi
= val
/ 0x10, lo
= val
% 0x10;
284 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
285 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
290 ret
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
292 xmlGenericError(xmlGenericErrorContext
,
293 "xmlSaveUri: out of memory\n");
299 if (uri
->server
!= NULL
) {
300 if (len
+ 3 >= max
) {
302 ret
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
304 xmlGenericError(xmlGenericErrorContext
,
305 "xmlSaveUri: out of memory\n");
311 if (uri
->user
!= NULL
) {
314 if (len
+ 3 >= max
) {
316 ret
= (xmlChar
*) xmlRealloc(ret
,
317 (max
+ 1) * sizeof(xmlChar
));
319 xmlGenericError(xmlGenericErrorContext
,
320 "xmlSaveUri: out of memory\n");
324 if ((IS_UNRESERVED(*(p
))) ||
325 ((*(p
) == ';')) || ((*(p
) == ':')) ||
326 ((*(p
) == '&')) || ((*(p
) == '=')) ||
327 ((*(p
) == '+')) || ((*(p
) == '$')) ||
331 int val
= *(unsigned char *)p
++;
332 int hi
= val
/ 0x10, lo
= val
% 0x10;
334 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
335 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
338 if (len
+ 3 >= max
) {
340 ret
= (xmlChar
*) xmlRealloc(ret
,
341 (max
+ 1) * sizeof(xmlChar
));
343 xmlGenericError(xmlGenericErrorContext
,
344 "xmlSaveUri: out of memory\n");
354 ret
= (xmlChar
*) xmlRealloc(ret
,
355 (max
+ 1) * sizeof(xmlChar
));
357 xmlGenericError(xmlGenericErrorContext
,
358 "xmlSaveUri: out of memory\n");
365 if (len
+ 10 >= max
) {
367 ret
= (xmlChar
*) xmlRealloc(ret
,
368 (max
+ 1) * sizeof(xmlChar
));
370 xmlGenericError(xmlGenericErrorContext
,
371 "xmlSaveUri: out of memory\n");
375 len
+= snprintf((char *) &ret
[len
], max
- len
, ":%d", uri
->port
);
377 } else if (uri
->authority
!= NULL
) {
378 if (len
+ 3 >= max
) {
380 ret
= (xmlChar
*) xmlRealloc(ret
,
381 (max
+ 1) * sizeof(xmlChar
));
383 xmlGenericError(xmlGenericErrorContext
,
384 "xmlSaveUri: out of memory\n");
392 if (len
+ 3 >= max
) {
394 ret
= (xmlChar
*) xmlRealloc(ret
,
395 (max
+ 1) * sizeof(xmlChar
));
397 xmlGenericError(xmlGenericErrorContext
,
398 "xmlSaveUri: out of memory\n");
402 if ((IS_UNRESERVED(*(p
))) ||
403 ((*(p
) == '$')) || ((*(p
) == ',')) || ((*(p
) == ';')) ||
404 ((*(p
) == ':')) || ((*(p
) == '@')) || ((*(p
) == '&')) ||
405 ((*(p
) == '=')) || ((*(p
) == '+')))
408 int val
= *(unsigned char *)p
++;
409 int hi
= val
/ 0x10, lo
= val
% 0x10;
411 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
412 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
415 } else if (uri
->scheme
!= NULL
) {
416 if (len
+ 3 >= max
) {
418 ret
= (xmlChar
*) xmlRealloc(ret
,
419 (max
+ 1) * sizeof(xmlChar
));
421 xmlGenericError(xmlGenericErrorContext
,
422 "xmlSaveUri: out of memory\n");
429 if (uri
->path
!= NULL
) {
432 if (len
+ 3 >= max
) {
434 ret
= (xmlChar
*) xmlRealloc(ret
,
435 (max
+ 1) * sizeof(xmlChar
));
437 xmlGenericError(xmlGenericErrorContext
,
438 "xmlSaveUri: out of memory\n");
442 if ((IS_UNRESERVED(*(p
))) || ((*(p
) == '/')) ||
443 ((*(p
) == ';')) || ((*(p
) == '@')) || ((*(p
) == '&')) ||
444 ((*(p
) == '=')) || ((*(p
) == '+')) || ((*(p
) == '$')) ||
448 int val
= *(unsigned char *)p
++;
449 int hi
= val
/ 0x10, lo
= val
% 0x10;
451 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
452 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
456 if (uri
->query
!= NULL
) {
457 if (len
+ 3 >= max
) {
459 ret
= (xmlChar
*) xmlRealloc(ret
,
460 (max
+ 1) * sizeof(xmlChar
));
462 xmlGenericError(xmlGenericErrorContext
,
463 "xmlSaveUri: out of memory\n");
470 if (len
+ 3 >= max
) {
472 ret
= (xmlChar
*) xmlRealloc(ret
,
473 (max
+ 1) * sizeof(xmlChar
));
475 xmlGenericError(xmlGenericErrorContext
,
476 "xmlSaveUri: out of memory\n");
480 if ((IS_UNRESERVED(*(p
))) || (IS_RESERVED(*(p
))))
483 int val
= *(unsigned char *)p
++;
484 int hi
= val
/ 0x10, lo
= val
% 0x10;
486 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
487 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
491 if (uri
->fragment
!= NULL
) {
492 if (len
+ 3 >= max
) {
494 ret
= (xmlChar
*) xmlRealloc(ret
,
495 (max
+ 1) * sizeof(xmlChar
));
497 xmlGenericError(xmlGenericErrorContext
,
498 "xmlSaveUri: out of memory\n");
505 if (len
+ 3 >= max
) {
507 ret
= (xmlChar
*) xmlRealloc(ret
,
508 (max
+ 1) * sizeof(xmlChar
));
510 xmlGenericError(xmlGenericErrorContext
,
511 "xmlSaveUri: out of memory\n");
515 if ((IS_UNRESERVED(*(p
))) || (IS_RESERVED(*(p
))))
518 int val
= *(unsigned char *)p
++;
519 int hi
= val
/ 0x10, lo
= val
% 0x10;
521 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
522 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
528 ret
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
530 xmlGenericError(xmlGenericErrorContext
,
531 "xmlSaveUri: out of memory\n");
542 * @stream: a FILE* for the output
543 * @uri: pointer to an xmlURI
545 * Prints the URI in the stream @steam.
548 xmlPrintURI(FILE *stream
, xmlURIPtr uri
) {
551 out
= xmlSaveUri(uri
);
553 fprintf(stream
, "%s", out
);
560 * @uri: pointer to an xmlURI
562 * Make sure the xmlURI struct is free of content
565 xmlCleanURI(xmlURIPtr uri
) {
566 if (uri
== NULL
) return;
568 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
570 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
572 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
574 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
576 if (uri
->fragment
!= NULL
) xmlFree(uri
->fragment
);
577 uri
->fragment
= NULL
;
578 if (uri
->opaque
!= NULL
) xmlFree(uri
->opaque
);
580 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
581 uri
->authority
= NULL
;
582 if (uri
->query
!= NULL
) xmlFree(uri
->query
);
588 * @uri: pointer to an xmlURI
590 * Free up the xmlURI struct
593 xmlFreeURI(xmlURIPtr uri
) {
594 if (uri
== NULL
) return;
596 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
597 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
598 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
599 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
600 if (uri
->fragment
!= NULL
) xmlFree(uri
->fragment
);
601 if (uri
->opaque
!= NULL
) xmlFree(uri
->opaque
);
602 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
603 if (uri
->query
!= NULL
) xmlFree(uri
->query
);
607 /************************************************************************
611 ************************************************************************/
614 * xmlNormalizeURIPath:
615 * @path: pointer to the path string
617 * Applies the 5 normalization steps to a path string--that is, RFC 2396
618 * Section 5.2, steps 6.c through 6.g.
620 * Normalization occurs directly on the string, no new allocation is done
622 * Returns 0 or an error code
625 xmlNormalizeURIPath(char *path
) {
631 /* Skip all initial "/" chars. We want to get to the beginning of the
632 * first non-empty segment.
635 while (cur
[0] == '/')
640 /* Keep everything we've seen so far. */
644 * Analyze each segment in sequence for cases (c) and (d).
646 while (cur
[0] != '\0') {
648 * c) All occurrences of "./", where "." is a complete path segment,
649 * are removed from the buffer string.
651 if ((cur
[0] == '.') && (cur
[1] == '/')) {
653 /* '//' normalization should be done at this point too */
654 while (cur
[0] == '/')
660 * d) If the buffer string ends with "." as a complete path segment,
661 * that "." is removed.
663 if ((cur
[0] == '.') && (cur
[1] == '\0'))
666 /* Otherwise keep the segment. */
667 while (cur
[0] != '/') {
670 (out
++)[0] = (cur
++)[0];
673 while ((cur
[0] == '/') && (cur
[1] == '/'))
676 (out
++)[0] = (cur
++)[0];
681 /* Reset to the beginning of the first segment for the next sequence. */
683 while (cur
[0] == '/')
689 * Analyze each segment in sequence for cases (e) and (f).
691 * e) All occurrences of "<segment>/../", where <segment> is a
692 * complete path segment not equal to "..", are removed from the
693 * buffer string. Removal of these path segments is performed
694 * iteratively, removing the leftmost matching pattern on each
695 * iteration, until no matching pattern remains.
697 * f) If the buffer string ends with "<segment>/..", where <segment>
698 * is a complete path segment not equal to "..", that
699 * "<segment>/.." is removed.
701 * To satisfy the "iterative" clause in (e), we need to collapse the
702 * string every time we find something that needs to be removed. Thus,
703 * we don't need to keep two pointers into the string: we only need a
704 * "current position" pointer.
709 /* At the beginning of each iteration of this loop, "cur" points to
710 * the first character of the segment we want to examine.
713 /* Find the end of the current segment. */
715 while ((segp
[0] != '/') && (segp
[0] != '\0'))
718 /* If this is the last segment, we're done (we need at least two
719 * segments to meet the criteria for the (e) and (f) cases).
724 /* If the first segment is "..", or if the next segment _isn't_ "..",
725 * keep this segment and try the next one.
728 if (((cur
[0] == '.') && (cur
[1] == '.') && (segp
== cur
+3))
729 || ((segp
[0] != '.') || (segp
[1] != '.')
730 || ((segp
[2] != '/') && (segp
[2] != '\0')))) {
735 /* If we get here, remove this segment and the next one and back up
736 * to the previous segment (if there is one), to implement the
737 * "iteratively" clause. It's pretty much impossible to back up
738 * while maintaining two pointers into the buffer, so just compact
739 * the whole buffer now.
742 /* If this is the end of the buffer, we're done. */
743 if (segp
[2] == '\0') {
747 strcpy(cur
, segp
+ 3);
749 /* If there are no previous segments, then keep going from here. */
751 while ((segp
> path
) && ((--segp
)[0] == '/'))
756 /* "segp" is pointing to the end of a previous segment; find it's
757 * start. We need to back up to the previous segment and start
758 * over with that to handle things like "foo/bar/../..". If we
759 * don't do this, then on the first pass we'll remove the "bar/..",
760 * but be pointing at the second ".." so we won't realize we can also
761 * remove the "foo/..".
764 while ((cur
> path
) && (cur
[-1] != '/'))
770 * g) If the resulting buffer string still begins with one or more
771 * complete path segments of "..", then the reference is
772 * considered to be in error. Implementations may handle this
773 * error by retaining these components in the resolved path (i.e.,
774 * treating them as part of the final URI), by removing them from
775 * the resolved path (i.e., discarding relative levels above the
776 * root), or by avoiding traversal of the reference.
778 * We discard them from the final path.
780 if (path
[0] == '/') {
782 while ((cur
[1] == '.') && (cur
[2] == '.')
783 && ((cur
[3] == '/') || (cur
[3] == '\0')))
788 while (cur
[0] != '\0')
789 (out
++)[0] = (cur
++)[0];
798 * xmlURIUnescapeString:
799 * @str: the string to unescape
800 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
801 * @target: optional destination buffer
803 * Unescaping routine, does not do validity checks !
804 * Output is direct unsigned char translation of %XX values (no encoding)
806 * Returns an copy of the string, but unescaped
809 xmlURIUnescapeString(const char *str
, int len
, char *target
) {
815 if (len
<= 0) len
= strlen(str
);
816 if (len
<= 0) return(NULL
);
818 if (target
== NULL
) {
819 ret
= (char *) xmlMalloc(len
+ 1);
821 xmlGenericError(xmlGenericErrorContext
,
822 "xmlURIUnescapeString: out of memory\n");
832 if ((*in
>= '0') && (*in
<= '9'))
834 else if ((*in
>= 'a') && (*in
<= 'f'))
835 *out
= (*in
- 'a') + 10;
836 else if ((*in
>= 'A') && (*in
<= 'F'))
837 *out
= (*in
- 'A') + 10;
839 if ((*in
>= '0') && (*in
<= '9'))
840 *out
= *out
* 16 + (*in
- '0');
841 else if ((*in
>= 'a') && (*in
<= 'f'))
842 *out
= *out
* 16 + (*in
- 'a') + 10;
843 else if ((*in
>= 'A') && (*in
<= 'F'))
844 *out
= *out
* 16 + (*in
- 'A') + 10;
859 * @str: string to escape
860 * @list: exception list string of chars not to escape
862 * This routine escapes a string to hex, ignoring reserved characters (a-z)
863 * and the characters in the exception list.
865 * Returns a new escaped string or NULL in case of error.
868 xmlURIEscapeStr(const xmlChar
*str
, const xmlChar
*list
) {
872 unsigned int len
, out
;
876 len
= xmlStrlen(str
);
877 if (!(len
> 0)) return(NULL
);
880 ret
= (xmlChar
*) xmlMalloc(len
);
882 xmlGenericError(xmlGenericErrorContext
,
883 "xmlURIEscapeStr: out of memory\n");
886 in
= (const xmlChar
*) str
;
889 if (len
- out
<= 3) {
891 ret
= (xmlChar
*) xmlRealloc(ret
, len
);
893 xmlGenericError(xmlGenericErrorContext
,
894 "xmlURIEscapeStr: out of memory\n");
901 if ((ch
!= '@') && (!IS_UNRESERVED(ch
)) && (!xmlStrchr(list
, ch
))) {
906 ret
[out
++] = '0' + val
;
908 ret
[out
++] = 'A' + val
- 0xA;
911 ret
[out
++] = '0' + val
;
913 ret
[out
++] = 'A' + val
- 0xA;
926 * @str: the string of the URI to escape
928 * Escaping routine, does not do validity checks !
929 * It will try to escape the chars needing this, but this is heuristic
930 * based it's impossible to be sure.
932 * Returns an copy of the string, but escaped
935 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
936 * according to RFC2396.
940 xmlURIEscape(const xmlChar
* str
)
942 xmlChar
*ret
, *segment
= NULL
;
946 #define NULLCHK(p) if(!p) { \
947 xmlGenericError(xmlGenericErrorContext, \
948 "xmlURIEscape: out of memory\n"); \
954 uri
= xmlCreateURI();
957 * Allow escaping errors in the unescaped form
960 ret2
= xmlParseURIReference(uri
, (const char *)str
);
973 segment
= xmlURIEscapeStr(BAD_CAST uri
->scheme
, BAD_CAST
"+-.");
975 ret
= xmlStrcat(ret
, segment
);
976 ret
= xmlStrcat(ret
, BAD_CAST
":");
980 if (uri
->authority
) {
982 xmlURIEscapeStr(BAD_CAST uri
->authority
, BAD_CAST
"/?;:@");
984 ret
= xmlStrcat(ret
, BAD_CAST
"//");
985 ret
= xmlStrcat(ret
, segment
);
990 segment
= xmlURIEscapeStr(BAD_CAST uri
->user
, BAD_CAST
";:&=+$,");
992 ret
= xmlStrcat(ret
, segment
);
993 ret
= xmlStrcat(ret
, BAD_CAST
"@");
998 segment
= xmlURIEscapeStr(BAD_CAST uri
->server
, BAD_CAST
"/?;:@");
1000 ret
= xmlStrcat(ret
, BAD_CAST
"//");
1001 ret
= xmlStrcat(ret
, segment
);
1008 snprintf((char *) port
, 10, "%d", uri
->port
);
1009 ret
= xmlStrcat(ret
, BAD_CAST
":");
1010 ret
= xmlStrcat(ret
, port
);
1015 xmlURIEscapeStr(BAD_CAST uri
->path
, BAD_CAST
":@&=+$,/?;");
1017 ret
= xmlStrcat(ret
, segment
);
1023 xmlURIEscapeStr(BAD_CAST uri
->query
, BAD_CAST
";/?:@&=+,$");
1025 ret
= xmlStrcat(ret
, BAD_CAST
"?");
1026 ret
= xmlStrcat(ret
, segment
);
1031 segment
= xmlURIEscapeStr(BAD_CAST uri
->opaque
, BAD_CAST
"");
1033 ret
= xmlStrcat(ret
, segment
);
1037 if (uri
->fragment
) {
1038 segment
= xmlURIEscapeStr(BAD_CAST uri
->fragment
, BAD_CAST
"#");
1040 ret
= xmlStrcat(ret
, BAD_CAST
"#");
1041 ret
= xmlStrcat(ret
, segment
);
1051 /************************************************************************
1053 * Escaped URI parsing *
1055 ************************************************************************/
1058 * xmlParseURIFragment:
1059 * @uri: pointer to an URI structure
1060 * @str: pointer to the string to analyze
1062 * Parse an URI fragment string and fills in the appropriate fields
1063 * of the @uri structure.
1067 * Returns 0 or the error code
1070 xmlParseURIFragment(xmlURIPtr uri
, const char **str
)
1072 const char *cur
= *str
;
1077 while (IS_URIC(cur
) || ((uri
->cleanup
) && (IS_UNWISE(cur
))))
1080 if (uri
->fragment
!= NULL
)
1081 xmlFree(uri
->fragment
);
1082 uri
->fragment
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
1090 * @uri: pointer to an URI structure
1091 * @str: pointer to the string to analyze
1093 * Parse the query part of an URI
1097 * Returns 0 or the error code
1100 xmlParseURIQuery(xmlURIPtr uri
, const char **str
)
1102 const char *cur
= *str
;
1107 while (IS_URIC(cur
) || ((uri
->cleanup
) && (IS_UNWISE(cur
))))
1110 if (uri
->query
!= NULL
)
1111 xmlFree(uri
->query
);
1112 uri
->query
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
1119 * xmlParseURIScheme:
1120 * @uri: pointer to an URI structure
1121 * @str: pointer to the string to analyze
1123 * Parse an URI scheme
1125 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1127 * Returns 0 or the error code
1130 xmlParseURIScheme(xmlURIPtr uri
, const char **str
) {
1137 if (!IS_ALPHA(*cur
))
1140 while (IS_SCHEME(*cur
)) cur
++;
1142 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
1144 uri
->scheme
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
1151 * xmlParseURIOpaquePart:
1152 * @uri: pointer to an URI structure
1153 * @str: pointer to the string to analyze
1155 * Parse an URI opaque part
1157 * opaque_part = uric_no_slash *uric
1159 * Returns 0 or the error code
1162 xmlParseURIOpaquePart(xmlURIPtr uri
, const char **str
)
1170 if (!(IS_URIC_NO_SLASH(cur
) || ((uri
->cleanup
) && (IS_UNWISE(cur
))))) {
1174 while (IS_URIC(cur
) || ((uri
->cleanup
) && (IS_UNWISE(cur
))))
1177 if (uri
->opaque
!= NULL
)
1178 xmlFree(uri
->opaque
);
1179 uri
->opaque
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
1186 * xmlParseURIServer:
1187 * @uri: pointer to an URI structure
1188 * @str: pointer to the string to analyze
1190 * Parse a server subpart of an URI, it's a finer grain analysis
1191 * of the authority part.
1193 * server = [ [ userinfo "@" ] hostport ]
1194 * userinfo = *( unreserved | escaped |
1195 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1196 * hostport = host [ ":" port ]
1197 * host = hostname | IPv4address
1198 * hostname = *( domainlabel "." ) toplabel [ "." ]
1199 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1200 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1201 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1204 * Returns 0 or the error code
1207 xmlParseURIServer(xmlURIPtr uri
, const char **str
) {
1209 const char *host
, *tmp
;
1217 * is there an userinfo ?
1219 while (IS_USERINFO(cur
)) NEXT(cur
);
1222 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1223 uri
->user
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
1228 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1234 * This can be empty in the case where there is no server
1239 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1240 uri
->authority
= NULL
;
1241 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1248 * host part of hostport can derive either an IPV4 address
1249 * or an unresolved name. Check the IP first, it easier to detect
1250 * errors if wrong one
1252 if (IS_DIGIT(*cur
)) {
1253 while(IS_DIGIT(*cur
)) cur
++;
1257 if (!IS_DIGIT(*cur
))
1259 while(IS_DIGIT(*cur
)) cur
++;
1263 if (!IS_DIGIT(*cur
))
1265 while(IS_DIGIT(*cur
)) cur
++;
1269 if (!IS_DIGIT(*cur
))
1271 while(IS_DIGIT(*cur
)) cur
++;
1273 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1274 uri
->authority
= NULL
;
1275 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1276 uri
->server
= xmlURIUnescapeString(host
, cur
- host
, NULL
);
1282 * the hostname production as-is is a parser nightmare.
1284 * hostname = *( domainlabel "." ) domainlabel [ "." ]
1285 * and just make sure the last label starts with a non numeric char.
1287 if (!IS_ALPHANUM(*cur
))
1289 while (IS_ALPHANUM(*cur
)) {
1290 while ((IS_ALPHANUM(*cur
)) || (*cur
== '-')) cur
++;
1296 while (IS_ALPHANUM(*tmp
) && (*tmp
!= '.') && (tmp
>= host
)) tmp
--;
1298 if (!IS_ALPHA(*tmp
))
1301 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1302 uri
->authority
= NULL
;
1303 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1304 uri
->server
= xmlURIUnescapeString(host
, cur
- host
, NULL
);
1310 * finish by checking for a port presence.
1314 if (IS_DIGIT(*cur
)) {
1317 while (IS_DIGIT(*cur
)) {
1319 uri
->port
= uri
->port
* 10 + (*cur
- '0');
1329 * xmlParseURIRelSegment:
1330 * @uri: pointer to an URI structure
1331 * @str: pointer to the string to analyze
1333 * Parse an URI relative segment
1335 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1338 * Returns 0 or the error code
1341 xmlParseURIRelSegment(xmlURIPtr uri
, const char **str
)
1349 if (!(IS_SEGMENT(cur
) || ((uri
->cleanup
) && (IS_UNWISE(cur
))))) {
1353 while (IS_SEGMENT(cur
) || ((uri
->cleanup
) && (IS_UNWISE(cur
))))
1356 if (uri
->path
!= NULL
)
1358 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
1365 * xmlParseURIPathSegments:
1366 * @uri: pointer to an URI structure
1367 * @str: pointer to the string to analyze
1368 * @slash: should we add a leading slash
1370 * Parse an URI set of path segments
1372 * path_segments = segment *( "/" segment )
1373 * segment = *pchar *( ";" param )
1376 * Returns 0 or the error code
1379 xmlParseURIPathSegments(xmlURIPtr uri
, const char **str
, int slash
)
1389 while (IS_PCHAR(cur
) || ((uri
->cleanup
) && (IS_UNWISE(cur
))))
1391 while (*cur
== ';') {
1393 while (IS_PCHAR(cur
) || ((uri
->cleanup
) && (IS_UNWISE(cur
))))
1405 * Concat the set of path segments to the current path
1411 if (uri
->path
!= NULL
) {
1412 len2
= strlen(uri
->path
);
1415 path
= (char *) xmlMalloc(len
+ 1);
1417 xmlGenericError(xmlGenericErrorContext
,
1418 "xmlParseURIPathSegments: out of memory\n");
1422 if (uri
->path
!= NULL
)
1423 memcpy(path
, uri
->path
, len2
);
1430 xmlURIUnescapeString(*str
, cur
- *str
, &path
[len2
]);
1431 if (uri
->path
!= NULL
)
1440 * xmlParseURIAuthority:
1441 * @uri: pointer to an URI structure
1442 * @str: pointer to the string to analyze
1444 * Parse the authority part of an URI.
1446 * authority = server | reg_name
1447 * server = [ [ userinfo "@" ] hostport ]
1448 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1449 * "@" | "&" | "=" | "+" )
1451 * Note : this is completely ambiguous since reg_name is allowed to
1452 * use the full set of chars in use by server:
1454 * 3.2.1. Registry-based Naming Authority
1456 * The structure of a registry-based naming authority is specific
1457 * to the URI scheme, but constrained to the allowed characters
1458 * for an authority component.
1460 * Returns 0 or the error code
1463 xmlParseURIAuthority(xmlURIPtr uri
, const char **str
) {
1473 * try first to parse it as a server string.
1475 ret
= xmlParseURIServer(uri
, str
);
1480 * failed, fallback to reg_name
1482 if (!IS_REG_NAME(cur
)) {
1486 while (IS_REG_NAME(cur
)) NEXT(cur
);
1488 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1490 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1492 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1493 uri
->authority
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
1500 * xmlParseURIHierPart:
1501 * @uri: pointer to an URI structure
1502 * @str: pointer to the string to analyze
1504 * Parse an URI hierarchical part
1506 * hier_part = ( net_path | abs_path ) [ "?" query ]
1507 * abs_path = "/" path_segments
1508 * net_path = "//" authority [ abs_path ]
1510 * Returns 0 or the error code
1513 xmlParseURIHierPart(xmlURIPtr uri
, const char **str
) {
1522 if ((cur
[0] == '/') && (cur
[1] == '/')) {
1524 ret
= xmlParseURIAuthority(uri
, &cur
);
1527 if (cur
[0] == '/') {
1529 ret
= xmlParseURIPathSegments(uri
, &cur
, 1);
1531 } else if (cur
[0] == '/') {
1533 ret
= xmlParseURIPathSegments(uri
, &cur
, 1);
1541 ret
= xmlParseURIQuery(uri
, &cur
);
1550 * xmlParseAbsoluteURI:
1551 * @uri: pointer to an URI structure
1552 * @str: pointer to the string to analyze
1554 * Parse an URI reference string and fills in the appropriate fields
1555 * of the @uri structure
1557 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1559 * Returns 0 or the error code
1562 xmlParseAbsoluteURI(xmlURIPtr uri
, const char **str
) {
1571 ret
= xmlParseURIScheme(uri
, str
);
1572 if (ret
!= 0) return(ret
);
1579 return(xmlParseURIHierPart(uri
, str
));
1580 return(xmlParseURIOpaquePart(uri
, str
));
1584 * xmlParseRelativeURI:
1585 * @uri: pointer to an URI structure
1586 * @str: pointer to the string to analyze
1588 * Parse an relative URI string and fills in the appropriate fields
1589 * of the @uri structure
1591 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1592 * abs_path = "/" path_segments
1593 * net_path = "//" authority [ abs_path ]
1594 * rel_path = rel_segment [ abs_path ]
1596 * Returns 0 or the error code
1599 xmlParseRelativeURI(xmlURIPtr uri
, const char **str
) {
1607 if ((cur
[0] == '/') && (cur
[1] == '/')) {
1609 ret
= xmlParseURIAuthority(uri
, &cur
);
1612 if (cur
[0] == '/') {
1614 ret
= xmlParseURIPathSegments(uri
, &cur
, 1);
1616 } else if (cur
[0] == '/') {
1618 ret
= xmlParseURIPathSegments(uri
, &cur
, 1);
1619 } else if (cur
[0] != '#' && cur
[0] != '?') {
1620 ret
= xmlParseURIRelSegment(uri
, &cur
);
1623 if (cur
[0] == '/') {
1625 ret
= xmlParseURIPathSegments(uri
, &cur
, 1);
1632 ret
= xmlParseURIQuery(uri
, &cur
);
1641 * xmlParseURIReference:
1642 * @uri: pointer to an URI structure
1643 * @str: the string to analyze
1645 * Parse an URI reference string and fills in the appropriate fields
1646 * of the @uri structure
1648 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1650 * Returns 0 or the error code
1653 xmlParseURIReference(xmlURIPtr uri
, const char *str
) {
1655 const char *tmp
= str
;
1662 * Try first to parse absolute refs, then fallback to relative if
1665 ret
= xmlParseAbsoluteURI(uri
, &str
);
1669 ret
= xmlParseRelativeURI(uri
, &str
);
1678 ret
= xmlParseURIFragment(uri
, &str
);
1679 if (ret
!= 0) return(ret
);
1690 * @str: the URI string to analyze
1694 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1696 * Returns a newly build xmlURIPtr or NULL in case of error
1699 xmlParseURI(const char *str
) {
1705 uri
= xmlCreateURI();
1707 ret
= xmlParseURIReference(uri
, str
);
1716 /************************************************************************
1718 * Public functions *
1720 ************************************************************************/
1724 * @URI: the URI instance found in the document
1725 * @base: the base value
1727 * Computes he final URI of the reference done by checking that
1728 * the given URI is valid, and building the final URI using the
1729 * base URI. This is processed according to section 5.2 of the
1732 * 5.2. Resolving Relative References to Absolute Form
1734 * Returns a new URI string (to be freed by the caller) or NULL in case
1738 xmlBuildURI(const xmlChar
*URI
, const xmlChar
*base
) {
1739 xmlChar
*val
= NULL
;
1740 int ret
, len
, indx
, cur
, out
;
1741 xmlURIPtr ref
= NULL
;
1742 xmlURIPtr bas
= NULL
;
1743 xmlURIPtr res
= NULL
;
1746 * 1) The URI reference is parsed into the potential four components and
1747 * fragment identifier, as described in Section 4.3.
1749 * NOTE that a completely empty URI is treated by modern browsers
1750 * as a reference to "." rather than as a synonym for the current
1751 * URI. Should we do that here?
1757 ref
= xmlCreateURI();
1760 ret
= xmlParseURIReference(ref
, (const char *) URI
);
1770 bas
= xmlCreateURI();
1773 ret
= xmlParseURIReference(bas
, (const char *) base
);
1777 val
= xmlSaveUri(ref
);
1782 * the base fragment must be ignored
1784 if (bas
->fragment
!= NULL
) {
1785 xmlFree(bas
->fragment
);
1786 bas
->fragment
= NULL
;
1788 val
= xmlSaveUri(bas
);
1793 * 2) If the path component is empty and the scheme, authority, and
1794 * query components are undefined, then it is a reference to the
1795 * current document and we are done. Otherwise, the reference URI's
1796 * query and fragment components are defined as found (or not found)
1797 * within the URI reference and not inherited from the base URI.
1799 * NOTE that in modern browsers, the parsing differs from the above
1800 * in the following aspect: the query component is allowed to be
1801 * defined while still treating this as a reference to the current
1804 res
= xmlCreateURI();
1807 if ((ref
->scheme
== NULL
) && (ref
->path
== NULL
) &&
1808 ((ref
->authority
== NULL
) && (ref
->server
== NULL
))) {
1809 if (bas
->scheme
!= NULL
)
1810 res
->scheme
= xmlMemStrdup(bas
->scheme
);
1811 if (bas
->authority
!= NULL
)
1812 res
->authority
= xmlMemStrdup(bas
->authority
);
1813 else if (bas
->server
!= NULL
) {
1814 res
->server
= xmlMemStrdup(bas
->server
);
1815 if (bas
->user
!= NULL
)
1816 res
->user
= xmlMemStrdup(bas
->user
);
1817 res
->port
= bas
->port
;
1819 if (bas
->path
!= NULL
)
1820 res
->path
= xmlMemStrdup(bas
->path
);
1821 if (ref
->query
!= NULL
)
1822 res
->query
= xmlMemStrdup(ref
->query
);
1823 else if (bas
->query
!= NULL
)
1824 res
->query
= xmlMemStrdup(bas
->query
);
1825 if (ref
->fragment
!= NULL
)
1826 res
->fragment
= xmlMemStrdup(ref
->fragment
);
1830 if (ref
->query
!= NULL
)
1831 res
->query
= xmlMemStrdup(ref
->query
);
1832 if (ref
->fragment
!= NULL
)
1833 res
->fragment
= xmlMemStrdup(ref
->fragment
);
1836 * 3) If the scheme component is defined, indicating that the reference
1837 * starts with a scheme name, then the reference is interpreted as an
1838 * absolute URI and we are done. Otherwise, the reference URI's
1839 * scheme is inherited from the base URI's scheme component.
1841 if (ref
->scheme
!= NULL
) {
1842 val
= xmlSaveUri(ref
);
1845 if (bas
->scheme
!= NULL
)
1846 res
->scheme
= xmlMemStrdup(bas
->scheme
);
1849 * 4) If the authority component is defined, then the reference is a
1850 * network-path and we skip to step 7. Otherwise, the reference
1851 * URI's authority is inherited from the base URI's authority
1852 * component, which will also be undefined if the URI scheme does not
1853 * use an authority component.
1855 if ((ref
->authority
!= NULL
) || (ref
->server
!= NULL
)) {
1856 if (ref
->authority
!= NULL
)
1857 res
->authority
= xmlMemStrdup(ref
->authority
);
1859 res
->server
= xmlMemStrdup(ref
->server
);
1860 if (ref
->user
!= NULL
)
1861 res
->user
= xmlMemStrdup(ref
->user
);
1862 res
->port
= ref
->port
;
1864 if (ref
->path
!= NULL
)
1865 res
->path
= xmlMemStrdup(ref
->path
);
1868 if (bas
->authority
!= NULL
)
1869 res
->authority
= xmlMemStrdup(bas
->authority
);
1870 else if (bas
->server
!= NULL
) {
1871 res
->server
= xmlMemStrdup(bas
->server
);
1872 if (bas
->user
!= NULL
)
1873 res
->user
= xmlMemStrdup(bas
->user
);
1874 res
->port
= bas
->port
;
1878 * 5) If the path component begins with a slash character ("/"), then
1879 * the reference is an absolute-path and we skip to step 7.
1881 if ((ref
->path
!= NULL
) && (ref
->path
[0] == '/')) {
1882 res
->path
= xmlMemStrdup(ref
->path
);
1888 * 6) If this step is reached, then we are resolving a relative-path
1889 * reference. The relative path needs to be merged with the base
1890 * URI's path. Although there are many ways to do this, we will
1891 * describe a simple method using a separate string buffer.
1893 * Allocate a buffer large enough for the result string.
1895 len
= 2; /* extra / and 0 */
1896 if (ref
->path
!= NULL
)
1897 len
+= strlen(ref
->path
);
1898 if (bas
->path
!= NULL
)
1899 len
+= strlen(bas
->path
);
1900 res
->path
= (char *) xmlMalloc(len
);
1901 if (res
->path
== NULL
) {
1902 xmlGenericError(xmlGenericErrorContext
,
1903 "xmlBuildURI: out of memory\n");
1909 * a) All but the last segment of the base URI's path component is
1910 * copied to the buffer. In other words, any characters after the
1911 * last (right-most) slash character, if any, are excluded.
1915 if (bas
->path
!= NULL
) {
1916 while (bas
->path
[cur
] != 0) {
1917 while ((bas
->path
[cur
] != 0) && (bas
->path
[cur
] != '/'))
1919 if (bas
->path
[cur
] == 0)
1924 res
->path
[out
] = bas
->path
[out
];
1932 * b) The reference's path component is appended to the buffer
1935 if (ref
->path
!= NULL
&& ref
->path
[0] != 0) {
1938 * Ensure the path includes a '/'
1940 if ((out
== 0) && (bas
->server
!= NULL
))
1941 res
->path
[out
++] = '/';
1942 while (ref
->path
[indx
] != 0) {
1943 res
->path
[out
++] = ref
->path
[indx
++];
1949 * Steps c) to h) are really path normalization steps
1951 xmlNormalizeURIPath(res
->path
);
1956 * 7) The resulting URI components, including any inherited from the
1957 * base URI, are recombined to give the absolute form of the URI
1960 val
= xmlSaveUri(res
);