2 * uri.c: set of generic URI related routines
4 * Reference: RFCs 3986, 2732 and 2373
6 * See Copyright for the status of this software.
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
21 static void xmlCleanURI(xmlURIPtr uri
);
24 * Old rule from 2396 used in legacy handling code
25 * alpha = lowalpha | upalpha
27 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
31 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33 * "u" | "v" | "w" | "x" | "y" | "z"
36 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
39 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41 * "U" | "V" | "W" | "X" | "Y" | "Z"
43 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
51 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
54 * alphanum = alpha | digit
57 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
60 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
63 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
64 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
65 ((x) == '(') || ((x) == ')'))
68 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
71 #define IS_UNWISE(p) \
72 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
73 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
74 ((*(p) == ']')) || ((*(p) == '`')))
76 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
80 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
86 * unreserved = alphanum | mark
89 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
92 * Skip to next pointer char, handle escaped sequences
95 #define NEXT(p) ((*p == '%')? p += 3 : p++)
98 * Productions from the spec.
100 * authority = server | reg_name
101 * reg_name = 1*( unreserved | escaped | "$" | "," |
102 * ";" | ":" | "@" | "&" | "=" | "+" )
104 * path = [ abs_path | opaque_part ]
107 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
109 /************************************************************************
113 ************************************************************************/
115 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
117 ((*(p) >= 'A') && (*(p) <= 'Z')))
118 #define ISA_HEXDIG(p) \
119 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
120 ((*(p) >= 'A') && (*(p) <= 'F')))
123 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
124 * / "*" / "+" / "," / ";" / "="
126 #define ISA_SUB_DELIM(p) \
127 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
128 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
129 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
133 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
135 #define ISA_GEN_DELIM(p) \
136 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
137 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
141 * reserved = gen-delims / sub-delims
143 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
146 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
148 #define ISA_UNRESERVED(p) \
149 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
150 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
153 * pct-encoded = "%" HEXDIG HEXDIG
155 #define ISA_PCT_ENCODED(p) \
156 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
159 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
161 #define ISA_PCHAR(p) \
162 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
163 ((*(p) == ':')) || ((*(p) == '@')))
166 * xmlParse3986Scheme:
167 * @uri: pointer to an URI structure
168 * @str: pointer to the string to analyze
170 * Parse an URI scheme
172 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
174 * Returns 0 or the error code
177 xmlParse3986Scheme(xmlURIPtr uri
, const char **str
) {
187 while (ISA_ALPHA(cur
) || ISA_DIGIT(cur
) ||
188 (*cur
== '+') || (*cur
== '-') || (*cur
== '.')) cur
++;
190 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
191 uri
->scheme
= STRNDUP(*str
, cur
- *str
);
198 * xmlParse3986Fragment:
199 * @uri: pointer to an URI structure
200 * @str: pointer to the string to analyze
202 * Parse the query part of an URI
204 * fragment = *( pchar / "/" / "?" )
205 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206 * in the fragment identifier but this is used very broadly for
207 * xpointer scheme selection, so we are allowing it here to not break
208 * for example all the DocBook processing chains.
210 * Returns 0 or the error code
213 xmlParse3986Fragment(xmlURIPtr uri
, const char **str
)
222 while ((ISA_PCHAR(cur
)) || (*cur
== '/') || (*cur
== '?') ||
223 (*cur
== '[') || (*cur
== ']') ||
224 ((uri
!= NULL
) && (uri
->cleanup
& 1) && (IS_UNWISE(cur
))))
227 if (uri
->fragment
!= NULL
)
228 xmlFree(uri
->fragment
);
229 if (uri
->cleanup
& 2)
230 uri
->fragment
= STRNDUP(*str
, cur
- *str
);
232 uri
->fragment
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
240 * @uri: pointer to an URI structure
241 * @str: pointer to the string to analyze
243 * Parse the query part of an URI
247 * Returns 0 or the error code
250 xmlParse3986Query(xmlURIPtr uri
, const char **str
)
259 while ((ISA_PCHAR(cur
)) || (*cur
== '/') || (*cur
== '?') ||
260 ((uri
!= NULL
) && (uri
->cleanup
& 1) && (IS_UNWISE(cur
))))
263 if (uri
->query
!= NULL
)
265 if (uri
->cleanup
& 2)
266 uri
->query
= STRNDUP(*str
, cur
- *str
);
268 uri
->query
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
270 /* Save the raw bytes of the query as well.
271 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
273 if (uri
->query_raw
!= NULL
)
274 xmlFree (uri
->query_raw
);
275 uri
->query_raw
= STRNDUP (*str
, cur
- *str
);
283 * @uri: pointer to an URI structure
284 * @str: the string to analyze
286 * Parse a port part and fills in the appropriate fields
287 * of the @uri structure
291 * Returns 0 or the error code
294 xmlParse3986Port(xmlURIPtr uri
, const char **str
)
296 const char *cur
= *str
;
298 if (ISA_DIGIT(cur
)) {
301 while (ISA_DIGIT(cur
)) {
303 uri
->port
= uri
->port
* 10 + (*cur
- '0');
313 * xmlParse3986Userinfo:
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
317 * Parse an user informations part and fills in the appropriate fields
318 * of the @uri structure
320 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
322 * Returns 0 or the error code
325 xmlParse3986Userinfo(xmlURIPtr uri
, const char **str
)
330 while (ISA_UNRESERVED(cur
) || ISA_PCT_ENCODED(cur
) ||
331 ISA_SUB_DELIM(cur
) || (*cur
== ':'))
335 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
336 if (uri
->cleanup
& 2)
337 uri
->user
= STRNDUP(*str
, cur
- *str
);
339 uri
->user
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
348 * xmlParse3986DecOctet:
349 * @str: the string to analyze
351 * dec-octet = DIGIT ; 0-9
352 * / %x31-39 DIGIT ; 10-99
353 * / "1" 2DIGIT ; 100-199
354 * / "2" %x30-34 DIGIT ; 200-249
355 * / "25" %x30-35 ; 250-255
359 * Returns 0 if found and skipped, 1 otherwise
362 xmlParse3986DecOctet(const char **str
) {
363 const char *cur
= *str
;
365 if (!(ISA_DIGIT(cur
)))
367 if (!ISA_DIGIT(cur
+1))
369 else if ((*cur
!= '0') && (ISA_DIGIT(cur
+ 1)) && (!ISA_DIGIT(cur
+2)))
371 else if ((*cur
== '1') && (ISA_DIGIT(cur
+ 1)) && (ISA_DIGIT(cur
+ 2)))
373 else if ((*cur
== '2') && (*(cur
+ 1) >= '0') &&
374 (*(cur
+ 1) <= '4') && (ISA_DIGIT(cur
+ 2)))
376 else if ((*cur
== '2') && (*(cur
+ 1) == '5') &&
377 (*(cur
+ 2) >= '0') && (*(cur
+ 1) <= '5'))
386 * @uri: pointer to an URI structure
387 * @str: the string to analyze
389 * Parse an host part and fills in the appropriate fields
390 * of the @uri structure
392 * host = IP-literal / IPv4address / reg-name
393 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
394 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
395 * reg-name = *( unreserved / pct-encoded / sub-delims )
397 * Returns 0 or the error code
400 xmlParse3986Host(xmlURIPtr uri
, const char **str
)
402 const char *cur
= *str
;
407 * IPv6 and future adressing scheme are enclosed between brackets
411 while ((*cur
!= ']') && (*cur
!= 0))
419 * try to parse an IPv4
421 if (ISA_DIGIT(cur
)) {
422 if (xmlParse3986DecOctet(&cur
) != 0)
427 if (xmlParse3986DecOctet(&cur
) != 0)
431 if (xmlParse3986DecOctet(&cur
) != 0)
435 if (xmlParse3986DecOctet(&cur
) != 0)
442 * then this should be a hostname which can be empty
444 while (ISA_UNRESERVED(cur
) || ISA_PCT_ENCODED(cur
) || ISA_SUB_DELIM(cur
))
448 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
449 uri
->authority
= NULL
;
450 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
452 if (uri
->cleanup
& 2)
453 uri
->server
= STRNDUP(host
, cur
- host
);
455 uri
->server
= xmlURIUnescapeString(host
, cur
- host
, NULL
);
464 * xmlParse3986Authority:
465 * @uri: pointer to an URI structure
466 * @str: the string to analyze
468 * Parse an authority part and fills in the appropriate fields
469 * of the @uri structure
471 * authority = [ userinfo "@" ] host [ ":" port ]
473 * Returns 0 or the error code
476 xmlParse3986Authority(xmlURIPtr uri
, const char **str
)
483 * try to parse an userinfo and check for the trailing @
485 ret
= xmlParse3986Userinfo(uri
, &cur
);
486 if ((ret
!= 0) || (*cur
!= '@'))
490 ret
= xmlParse3986Host(uri
, &cur
);
491 if (ret
!= 0) return(ret
);
494 ret
= xmlParse3986Port(uri
, &cur
);
495 if (ret
!= 0) return(ret
);
502 * xmlParse3986Segment:
503 * @str: the string to analyze
504 * @forbid: an optional forbidden character
505 * @empty: allow an empty segment
507 * Parse a segment and fills in the appropriate fields
508 * of the @uri structure
511 * segment-nz = 1*pchar
512 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513 * ; non-zero-length segment without any colon ":"
515 * Returns 0 or the error code
518 xmlParse3986Segment(const char **str
, char forbid
, int empty
)
523 if (!ISA_PCHAR(cur
)) {
528 while (ISA_PCHAR(cur
) && (*cur
!= forbid
))
535 * xmlParse3986PathAbEmpty:
536 * @uri: pointer to an URI structure
537 * @str: the string to analyze
539 * Parse an path absolute or empty and fills in the appropriate fields
540 * of the @uri structure
542 * path-abempty = *( "/" segment )
544 * Returns 0 or the error code
547 xmlParse3986PathAbEmpty(xmlURIPtr uri
, const char **str
)
554 while (*cur
== '/') {
556 ret
= xmlParse3986Segment(&cur
, 0, 1);
557 if (ret
!= 0) return(ret
);
560 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
562 if (uri
->cleanup
& 2)
563 uri
->path
= STRNDUP(*str
, cur
- *str
);
565 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
575 * xmlParse3986PathAbsolute:
576 * @uri: pointer to an URI structure
577 * @str: the string to analyze
579 * Parse an path absolute and fills in the appropriate fields
580 * of the @uri structure
582 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
584 * Returns 0 or the error code
587 xmlParse3986PathAbsolute(xmlURIPtr uri
, const char **str
)
597 ret
= xmlParse3986Segment(&cur
, 0, 0);
599 while (*cur
== '/') {
601 ret
= xmlParse3986Segment(&cur
, 0, 1);
602 if (ret
!= 0) return(ret
);
606 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
608 if (uri
->cleanup
& 2)
609 uri
->path
= STRNDUP(*str
, cur
- *str
);
611 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
621 * xmlParse3986PathRootless:
622 * @uri: pointer to an URI structure
623 * @str: the string to analyze
625 * Parse an path without root and fills in the appropriate fields
626 * of the @uri structure
628 * path-rootless = segment-nz *( "/" segment )
630 * Returns 0 or the error code
633 xmlParse3986PathRootless(xmlURIPtr uri
, const char **str
)
640 ret
= xmlParse3986Segment(&cur
, 0, 0);
641 if (ret
!= 0) return(ret
);
642 while (*cur
== '/') {
644 ret
= xmlParse3986Segment(&cur
, 0, 1);
645 if (ret
!= 0) return(ret
);
648 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
650 if (uri
->cleanup
& 2)
651 uri
->path
= STRNDUP(*str
, cur
- *str
);
653 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
663 * xmlParse3986PathNoScheme:
664 * @uri: pointer to an URI structure
665 * @str: the string to analyze
667 * Parse an path which is not a scheme and fills in the appropriate fields
668 * of the @uri structure
670 * path-noscheme = segment-nz-nc *( "/" segment )
672 * Returns 0 or the error code
675 xmlParse3986PathNoScheme(xmlURIPtr uri
, const char **str
)
682 ret
= xmlParse3986Segment(&cur
, ':', 0);
683 if (ret
!= 0) return(ret
);
684 while (*cur
== '/') {
686 ret
= xmlParse3986Segment(&cur
, 0, 1);
687 if (ret
!= 0) return(ret
);
690 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
692 if (uri
->cleanup
& 2)
693 uri
->path
= STRNDUP(*str
, cur
- *str
);
695 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
705 * xmlParse3986HierPart:
706 * @uri: pointer to an URI structure
707 * @str: the string to analyze
709 * Parse an hierarchical part and fills in the appropriate fields
710 * of the @uri structure
712 * hier-part = "//" authority path-abempty
717 * Returns 0 or the error code
720 xmlParse3986HierPart(xmlURIPtr uri
, const char **str
)
727 if ((*cur
== '/') && (*(cur
+ 1) == '/')) {
729 ret
= xmlParse3986Authority(uri
, &cur
);
730 if (ret
!= 0) return(ret
);
731 ret
= xmlParse3986PathAbEmpty(uri
, &cur
);
732 if (ret
!= 0) return(ret
);
735 } else if (*cur
== '/') {
736 ret
= xmlParse3986PathAbsolute(uri
, &cur
);
737 if (ret
!= 0) return(ret
);
738 } else if (ISA_PCHAR(cur
)) {
739 ret
= xmlParse3986PathRootless(uri
, &cur
);
740 if (ret
!= 0) return(ret
);
742 /* path-empty is effectively empty */
744 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
753 * xmlParse3986RelativeRef:
754 * @uri: pointer to an URI structure
755 * @str: the string to analyze
757 * Parse an URI string and fills in the appropriate fields
758 * of the @uri structure
760 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
761 * relative-part = "//" authority path-abempty
766 * Returns 0 or the error code
769 xmlParse3986RelativeRef(xmlURIPtr uri
, const char *str
) {
772 if ((*str
== '/') && (*(str
+ 1) == '/')) {
774 ret
= xmlParse3986Authority(uri
, &str
);
775 if (ret
!= 0) return(ret
);
776 ret
= xmlParse3986PathAbEmpty(uri
, &str
);
777 if (ret
!= 0) return(ret
);
778 } else if (*str
== '/') {
779 ret
= xmlParse3986PathAbsolute(uri
, &str
);
780 if (ret
!= 0) return(ret
);
781 } else if (ISA_PCHAR(str
)) {
782 ret
= xmlParse3986PathNoScheme(uri
, &str
);
783 if (ret
!= 0) return(ret
);
785 /* path-empty is effectively empty */
787 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
794 ret
= xmlParse3986Query(uri
, &str
);
795 if (ret
!= 0) return(ret
);
799 ret
= xmlParse3986Fragment(uri
, &str
);
800 if (ret
!= 0) return(ret
);
812 * @uri: pointer to an URI structure
813 * @str: the string to analyze
815 * Parse an URI string and fills in the appropriate fields
816 * of the @uri structure
818 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
820 * Returns 0 or the error code
823 xmlParse3986URI(xmlURIPtr uri
, const char *str
) {
826 ret
= xmlParse3986Scheme(uri
, &str
);
827 if (ret
!= 0) return(ret
);
832 ret
= xmlParse3986HierPart(uri
, &str
);
833 if (ret
!= 0) return(ret
);
836 ret
= xmlParse3986Query(uri
, &str
);
837 if (ret
!= 0) return(ret
);
841 ret
= xmlParse3986Fragment(uri
, &str
);
842 if (ret
!= 0) return(ret
);
852 * xmlParse3986URIReference:
853 * @uri: pointer to an URI structure
854 * @str: the string to analyze
856 * Parse an URI reference string and fills in the appropriate fields
857 * of the @uri structure
859 * URI-reference = URI / relative-ref
861 * Returns 0 or the error code
864 xmlParse3986URIReference(xmlURIPtr uri
, const char *str
) {
872 * Try first to parse absolute refs, then fallback to relative if
875 ret
= xmlParse3986URI(uri
, str
);
878 ret
= xmlParse3986RelativeRef(uri
, str
);
889 * @str: the URI string to analyze
891 * Parse an URI based on RFC 3986
893 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
895 * Returns a newly built xmlURIPtr or NULL in case of error
898 xmlParseURI(const char *str
) {
904 uri
= xmlCreateURI();
906 ret
= xmlParse3986URIReference(uri
, str
);
916 * xmlParseURIReference:
917 * @uri: pointer to an URI structure
918 * @str: the string to analyze
920 * Parse an URI reference string based on RFC 3986 and fills in the
921 * appropriate fields of the @uri structure
923 * URI-reference = URI / relative-ref
925 * Returns 0 or the error code
928 xmlParseURIReference(xmlURIPtr uri
, const char *str
) {
929 return(xmlParse3986URIReference(uri
, str
));
934 * @str: the URI string to analyze
935 * @raw: if 1 unescaping of URI pieces are disabled
937 * Parse an URI but allows to keep intact the original fragments.
939 * URI-reference = URI / relative-ref
941 * Returns a newly built xmlURIPtr or NULL in case of error
944 xmlParseURIRaw(const char *str
, int raw
) {
950 uri
= xmlCreateURI();
955 ret
= xmlParseURIReference(uri
, str
);
964 /************************************************************************
966 * Generic URI structure functions *
968 ************************************************************************/
973 * Simply creates an empty xmlURI
975 * Returns the new structure or NULL in case of error
981 ret
= (xmlURIPtr
) xmlMalloc(sizeof(xmlURI
));
983 xmlGenericError(xmlGenericErrorContext
,
984 "xmlCreateURI: out of memory\n");
987 memset(ret
, 0, sizeof(xmlURI
));
993 * @uri: pointer to an xmlURI
995 * Save the URI as an escaped string
997 * Returns a new string (to be deallocated by caller)
1000 xmlSaveUri(xmlURIPtr uri
) {
1001 xmlChar
*ret
= NULL
;
1007 if (uri
== NULL
) return(NULL
);
1011 ret
= (xmlChar
*) xmlMallocAtomic((max
+ 1) * sizeof(xmlChar
));
1013 xmlGenericError(xmlGenericErrorContext
,
1014 "xmlSaveUri: out of memory\n");
1019 if (uri
->scheme
!= NULL
) {
1024 temp
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
1026 xmlGenericError(xmlGenericErrorContext
,
1027 "xmlSaveUri: out of memory\n");
1037 temp
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
1039 xmlGenericError(xmlGenericErrorContext
,
1040 "xmlSaveUri: out of memory\n");
1048 if (uri
->opaque
!= NULL
) {
1051 if (len
+ 3 >= max
) {
1053 temp
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
1055 xmlGenericError(xmlGenericErrorContext
,
1056 "xmlSaveUri: out of memory\n");
1062 if (IS_RESERVED(*(p
)) || IS_UNRESERVED(*(p
)))
1065 int val
= *(unsigned char *)p
++;
1066 int hi
= val
/ 0x10, lo
= val
% 0x10;
1068 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1069 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1073 if (uri
->server
!= NULL
) {
1074 if (len
+ 3 >= max
) {
1076 temp
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
1078 xmlGenericError(xmlGenericErrorContext
,
1079 "xmlSaveUri: out of memory\n");
1087 if (uri
->user
!= NULL
) {
1090 if (len
+ 3 >= max
) {
1092 temp
= (xmlChar
*) xmlRealloc(ret
,
1093 (max
+ 1) * sizeof(xmlChar
));
1095 xmlGenericError(xmlGenericErrorContext
,
1096 "xmlSaveUri: out of memory\n");
1102 if ((IS_UNRESERVED(*(p
))) ||
1103 ((*(p
) == ';')) || ((*(p
) == ':')) ||
1104 ((*(p
) == '&')) || ((*(p
) == '=')) ||
1105 ((*(p
) == '+')) || ((*(p
) == '$')) ||
1109 int val
= *(unsigned char *)p
++;
1110 int hi
= val
/ 0x10, lo
= val
% 0x10;
1112 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1113 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1116 if (len
+ 3 >= max
) {
1118 temp
= (xmlChar
*) xmlRealloc(ret
,
1119 (max
+ 1) * sizeof(xmlChar
));
1121 xmlGenericError(xmlGenericErrorContext
,
1122 "xmlSaveUri: out of memory\n");
1134 temp
= (xmlChar
*) xmlRealloc(ret
,
1135 (max
+ 1) * sizeof(xmlChar
));
1137 xmlGenericError(xmlGenericErrorContext
,
1138 "xmlSaveUri: out of memory\n");
1146 if (uri
->port
> 0) {
1147 if (len
+ 10 >= max
) {
1149 temp
= (xmlChar
*) xmlRealloc(ret
,
1150 (max
+ 1) * sizeof(xmlChar
));
1152 xmlGenericError(xmlGenericErrorContext
,
1153 "xmlSaveUri: out of memory\n");
1159 len
+= snprintf((char *) &ret
[len
], max
- len
, ":%d", uri
->port
);
1161 } else if (uri
->authority
!= NULL
) {
1162 if (len
+ 3 >= max
) {
1164 temp
= (xmlChar
*) xmlRealloc(ret
,
1165 (max
+ 1) * sizeof(xmlChar
));
1167 xmlGenericError(xmlGenericErrorContext
,
1168 "xmlSaveUri: out of memory\n");
1178 if (len
+ 3 >= max
) {
1180 temp
= (xmlChar
*) xmlRealloc(ret
,
1181 (max
+ 1) * sizeof(xmlChar
));
1183 xmlGenericError(xmlGenericErrorContext
,
1184 "xmlSaveUri: out of memory\n");
1190 if ((IS_UNRESERVED(*(p
))) ||
1191 ((*(p
) == '$')) || ((*(p
) == ',')) || ((*(p
) == ';')) ||
1192 ((*(p
) == ':')) || ((*(p
) == '@')) || ((*(p
) == '&')) ||
1193 ((*(p
) == '=')) || ((*(p
) == '+')))
1196 int val
= *(unsigned char *)p
++;
1197 int hi
= val
/ 0x10, lo
= val
% 0x10;
1199 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1200 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1203 } else if (uri
->scheme
!= NULL
) {
1204 if (len
+ 3 >= max
) {
1206 temp
= (xmlChar
*) xmlRealloc(ret
,
1207 (max
+ 1) * sizeof(xmlChar
));
1209 xmlGenericError(xmlGenericErrorContext
,
1210 "xmlSaveUri: out of memory\n");
1219 if (uri
->path
!= NULL
) {
1222 * the colon in file:///d: should not be escaped or
1223 * Windows accesses fail later.
1225 if ((uri
->scheme
!= NULL
) &&
1227 (((p
[1] >= 'a') && (p
[1] <= 'z')) ||
1228 ((p
[1] >= 'A') && (p
[1] <= 'Z'))) &&
1230 (xmlStrEqual(BAD_CAST uri
->scheme
, BAD_CAST
"file"))) {
1231 if (len
+ 3 >= max
) {
1233 ret
= (xmlChar
*) xmlRealloc(ret
,
1234 (max
+ 1) * sizeof(xmlChar
));
1236 xmlGenericError(xmlGenericErrorContext
,
1237 "xmlSaveUri: out of memory\n");
1246 if (len
+ 3 >= max
) {
1248 temp
= (xmlChar
*) xmlRealloc(ret
,
1249 (max
+ 1) * sizeof(xmlChar
));
1251 xmlGenericError(xmlGenericErrorContext
,
1252 "xmlSaveUri: out of memory\n");
1258 if ((IS_UNRESERVED(*(p
))) || ((*(p
) == '/')) ||
1259 ((*(p
) == ';')) || ((*(p
) == '@')) || ((*(p
) == '&')) ||
1260 ((*(p
) == '=')) || ((*(p
) == '+')) || ((*(p
) == '$')) ||
1264 int val
= *(unsigned char *)p
++;
1265 int hi
= val
/ 0x10, lo
= val
% 0x10;
1267 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1268 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1272 if (uri
->query_raw
!= NULL
) {
1273 if (len
+ 1 >= max
) {
1275 temp
= (xmlChar
*) xmlRealloc(ret
,
1276 (max
+ 1) * sizeof(xmlChar
));
1278 xmlGenericError(xmlGenericErrorContext
,
1279 "xmlSaveUri: out of memory\n");
1288 if (len
+ 1 >= max
) {
1290 temp
= (xmlChar
*) xmlRealloc(ret
,
1291 (max
+ 1) * sizeof(xmlChar
));
1293 xmlGenericError(xmlGenericErrorContext
,
1294 "xmlSaveUri: out of memory\n");
1302 } else if (uri
->query
!= NULL
) {
1303 if (len
+ 3 >= max
) {
1305 temp
= (xmlChar
*) xmlRealloc(ret
,
1306 (max
+ 1) * sizeof(xmlChar
));
1308 xmlGenericError(xmlGenericErrorContext
,
1309 "xmlSaveUri: out of memory\n");
1318 if (len
+ 3 >= max
) {
1320 temp
= (xmlChar
*) xmlRealloc(ret
,
1321 (max
+ 1) * sizeof(xmlChar
));
1323 xmlGenericError(xmlGenericErrorContext
,
1324 "xmlSaveUri: out of memory\n");
1330 if ((IS_UNRESERVED(*(p
))) || (IS_RESERVED(*(p
))))
1333 int val
= *(unsigned char *)p
++;
1334 int hi
= val
/ 0x10, lo
= val
% 0x10;
1336 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1337 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1342 if (uri
->fragment
!= NULL
) {
1343 if (len
+ 3 >= max
) {
1345 temp
= (xmlChar
*) xmlRealloc(ret
,
1346 (max
+ 1) * sizeof(xmlChar
));
1348 xmlGenericError(xmlGenericErrorContext
,
1349 "xmlSaveUri: out of memory\n");
1358 if (len
+ 3 >= max
) {
1360 temp
= (xmlChar
*) xmlRealloc(ret
,
1361 (max
+ 1) * sizeof(xmlChar
));
1363 xmlGenericError(xmlGenericErrorContext
,
1364 "xmlSaveUri: out of memory\n");
1370 if ((IS_UNRESERVED(*(p
))) || (IS_RESERVED(*(p
))))
1373 int val
= *(unsigned char *)p
++;
1374 int hi
= val
/ 0x10, lo
= val
% 0x10;
1376 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1377 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1383 temp
= (xmlChar
*) xmlRealloc(ret
, (max
+ 1) * sizeof(xmlChar
));
1385 xmlGenericError(xmlGenericErrorContext
,
1386 "xmlSaveUri: out of memory\n");
1398 * @stream: a FILE* for the output
1399 * @uri: pointer to an xmlURI
1401 * Prints the URI in the stream @stream.
1404 xmlPrintURI(FILE *stream
, xmlURIPtr uri
) {
1407 out
= xmlSaveUri(uri
);
1409 fprintf(stream
, "%s", (char *) out
);
1416 * @uri: pointer to an xmlURI
1418 * Make sure the xmlURI struct is free of content
1421 xmlCleanURI(xmlURIPtr uri
) {
1422 if (uri
== NULL
) return;
1424 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
1426 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1428 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1430 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
1432 if (uri
->fragment
!= NULL
) xmlFree(uri
->fragment
);
1433 uri
->fragment
= NULL
;
1434 if (uri
->opaque
!= NULL
) xmlFree(uri
->opaque
);
1436 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1437 uri
->authority
= NULL
;
1438 if (uri
->query
!= NULL
) xmlFree(uri
->query
);
1440 if (uri
->query_raw
!= NULL
) xmlFree(uri
->query_raw
);
1441 uri
->query_raw
= NULL
;
1446 * @uri: pointer to an xmlURI
1448 * Free up the xmlURI struct
1451 xmlFreeURI(xmlURIPtr uri
) {
1452 if (uri
== NULL
) return;
1454 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
1455 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1456 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1457 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
1458 if (uri
->fragment
!= NULL
) xmlFree(uri
->fragment
);
1459 if (uri
->opaque
!= NULL
) xmlFree(uri
->opaque
);
1460 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1461 if (uri
->query
!= NULL
) xmlFree(uri
->query
);
1462 if (uri
->query_raw
!= NULL
) xmlFree(uri
->query_raw
);
1466 /************************************************************************
1468 * Helper functions *
1470 ************************************************************************/
1473 * xmlNormalizeURIPath:
1474 * @path: pointer to the path string
1476 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1477 * Section 5.2, steps 6.c through 6.g.
1479 * Normalization occurs directly on the string, no new allocation is done
1481 * Returns 0 or an error code
1484 xmlNormalizeURIPath(char *path
) {
1490 /* Skip all initial "/" chars. We want to get to the beginning of the
1491 * first non-empty segment.
1494 while (cur
[0] == '/')
1499 /* Keep everything we've seen so far. */
1503 * Analyze each segment in sequence for cases (c) and (d).
1505 while (cur
[0] != '\0') {
1507 * c) All occurrences of "./", where "." is a complete path segment,
1508 * are removed from the buffer string.
1510 if ((cur
[0] == '.') && (cur
[1] == '/')) {
1512 /* '//' normalization should be done at this point too */
1513 while (cur
[0] == '/')
1519 * d) If the buffer string ends with "." as a complete path segment,
1520 * that "." is removed.
1522 if ((cur
[0] == '.') && (cur
[1] == '\0'))
1525 /* Otherwise keep the segment. */
1526 while (cur
[0] != '/') {
1529 (out
++)[0] = (cur
++)[0];
1532 while ((cur
[0] == '/') && (cur
[1] == '/'))
1535 (out
++)[0] = (cur
++)[0];
1540 /* Reset to the beginning of the first segment for the next sequence. */
1542 while (cur
[0] == '/')
1548 * Analyze each segment in sequence for cases (e) and (f).
1550 * e) All occurrences of "<segment>/../", where <segment> is a
1551 * complete path segment not equal to "..", are removed from the
1552 * buffer string. Removal of these path segments is performed
1553 * iteratively, removing the leftmost matching pattern on each
1554 * iteration, until no matching pattern remains.
1556 * f) If the buffer string ends with "<segment>/..", where <segment>
1557 * is a complete path segment not equal to "..", that
1558 * "<segment>/.." is removed.
1560 * To satisfy the "iterative" clause in (e), we need to collapse the
1561 * string every time we find something that needs to be removed. Thus,
1562 * we don't need to keep two pointers into the string: we only need a
1563 * "current position" pointer.
1568 /* At the beginning of each iteration of this loop, "cur" points to
1569 * the first character of the segment we want to examine.
1572 /* Find the end of the current segment. */
1574 while ((segp
[0] != '/') && (segp
[0] != '\0'))
1577 /* If this is the last segment, we're done (we need at least two
1578 * segments to meet the criteria for the (e) and (f) cases).
1580 if (segp
[0] == '\0')
1583 /* If the first segment is "..", or if the next segment _isn't_ "..",
1584 * keep this segment and try the next one.
1587 if (((cur
[0] == '.') && (cur
[1] == '.') && (segp
== cur
+3))
1588 || ((segp
[0] != '.') || (segp
[1] != '.')
1589 || ((segp
[2] != '/') && (segp
[2] != '\0')))) {
1594 /* If we get here, remove this segment and the next one and back up
1595 * to the previous segment (if there is one), to implement the
1596 * "iteratively" clause. It's pretty much impossible to back up
1597 * while maintaining two pointers into the buffer, so just compact
1598 * the whole buffer now.
1601 /* If this is the end of the buffer, we're done. */
1602 if (segp
[2] == '\0') {
1606 /* Valgrind complained, strcpy(cur, segp + 3); */
1607 /* string will overlap, do not use strcpy */
1610 while ((*tmp
++ = *segp
++) != 0);
1612 /* If there are no previous segments, then keep going from here. */
1614 while ((segp
> path
) && ((--segp
)[0] == '/'))
1619 /* "segp" is pointing to the end of a previous segment; find it's
1620 * start. We need to back up to the previous segment and start
1621 * over with that to handle things like "foo/bar/../..". If we
1622 * don't do this, then on the first pass we'll remove the "bar/..",
1623 * but be pointing at the second ".." so we won't realize we can also
1624 * remove the "foo/..".
1627 while ((cur
> path
) && (cur
[-1] != '/'))
1633 * g) If the resulting buffer string still begins with one or more
1634 * complete path segments of "..", then the reference is
1635 * considered to be in error. Implementations may handle this
1636 * error by retaining these components in the resolved path (i.e.,
1637 * treating them as part of the final URI), by removing them from
1638 * the resolved path (i.e., discarding relative levels above the
1639 * root), or by avoiding traversal of the reference.
1641 * We discard them from the final path.
1643 if (path
[0] == '/') {
1645 while ((cur
[0] == '/') && (cur
[1] == '.') && (cur
[2] == '.')
1646 && ((cur
[3] == '/') || (cur
[3] == '\0')))
1651 while (cur
[0] != '\0')
1652 (out
++)[0] = (cur
++)[0];
1660 static int is_hex(char c
) {
1661 if (((c
>= '0') && (c
<= '9')) ||
1662 ((c
>= 'a') && (c
<= 'f')) ||
1663 ((c
>= 'A') && (c
<= 'F')))
1669 * xmlURIUnescapeString:
1670 * @str: the string to unescape
1671 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1672 * @target: optional destination buffer
1674 * Unescaping routine, but does not check that the string is an URI. The
1675 * output is a direct unsigned char translation of %XX values (no encoding)
1676 * Note that the length of the result can only be smaller or same size as
1679 * Returns a copy of the string, but unescaped, will return NULL only in case
1683 xmlURIUnescapeString(const char *str
, int len
, char *target
) {
1689 if (len
<= 0) len
= strlen(str
);
1690 if (len
< 0) return(NULL
);
1692 if (target
== NULL
) {
1693 ret
= (char *) xmlMallocAtomic(len
+ 1);
1695 xmlGenericError(xmlGenericErrorContext
,
1696 "xmlURIUnescapeString: out of memory\n");
1704 if ((len
> 2) && (*in
== '%') && (is_hex(in
[1])) && (is_hex(in
[2]))) {
1706 if ((*in
>= '0') && (*in
<= '9'))
1708 else if ((*in
>= 'a') && (*in
<= 'f'))
1709 *out
= (*in
- 'a') + 10;
1710 else if ((*in
>= 'A') && (*in
<= 'F'))
1711 *out
= (*in
- 'A') + 10;
1713 if ((*in
>= '0') && (*in
<= '9'))
1714 *out
= *out
* 16 + (*in
- '0');
1715 else if ((*in
>= 'a') && (*in
<= 'f'))
1716 *out
= *out
* 16 + (*in
- 'a') + 10;
1717 else if ((*in
>= 'A') && (*in
<= 'F'))
1718 *out
= *out
* 16 + (*in
- 'A') + 10;
1733 * @str: string to escape
1734 * @list: exception list string of chars not to escape
1736 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1737 * and the characters in the exception list.
1739 * Returns a new escaped string or NULL in case of error.
1742 xmlURIEscapeStr(const xmlChar
*str
, const xmlChar
*list
) {
1747 unsigned int len
, out
;
1752 return(xmlStrdup(str
));
1753 len
= xmlStrlen(str
);
1754 if (!(len
> 0)) return(NULL
);
1757 ret
= (xmlChar
*) xmlMallocAtomic(len
);
1759 xmlGenericError(xmlGenericErrorContext
,
1760 "xmlURIEscapeStr: out of memory\n");
1763 in
= (const xmlChar
*) str
;
1766 if (len
- out
<= 3) {
1768 temp
= (xmlChar
*) xmlRealloc(ret
, len
);
1770 xmlGenericError(xmlGenericErrorContext
,
1771 "xmlURIEscapeStr: out of memory\n");
1780 if ((ch
!= '@') && (!IS_UNRESERVED(ch
)) && (!xmlStrchr(list
, ch
))) {
1785 ret
[out
++] = '0' + val
;
1787 ret
[out
++] = 'A' + val
- 0xA;
1790 ret
[out
++] = '0' + val
;
1792 ret
[out
++] = 'A' + val
- 0xA;
1805 * @str: the string of the URI to escape
1807 * Escaping routine, does not do validity checks !
1808 * It will try to escape the chars needing this, but this is heuristic
1809 * based it's impossible to be sure.
1811 * Returns an copy of the string, but escaped
1814 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1815 * according to RFC2396.
1819 xmlURIEscape(const xmlChar
* str
)
1821 xmlChar
*ret
, *segment
= NULL
;
1825 #define NULLCHK(p) if(!p) { \
1826 xmlGenericError(xmlGenericErrorContext, \
1827 "xmlURIEscape: out of memory\n"); \
1834 uri
= xmlCreateURI();
1837 * Allow escaping errors in the unescaped form
1840 ret2
= xmlParseURIReference(uri
, (const char *)str
);
1853 segment
= xmlURIEscapeStr(BAD_CAST uri
->scheme
, BAD_CAST
"+-.");
1855 ret
= xmlStrcat(ret
, segment
);
1856 ret
= xmlStrcat(ret
, BAD_CAST
":");
1860 if (uri
->authority
) {
1862 xmlURIEscapeStr(BAD_CAST uri
->authority
, BAD_CAST
"/?;:@");
1864 ret
= xmlStrcat(ret
, BAD_CAST
"//");
1865 ret
= xmlStrcat(ret
, segment
);
1870 segment
= xmlURIEscapeStr(BAD_CAST uri
->user
, BAD_CAST
";:&=+$,");
1872 ret
= xmlStrcat(ret
,BAD_CAST
"//");
1873 ret
= xmlStrcat(ret
, segment
);
1874 ret
= xmlStrcat(ret
, BAD_CAST
"@");
1879 segment
= xmlURIEscapeStr(BAD_CAST uri
->server
, BAD_CAST
"/?;:@");
1881 if (uri
->user
== NULL
)
1882 ret
= xmlStrcat(ret
, BAD_CAST
"//");
1883 ret
= xmlStrcat(ret
, segment
);
1890 snprintf((char *) port
, 10, "%d", uri
->port
);
1891 ret
= xmlStrcat(ret
, BAD_CAST
":");
1892 ret
= xmlStrcat(ret
, port
);
1897 xmlURIEscapeStr(BAD_CAST uri
->path
, BAD_CAST
":@&=+$,/?;");
1899 ret
= xmlStrcat(ret
, segment
);
1903 if (uri
->query_raw
) {
1904 ret
= xmlStrcat(ret
, BAD_CAST
"?");
1905 ret
= xmlStrcat(ret
, BAD_CAST uri
->query_raw
);
1907 else if (uri
->query
) {
1909 xmlURIEscapeStr(BAD_CAST uri
->query
, BAD_CAST
";/?:@&=+,$");
1911 ret
= xmlStrcat(ret
, BAD_CAST
"?");
1912 ret
= xmlStrcat(ret
, segment
);
1917 segment
= xmlURIEscapeStr(BAD_CAST uri
->opaque
, BAD_CAST
"");
1919 ret
= xmlStrcat(ret
, segment
);
1923 if (uri
->fragment
) {
1924 segment
= xmlURIEscapeStr(BAD_CAST uri
->fragment
, BAD_CAST
"#");
1926 ret
= xmlStrcat(ret
, BAD_CAST
"#");
1927 ret
= xmlStrcat(ret
, segment
);
1937 /************************************************************************
1939 * Public functions *
1941 ************************************************************************/
1945 * @URI: the URI instance found in the document
1946 * @base: the base value
1948 * Computes he final URI of the reference done by checking that
1949 * the given URI is valid, and building the final URI using the
1950 * base URI. This is processed according to section 5.2 of the
1953 * 5.2. Resolving Relative References to Absolute Form
1955 * Returns a new URI string (to be freed by the caller) or NULL in case
1959 xmlBuildURI(const xmlChar
*URI
, const xmlChar
*base
) {
1960 xmlChar
*val
= NULL
;
1961 int ret
, len
, indx
, cur
, out
;
1962 xmlURIPtr ref
= NULL
;
1963 xmlURIPtr bas
= NULL
;
1964 xmlURIPtr res
= NULL
;
1967 * 1) The URI reference is parsed into the potential four components and
1968 * fragment identifier, as described in Section 4.3.
1970 * NOTE that a completely empty URI is treated by modern browsers
1971 * as a reference to "." rather than as a synonym for the current
1972 * URI. Should we do that here?
1978 ref
= xmlCreateURI();
1981 ret
= xmlParseURIReference(ref
, (const char *) URI
);
1988 if ((ref
!= NULL
) && (ref
->scheme
!= NULL
)) {
1990 * The URI is absolute don't modify.
1992 val
= xmlStrdup(URI
);
1998 bas
= xmlCreateURI();
2001 ret
= xmlParseURIReference(bas
, (const char *) base
);
2005 val
= xmlSaveUri(ref
);
2010 * the base fragment must be ignored
2012 if (bas
->fragment
!= NULL
) {
2013 xmlFree(bas
->fragment
);
2014 bas
->fragment
= NULL
;
2016 val
= xmlSaveUri(bas
);
2021 * 2) If the path component is empty and the scheme, authority, and
2022 * query components are undefined, then it is a reference to the
2023 * current document and we are done. Otherwise, the reference URI's
2024 * query and fragment components are defined as found (or not found)
2025 * within the URI reference and not inherited from the base URI.
2027 * NOTE that in modern browsers, the parsing differs from the above
2028 * in the following aspect: the query component is allowed to be
2029 * defined while still treating this as a reference to the current
2032 res
= xmlCreateURI();
2035 if ((ref
->scheme
== NULL
) && (ref
->path
== NULL
) &&
2036 ((ref
->authority
== NULL
) && (ref
->server
== NULL
))) {
2037 if (bas
->scheme
!= NULL
)
2038 res
->scheme
= xmlMemStrdup(bas
->scheme
);
2039 if (bas
->authority
!= NULL
)
2040 res
->authority
= xmlMemStrdup(bas
->authority
);
2041 else if (bas
->server
!= NULL
) {
2042 res
->server
= xmlMemStrdup(bas
->server
);
2043 if (bas
->user
!= NULL
)
2044 res
->user
= xmlMemStrdup(bas
->user
);
2045 res
->port
= bas
->port
;
2047 if (bas
->path
!= NULL
)
2048 res
->path
= xmlMemStrdup(bas
->path
);
2049 if (ref
->query_raw
!= NULL
)
2050 res
->query_raw
= xmlMemStrdup (ref
->query_raw
);
2051 else if (ref
->query
!= NULL
)
2052 res
->query
= xmlMemStrdup(ref
->query
);
2053 else if (bas
->query_raw
!= NULL
)
2054 res
->query_raw
= xmlMemStrdup(bas
->query_raw
);
2055 else if (bas
->query
!= NULL
)
2056 res
->query
= xmlMemStrdup(bas
->query
);
2057 if (ref
->fragment
!= NULL
)
2058 res
->fragment
= xmlMemStrdup(ref
->fragment
);
2063 * 3) If the scheme component is defined, indicating that the reference
2064 * starts with a scheme name, then the reference is interpreted as an
2065 * absolute URI and we are done. Otherwise, the reference URI's
2066 * scheme is inherited from the base URI's scheme component.
2068 if (ref
->scheme
!= NULL
) {
2069 val
= xmlSaveUri(ref
);
2072 if (bas
->scheme
!= NULL
)
2073 res
->scheme
= xmlMemStrdup(bas
->scheme
);
2075 if (ref
->query_raw
!= NULL
)
2076 res
->query_raw
= xmlMemStrdup(ref
->query_raw
);
2077 else if (ref
->query
!= NULL
)
2078 res
->query
= xmlMemStrdup(ref
->query
);
2079 if (ref
->fragment
!= NULL
)
2080 res
->fragment
= xmlMemStrdup(ref
->fragment
);
2083 * 4) If the authority component is defined, then the reference is a
2084 * network-path and we skip to step 7. Otherwise, the reference
2085 * URI's authority is inherited from the base URI's authority
2086 * component, which will also be undefined if the URI scheme does not
2087 * use an authority component.
2089 if ((ref
->authority
!= NULL
) || (ref
->server
!= NULL
)) {
2090 if (ref
->authority
!= NULL
)
2091 res
->authority
= xmlMemStrdup(ref
->authority
);
2093 res
->server
= xmlMemStrdup(ref
->server
);
2094 if (ref
->user
!= NULL
)
2095 res
->user
= xmlMemStrdup(ref
->user
);
2096 res
->port
= ref
->port
;
2098 if (ref
->path
!= NULL
)
2099 res
->path
= xmlMemStrdup(ref
->path
);
2102 if (bas
->authority
!= NULL
)
2103 res
->authority
= xmlMemStrdup(bas
->authority
);
2104 else if (bas
->server
!= NULL
) {
2105 res
->server
= xmlMemStrdup(bas
->server
);
2106 if (bas
->user
!= NULL
)
2107 res
->user
= xmlMemStrdup(bas
->user
);
2108 res
->port
= bas
->port
;
2112 * 5) If the path component begins with a slash character ("/"), then
2113 * the reference is an absolute-path and we skip to step 7.
2115 if ((ref
->path
!= NULL
) && (ref
->path
[0] == '/')) {
2116 res
->path
= xmlMemStrdup(ref
->path
);
2122 * 6) If this step is reached, then we are resolving a relative-path
2123 * reference. The relative path needs to be merged with the base
2124 * URI's path. Although there are many ways to do this, we will
2125 * describe a simple method using a separate string buffer.
2127 * Allocate a buffer large enough for the result string.
2129 len
= 2; /* extra / and 0 */
2130 if (ref
->path
!= NULL
)
2131 len
+= strlen(ref
->path
);
2132 if (bas
->path
!= NULL
)
2133 len
+= strlen(bas
->path
);
2134 res
->path
= (char *) xmlMallocAtomic(len
);
2135 if (res
->path
== NULL
) {
2136 xmlGenericError(xmlGenericErrorContext
,
2137 "xmlBuildURI: out of memory\n");
2143 * a) All but the last segment of the base URI's path component is
2144 * copied to the buffer. In other words, any characters after the
2145 * last (right-most) slash character, if any, are excluded.
2149 if (bas
->path
!= NULL
) {
2150 while (bas
->path
[cur
] != 0) {
2151 while ((bas
->path
[cur
] != 0) && (bas
->path
[cur
] != '/'))
2153 if (bas
->path
[cur
] == 0)
2158 res
->path
[out
] = bas
->path
[out
];
2166 * b) The reference's path component is appended to the buffer
2169 if (ref
->path
!= NULL
&& ref
->path
[0] != 0) {
2172 * Ensure the path includes a '/'
2174 if ((out
== 0) && (bas
->server
!= NULL
))
2175 res
->path
[out
++] = '/';
2176 while (ref
->path
[indx
] != 0) {
2177 res
->path
[out
++] = ref
->path
[indx
++];
2183 * Steps c) to h) are really path normalization steps
2185 xmlNormalizeURIPath(res
->path
);
2190 * 7) The resulting URI components, including any inherited from the
2191 * base URI, are recombined to give the absolute form of the URI
2194 val
= xmlSaveUri(res
);
2207 * xmlBuildRelativeURI:
2208 * @URI: the URI reference under consideration
2209 * @base: the base value
2211 * Expresses the URI of the reference in terms relative to the
2212 * base. Some examples of this operation include:
2213 * base = "http://site1.com/docs/book1.html"
2214 * URI input URI returned
2215 * docs/pic1.gif pic1.gif
2216 * docs/img/pic1.gif img/pic1.gif
2217 * img/pic1.gif ../img/pic1.gif
2218 * http://site1.com/docs/pic1.gif pic1.gif
2219 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2221 * base = "docs/book1.html"
2222 * URI input URI returned
2223 * docs/pic1.gif pic1.gif
2224 * docs/img/pic1.gif img/pic1.gif
2225 * img/pic1.gif ../img/pic1.gif
2226 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2229 * Note: if the URI reference is really wierd or complicated, it may be
2230 * worthwhile to first convert it into a "nice" one by calling
2231 * xmlBuildURI (using 'base') before calling this routine,
2232 * since this routine (for reasonable efficiency) assumes URI has
2233 * already been through some validation.
2235 * Returns a new URI string (to be freed by the caller) or NULL in case
2239 xmlBuildRelativeURI (const xmlChar
* URI
, const xmlChar
* base
)
2241 xmlChar
*val
= NULL
;
2247 xmlURIPtr ref
= NULL
;
2248 xmlURIPtr bas
= NULL
;
2249 xmlChar
*bptr
, *uptr
, *vptr
;
2250 int remove_path
= 0;
2252 if ((URI
== NULL
) || (*URI
== 0))
2256 * First parse URI into a standard form
2258 ref
= xmlCreateURI ();
2261 /* If URI not already in "relative" form */
2262 if (URI
[0] != '.') {
2263 ret
= xmlParseURIReference (ref
, (const char *) URI
);
2265 goto done
; /* Error in URI, return NULL */
2267 ref
->path
= (char *)xmlStrdup(URI
);
2270 * Next parse base into the same standard form
2272 if ((base
== NULL
) || (*base
== 0)) {
2273 val
= xmlStrdup (URI
);
2276 bas
= xmlCreateURI ();
2279 if (base
[0] != '.') {
2280 ret
= xmlParseURIReference (bas
, (const char *) base
);
2282 goto done
; /* Error in base, return NULL */
2284 bas
->path
= (char *)xmlStrdup(base
);
2287 * If the scheme / server on the URI differs from the base,
2288 * just return the URI
2290 if ((ref
->scheme
!= NULL
) &&
2291 ((bas
->scheme
== NULL
) ||
2292 (xmlStrcmp ((xmlChar
*)bas
->scheme
, (xmlChar
*)ref
->scheme
)) ||
2293 (xmlStrcmp ((xmlChar
*)bas
->server
, (xmlChar
*)ref
->server
)))) {
2294 val
= xmlStrdup (URI
);
2297 if (xmlStrEqual((xmlChar
*)bas
->path
, (xmlChar
*)ref
->path
)) {
2298 val
= xmlStrdup(BAD_CAST
"");
2301 if (bas
->path
== NULL
) {
2302 val
= xmlStrdup((xmlChar
*)ref
->path
);
2305 if (ref
->path
== NULL
) {
2306 ref
->path
= (char *) "/";
2311 * At this point (at last!) we can compare the two paths
2313 * First we take care of the special case where either of the
2314 * two path components may be missing (bug 316224)
2316 if (bas
->path
== NULL
) {
2317 if (ref
->path
!= NULL
) {
2318 uptr
= (xmlChar
*) ref
->path
;
2321 /* exception characters from xmlSaveUri */
2322 val
= xmlURIEscapeStr(uptr
, BAD_CAST
"/;&=+$,");
2326 bptr
= (xmlChar
*)bas
->path
;
2327 if (ref
->path
== NULL
) {
2328 for (ix
= 0; bptr
[ix
] != 0; ix
++) {
2329 if (bptr
[ix
] == '/')
2333 len
= 1; /* this is for a string terminator only */
2336 * Next we compare the two strings and find where they first differ
2338 if ((ref
->path
[pos
] == '.') && (ref
->path
[pos
+1] == '/'))
2340 if ((*bptr
== '.') && (bptr
[1] == '/'))
2342 else if ((*bptr
== '/') && (ref
->path
[pos
] != '/'))
2344 while ((bptr
[pos
] == ref
->path
[pos
]) && (bptr
[pos
] != 0))
2347 if (bptr
[pos
] == ref
->path
[pos
]) {
2348 val
= xmlStrdup(BAD_CAST
"");
2349 goto done
; /* (I can't imagine why anyone would do this) */
2353 * In URI, "back up" to the last '/' encountered. This will be the
2354 * beginning of the "unique" suffix of URI
2357 if ((ref
->path
[ix
] == '/') && (ix
> 0))
2359 else if ((ref
->path
[ix
] == 0) && (ix
> 1) && (ref
->path
[ix
- 1] == '/'))
2361 for (; ix
> 0; ix
--) {
2362 if (ref
->path
[ix
] == '/')
2366 uptr
= (xmlChar
*)ref
->path
;
2369 uptr
= (xmlChar
*)&ref
->path
[ix
];
2373 * In base, count the number of '/' from the differing point
2375 if (bptr
[pos
] != ref
->path
[pos
]) {/* check for trivial URI == base */
2376 for (; bptr
[ix
] != 0; ix
++) {
2377 if (bptr
[ix
] == '/')
2381 len
= xmlStrlen (uptr
) + 1;
2386 /* exception characters from xmlSaveUri */
2387 val
= xmlURIEscapeStr(uptr
, BAD_CAST
"/;&=+$,");
2392 * Allocate just enough space for the returned string -
2393 * length of the remainder of the URI, plus enough space
2394 * for the "../" groups, plus one for the terminator
2396 val
= (xmlChar
*) xmlMalloc (len
+ 3 * nbslash
);
2398 xmlGenericError(xmlGenericErrorContext
,
2399 "xmlBuildRelativeURI: out of memory\n");
2404 * Put in as many "../" as needed
2406 for (; nbslash
>0; nbslash
--) {
2412 * Finish up with the end of the URI
2415 if ((vptr
> val
) && (len
> 0) &&
2416 (uptr
[0] == '/') && (vptr
[-1] == '/')) {
2417 memcpy (vptr
, uptr
+ 1, len
- 1);
2420 memcpy (vptr
, uptr
, len
);
2427 /* escape the freshly-built path */
2429 /* exception characters from xmlSaveUri */
2430 val
= xmlURIEscapeStr(vptr
, BAD_CAST
"/;&=+$,");
2435 * Free the working variables
2437 if (remove_path
!= 0)
2449 * @path: the resource locator in a filesystem notation
2451 * Constructs a canonic path from the specified path.
2453 * Returns a new canonic path, or a duplicate of the path parameter if the
2454 * construction fails. The caller is responsible for freeing the memory occupied
2455 * by the returned string. If there is insufficient memory available, or the
2456 * argument is NULL, the function returns NULL.
2458 #define IS_WINDOWS_PATH(p) \
2460 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2461 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2462 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2464 xmlCanonicPath(const xmlChar
*path
)
2467 * For Windows implementations, additional work needs to be done to
2468 * replace backslashes in pathnames with "forward slashes"
2470 #if defined(_WIN32) && !defined(__CYGWIN__)
2477 const xmlChar
*absuri
;
2482 /* sanitize filename starting with // so it can be used as URI */
2483 if ((path
[0] == '/') && (path
[1] == '/') && (path
[2] != '/'))
2486 if ((uri
= xmlParseURI((const char *) path
)) != NULL
) {
2488 return xmlStrdup(path
);
2491 /* Check if this is an "absolute uri" */
2492 absuri
= xmlStrstr(path
, BAD_CAST
"://");
2493 if (absuri
!= NULL
) {
2499 * this looks like an URI where some parts have not been
2500 * escaped leading to a parsing problem. Check that the first
2501 * part matches a protocol.
2504 /* Bypass if first part (part before the '://') is > 20 chars */
2505 if ((l
<= 0) || (l
> 20))
2506 goto path_processing
;
2507 /* Bypass if any non-alpha characters are present in first part */
2508 for (j
= 0;j
< l
;j
++) {
2510 if (!(((c
>= 'a') && (c
<= 'z')) || ((c
>= 'A') && (c
<= 'Z'))))
2511 goto path_processing
;
2514 /* Escape all except the characters specified in the supplied path */
2515 escURI
= xmlURIEscapeStr(path
, BAD_CAST
":/?_.#&;=");
2516 if (escURI
!= NULL
) {
2517 /* Try parsing the escaped path */
2518 uri
= xmlParseURI((const char *) escURI
);
2519 /* If successful, return the escaped string */
2528 /* For Windows implementations, replace backslashes with 'forward slashes' */
2529 #if defined(_WIN32) && !defined(__CYGWIN__)
2531 * Create a URI structure
2533 uri
= xmlCreateURI();
2534 if (uri
== NULL
) { /* Guard against 'out of memory' */
2538 len
= xmlStrlen(path
);
2539 if ((len
> 2) && IS_WINDOWS_PATH(path
)) {
2540 /* make the scheme 'file' */
2541 uri
->scheme
= xmlStrdup(BAD_CAST
"file");
2542 /* allocate space for leading '/' + path + string terminator */
2543 uri
->path
= xmlMallocAtomic(len
+ 2);
2544 if (uri
->path
== NULL
) {
2545 xmlFreeURI(uri
); /* Guard agains 'out of memory' */
2548 /* Put in leading '/' plus path */
2551 strncpy(p
, path
, len
+ 1);
2553 uri
->path
= xmlStrdup(path
);
2554 if (uri
->path
== NULL
) {
2560 /* Now change all occurences of '\' to '/' */
2561 while (*p
!= '\0') {
2567 if (uri
->scheme
== NULL
) {
2568 ret
= xmlStrdup((const xmlChar
*) uri
->path
);
2570 ret
= xmlSaveUri(uri
);
2575 ret
= xmlStrdup((const xmlChar
*) path
);
2582 * @path: the resource locator in a filesystem notation
2584 * Constructs an URI expressing the existing path
2586 * Returns a new URI, or a duplicate of the path parameter if the
2587 * construction fails. The caller is responsible for freeing the memory
2588 * occupied by the returned string. If there is insufficient memory available,
2589 * or the argument is NULL, the function returns NULL.
2592 xmlPathToURI(const xmlChar
*path
)
2601 if ((uri
= xmlParseURI((const char *) path
)) != NULL
) {
2603 return xmlStrdup(path
);
2605 cal
= xmlCanonicPath(path
);
2608 #if defined(_WIN32) && !defined(__CYGWIN__)
2609 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2610 If 'cal' is a valid URI allready then we are done here, as continuing would make
2612 if ((uri
= xmlParseURI((const char *) cal
)) != NULL
) {
2616 /* 'cal' can contain a relative path with backslashes. If that is processed
2617 by xmlSaveURI, they will be escaped and the external entity loader machinery
2618 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2620 while (*ret
!= '\0') {
2626 memset(&temp
, 0, sizeof(temp
));
2627 temp
.path
= (char *) cal
;
2628 ret
= xmlSaveUri(&temp
);
2633 #include "elfgcchack.h"