2 * uri.c: set of generic URI related routines
4 * Reference: RFCs 3986, 2732 and 2373
6 * See Copyright for the status of this software.
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
24 * The definition of the URI regexp in the above RFC has no size limit
25 * In practice they are usually relativey short except for the
26 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27 * maximum size before hitting random practical limits is around 64 KB
28 * and 4KB is usually a maximum admitted limit for proper operations.
29 * The value below is more a security limit than anything else and
30 * really should never be hit by 'normal' operations
31 * Set to 1 MByte in 2012, this is only enforced on output
33 #define MAX_URI_LENGTH 1024 * 1024
36 xmlURIErrMemory(const char *extra
)
39 __xmlRaiseError(NULL
, NULL
, NULL
,
40 NULL
, NULL
, XML_FROM_URI
,
41 XML_ERR_NO_MEMORY
, XML_ERR_FATAL
, NULL
, 0,
42 extra
, NULL
, NULL
, 0, 0,
43 "Memory allocation failed : %s\n", extra
);
45 __xmlRaiseError(NULL
, NULL
, NULL
,
46 NULL
, NULL
, XML_FROM_URI
,
47 XML_ERR_NO_MEMORY
, XML_ERR_FATAL
, NULL
, 0,
48 NULL
, NULL
, NULL
, 0, 0,
49 "Memory allocation failed\n");
52 static void xmlCleanURI(xmlURIPtr uri
);
55 * Old rule from 2396 used in legacy handling code
56 * alpha = lowalpha | upalpha
58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64 * "u" | "v" | "w" | "x" | "y" | "z"
67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72 * "U" | "V" | "W" | "X" | "Y" | "Z"
74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
85 * alphanum = alpha | digit
88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
91 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
95 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
96 ((x) == '(') || ((x) == ')'))
99 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
102 #define IS_UNWISE(p) \
103 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
104 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
105 ((*(p) == ']')) || ((*(p) == '`')))
107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
117 * unreserved = alphanum | mark
120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
123 * Skip to next pointer char, handle escaped sequences
126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
129 * Productions from the spec.
131 * authority = server | reg_name
132 * reg_name = 1*( unreserved | escaped | "$" | "," |
133 * ";" | ":" | "@" | "&" | "=" | "+" )
135 * path = [ abs_path | opaque_part ]
138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140 /************************************************************************
144 ************************************************************************/
146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
148 ((*(p) >= 'A') && (*(p) <= 'Z')))
149 #define ISA_HEXDIG(p) \
150 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
151 ((*(p) >= 'A') && (*(p) <= 'F')))
154 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155 * / "*" / "+" / "," / ";" / "="
157 #define ISA_SUB_DELIM(p) \
158 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
159 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
160 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
161 ((*(p) == '=')) || ((*(p) == '\'')))
164 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166 #define ISA_GEN_DELIM(p) \
167 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
168 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
172 * reserved = gen-delims / sub-delims
174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
177 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
179 #define ISA_UNRESERVED(p) \
180 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
181 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
184 * pct-encoded = "%" HEXDIG HEXDIG
186 #define ISA_PCT_ENCODED(p) \
187 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
190 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
192 #define ISA_PCHAR(p) \
193 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
194 ((*(p) == ':')) || ((*(p) == '@')))
197 * xmlParse3986Scheme:
198 * @uri: pointer to an URI structure
199 * @str: pointer to the string to analyze
201 * Parse an URI scheme
203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205 * Returns 0 or the error code
208 xmlParse3986Scheme(xmlURIPtr uri
, const char **str
) {
218 while (ISA_ALPHA(cur
) || ISA_DIGIT(cur
) ||
219 (*cur
== '+') || (*cur
== '-') || (*cur
== '.')) cur
++;
221 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
222 uri
->scheme
= STRNDUP(*str
, cur
- *str
);
229 * xmlParse3986Fragment:
230 * @uri: pointer to an URI structure
231 * @str: pointer to the string to analyze
233 * Parse the query part of an URI
235 * fragment = *( pchar / "/" / "?" )
236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237 * in the fragment identifier but this is used very broadly for
238 * xpointer scheme selection, so we are allowing it here to not break
239 * for example all the DocBook processing chains.
241 * Returns 0 or the error code
244 xmlParse3986Fragment(xmlURIPtr uri
, const char **str
)
253 while ((ISA_PCHAR(cur
)) || (*cur
== '/') || (*cur
== '?') ||
254 (*cur
== '[') || (*cur
== ']') ||
255 ((uri
!= NULL
) && (uri
->cleanup
& 1) && (IS_UNWISE(cur
))))
258 if (uri
->fragment
!= NULL
)
259 xmlFree(uri
->fragment
);
260 if (uri
->cleanup
& 2)
261 uri
->fragment
= STRNDUP(*str
, cur
- *str
);
263 uri
->fragment
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
271 * @uri: pointer to an URI structure
272 * @str: pointer to the string to analyze
274 * Parse the query part of an URI
278 * Returns 0 or the error code
281 xmlParse3986Query(xmlURIPtr uri
, const char **str
)
290 while ((ISA_PCHAR(cur
)) || (*cur
== '/') || (*cur
== '?') ||
291 ((uri
!= NULL
) && (uri
->cleanup
& 1) && (IS_UNWISE(cur
))))
294 if (uri
->query
!= NULL
)
296 if (uri
->cleanup
& 2)
297 uri
->query
= STRNDUP(*str
, cur
- *str
);
299 uri
->query
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
301 /* Save the raw bytes of the query as well.
302 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304 if (uri
->query_raw
!= NULL
)
305 xmlFree (uri
->query_raw
);
306 uri
->query_raw
= STRNDUP (*str
, cur
- *str
);
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
317 * Parse a port part and fills in the appropriate fields
318 * of the @uri structure
322 * Returns 0 or the error code
325 xmlParse3986Port(xmlURIPtr uri
, const char **str
)
327 const char *cur
= *str
;
329 if (ISA_DIGIT(cur
)) {
332 while (ISA_DIGIT(cur
)) {
334 uri
->port
= uri
->port
* 10 + (*cur
- '0');
344 * xmlParse3986Userinfo:
345 * @uri: pointer to an URI structure
346 * @str: the string to analyze
348 * Parse an user informations part and fills in the appropriate fields
349 * of the @uri structure
351 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
353 * Returns 0 or the error code
356 xmlParse3986Userinfo(xmlURIPtr uri
, const char **str
)
361 while (ISA_UNRESERVED(cur
) || ISA_PCT_ENCODED(cur
) ||
362 ISA_SUB_DELIM(cur
) || (*cur
== ':'))
366 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
367 if (uri
->cleanup
& 2)
368 uri
->user
= STRNDUP(*str
, cur
- *str
);
370 uri
->user
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
379 * xmlParse3986DecOctet:
380 * @str: the string to analyze
382 * dec-octet = DIGIT ; 0-9
383 * / %x31-39 DIGIT ; 10-99
384 * / "1" 2DIGIT ; 100-199
385 * / "2" %x30-34 DIGIT ; 200-249
386 * / "25" %x30-35 ; 250-255
390 * Returns 0 if found and skipped, 1 otherwise
393 xmlParse3986DecOctet(const char **str
) {
394 const char *cur
= *str
;
396 if (!(ISA_DIGIT(cur
)))
398 if (!ISA_DIGIT(cur
+1))
400 else if ((*cur
!= '0') && (ISA_DIGIT(cur
+ 1)) && (!ISA_DIGIT(cur
+2)))
402 else if ((*cur
== '1') && (ISA_DIGIT(cur
+ 1)) && (ISA_DIGIT(cur
+ 2)))
404 else if ((*cur
== '2') && (*(cur
+ 1) >= '0') &&
405 (*(cur
+ 1) <= '4') && (ISA_DIGIT(cur
+ 2)))
407 else if ((*cur
== '2') && (*(cur
+ 1) == '5') &&
408 (*(cur
+ 2) >= '0') && (*(cur
+ 1) <= '5'))
417 * @uri: pointer to an URI structure
418 * @str: the string to analyze
420 * Parse an host part and fills in the appropriate fields
421 * of the @uri structure
423 * host = IP-literal / IPv4address / reg-name
424 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
425 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
426 * reg-name = *( unreserved / pct-encoded / sub-delims )
428 * Returns 0 or the error code
431 xmlParse3986Host(xmlURIPtr uri
, const char **str
)
433 const char *cur
= *str
;
438 * IPv6 and future adressing scheme are enclosed between brackets
442 while ((*cur
!= ']') && (*cur
!= 0))
450 * try to parse an IPv4
452 if (ISA_DIGIT(cur
)) {
453 if (xmlParse3986DecOctet(&cur
) != 0)
458 if (xmlParse3986DecOctet(&cur
) != 0)
462 if (xmlParse3986DecOctet(&cur
) != 0)
466 if (xmlParse3986DecOctet(&cur
) != 0)
473 * then this should be a hostname which can be empty
475 while (ISA_UNRESERVED(cur
) || ISA_PCT_ENCODED(cur
) || ISA_SUB_DELIM(cur
))
479 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
480 uri
->authority
= NULL
;
481 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
483 if (uri
->cleanup
& 2)
484 uri
->server
= STRNDUP(host
, cur
- host
);
486 uri
->server
= xmlURIUnescapeString(host
, cur
- host
, NULL
);
495 * xmlParse3986Authority:
496 * @uri: pointer to an URI structure
497 * @str: the string to analyze
499 * Parse an authority part and fills in the appropriate fields
500 * of the @uri structure
502 * authority = [ userinfo "@" ] host [ ":" port ]
504 * Returns 0 or the error code
507 xmlParse3986Authority(xmlURIPtr uri
, const char **str
)
514 * try to parse an userinfo and check for the trailing @
516 ret
= xmlParse3986Userinfo(uri
, &cur
);
517 if ((ret
!= 0) || (*cur
!= '@'))
521 ret
= xmlParse3986Host(uri
, &cur
);
522 if (ret
!= 0) return(ret
);
525 ret
= xmlParse3986Port(uri
, &cur
);
526 if (ret
!= 0) return(ret
);
533 * xmlParse3986Segment:
534 * @str: the string to analyze
535 * @forbid: an optional forbidden character
536 * @empty: allow an empty segment
538 * Parse a segment and fills in the appropriate fields
539 * of the @uri structure
542 * segment-nz = 1*pchar
543 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
544 * ; non-zero-length segment without any colon ":"
546 * Returns 0 or the error code
549 xmlParse3986Segment(const char **str
, char forbid
, int empty
)
554 if (!ISA_PCHAR(cur
)) {
559 while (ISA_PCHAR(cur
) && (*cur
!= forbid
))
566 * xmlParse3986PathAbEmpty:
567 * @uri: pointer to an URI structure
568 * @str: the string to analyze
570 * Parse an path absolute or empty and fills in the appropriate fields
571 * of the @uri structure
573 * path-abempty = *( "/" segment )
575 * Returns 0 or the error code
578 xmlParse3986PathAbEmpty(xmlURIPtr uri
, const char **str
)
585 while (*cur
== '/') {
587 ret
= xmlParse3986Segment(&cur
, 0, 1);
588 if (ret
!= 0) return(ret
);
591 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
593 if (uri
->cleanup
& 2)
594 uri
->path
= STRNDUP(*str
, cur
- *str
);
596 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
606 * xmlParse3986PathAbsolute:
607 * @uri: pointer to an URI structure
608 * @str: the string to analyze
610 * Parse an path absolute and fills in the appropriate fields
611 * of the @uri structure
613 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
615 * Returns 0 or the error code
618 xmlParse3986PathAbsolute(xmlURIPtr uri
, const char **str
)
628 ret
= xmlParse3986Segment(&cur
, 0, 0);
630 while (*cur
== '/') {
632 ret
= xmlParse3986Segment(&cur
, 0, 1);
633 if (ret
!= 0) return(ret
);
637 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
639 if (uri
->cleanup
& 2)
640 uri
->path
= STRNDUP(*str
, cur
- *str
);
642 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
652 * xmlParse3986PathRootless:
653 * @uri: pointer to an URI structure
654 * @str: the string to analyze
656 * Parse an path without root and fills in the appropriate fields
657 * of the @uri structure
659 * path-rootless = segment-nz *( "/" segment )
661 * Returns 0 or the error code
664 xmlParse3986PathRootless(xmlURIPtr uri
, const char **str
)
671 ret
= xmlParse3986Segment(&cur
, 0, 0);
672 if (ret
!= 0) return(ret
);
673 while (*cur
== '/') {
675 ret
= xmlParse3986Segment(&cur
, 0, 1);
676 if (ret
!= 0) return(ret
);
679 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
681 if (uri
->cleanup
& 2)
682 uri
->path
= STRNDUP(*str
, cur
- *str
);
684 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
694 * xmlParse3986PathNoScheme:
695 * @uri: pointer to an URI structure
696 * @str: the string to analyze
698 * Parse an path which is not a scheme and fills in the appropriate fields
699 * of the @uri structure
701 * path-noscheme = segment-nz-nc *( "/" segment )
703 * Returns 0 or the error code
706 xmlParse3986PathNoScheme(xmlURIPtr uri
, const char **str
)
713 ret
= xmlParse3986Segment(&cur
, ':', 0);
714 if (ret
!= 0) return(ret
);
715 while (*cur
== '/') {
717 ret
= xmlParse3986Segment(&cur
, 0, 1);
718 if (ret
!= 0) return(ret
);
721 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
723 if (uri
->cleanup
& 2)
724 uri
->path
= STRNDUP(*str
, cur
- *str
);
726 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
736 * xmlParse3986HierPart:
737 * @uri: pointer to an URI structure
738 * @str: the string to analyze
740 * Parse an hierarchical part and fills in the appropriate fields
741 * of the @uri structure
743 * hier-part = "//" authority path-abempty
748 * Returns 0 or the error code
751 xmlParse3986HierPart(xmlURIPtr uri
, const char **str
)
758 if ((*cur
== '/') && (*(cur
+ 1) == '/')) {
760 ret
= xmlParse3986Authority(uri
, &cur
);
761 if (ret
!= 0) return(ret
);
762 if (uri
->server
== NULL
)
764 ret
= xmlParse3986PathAbEmpty(uri
, &cur
);
765 if (ret
!= 0) return(ret
);
768 } else if (*cur
== '/') {
769 ret
= xmlParse3986PathAbsolute(uri
, &cur
);
770 if (ret
!= 0) return(ret
);
771 } else if (ISA_PCHAR(cur
)) {
772 ret
= xmlParse3986PathRootless(uri
, &cur
);
773 if (ret
!= 0) return(ret
);
775 /* path-empty is effectively empty */
777 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
786 * xmlParse3986RelativeRef:
787 * @uri: pointer to an URI structure
788 * @str: the string to analyze
790 * Parse an URI string and fills in the appropriate fields
791 * of the @uri structure
793 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
794 * relative-part = "//" authority path-abempty
799 * Returns 0 or the error code
802 xmlParse3986RelativeRef(xmlURIPtr uri
, const char *str
) {
805 if ((*str
== '/') && (*(str
+ 1) == '/')) {
807 ret
= xmlParse3986Authority(uri
, &str
);
808 if (ret
!= 0) return(ret
);
809 ret
= xmlParse3986PathAbEmpty(uri
, &str
);
810 if (ret
!= 0) return(ret
);
811 } else if (*str
== '/') {
812 ret
= xmlParse3986PathAbsolute(uri
, &str
);
813 if (ret
!= 0) return(ret
);
814 } else if (ISA_PCHAR(str
)) {
815 ret
= xmlParse3986PathNoScheme(uri
, &str
);
816 if (ret
!= 0) return(ret
);
818 /* path-empty is effectively empty */
820 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
827 ret
= xmlParse3986Query(uri
, &str
);
828 if (ret
!= 0) return(ret
);
832 ret
= xmlParse3986Fragment(uri
, &str
);
833 if (ret
!= 0) return(ret
);
845 * @uri: pointer to an URI structure
846 * @str: the string to analyze
848 * Parse an URI string and fills in the appropriate fields
849 * of the @uri structure
851 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
853 * Returns 0 or the error code
856 xmlParse3986URI(xmlURIPtr uri
, const char *str
) {
859 ret
= xmlParse3986Scheme(uri
, &str
);
860 if (ret
!= 0) return(ret
);
865 ret
= xmlParse3986HierPart(uri
, &str
);
866 if (ret
!= 0) return(ret
);
869 ret
= xmlParse3986Query(uri
, &str
);
870 if (ret
!= 0) return(ret
);
874 ret
= xmlParse3986Fragment(uri
, &str
);
875 if (ret
!= 0) return(ret
);
885 * xmlParse3986URIReference:
886 * @uri: pointer to an URI structure
887 * @str: the string to analyze
889 * Parse an URI reference string and fills in the appropriate fields
890 * of the @uri structure
892 * URI-reference = URI / relative-ref
894 * Returns 0 or the error code
897 xmlParse3986URIReference(xmlURIPtr uri
, const char *str
) {
905 * Try first to parse absolute refs, then fallback to relative if
908 ret
= xmlParse3986URI(uri
, str
);
911 ret
= xmlParse3986RelativeRef(uri
, str
);
922 * @str: the URI string to analyze
924 * Parse an URI based on RFC 3986
926 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
928 * Returns a newly built xmlURIPtr or NULL in case of error
931 xmlParseURI(const char *str
) {
937 uri
= xmlCreateURI();
939 ret
= xmlParse3986URIReference(uri
, str
);
949 * xmlParseURIReference:
950 * @uri: pointer to an URI structure
951 * @str: the string to analyze
953 * Parse an URI reference string based on RFC 3986 and fills in the
954 * appropriate fields of the @uri structure
956 * URI-reference = URI / relative-ref
958 * Returns 0 or the error code
961 xmlParseURIReference(xmlURIPtr uri
, const char *str
) {
962 return(xmlParse3986URIReference(uri
, str
));
967 * @str: the URI string to analyze
968 * @raw: if 1 unescaping of URI pieces are disabled
970 * Parse an URI but allows to keep intact the original fragments.
972 * URI-reference = URI / relative-ref
974 * Returns a newly built xmlURIPtr or NULL in case of error
977 xmlParseURIRaw(const char *str
, int raw
) {
983 uri
= xmlCreateURI();
988 ret
= xmlParseURIReference(uri
, str
);
997 /************************************************************************
999 * Generic URI structure functions *
1001 ************************************************************************/
1006 * Simply creates an empty xmlURI
1008 * Returns the new structure or NULL in case of error
1011 xmlCreateURI(void) {
1014 ret
= (xmlURIPtr
) xmlMalloc(sizeof(xmlURI
));
1016 xmlURIErrMemory("creating URI structure\n");
1019 memset(ret
, 0, sizeof(xmlURI
));
1024 * xmlSaveUriRealloc:
1026 * Function to handle properly a reallocation when saving an URI
1027 * Also imposes some limit on the length of an URI string output
1030 xmlSaveUriRealloc(xmlChar
*ret
, int *max
) {
1034 if (*max
> MAX_URI_LENGTH
) {
1035 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1039 temp
= (xmlChar
*) xmlRealloc(ret
, (tmp
+ 1));
1041 xmlURIErrMemory("saving URI\n");
1050 * @uri: pointer to an xmlURI
1052 * Save the URI as an escaped string
1054 * Returns a new string (to be deallocated by caller)
1057 xmlSaveUri(xmlURIPtr uri
) {
1058 xmlChar
*ret
= NULL
;
1064 if (uri
== NULL
) return(NULL
);
1068 ret
= (xmlChar
*) xmlMallocAtomic((max
+ 1) * sizeof(xmlChar
));
1070 xmlURIErrMemory("saving URI\n");
1075 if (uri
->scheme
!= NULL
) {
1079 temp
= xmlSaveUriRealloc(ret
, &max
);
1080 if (temp
== NULL
) goto mem_error
;
1086 temp
= xmlSaveUriRealloc(ret
, &max
);
1087 if (temp
== NULL
) goto mem_error
;
1092 if (uri
->opaque
!= NULL
) {
1095 if (len
+ 3 >= max
) {
1096 temp
= xmlSaveUriRealloc(ret
, &max
);
1097 if (temp
== NULL
) goto mem_error
;
1100 if (IS_RESERVED(*(p
)) || IS_UNRESERVED(*(p
)))
1103 int val
= *(unsigned char *)p
++;
1104 int hi
= val
/ 0x10, lo
= val
% 0x10;
1106 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1107 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1111 if ((uri
->server
!= NULL
) || (uri
->port
== -1)) {
1112 if (len
+ 3 >= max
) {
1113 temp
= xmlSaveUriRealloc(ret
, &max
);
1114 if (temp
== NULL
) goto mem_error
;
1119 if (uri
->user
!= NULL
) {
1122 if (len
+ 3 >= max
) {
1123 temp
= xmlSaveUriRealloc(ret
, &max
);
1124 if (temp
== NULL
) goto mem_error
;
1127 if ((IS_UNRESERVED(*(p
))) ||
1128 ((*(p
) == ';')) || ((*(p
) == ':')) ||
1129 ((*(p
) == '&')) || ((*(p
) == '=')) ||
1130 ((*(p
) == '+')) || ((*(p
) == '$')) ||
1134 int val
= *(unsigned char *)p
++;
1135 int hi
= val
/ 0x10, lo
= val
% 0x10;
1137 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1138 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1141 if (len
+ 3 >= max
) {
1142 temp
= xmlSaveUriRealloc(ret
, &max
);
1143 if (temp
== NULL
) goto mem_error
;
1148 if (uri
->server
!= NULL
) {
1152 temp
= xmlSaveUriRealloc(ret
, &max
);
1153 if (temp
== NULL
) goto mem_error
;
1158 if (uri
->port
> 0) {
1159 if (len
+ 10 >= max
) {
1160 temp
= xmlSaveUriRealloc(ret
, &max
);
1161 if (temp
== NULL
) goto mem_error
;
1164 len
+= snprintf((char *) &ret
[len
], max
- len
, ":%d", uri
->port
);
1167 } else if (uri
->authority
!= NULL
) {
1168 if (len
+ 3 >= max
) {
1169 temp
= xmlSaveUriRealloc(ret
, &max
);
1170 if (temp
== NULL
) goto mem_error
;
1177 if (len
+ 3 >= max
) {
1178 temp
= xmlSaveUriRealloc(ret
, &max
);
1179 if (temp
== NULL
) goto mem_error
;
1182 if ((IS_UNRESERVED(*(p
))) ||
1183 ((*(p
) == '$')) || ((*(p
) == ',')) || ((*(p
) == ';')) ||
1184 ((*(p
) == ':')) || ((*(p
) == '@')) || ((*(p
) == '&')) ||
1185 ((*(p
) == '=')) || ((*(p
) == '+')))
1188 int val
= *(unsigned char *)p
++;
1189 int hi
= val
/ 0x10, lo
= val
% 0x10;
1191 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1192 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1195 } else if (uri
->scheme
!= NULL
) {
1196 if (len
+ 3 >= max
) {
1197 temp
= xmlSaveUriRealloc(ret
, &max
);
1198 if (temp
== NULL
) goto mem_error
;
1202 if (uri
->path
!= NULL
) {
1205 * the colon in file:///d: should not be escaped or
1206 * Windows accesses fail later.
1208 if ((uri
->scheme
!= NULL
) &&
1210 (((p
[1] >= 'a') && (p
[1] <= 'z')) ||
1211 ((p
[1] >= 'A') && (p
[1] <= 'Z'))) &&
1213 (xmlStrEqual(BAD_CAST uri
->scheme
, BAD_CAST
"file"))) {
1214 if (len
+ 3 >= max
) {
1215 temp
= xmlSaveUriRealloc(ret
, &max
);
1216 if (temp
== NULL
) goto mem_error
;
1224 if (len
+ 3 >= max
) {
1225 temp
= xmlSaveUriRealloc(ret
, &max
);
1226 if (temp
== NULL
) goto mem_error
;
1229 if ((IS_UNRESERVED(*(p
))) || ((*(p
) == '/')) ||
1230 ((*(p
) == ';')) || ((*(p
) == '@')) || ((*(p
) == '&')) ||
1231 ((*(p
) == '=')) || ((*(p
) == '+')) || ((*(p
) == '$')) ||
1235 int val
= *(unsigned char *)p
++;
1236 int hi
= val
/ 0x10, lo
= val
% 0x10;
1238 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1239 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1243 if (uri
->query_raw
!= NULL
) {
1244 if (len
+ 1 >= max
) {
1245 temp
= xmlSaveUriRealloc(ret
, &max
);
1246 if (temp
== NULL
) goto mem_error
;
1252 if (len
+ 1 >= max
) {
1253 temp
= xmlSaveUriRealloc(ret
, &max
);
1254 if (temp
== NULL
) goto mem_error
;
1259 } else if (uri
->query
!= NULL
) {
1260 if (len
+ 3 >= max
) {
1261 temp
= xmlSaveUriRealloc(ret
, &max
);
1262 if (temp
== NULL
) goto mem_error
;
1268 if (len
+ 3 >= max
) {
1269 temp
= xmlSaveUriRealloc(ret
, &max
);
1270 if (temp
== NULL
) goto mem_error
;
1273 if ((IS_UNRESERVED(*(p
))) || (IS_RESERVED(*(p
))))
1276 int val
= *(unsigned char *)p
++;
1277 int hi
= val
/ 0x10, lo
= val
% 0x10;
1279 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1280 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1285 if (uri
->fragment
!= NULL
) {
1286 if (len
+ 3 >= max
) {
1287 temp
= xmlSaveUriRealloc(ret
, &max
);
1288 if (temp
== NULL
) goto mem_error
;
1294 if (len
+ 3 >= max
) {
1295 temp
= xmlSaveUriRealloc(ret
, &max
);
1296 if (temp
== NULL
) goto mem_error
;
1299 if ((IS_UNRESERVED(*(p
))) || (IS_RESERVED(*(p
))))
1302 int val
= *(unsigned char *)p
++;
1303 int hi
= val
/ 0x10, lo
= val
% 0x10;
1305 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1306 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1311 temp
= xmlSaveUriRealloc(ret
, &max
);
1312 if (temp
== NULL
) goto mem_error
;
1325 * @stream: a FILE* for the output
1326 * @uri: pointer to an xmlURI
1328 * Prints the URI in the stream @stream.
1331 xmlPrintURI(FILE *stream
, xmlURIPtr uri
) {
1334 out
= xmlSaveUri(uri
);
1336 fprintf(stream
, "%s", (char *) out
);
1343 * @uri: pointer to an xmlURI
1345 * Make sure the xmlURI struct is free of content
1348 xmlCleanURI(xmlURIPtr uri
) {
1349 if (uri
== NULL
) return;
1351 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
1353 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1355 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1357 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
1359 if (uri
->fragment
!= NULL
) xmlFree(uri
->fragment
);
1360 uri
->fragment
= NULL
;
1361 if (uri
->opaque
!= NULL
) xmlFree(uri
->opaque
);
1363 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1364 uri
->authority
= NULL
;
1365 if (uri
->query
!= NULL
) xmlFree(uri
->query
);
1367 if (uri
->query_raw
!= NULL
) xmlFree(uri
->query_raw
);
1368 uri
->query_raw
= NULL
;
1373 * @uri: pointer to an xmlURI
1375 * Free up the xmlURI struct
1378 xmlFreeURI(xmlURIPtr uri
) {
1379 if (uri
== NULL
) return;
1381 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
1382 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1383 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1384 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
1385 if (uri
->fragment
!= NULL
) xmlFree(uri
->fragment
);
1386 if (uri
->opaque
!= NULL
) xmlFree(uri
->opaque
);
1387 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1388 if (uri
->query
!= NULL
) xmlFree(uri
->query
);
1389 if (uri
->query_raw
!= NULL
) xmlFree(uri
->query_raw
);
1393 /************************************************************************
1395 * Helper functions *
1397 ************************************************************************/
1400 * xmlNormalizeURIPath:
1401 * @path: pointer to the path string
1403 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1404 * Section 5.2, steps 6.c through 6.g.
1406 * Normalization occurs directly on the string, no new allocation is done
1408 * Returns 0 or an error code
1411 xmlNormalizeURIPath(char *path
) {
1417 /* Skip all initial "/" chars. We want to get to the beginning of the
1418 * first non-empty segment.
1421 while (cur
[0] == '/')
1426 /* Keep everything we've seen so far. */
1430 * Analyze each segment in sequence for cases (c) and (d).
1432 while (cur
[0] != '\0') {
1434 * c) All occurrences of "./", where "." is a complete path segment,
1435 * are removed from the buffer string.
1437 if ((cur
[0] == '.') && (cur
[1] == '/')) {
1439 /* '//' normalization should be done at this point too */
1440 while (cur
[0] == '/')
1446 * d) If the buffer string ends with "." as a complete path segment,
1447 * that "." is removed.
1449 if ((cur
[0] == '.') && (cur
[1] == '\0'))
1452 /* Otherwise keep the segment. */
1453 while (cur
[0] != '/') {
1456 (out
++)[0] = (cur
++)[0];
1459 while ((cur
[0] == '/') && (cur
[1] == '/'))
1462 (out
++)[0] = (cur
++)[0];
1467 /* Reset to the beginning of the first segment for the next sequence. */
1469 while (cur
[0] == '/')
1475 * Analyze each segment in sequence for cases (e) and (f).
1477 * e) All occurrences of "<segment>/../", where <segment> is a
1478 * complete path segment not equal to "..", are removed from the
1479 * buffer string. Removal of these path segments is performed
1480 * iteratively, removing the leftmost matching pattern on each
1481 * iteration, until no matching pattern remains.
1483 * f) If the buffer string ends with "<segment>/..", where <segment>
1484 * is a complete path segment not equal to "..", that
1485 * "<segment>/.." is removed.
1487 * To satisfy the "iterative" clause in (e), we need to collapse the
1488 * string every time we find something that needs to be removed. Thus,
1489 * we don't need to keep two pointers into the string: we only need a
1490 * "current position" pointer.
1495 /* At the beginning of each iteration of this loop, "cur" points to
1496 * the first character of the segment we want to examine.
1499 /* Find the end of the current segment. */
1501 while ((segp
[0] != '/') && (segp
[0] != '\0'))
1504 /* If this is the last segment, we're done (we need at least two
1505 * segments to meet the criteria for the (e) and (f) cases).
1507 if (segp
[0] == '\0')
1510 /* If the first segment is "..", or if the next segment _isn't_ "..",
1511 * keep this segment and try the next one.
1514 if (((cur
[0] == '.') && (cur
[1] == '.') && (segp
== cur
+3))
1515 || ((segp
[0] != '.') || (segp
[1] != '.')
1516 || ((segp
[2] != '/') && (segp
[2] != '\0')))) {
1521 /* If we get here, remove this segment and the next one and back up
1522 * to the previous segment (if there is one), to implement the
1523 * "iteratively" clause. It's pretty much impossible to back up
1524 * while maintaining two pointers into the buffer, so just compact
1525 * the whole buffer now.
1528 /* If this is the end of the buffer, we're done. */
1529 if (segp
[2] == '\0') {
1533 /* Valgrind complained, strcpy(cur, segp + 3); */
1534 /* string will overlap, do not use strcpy */
1537 while ((*tmp
++ = *segp
++) != 0)
1540 /* If there are no previous segments, then keep going from here. */
1542 while ((segp
> path
) && ((--segp
)[0] == '/'))
1547 /* "segp" is pointing to the end of a previous segment; find it's
1548 * start. We need to back up to the previous segment and start
1549 * over with that to handle things like "foo/bar/../..". If we
1550 * don't do this, then on the first pass we'll remove the "bar/..",
1551 * but be pointing at the second ".." so we won't realize we can also
1552 * remove the "foo/..".
1555 while ((cur
> path
) && (cur
[-1] != '/'))
1561 * g) If the resulting buffer string still begins with one or more
1562 * complete path segments of "..", then the reference is
1563 * considered to be in error. Implementations may handle this
1564 * error by retaining these components in the resolved path (i.e.,
1565 * treating them as part of the final URI), by removing them from
1566 * the resolved path (i.e., discarding relative levels above the
1567 * root), or by avoiding traversal of the reference.
1569 * We discard them from the final path.
1571 if (path
[0] == '/') {
1573 while ((cur
[0] == '/') && (cur
[1] == '.') && (cur
[2] == '.')
1574 && ((cur
[3] == '/') || (cur
[3] == '\0')))
1579 while (cur
[0] != '\0')
1580 (out
++)[0] = (cur
++)[0];
1588 static int is_hex(char c
) {
1589 if (((c
>= '0') && (c
<= '9')) ||
1590 ((c
>= 'a') && (c
<= 'f')) ||
1591 ((c
>= 'A') && (c
<= 'F')))
1597 * xmlURIUnescapeString:
1598 * @str: the string to unescape
1599 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1600 * @target: optional destination buffer
1602 * Unescaping routine, but does not check that the string is an URI. The
1603 * output is a direct unsigned char translation of %XX values (no encoding)
1604 * Note that the length of the result can only be smaller or same size as
1607 * Returns a copy of the string, but unescaped, will return NULL only in case
1611 xmlURIUnescapeString(const char *str
, int len
, char *target
) {
1617 if (len
<= 0) len
= strlen(str
);
1618 if (len
< 0) return(NULL
);
1620 if (target
== NULL
) {
1621 ret
= (char *) xmlMallocAtomic(len
+ 1);
1623 xmlURIErrMemory("unescaping URI value\n");
1631 if ((len
> 2) && (*in
== '%') && (is_hex(in
[1])) && (is_hex(in
[2]))) {
1633 if ((*in
>= '0') && (*in
<= '9'))
1635 else if ((*in
>= 'a') && (*in
<= 'f'))
1636 *out
= (*in
- 'a') + 10;
1637 else if ((*in
>= 'A') && (*in
<= 'F'))
1638 *out
= (*in
- 'A') + 10;
1640 if ((*in
>= '0') && (*in
<= '9'))
1641 *out
= *out
* 16 + (*in
- '0');
1642 else if ((*in
>= 'a') && (*in
<= 'f'))
1643 *out
= *out
* 16 + (*in
- 'a') + 10;
1644 else if ((*in
>= 'A') && (*in
<= 'F'))
1645 *out
= *out
* 16 + (*in
- 'A') + 10;
1660 * @str: string to escape
1661 * @list: exception list string of chars not to escape
1663 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1664 * and the characters in the exception list.
1666 * Returns a new escaped string or NULL in case of error.
1669 xmlURIEscapeStr(const xmlChar
*str
, const xmlChar
*list
) {
1678 return(xmlStrdup(str
));
1679 len
= xmlStrlen(str
);
1680 if (!(len
> 0)) return(NULL
);
1683 ret
= (xmlChar
*) xmlMallocAtomic(len
);
1685 xmlURIErrMemory("escaping URI value\n");
1688 in
= (const xmlChar
*) str
;
1691 if (len
- out
<= 3) {
1692 temp
= xmlSaveUriRealloc(ret
, &len
);
1694 xmlURIErrMemory("escaping URI value\n");
1703 if ((ch
!= '@') && (!IS_UNRESERVED(ch
)) && (!xmlStrchr(list
, ch
))) {
1708 ret
[out
++] = '0' + val
;
1710 ret
[out
++] = 'A' + val
- 0xA;
1713 ret
[out
++] = '0' + val
;
1715 ret
[out
++] = 'A' + val
- 0xA;
1728 * @str: the string of the URI to escape
1730 * Escaping routine, does not do validity checks !
1731 * It will try to escape the chars needing this, but this is heuristic
1732 * based it's impossible to be sure.
1734 * Returns an copy of the string, but escaped
1737 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1738 * according to RFC2396.
1742 xmlURIEscape(const xmlChar
* str
)
1744 xmlChar
*ret
, *segment
= NULL
;
1748 #define NULLCHK(p) if(!p) { \
1749 xmlURIErrMemory("escaping URI value\n"); \
1756 uri
= xmlCreateURI();
1759 * Allow escaping errors in the unescaped form
1762 ret2
= xmlParseURIReference(uri
, (const char *)str
);
1775 segment
= xmlURIEscapeStr(BAD_CAST uri
->scheme
, BAD_CAST
"+-.");
1777 ret
= xmlStrcat(ret
, segment
);
1778 ret
= xmlStrcat(ret
, BAD_CAST
":");
1782 if (uri
->authority
) {
1784 xmlURIEscapeStr(BAD_CAST uri
->authority
, BAD_CAST
"/?;:@");
1786 ret
= xmlStrcat(ret
, BAD_CAST
"//");
1787 ret
= xmlStrcat(ret
, segment
);
1792 segment
= xmlURIEscapeStr(BAD_CAST uri
->user
, BAD_CAST
";:&=+$,");
1794 ret
= xmlStrcat(ret
,BAD_CAST
"//");
1795 ret
= xmlStrcat(ret
, segment
);
1796 ret
= xmlStrcat(ret
, BAD_CAST
"@");
1801 segment
= xmlURIEscapeStr(BAD_CAST uri
->server
, BAD_CAST
"/?;:@");
1803 if (uri
->user
== NULL
)
1804 ret
= xmlStrcat(ret
, BAD_CAST
"//");
1805 ret
= xmlStrcat(ret
, segment
);
1812 snprintf((char *) port
, 10, "%d", uri
->port
);
1813 ret
= xmlStrcat(ret
, BAD_CAST
":");
1814 ret
= xmlStrcat(ret
, port
);
1819 xmlURIEscapeStr(BAD_CAST uri
->path
, BAD_CAST
":@&=+$,/?;");
1821 ret
= xmlStrcat(ret
, segment
);
1825 if (uri
->query_raw
) {
1826 ret
= xmlStrcat(ret
, BAD_CAST
"?");
1827 ret
= xmlStrcat(ret
, BAD_CAST uri
->query_raw
);
1829 else if (uri
->query
) {
1831 xmlURIEscapeStr(BAD_CAST uri
->query
, BAD_CAST
";/?:@&=+,$");
1833 ret
= xmlStrcat(ret
, BAD_CAST
"?");
1834 ret
= xmlStrcat(ret
, segment
);
1839 segment
= xmlURIEscapeStr(BAD_CAST uri
->opaque
, BAD_CAST
"");
1841 ret
= xmlStrcat(ret
, segment
);
1845 if (uri
->fragment
) {
1846 segment
= xmlURIEscapeStr(BAD_CAST uri
->fragment
, BAD_CAST
"#");
1848 ret
= xmlStrcat(ret
, BAD_CAST
"#");
1849 ret
= xmlStrcat(ret
, segment
);
1859 /************************************************************************
1861 * Public functions *
1863 ************************************************************************/
1867 * @URI: the URI instance found in the document
1868 * @base: the base value
1870 * Computes he final URI of the reference done by checking that
1871 * the given URI is valid, and building the final URI using the
1872 * base URI. This is processed according to section 5.2 of the
1875 * 5.2. Resolving Relative References to Absolute Form
1877 * Returns a new URI string (to be freed by the caller) or NULL in case
1881 xmlBuildURI(const xmlChar
*URI
, const xmlChar
*base
) {
1882 xmlChar
*val
= NULL
;
1883 int ret
, len
, indx
, cur
, out
;
1884 xmlURIPtr ref
= NULL
;
1885 xmlURIPtr bas
= NULL
;
1886 xmlURIPtr res
= NULL
;
1889 * 1) The URI reference is parsed into the potential four components and
1890 * fragment identifier, as described in Section 4.3.
1892 * NOTE that a completely empty URI is treated by modern browsers
1893 * as a reference to "." rather than as a synonym for the current
1894 * URI. Should we do that here?
1900 ref
= xmlCreateURI();
1903 ret
= xmlParseURIReference(ref
, (const char *) URI
);
1910 if ((ref
!= NULL
) && (ref
->scheme
!= NULL
)) {
1912 * The URI is absolute don't modify.
1914 val
= xmlStrdup(URI
);
1920 bas
= xmlCreateURI();
1923 ret
= xmlParseURIReference(bas
, (const char *) base
);
1927 val
= xmlSaveUri(ref
);
1932 * the base fragment must be ignored
1934 if (bas
->fragment
!= NULL
) {
1935 xmlFree(bas
->fragment
);
1936 bas
->fragment
= NULL
;
1938 val
= xmlSaveUri(bas
);
1943 * 2) If the path component is empty and the scheme, authority, and
1944 * query components are undefined, then it is a reference to the
1945 * current document and we are done. Otherwise, the reference URI's
1946 * query and fragment components are defined as found (or not found)
1947 * within the URI reference and not inherited from the base URI.
1949 * NOTE that in modern browsers, the parsing differs from the above
1950 * in the following aspect: the query component is allowed to be
1951 * defined while still treating this as a reference to the current
1954 res
= xmlCreateURI();
1957 if ((ref
->scheme
== NULL
) && (ref
->path
== NULL
) &&
1958 ((ref
->authority
== NULL
) && (ref
->server
== NULL
))) {
1959 if (bas
->scheme
!= NULL
)
1960 res
->scheme
= xmlMemStrdup(bas
->scheme
);
1961 if (bas
->authority
!= NULL
)
1962 res
->authority
= xmlMemStrdup(bas
->authority
);
1963 else if (bas
->server
!= NULL
) {
1964 res
->server
= xmlMemStrdup(bas
->server
);
1965 if (bas
->user
!= NULL
)
1966 res
->user
= xmlMemStrdup(bas
->user
);
1967 res
->port
= bas
->port
;
1969 if (bas
->path
!= NULL
)
1970 res
->path
= xmlMemStrdup(bas
->path
);
1971 if (ref
->query_raw
!= NULL
)
1972 res
->query_raw
= xmlMemStrdup (ref
->query_raw
);
1973 else if (ref
->query
!= NULL
)
1974 res
->query
= xmlMemStrdup(ref
->query
);
1975 else if (bas
->query_raw
!= NULL
)
1976 res
->query_raw
= xmlMemStrdup(bas
->query_raw
);
1977 else if (bas
->query
!= NULL
)
1978 res
->query
= xmlMemStrdup(bas
->query
);
1979 if (ref
->fragment
!= NULL
)
1980 res
->fragment
= xmlMemStrdup(ref
->fragment
);
1985 * 3) If the scheme component is defined, indicating that the reference
1986 * starts with a scheme name, then the reference is interpreted as an
1987 * absolute URI and we are done. Otherwise, the reference URI's
1988 * scheme is inherited from the base URI's scheme component.
1990 if (ref
->scheme
!= NULL
) {
1991 val
= xmlSaveUri(ref
);
1994 if (bas
->scheme
!= NULL
)
1995 res
->scheme
= xmlMemStrdup(bas
->scheme
);
1997 if (ref
->query_raw
!= NULL
)
1998 res
->query_raw
= xmlMemStrdup(ref
->query_raw
);
1999 else if (ref
->query
!= NULL
)
2000 res
->query
= xmlMemStrdup(ref
->query
);
2001 if (ref
->fragment
!= NULL
)
2002 res
->fragment
= xmlMemStrdup(ref
->fragment
);
2005 * 4) If the authority component is defined, then the reference is a
2006 * network-path and we skip to step 7. Otherwise, the reference
2007 * URI's authority is inherited from the base URI's authority
2008 * component, which will also be undefined if the URI scheme does not
2009 * use an authority component.
2011 if ((ref
->authority
!= NULL
) || (ref
->server
!= NULL
)) {
2012 if (ref
->authority
!= NULL
)
2013 res
->authority
= xmlMemStrdup(ref
->authority
);
2015 res
->server
= xmlMemStrdup(ref
->server
);
2016 if (ref
->user
!= NULL
)
2017 res
->user
= xmlMemStrdup(ref
->user
);
2018 res
->port
= ref
->port
;
2020 if (ref
->path
!= NULL
)
2021 res
->path
= xmlMemStrdup(ref
->path
);
2024 if (bas
->authority
!= NULL
)
2025 res
->authority
= xmlMemStrdup(bas
->authority
);
2026 else if (bas
->server
!= NULL
) {
2027 res
->server
= xmlMemStrdup(bas
->server
);
2028 if (bas
->user
!= NULL
)
2029 res
->user
= xmlMemStrdup(bas
->user
);
2030 res
->port
= bas
->port
;
2034 * 5) If the path component begins with a slash character ("/"), then
2035 * the reference is an absolute-path and we skip to step 7.
2037 if ((ref
->path
!= NULL
) && (ref
->path
[0] == '/')) {
2038 res
->path
= xmlMemStrdup(ref
->path
);
2044 * 6) If this step is reached, then we are resolving a relative-path
2045 * reference. The relative path needs to be merged with the base
2046 * URI's path. Although there are many ways to do this, we will
2047 * describe a simple method using a separate string buffer.
2049 * Allocate a buffer large enough for the result string.
2051 len
= 2; /* extra / and 0 */
2052 if (ref
->path
!= NULL
)
2053 len
+= strlen(ref
->path
);
2054 if (bas
->path
!= NULL
)
2055 len
+= strlen(bas
->path
);
2056 res
->path
= (char *) xmlMallocAtomic(len
);
2057 if (res
->path
== NULL
) {
2058 xmlURIErrMemory("resolving URI against base\n");
2064 * a) All but the last segment of the base URI's path component is
2065 * copied to the buffer. In other words, any characters after the
2066 * last (right-most) slash character, if any, are excluded.
2070 if (bas
->path
!= NULL
) {
2071 while (bas
->path
[cur
] != 0) {
2072 while ((bas
->path
[cur
] != 0) && (bas
->path
[cur
] != '/'))
2074 if (bas
->path
[cur
] == 0)
2079 res
->path
[out
] = bas
->path
[out
];
2087 * b) The reference's path component is appended to the buffer
2090 if (ref
->path
!= NULL
&& ref
->path
[0] != 0) {
2093 * Ensure the path includes a '/'
2095 if ((out
== 0) && (bas
->server
!= NULL
))
2096 res
->path
[out
++] = '/';
2097 while (ref
->path
[indx
] != 0) {
2098 res
->path
[out
++] = ref
->path
[indx
++];
2104 * Steps c) to h) are really path normalization steps
2106 xmlNormalizeURIPath(res
->path
);
2111 * 7) The resulting URI components, including any inherited from the
2112 * base URI, are recombined to give the absolute form of the URI
2115 val
= xmlSaveUri(res
);
2128 * xmlBuildRelativeURI:
2129 * @URI: the URI reference under consideration
2130 * @base: the base value
2132 * Expresses the URI of the reference in terms relative to the
2133 * base. Some examples of this operation include:
2134 * base = "http://site1.com/docs/book1.html"
2135 * URI input URI returned
2136 * docs/pic1.gif pic1.gif
2137 * docs/img/pic1.gif img/pic1.gif
2138 * img/pic1.gif ../img/pic1.gif
2139 * http://site1.com/docs/pic1.gif pic1.gif
2140 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2142 * base = "docs/book1.html"
2143 * URI input URI returned
2144 * docs/pic1.gif pic1.gif
2145 * docs/img/pic1.gif img/pic1.gif
2146 * img/pic1.gif ../img/pic1.gif
2147 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2150 * Note: if the URI reference is really wierd or complicated, it may be
2151 * worthwhile to first convert it into a "nice" one by calling
2152 * xmlBuildURI (using 'base') before calling this routine,
2153 * since this routine (for reasonable efficiency) assumes URI has
2154 * already been through some validation.
2156 * Returns a new URI string (to be freed by the caller) or NULL in case
2160 xmlBuildRelativeURI (const xmlChar
* URI
, const xmlChar
* base
)
2162 xmlChar
*val
= NULL
;
2168 xmlURIPtr ref
= NULL
;
2169 xmlURIPtr bas
= NULL
;
2170 xmlChar
*bptr
, *uptr
, *vptr
;
2171 int remove_path
= 0;
2173 if ((URI
== NULL
) || (*URI
== 0))
2177 * First parse URI into a standard form
2179 ref
= xmlCreateURI ();
2182 /* If URI not already in "relative" form */
2183 if (URI
[0] != '.') {
2184 ret
= xmlParseURIReference (ref
, (const char *) URI
);
2186 goto done
; /* Error in URI, return NULL */
2188 ref
->path
= (char *)xmlStrdup(URI
);
2191 * Next parse base into the same standard form
2193 if ((base
== NULL
) || (*base
== 0)) {
2194 val
= xmlStrdup (URI
);
2197 bas
= xmlCreateURI ();
2200 if (base
[0] != '.') {
2201 ret
= xmlParseURIReference (bas
, (const char *) base
);
2203 goto done
; /* Error in base, return NULL */
2205 bas
->path
= (char *)xmlStrdup(base
);
2208 * If the scheme / server on the URI differs from the base,
2209 * just return the URI
2211 if ((ref
->scheme
!= NULL
) &&
2212 ((bas
->scheme
== NULL
) ||
2213 (xmlStrcmp ((xmlChar
*)bas
->scheme
, (xmlChar
*)ref
->scheme
)) ||
2214 (xmlStrcmp ((xmlChar
*)bas
->server
, (xmlChar
*)ref
->server
)))) {
2215 val
= xmlStrdup (URI
);
2218 if (xmlStrEqual((xmlChar
*)bas
->path
, (xmlChar
*)ref
->path
)) {
2219 val
= xmlStrdup(BAD_CAST
"");
2222 if (bas
->path
== NULL
) {
2223 val
= xmlStrdup((xmlChar
*)ref
->path
);
2226 if (ref
->path
== NULL
) {
2227 ref
->path
= (char *) "/";
2232 * At this point (at last!) we can compare the two paths
2234 * First we take care of the special case where either of the
2235 * two path components may be missing (bug 316224)
2237 if (bas
->path
== NULL
) {
2238 if (ref
->path
!= NULL
) {
2239 uptr
= (xmlChar
*) ref
->path
;
2242 /* exception characters from xmlSaveUri */
2243 val
= xmlURIEscapeStr(uptr
, BAD_CAST
"/;&=+$,");
2247 bptr
= (xmlChar
*)bas
->path
;
2248 if (ref
->path
== NULL
) {
2249 for (ix
= 0; bptr
[ix
] != 0; ix
++) {
2250 if (bptr
[ix
] == '/')
2254 len
= 1; /* this is for a string terminator only */
2257 * Next we compare the two strings and find where they first differ
2259 if ((ref
->path
[pos
] == '.') && (ref
->path
[pos
+1] == '/'))
2261 if ((*bptr
== '.') && (bptr
[1] == '/'))
2263 else if ((*bptr
== '/') && (ref
->path
[pos
] != '/'))
2265 while ((bptr
[pos
] == ref
->path
[pos
]) && (bptr
[pos
] != 0))
2268 if (bptr
[pos
] == ref
->path
[pos
]) {
2269 val
= xmlStrdup(BAD_CAST
"");
2270 goto done
; /* (I can't imagine why anyone would do this) */
2274 * In URI, "back up" to the last '/' encountered. This will be the
2275 * beginning of the "unique" suffix of URI
2278 if ((ref
->path
[ix
] == '/') && (ix
> 0))
2280 else if ((ref
->path
[ix
] == 0) && (ix
> 1) && (ref
->path
[ix
- 1] == '/'))
2282 for (; ix
> 0; ix
--) {
2283 if (ref
->path
[ix
] == '/')
2287 uptr
= (xmlChar
*)ref
->path
;
2290 uptr
= (xmlChar
*)&ref
->path
[ix
];
2294 * In base, count the number of '/' from the differing point
2296 if (bptr
[pos
] != ref
->path
[pos
]) {/* check for trivial URI == base */
2297 for (; bptr
[ix
] != 0; ix
++) {
2298 if (bptr
[ix
] == '/')
2302 len
= xmlStrlen (uptr
) + 1;
2307 /* exception characters from xmlSaveUri */
2308 val
= xmlURIEscapeStr(uptr
, BAD_CAST
"/;&=+$,");
2313 * Allocate just enough space for the returned string -
2314 * length of the remainder of the URI, plus enough space
2315 * for the "../" groups, plus one for the terminator
2317 val
= (xmlChar
*) xmlMalloc (len
+ 3 * nbslash
);
2319 xmlURIErrMemory("building relative URI\n");
2324 * Put in as many "../" as needed
2326 for (; nbslash
>0; nbslash
--) {
2332 * Finish up with the end of the URI
2335 if ((vptr
> val
) && (len
> 0) &&
2336 (uptr
[0] == '/') && (vptr
[-1] == '/')) {
2337 memcpy (vptr
, uptr
+ 1, len
- 1);
2340 memcpy (vptr
, uptr
, len
);
2347 /* escape the freshly-built path */
2349 /* exception characters from xmlSaveUri */
2350 val
= xmlURIEscapeStr(vptr
, BAD_CAST
"/;&=+$,");
2355 * Free the working variables
2357 if (remove_path
!= 0)
2369 * @path: the resource locator in a filesystem notation
2371 * Constructs a canonic path from the specified path.
2373 * Returns a new canonic path, or a duplicate of the path parameter if the
2374 * construction fails. The caller is responsible for freeing the memory occupied
2375 * by the returned string. If there is insufficient memory available, or the
2376 * argument is NULL, the function returns NULL.
2378 #define IS_WINDOWS_PATH(p) \
2380 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2381 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2382 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2384 xmlCanonicPath(const xmlChar
*path
)
2387 * For Windows implementations, additional work needs to be done to
2388 * replace backslashes in pathnames with "forward slashes"
2390 #if defined(_WIN32) && !defined(__CYGWIN__)
2397 const xmlChar
*absuri
;
2404 * We must not change the backslashes to slashes if the the path
2406 * Those paths can be up to 32k characters long.
2407 * Was added specifically for OpenOffice, those paths can't be converted
2410 if ((path
[0] == '\\') && (path
[1] == '\\') && (path
[2] == '?') &&
2412 return xmlStrdup((const xmlChar
*) path
);
2415 /* sanitize filename starting with // so it can be used as URI */
2416 if ((path
[0] == '/') && (path
[1] == '/') && (path
[2] != '/'))
2419 if ((uri
= xmlParseURI((const char *) path
)) != NULL
) {
2421 return xmlStrdup(path
);
2424 /* Check if this is an "absolute uri" */
2425 absuri
= xmlStrstr(path
, BAD_CAST
"://");
2426 if (absuri
!= NULL
) {
2432 * this looks like an URI where some parts have not been
2433 * escaped leading to a parsing problem. Check that the first
2434 * part matches a protocol.
2437 /* Bypass if first part (part before the '://') is > 20 chars */
2438 if ((l
<= 0) || (l
> 20))
2439 goto path_processing
;
2440 /* Bypass if any non-alpha characters are present in first part */
2441 for (j
= 0;j
< l
;j
++) {
2443 if (!(((c
>= 'a') && (c
<= 'z')) || ((c
>= 'A') && (c
<= 'Z'))))
2444 goto path_processing
;
2447 /* Escape all except the characters specified in the supplied path */
2448 escURI
= xmlURIEscapeStr(path
, BAD_CAST
":/?_.#&;=");
2449 if (escURI
!= NULL
) {
2450 /* Try parsing the escaped path */
2451 uri
= xmlParseURI((const char *) escURI
);
2452 /* If successful, return the escaped string */
2461 /* For Windows implementations, replace backslashes with 'forward slashes' */
2462 #if defined(_WIN32) && !defined(__CYGWIN__)
2464 * Create a URI structure
2466 uri
= xmlCreateURI();
2467 if (uri
== NULL
) { /* Guard against 'out of memory' */
2471 len
= xmlStrlen(path
);
2472 if ((len
> 2) && IS_WINDOWS_PATH(path
)) {
2473 /* make the scheme 'file' */
2474 uri
->scheme
= xmlStrdup(BAD_CAST
"file");
2475 /* allocate space for leading '/' + path + string terminator */
2476 uri
->path
= xmlMallocAtomic(len
+ 2);
2477 if (uri
->path
== NULL
) {
2478 xmlFreeURI(uri
); /* Guard agains 'out of memory' */
2481 /* Put in leading '/' plus path */
2484 strncpy(p
, path
, len
+ 1);
2486 uri
->path
= xmlStrdup(path
);
2487 if (uri
->path
== NULL
) {
2493 /* Now change all occurences of '\' to '/' */
2494 while (*p
!= '\0') {
2500 if (uri
->scheme
== NULL
) {
2501 ret
= xmlStrdup((const xmlChar
*) uri
->path
);
2503 ret
= xmlSaveUri(uri
);
2508 ret
= xmlStrdup((const xmlChar
*) path
);
2515 * @path: the resource locator in a filesystem notation
2517 * Constructs an URI expressing the existing path
2519 * Returns a new URI, or a duplicate of the path parameter if the
2520 * construction fails. The caller is responsible for freeing the memory
2521 * occupied by the returned string. If there is insufficient memory available,
2522 * or the argument is NULL, the function returns NULL.
2525 xmlPathToURI(const xmlChar
*path
)
2534 if ((uri
= xmlParseURI((const char *) path
)) != NULL
) {
2536 return xmlStrdup(path
);
2538 cal
= xmlCanonicPath(path
);
2541 #if defined(_WIN32) && !defined(__CYGWIN__)
2542 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2543 If 'cal' is a valid URI allready then we are done here, as continuing would make
2545 if ((uri
= xmlParseURI((const char *) cal
)) != NULL
) {
2549 /* 'cal' can contain a relative path with backslashes. If that is processed
2550 by xmlSaveURI, they will be escaped and the external entity loader machinery
2551 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2553 while (*ret
!= '\0') {
2559 memset(&temp
, 0, sizeof(temp
));
2560 temp
.path
= (char *) cal
;
2561 ret
= xmlSaveUri(&temp
);
2566 #include "elfgcchack.h"