third_party/libxml/src/uri.c

   1 /**
   2  * uri.c: set of generic URI related routines
   3  *
   4  * Reference: RFCs 3986, 2732 and 2373
   5  *
   6  * See Copyright for the status of this software.
   7  *
   8  * daniel@veillard.com
   9  */
  10
  11 #define IN_LIBXML
  12 #include "libxml.h"
  13
  14 #include <string.h>
  15
  16 #include <libxml/xmlmemory.h>
  17 #include <libxml/uri.h>
  18 #include <libxml/globals.h>
  19 #include <libxml/xmlerror.h>
  20
  21 /**
  22  * MAX_URI_LENGTH:
  23  *
  24  * The definition of the URI regexp in the above RFC has no size limit
  25  * In practice they are usually relativey short except for the
  26  * data URI scheme as defined in RFC 2397. Even for data URI the usual
  27  * maximum size before hitting random practical limits is around 64 KB
  28  * and 4KB is usually a maximum admitted limit for proper operations.
  29  * The value below is more a security limit than anything else and
  30  * really should never be hit by 'normal' operations
  31  * Set to 1 MByte in 2012, this is only enforced on output
  32  */
  33 #define MAX_URI_LENGTH 1024 * 1024
  34
  35 static void
  36 xmlURIErrMemory(const char *extra)
  37 {
  38     if (extra)
  39         __xmlRaiseError(NULL, NULL, NULL,
  40                         NULL, NULL, XML_FROM_URI,
  41                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
  42                         extra, NULL, NULL, 0, 0,
  43                         "Memory allocation failed : %s\n", extra);
  44     else
  45         __xmlRaiseError(NULL, NULL, NULL,
  46                         NULL, NULL, XML_FROM_URI,
  47                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
  48                         NULL, NULL, NULL, 0, 0,
  49                         "Memory allocation failed\n");
  50 }
  51
  52 static void xmlCleanURI(xmlURIPtr uri);
  53
  54 /*
  55  * Old rule from 2396 used in legacy handling code
  56  * alpha    = lowalpha | upalpha
  57  */
  58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
  59
  60
  61 /*
  62  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
  63  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
  64  *            "u" | "v" | "w" | "x" | "y" | "z"
  65  */
  66
  67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
  68
  69 /*
  70  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
  71  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
  72  *           "U" | "V" | "W" | "X" | "Y" | "Z"
  73  */
  74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
  75
  76 #ifdef IS_DIGIT
  77 #undef IS_DIGIT
  78 #endif
  79 /*
  80  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
  81  */
  82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
  83
  84 /*
  85  * alphanum = alpha | digit
  86  */
  87
  88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
  89
  90 /*
  91  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  92  */
  93
  94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
  95     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
  96     ((x) == '(') || ((x) == ')'))
  97
  98 /*
  99  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
 100  */
 101
 102 #define IS_UNWISE(p)                                                    \
 103       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
 104        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
 105        ((*(p) == ']')) || ((*(p) == '`')))
 106 /*
 107  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
 108  *            "[" | "]"
 109  */
 110
 111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
 112         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
 113         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
 114         ((x) == ']'))
 115
 116 /*
 117  * unreserved = alphanum | mark
 118  */
 119
 120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
 121
 122 /*
 123  * Skip to next pointer char, handle escaped sequences
 124  */
 125
 126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
 127
 128 /*
 129  * Productions from the spec.
 130  *
 131  *    authority     = server | reg_name
 132  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
 133  *                        ";" | ":" | "@" | "&" | "=" | "+" )
 134  *
 135  * path          = [ abs_path | opaque_part ]
 136  */
 137
 138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
 139
 140 /************************************************************************
 141  *                                                                      *
 142  *                         RFC 3986 parser                              *
 143  *                                                                      *
 144  ************************************************************************/
 145
 146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
 147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||               \
 148                       ((*(p) >= 'A') && (*(p) <= 'Z')))
 149 #define ISA_HEXDIG(p)                                                   \
 150        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||             \
 151         ((*(p) >= 'A') && (*(p) <= 'F')))
 152
 153 /*
 154  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
 155  *                     / "*" / "+" / "," / ";" / "="
 156  */
 157 #define ISA_SUB_DELIM(p)                                                \
 158       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||         \
 159        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||         \
 160        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||         \
 161        ((*(p) == '=')) || ((*(p) == '\'')))
 162
 163 /*
 164  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
 165  */
 166 #define ISA_GEN_DELIM(p)                                                \
 167       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
 168        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
 169        ((*(p) == '@')))
 170
 171 /*
 172  *    reserved      = gen-delims / sub-delims
 173  */
 174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
 175
 176 /*
 177  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
 178  */
 179 #define ISA_UNRESERVED(p)                                               \
 180       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||           \
 181        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
 182
 183 /*
 184  *    pct-encoded   = "%" HEXDIG HEXDIG
 185  */
 186 #define ISA_PCT_ENCODED(p)                                              \
 187      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
 188
 189 /*
 190  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 191  */
 192 #define ISA_PCHAR(p)                                                    \
 193      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||    \
 194       ((*(p) == ':')) || ((*(p) == '@')))
 195
 196 /**
 197  * xmlParse3986Scheme:
 198  * @uri:  pointer to an URI structure
 199  * @str:  pointer to the string to analyze
 200  *
 201  * Parse an URI scheme
 202  *
 203  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 204  *
 205  * Returns 0 or the error code
 206  */
 207 static int
 208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
 209     const char *cur;
 210
 211     if (str == NULL)
 212         return(-1);
 213
 214     cur = *str;
 215     if (!ISA_ALPHA(cur))
 216         return(2);
 217     cur++;
 218     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
 219            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
 220     if (uri != NULL) {
 221         if (uri->scheme != NULL) xmlFree(uri->scheme);
 222         uri->scheme = STRNDUP(*str, cur - *str);
 223     }
 224     *str = cur;
 225     return(0);
 226 }
 227
 228 /**
 229  * xmlParse3986Fragment:
 230  * @uri:  pointer to an URI structure
 231  * @str:  pointer to the string to analyze
 232  *
 233  * Parse the query part of an URI
 234  *
 235  * fragment      = *( pchar / "/" / "?" )
 236  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
 237  *       in the fragment identifier but this is used very broadly for
 238  *       xpointer scheme selection, so we are allowing it here to not break
 239  *       for example all the DocBook processing chains.
 240  *
 241  * Returns 0 or the error code
 242  */
 243 static int
 244 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
 245 {
 246     const char *cur;
 247
 248     if (str == NULL)
 249         return (-1);
 250
 251     cur = *str;
 252
 253     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 254            (*cur == '[') || (*cur == ']') ||
 255            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
 256         NEXT(cur);
 257     if (uri != NULL) {
 258         if (uri->fragment != NULL)
 259             xmlFree(uri->fragment);
 260         if (uri->cleanup & 2)
 261             uri->fragment = STRNDUP(*str, cur - *str);
 262         else
 263             uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
 264     }
 265     *str = cur;
 266     return (0);
 267 }
 268
 269 /**
 270  * xmlParse3986Query:
 271  * @uri:  pointer to an URI structure
 272  * @str:  pointer to the string to analyze
 273  *
 274  * Parse the query part of an URI
 275  *
 276  * query = *uric
 277  *
 278  * Returns 0 or the error code
 279  */
 280 static int
 281 xmlParse3986Query(xmlURIPtr uri, const char **str)
 282 {
 283     const char *cur;
 284
 285     if (str == NULL)
 286         return (-1);
 287
 288     cur = *str;
 289
 290     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 291            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
 292         NEXT(cur);
 293     if (uri != NULL) {
 294         if (uri->query != NULL)
 295             xmlFree(uri->query);
 296         if (uri->cleanup & 2)
 297             uri->query = STRNDUP(*str, cur - *str);
 298         else
 299             uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
 300
 301         /* Save the raw bytes of the query as well.
 302          * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
 303          */
 304         if (uri->query_raw != NULL)
 305             xmlFree (uri->query_raw);
 306         uri->query_raw = STRNDUP (*str, cur - *str);
 307     }
 308     *str = cur;
 309     return (0);
 310 }
 311
 312 /**
 313  * xmlParse3986Port:
 314  * @uri:  pointer to an URI structure
 315  * @str:  the string to analyze
 316  *
 317  * Parse a port  part and fills in the appropriate fields
 318  * of the @uri structure
 319  *
 320  * port          = *DIGIT
 321  *
 322  * Returns 0 or the error code
 323  */
 324 static int
 325 xmlParse3986Port(xmlURIPtr uri, const char **str)
 326 {
 327     const char *cur = *str;
 328
 329     if (ISA_DIGIT(cur)) {
 330         if (uri != NULL)
 331             uri->port = 0;
 332         while (ISA_DIGIT(cur)) {
 333             if (uri != NULL)
 334                 uri->port = uri->port * 10 + (*cur - '0');
 335             cur++;
 336         }
 337         *str = cur;
 338         return(0);
 339     }
 340     return(1);
 341 }
 342
 343 /**
 344  * xmlParse3986Userinfo:
 345  * @uri:  pointer to an URI structure
 346  * @str:  the string to analyze
 347  *
 348  * Parse an user informations part and fills in the appropriate fields
 349  * of the @uri structure
 350  *
 351  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 352  *
 353  * Returns 0 or the error code
 354  */
 355 static int
 356 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
 357 {
 358     const char *cur;
 359
 360     cur = *str;
 361     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
 362            ISA_SUB_DELIM(cur) || (*cur == ':'))
 363         NEXT(cur);
 364     if (*cur == '@') {
 365         if (uri != NULL) {
 366             if (uri->user != NULL) xmlFree(uri->user);
 367             if (uri->cleanup & 2)
 368                 uri->user = STRNDUP(*str, cur - *str);
 369             else
 370                 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
 371         }
 372         *str = cur;
 373         return(0);
 374     }
 375     return(1);
 376 }
 377
 378 /**
 379  * xmlParse3986DecOctet:
 380  * @str:  the string to analyze
 381  *
 382  *    dec-octet     = DIGIT                 ; 0-9
 383  *                  / %x31-39 DIGIT         ; 10-99
 384  *                  / "1" 2DIGIT            ; 100-199
 385  *                  / "2" %x30-34 DIGIT     ; 200-249
 386  *                  / "25" %x30-35          ; 250-255
 387  *
 388  * Skip a dec-octet.
 389  *
 390  * Returns 0 if found and skipped, 1 otherwise
 391  */
 392 static int
 393 xmlParse3986DecOctet(const char **str) {
 394     const char *cur = *str;
 395
 396     if (!(ISA_DIGIT(cur)))
 397         return(1);
 398     if (!ISA_DIGIT(cur+1))
 399         cur++;
 400     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
 401         cur += 2;
 402     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
 403         cur += 3;
 404     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
 405              (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
 406         cur += 3;
 407     else if ((*cur == '2') && (*(cur + 1) == '5') &&
 408              (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
 409         cur += 3;
 410     else
 411         return(1);
 412     *str = cur;
 413     return(0);
 414 }
 415 /**
 416  * xmlParse3986Host:
 417  * @uri:  pointer to an URI structure
 418  * @str:  the string to analyze
 419  *
 420  * Parse an host part and fills in the appropriate fields
 421  * of the @uri structure
 422  *
 423  * host          = IP-literal / IPv4address / reg-name
 424  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 425  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 426  * reg-name      = *( unreserved / pct-encoded / sub-delims )
 427  *
 428  * Returns 0 or the error code
 429  */
 430 static int
 431 xmlParse3986Host(xmlURIPtr uri, const char **str)
 432 {
 433     const char *cur = *str;
 434     const char *host;
 435
 436     host = cur;
 437     /*
 438      * IPv6 and future adressing scheme are enclosed between brackets
 439      */
 440     if (*cur == '[') {
 441         cur++;
 442         while ((*cur != ']') && (*cur != 0))
 443             cur++;
 444         if (*cur != ']')
 445             return(1);
 446         cur++;
 447         goto found;
 448     }
 449     /*
 450      * try to parse an IPv4
 451      */
 452     if (ISA_DIGIT(cur)) {
 453         if (xmlParse3986DecOctet(&cur) != 0)
 454             goto not_ipv4;
 455         if (*cur != '.')
 456             goto not_ipv4;
 457         cur++;
 458         if (xmlParse3986DecOctet(&cur) != 0)
 459             goto not_ipv4;
 460         if (*cur != '.')
 461             goto not_ipv4;
 462         if (xmlParse3986DecOctet(&cur) != 0)
 463             goto not_ipv4;
 464         if (*cur != '.')
 465             goto not_ipv4;
 466         if (xmlParse3986DecOctet(&cur) != 0)
 467             goto not_ipv4;
 468         goto found;
 469 not_ipv4:
 470         cur = *str;
 471     }
 472     /*
 473      * then this should be a hostname which can be empty
 474      */
 475     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
 476         NEXT(cur);
 477 found:
 478     if (uri != NULL) {
 479         if (uri->authority != NULL) xmlFree(uri->authority);
 480         uri->authority = NULL;
 481         if (uri->server != NULL) xmlFree(uri->server);
 482         if (cur != host) {
 483             if (uri->cleanup & 2)
 484                 uri->server = STRNDUP(host, cur - host);
 485             else
 486                 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
 487         } else
 488             uri->server = NULL;
 489     }
 490     *str = cur;
 491     return(0);
 492 }
 493
 494 /**
 495  * xmlParse3986Authority:
 496  * @uri:  pointer to an URI structure
 497  * @str:  the string to analyze
 498  *
 499  * Parse an authority part and fills in the appropriate fields
 500  * of the @uri structure
 501  *
 502  * authority     = [ userinfo "@" ] host [ ":" port ]
 503  *
 504  * Returns 0 or the error code
 505  */
 506 static int
 507 xmlParse3986Authority(xmlURIPtr uri, const char **str)
 508 {
 509     const char *cur;
 510     int ret;
 511
 512     cur = *str;
 513     /*
 514      * try to parse an userinfo and check for the trailing @
 515      */
 516     ret = xmlParse3986Userinfo(uri, &cur);
 517     if ((ret != 0) || (*cur != '@'))
 518         cur = *str;
 519     else
 520         cur++;
 521     ret = xmlParse3986Host(uri, &cur);
 522     if (ret != 0) return(ret);
 523     if (*cur == ':') {
 524         cur++;
 525         ret = xmlParse3986Port(uri, &cur);
 526         if (ret != 0) return(ret);
 527     }
 528     *str = cur;
 529     return(0);
 530 }
 531
 532 /**
 533  * xmlParse3986Segment:
 534  * @str:  the string to analyze
 535  * @forbid: an optional forbidden character
 536  * @empty: allow an empty segment
 537  *
 538  * Parse a segment and fills in the appropriate fields
 539  * of the @uri structure
 540  *
 541  * segment       = *pchar
 542  * segment-nz    = 1*pchar
 543  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 544  *               ; non-zero-length segment without any colon ":"
 545  *
 546  * Returns 0 or the error code
 547  */
 548 static int
 549 xmlParse3986Segment(const char **str, char forbid, int empty)
 550 {
 551     const char *cur;
 552
 553     cur = *str;
 554     if (!ISA_PCHAR(cur)) {
 555         if (empty)
 556             return(0);
 557         return(1);
 558     }
 559     while (ISA_PCHAR(cur) && (*cur != forbid))
 560         NEXT(cur);
 561     *str = cur;
 562     return (0);
 563 }
 564
 565 /**
 566  * xmlParse3986PathAbEmpty:
 567  * @uri:  pointer to an URI structure
 568  * @str:  the string to analyze
 569  *
 570  * Parse an path absolute or empty and fills in the appropriate fields
 571  * of the @uri structure
 572  *
 573  * path-abempty  = *( "/" segment )
 574  *
 575  * Returns 0 or the error code
 576  */
 577 static int
 578 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
 579 {
 580     const char *cur;
 581     int ret;
 582
 583     cur = *str;
 584
 585     while (*cur == '/') {
 586         cur++;
 587         ret = xmlParse3986Segment(&cur, 0, 1);
 588         if (ret != 0) return(ret);
 589     }
 590     if (uri != NULL) {
 591         if (uri->path != NULL) xmlFree(uri->path);
 592         if (*str != cur) {
 593             if (uri->cleanup & 2)
 594                 uri->path = STRNDUP(*str, cur - *str);
 595             else
 596                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 597         } else {
 598             uri->path = NULL;
 599         }
 600     }
 601     *str = cur;
 602     return (0);
 603 }
 604
 605 /**
 606  * xmlParse3986PathAbsolute:
 607  * @uri:  pointer to an URI structure
 608  * @str:  the string to analyze
 609  *
 610  * Parse an path absolute and fills in the appropriate fields
 611  * of the @uri structure
 612  *
 613  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
 614  *
 615  * Returns 0 or the error code
 616  */
 617 static int
 618 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
 619 {
 620     const char *cur;
 621     int ret;
 622
 623     cur = *str;
 624
 625     if (*cur != '/')
 626         return(1);
 627     cur++;
 628     ret = xmlParse3986Segment(&cur, 0, 0);
 629     if (ret == 0) {
 630         while (*cur == '/') {
 631             cur++;
 632             ret = xmlParse3986Segment(&cur, 0, 1);
 633             if (ret != 0) return(ret);
 634         }
 635     }
 636     if (uri != NULL) {
 637         if (uri->path != NULL) xmlFree(uri->path);
 638         if (cur != *str) {
 639             if (uri->cleanup & 2)
 640                 uri->path = STRNDUP(*str, cur - *str);
 641             else
 642                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 643         } else {
 644             uri->path = NULL;
 645         }
 646     }
 647     *str = cur;
 648     return (0);
 649 }
 650
 651 /**
 652  * xmlParse3986PathRootless:
 653  * @uri:  pointer to an URI structure
 654  * @str:  the string to analyze
 655  *
 656  * Parse an path without root and fills in the appropriate fields
 657  * of the @uri structure
 658  *
 659  * path-rootless = segment-nz *( "/" segment )
 660  *
 661  * Returns 0 or the error code
 662  */
 663 static int
 664 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
 665 {
 666     const char *cur;
 667     int ret;
 668
 669     cur = *str;
 670
 671     ret = xmlParse3986Segment(&cur, 0, 0);
 672     if (ret != 0) return(ret);
 673     while (*cur == '/') {
 674         cur++;
 675         ret = xmlParse3986Segment(&cur, 0, 1);
 676         if (ret != 0) return(ret);
 677     }
 678     if (uri != NULL) {
 679         if (uri->path != NULL) xmlFree(uri->path);
 680         if (cur != *str) {
 681             if (uri->cleanup & 2)
 682                 uri->path = STRNDUP(*str, cur - *str);
 683             else
 684                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 685         } else {
 686             uri->path = NULL;
 687         }
 688     }
 689     *str = cur;
 690     return (0);
 691 }
 692
 693 /**
 694  * xmlParse3986PathNoScheme:
 695  * @uri:  pointer to an URI structure
 696  * @str:  the string to analyze
 697  *
 698  * Parse an path which is not a scheme and fills in the appropriate fields
 699  * of the @uri structure
 700  *
 701  * path-noscheme = segment-nz-nc *( "/" segment )
 702  *
 703  * Returns 0 or the error code
 704  */
 705 static int
 706 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
 707 {
 708     const char *cur;
 709     int ret;
 710
 711     cur = *str;
 712
 713     ret = xmlParse3986Segment(&cur, ':', 0);
 714     if (ret != 0) return(ret);
 715     while (*cur == '/') {
 716         cur++;
 717         ret = xmlParse3986Segment(&cur, 0, 1);
 718         if (ret != 0) return(ret);
 719     }
 720     if (uri != NULL) {
 721         if (uri->path != NULL) xmlFree(uri->path);
 722         if (cur != *str) {
 723             if (uri->cleanup & 2)
 724                 uri->path = STRNDUP(*str, cur - *str);
 725             else
 726                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 727         } else {
 728             uri->path = NULL;
 729         }
 730     }
 731     *str = cur;
 732     return (0);
 733 }
 734
 735 /**
 736  * xmlParse3986HierPart:
 737  * @uri:  pointer to an URI structure
 738  * @str:  the string to analyze
 739  *
 740  * Parse an hierarchical part and fills in the appropriate fields
 741  * of the @uri structure
 742  *
 743  * hier-part     = "//" authority path-abempty
 744  *                / path-absolute
 745  *                / path-rootless
 746  *                / path-empty
 747  *
 748  * Returns 0 or the error code
 749  */
 750 static int
 751 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
 752 {
 753     const char *cur;
 754     int ret;
 755
 756     cur = *str;
 757
 758     if ((*cur == '/') && (*(cur + 1) == '/')) {
 759         cur += 2;
 760         ret = xmlParse3986Authority(uri, &cur);
 761         if (ret != 0) return(ret);
 762         if (uri->server == NULL)
 763             uri->port = -1;
 764         ret = xmlParse3986PathAbEmpty(uri, &cur);
 765         if (ret != 0) return(ret);
 766         *str = cur;
 767         return(0);
 768     } else if (*cur == '/') {
 769         ret = xmlParse3986PathAbsolute(uri, &cur);
 770         if (ret != 0) return(ret);
 771     } else if (ISA_PCHAR(cur)) {
 772         ret = xmlParse3986PathRootless(uri, &cur);
 773         if (ret != 0) return(ret);
 774     } else {
 775         /* path-empty is effectively empty */
 776         if (uri != NULL) {
 777             if (uri->path != NULL) xmlFree(uri->path);
 778             uri->path = NULL;
 779         }
 780     }
 781     *str = cur;
 782     return (0);
 783 }
 784
 785 /**
 786  * xmlParse3986RelativeRef:
 787  * @uri:  pointer to an URI structure
 788  * @str:  the string to analyze
 789  *
 790  * Parse an URI string and fills in the appropriate fields
 791  * of the @uri structure
 792  *
 793  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
 794  * relative-part = "//" authority path-abempty
 795  *               / path-absolute
 796  *               / path-noscheme
 797  *               / path-empty
 798  *
 799  * Returns 0 or the error code
 800  */
 801 static int
 802 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
 803     int ret;
 804
 805     if ((*str == '/') && (*(str + 1) == '/')) {
 806         str += 2;
 807         ret = xmlParse3986Authority(uri, &str);
 808         if (ret != 0) return(ret);
 809         ret = xmlParse3986PathAbEmpty(uri, &str);
 810         if (ret != 0) return(ret);
 811     } else if (*str == '/') {
 812         ret = xmlParse3986PathAbsolute(uri, &str);
 813         if (ret != 0) return(ret);
 814     } else if (ISA_PCHAR(str)) {
 815         ret = xmlParse3986PathNoScheme(uri, &str);
 816         if (ret != 0) return(ret);
 817     } else {
 818         /* path-empty is effectively empty */
 819         if (uri != NULL) {
 820             if (uri->path != NULL) xmlFree(uri->path);
 821             uri->path = NULL;
 822         }
 823     }
 824
 825     if (*str == '?') {
 826         str++;
 827         ret = xmlParse3986Query(uri, &str);
 828         if (ret != 0) return(ret);
 829     }
 830     if (*str == '#') {
 831         str++;
 832         ret = xmlParse3986Fragment(uri, &str);
 833         if (ret != 0) return(ret);
 834     }
 835     if (*str != 0) {
 836         xmlCleanURI(uri);
 837         return(1);
 838     }
 839     return(0);
 840 }
 841
 842
 843 /**
 844  * xmlParse3986URI:
 845  * @uri:  pointer to an URI structure
 846  * @str:  the string to analyze
 847  *
 848  * Parse an URI string and fills in the appropriate fields
 849  * of the @uri structure
 850  *
 851  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 852  *
 853  * Returns 0 or the error code
 854  */
 855 static int
 856 xmlParse3986URI(xmlURIPtr uri, const char *str) {
 857     int ret;
 858
 859     ret = xmlParse3986Scheme(uri, &str);
 860     if (ret != 0) return(ret);
 861     if (*str != ':') {
 862         return(1);
 863     }
 864     str++;
 865     ret = xmlParse3986HierPart(uri, &str);
 866     if (ret != 0) return(ret);
 867     if (*str == '?') {
 868         str++;
 869         ret = xmlParse3986Query(uri, &str);
 870         if (ret != 0) return(ret);
 871     }
 872     if (*str == '#') {
 873         str++;
 874         ret = xmlParse3986Fragment(uri, &str);
 875         if (ret != 0) return(ret);
 876     }
 877     if (*str != 0) {
 878         xmlCleanURI(uri);
 879         return(1);
 880     }
 881     return(0);
 882 }
 883
 884 /**
 885  * xmlParse3986URIReference:
 886  * @uri:  pointer to an URI structure
 887  * @str:  the string to analyze
 888  *
 889  * Parse an URI reference string and fills in the appropriate fields
 890  * of the @uri structure
 891  *
 892  * URI-reference = URI / relative-ref
 893  *
 894  * Returns 0 or the error code
 895  */
 896 static int
 897 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
 898     int ret;
 899
 900     if (str == NULL)
 901         return(-1);
 902     xmlCleanURI(uri);
 903
 904     /*
 905      * Try first to parse absolute refs, then fallback to relative if
 906      * it fails.
 907      */
 908     ret = xmlParse3986URI(uri, str);
 909     if (ret != 0) {
 910         xmlCleanURI(uri);
 911         ret = xmlParse3986RelativeRef(uri, str);
 912         if (ret != 0) {
 913             xmlCleanURI(uri);
 914             return(ret);
 915         }
 916     }
 917     return(0);
 918 }
 919
 920 /**
 921  * xmlParseURI:
 922  * @str:  the URI string to analyze
 923  *
 924  * Parse an URI based on RFC 3986
 925  *
 926  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 927  *
 928  * Returns a newly built xmlURIPtr or NULL in case of error
 929  */
 930 xmlURIPtr
 931 xmlParseURI(const char *str) {
 932     xmlURIPtr uri;
 933     int ret;
 934
 935     if (str == NULL)
 936         return(NULL);
 937     uri = xmlCreateURI();
 938     if (uri != NULL) {
 939         ret = xmlParse3986URIReference(uri, str);
 940         if (ret) {
 941             xmlFreeURI(uri);
 942             return(NULL);
 943         }
 944     }
 945     return(uri);
 946 }
 947
 948 /**
 949  * xmlParseURIReference:
 950  * @uri:  pointer to an URI structure
 951  * @str:  the string to analyze
 952  *
 953  * Parse an URI reference string based on RFC 3986 and fills in the
 954  * appropriate fields of the @uri structure
 955  *
 956  * URI-reference = URI / relative-ref
 957  *
 958  * Returns 0 or the error code
 959  */
 960 int
 961 xmlParseURIReference(xmlURIPtr uri, const char *str) {
 962     return(xmlParse3986URIReference(uri, str));
 963 }
 964
 965 /**
 966  * xmlParseURIRaw:
 967  * @str:  the URI string to analyze
 968  * @raw:  if 1 unescaping of URI pieces are disabled
 969  *
 970  * Parse an URI but allows to keep intact the original fragments.
 971  *
 972  * URI-reference = URI / relative-ref
 973  *
 974  * Returns a newly built xmlURIPtr or NULL in case of error
 975  */
 976 xmlURIPtr
 977 xmlParseURIRaw(const char *str, int raw) {
 978     xmlURIPtr uri;
 979     int ret;
 980
 981     if (str == NULL)
 982         return(NULL);
 983     uri = xmlCreateURI();
 984     if (uri != NULL) {
 985         if (raw) {
 986             uri->cleanup |= 2;
 987         }
 988         ret = xmlParseURIReference(uri, str);
 989         if (ret) {
 990             xmlFreeURI(uri);
 991             return(NULL);
 992         }
 993     }
 994     return(uri);
 995 }
 996
 997 /************************************************************************
 998  *                                                                      *
 999  *                      Generic URI structure functions                 *
1000  *                                                                      *
1001  ************************************************************************/
1002
1003 /**
1004  * xmlCreateURI:
1005  *
1006  * Simply creates an empty xmlURI
1007  *
1008  * Returns the new structure or NULL in case of error
1009  */
1010 xmlURIPtr
1011 xmlCreateURI(void) {
1012     xmlURIPtr ret;
1013
1014     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1015     if (ret == NULL) {
1016         xmlURIErrMemory("creating URI structure\n");
1017         return(NULL);
1018     }
1019     memset(ret, 0, sizeof(xmlURI));
1020     return(ret);
1021 }
1022
1023 /**
1024  * xmlSaveUriRealloc:
1025  *
1026  * Function to handle properly a reallocation when saving an URI
1027  * Also imposes some limit on the length of an URI string output
1028  */
1029 static xmlChar *
1030 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1031     xmlChar *temp;
1032     int tmp;
1033
1034     if (*max > MAX_URI_LENGTH) {
1035         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1036         return(NULL);
1037     }
1038     tmp = *max * 2;
1039     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1040     if (temp == NULL) {
1041         xmlURIErrMemory("saving URI\n");
1042         return(NULL);
1043     }
1044     *max = tmp;
1045     return(temp);
1046 }
1047
1048 /**
1049  * xmlSaveUri:
1050  * @uri:  pointer to an xmlURI
1051  *
1052  * Save the URI as an escaped string
1053  *
1054  * Returns a new string (to be deallocated by caller)
1055  */
1056 xmlChar *
1057 xmlSaveUri(xmlURIPtr uri) {
1058     xmlChar *ret = NULL;
1059     xmlChar *temp;
1060     const char *p;
1061     int len;
1062     int max;
1063
1064     if (uri == NULL) return(NULL);
1065
1066
1067     max = 80;
1068     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1069     if (ret == NULL) {
1070         xmlURIErrMemory("saving URI\n");
1071         return(NULL);
1072     }
1073     len = 0;
1074
1075     if (uri->scheme != NULL) {
1076         p = uri->scheme;
1077         while (*p != 0) {
1078             if (len >= max) {
1079                 temp = xmlSaveUriRealloc(ret, &max);
1080                 if (temp == NULL) goto mem_error;
1081                 ret = temp;
1082             }
1083             ret[len++] = *p++;
1084         }
1085         if (len >= max) {
1086             temp = xmlSaveUriRealloc(ret, &max);
1087             if (temp == NULL) goto mem_error;
1088             ret = temp;
1089         }
1090         ret[len++] = ':';
1091     }
1092     if (uri->opaque != NULL) {
1093         p = uri->opaque;
1094         while (*p != 0) {
1095             if (len + 3 >= max) {
1096                 temp = xmlSaveUriRealloc(ret, &max);
1097                 if (temp == NULL) goto mem_error;
1098                 ret = temp;
1099             }
1100             if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1101                 ret[len++] = *p++;
1102             else {
1103                 int val = *(unsigned char *)p++;
1104                 int hi = val / 0x10, lo = val % 0x10;
1105                 ret[len++] = '%';
1106                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1107                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1108             }
1109         }
1110     } else {
1111         if ((uri->server != NULL) || (uri->port == -1)) {
1112             if (len + 3 >= max) {
1113                 temp = xmlSaveUriRealloc(ret, &max);
1114                 if (temp == NULL) goto mem_error;
1115                 ret = temp;
1116             }
1117             ret[len++] = '/';
1118             ret[len++] = '/';
1119             if (uri->user != NULL) {
1120                 p = uri->user;
1121                 while (*p != 0) {
1122                     if (len + 3 >= max) {
1123                         temp = xmlSaveUriRealloc(ret, &max);
1124                         if (temp == NULL) goto mem_error;
1125                         ret = temp;
1126                     }
1127                     if ((IS_UNRESERVED(*(p))) ||
1128                         ((*(p) == ';')) || ((*(p) == ':')) ||
1129                         ((*(p) == '&')) || ((*(p) == '=')) ||
1130                         ((*(p) == '+')) || ((*(p) == '$')) ||
1131                         ((*(p) == ',')))
1132                         ret[len++] = *p++;
1133                     else {
1134                         int val = *(unsigned char *)p++;
1135                         int hi = val / 0x10, lo = val % 0x10;
1136                         ret[len++] = '%';
1137                         ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1138                         ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1139                     }
1140                 }
1141                 if (len + 3 >= max) {
1142                     temp = xmlSaveUriRealloc(ret, &max);
1143                     if (temp == NULL) goto mem_error;
1144                     ret = temp;
1145                 }
1146                 ret[len++] = '@';
1147             }
1148             if (uri->server != NULL) {
1149                 p = uri->server;
1150                 while (*p != 0) {
1151                     if (len >= max) {
1152                         temp = xmlSaveUriRealloc(ret, &max);
1153                         if (temp == NULL) goto mem_error;
1154                         ret = temp;
1155                     }
1156                     ret[len++] = *p++;
1157                 }
1158                 if (uri->port > 0) {
1159                     if (len + 10 >= max) {
1160                         temp = xmlSaveUriRealloc(ret, &max);
1161                         if (temp == NULL) goto mem_error;
1162                         ret = temp;
1163                     }
1164                     len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1165                 }
1166             }
1167         } else if (uri->authority != NULL) {
1168             if (len + 3 >= max) {
1169                 temp = xmlSaveUriRealloc(ret, &max);
1170                 if (temp == NULL) goto mem_error;
1171                 ret = temp;
1172             }
1173             ret[len++] = '/';
1174             ret[len++] = '/';
1175             p = uri->authority;
1176             while (*p != 0) {
1177                 if (len + 3 >= max) {
1178                     temp = xmlSaveUriRealloc(ret, &max);
1179                     if (temp == NULL) goto mem_error;
1180                     ret = temp;
1181                 }
1182                 if ((IS_UNRESERVED(*(p))) ||
1183                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1184                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1185                     ((*(p) == '=')) || ((*(p) == '+')))
1186                     ret[len++] = *p++;
1187                 else {
1188                     int val = *(unsigned char *)p++;
1189                     int hi = val / 0x10, lo = val % 0x10;
1190                     ret[len++] = '%';
1191                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1192                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1193                 }
1194             }
1195         } else if (uri->scheme != NULL) {
1196             if (len + 3 >= max) {
1197                 temp = xmlSaveUriRealloc(ret, &max);
1198                 if (temp == NULL) goto mem_error;
1199                 ret = temp;
1200             }
1201         }
1202         if (uri->path != NULL) {
1203             p = uri->path;
1204             /*
1205              * the colon in file:///d: should not be escaped or
1206              * Windows accesses fail later.
1207              */
1208             if ((uri->scheme != NULL) &&
1209                 (p[0] == '/') &&
1210                 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1211                  ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1212                 (p[2] == ':') &&
1213                 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1214                 if (len + 3 >= max) {
1215                     temp = xmlSaveUriRealloc(ret, &max);
1216                     if (temp == NULL) goto mem_error;
1217                     ret = temp;
1218                 }
1219                 ret[len++] = *p++;
1220                 ret[len++] = *p++;
1221                 ret[len++] = *p++;
1222             }
1223             while (*p != 0) {
1224                 if (len + 3 >= max) {
1225                     temp = xmlSaveUriRealloc(ret, &max);
1226                     if (temp == NULL) goto mem_error;
1227                     ret = temp;
1228                 }
1229                 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1230                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1231                     ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1232                     ((*(p) == ',')))
1233                     ret[len++] = *p++;
1234                 else {
1235                     int val = *(unsigned char *)p++;
1236                     int hi = val / 0x10, lo = val % 0x10;
1237                     ret[len++] = '%';
1238                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1239                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1240                 }
1241             }
1242         }
1243         if (uri->query_raw != NULL) {
1244             if (len + 1 >= max) {
1245                 temp = xmlSaveUriRealloc(ret, &max);
1246                 if (temp == NULL) goto mem_error;
1247                 ret = temp;
1248             }
1249             ret[len++] = '?';
1250             p = uri->query_raw;
1251             while (*p != 0) {
1252                 if (len + 1 >= max) {
1253                     temp = xmlSaveUriRealloc(ret, &max);
1254                     if (temp == NULL) goto mem_error;
1255                     ret = temp;
1256                 }
1257                 ret[len++] = *p++;
1258             }
1259         } else if (uri->query != NULL) {
1260             if (len + 3 >= max) {
1261                 temp = xmlSaveUriRealloc(ret, &max);
1262                 if (temp == NULL) goto mem_error;
1263                 ret = temp;
1264             }
1265             ret[len++] = '?';
1266             p = uri->query;
1267             while (*p != 0) {
1268                 if (len + 3 >= max) {
1269                     temp = xmlSaveUriRealloc(ret, &max);
1270                     if (temp == NULL) goto mem_error;
1271                     ret = temp;
1272                 }
1273                 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1274                     ret[len++] = *p++;
1275                 else {
1276                     int val = *(unsigned char *)p++;
1277                     int hi = val / 0x10, lo = val % 0x10;
1278                     ret[len++] = '%';
1279                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1280                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1281                 }
1282             }
1283         }
1284     }
1285     if (uri->fragment != NULL) {
1286         if (len + 3 >= max) {
1287             temp = xmlSaveUriRealloc(ret, &max);
1288             if (temp == NULL) goto mem_error;
1289             ret = temp;
1290         }
1291         ret[len++] = '#';
1292         p = uri->fragment;
1293         while (*p != 0) {
1294             if (len + 3 >= max) {
1295                 temp = xmlSaveUriRealloc(ret, &max);
1296                 if (temp == NULL) goto mem_error;
1297                 ret = temp;
1298             }
1299             if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1300                 ret[len++] = *p++;
1301             else {
1302                 int val = *(unsigned char *)p++;
1303                 int hi = val / 0x10, lo = val % 0x10;
1304                 ret[len++] = '%';
1305                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1306                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1307             }
1308         }
1309     }
1310     if (len >= max) {
1311         temp = xmlSaveUriRealloc(ret, &max);
1312         if (temp == NULL) goto mem_error;
1313         ret = temp;
1314     }
1315     ret[len] = 0;
1316     return(ret);
1317
1318 mem_error:
1319     xmlFree(ret);
1320     return(NULL);
1321 }
1322
1323 /**
1324  * xmlPrintURI:
1325  * @stream:  a FILE* for the output
1326  * @uri:  pointer to an xmlURI
1327  *
1328  * Prints the URI in the stream @stream.
1329  */
1330 void
1331 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1332     xmlChar *out;
1333
1334     out = xmlSaveUri(uri);
1335     if (out != NULL) {
1336         fprintf(stream, "%s", (char *) out);
1337         xmlFree(out);
1338     }
1339 }
1340
1341 /**
1342  * xmlCleanURI:
1343  * @uri:  pointer to an xmlURI
1344  *
1345  * Make sure the xmlURI struct is free of content
1346  */
1347 static void
1348 xmlCleanURI(xmlURIPtr uri) {
1349     if (uri == NULL) return;
1350
1351     if (uri->scheme != NULL) xmlFree(uri->scheme);
1352     uri->scheme = NULL;
1353     if (uri->server != NULL) xmlFree(uri->server);
1354     uri->server = NULL;
1355     if (uri->user != NULL) xmlFree(uri->user);
1356     uri->user = NULL;
1357     if (uri->path != NULL) xmlFree(uri->path);
1358     uri->path = NULL;
1359     if (uri->fragment != NULL) xmlFree(uri->fragment);
1360     uri->fragment = NULL;
1361     if (uri->opaque != NULL) xmlFree(uri->opaque);
1362     uri->opaque = NULL;
1363     if (uri->authority != NULL) xmlFree(uri->authority);
1364     uri->authority = NULL;
1365     if (uri->query != NULL) xmlFree(uri->query);
1366     uri->query = NULL;
1367     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1368     uri->query_raw = NULL;
1369 }
1370
1371 /**
1372  * xmlFreeURI:
1373  * @uri:  pointer to an xmlURI
1374  *
1375  * Free up the xmlURI struct
1376  */
1377 void
1378 xmlFreeURI(xmlURIPtr uri) {
1379     if (uri == NULL) return;
1380
1381     if (uri->scheme != NULL) xmlFree(uri->scheme);
1382     if (uri->server != NULL) xmlFree(uri->server);
1383     if (uri->user != NULL) xmlFree(uri->user);
1384     if (uri->path != NULL) xmlFree(uri->path);
1385     if (uri->fragment != NULL) xmlFree(uri->fragment);
1386     if (uri->opaque != NULL) xmlFree(uri->opaque);
1387     if (uri->authority != NULL) xmlFree(uri->authority);
1388     if (uri->query != NULL) xmlFree(uri->query);
1389     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1390     xmlFree(uri);
1391 }
1392
1393 /************************************************************************
1394  *                                                                      *
1395  *                      Helper functions                                *
1396  *                                                                      *
1397  ************************************************************************/
1398
1399 /**
1400  * xmlNormalizeURIPath:
1401  * @path:  pointer to the path string
1402  *
1403  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1404  * Section 5.2, steps 6.c through 6.g.
1405  *
1406  * Normalization occurs directly on the string, no new allocation is done
1407  *
1408  * Returns 0 or an error code
1409  */
1410 int
1411 xmlNormalizeURIPath(char *path) {
1412     char *cur, *out;
1413
1414     if (path == NULL)
1415         return(-1);
1416
1417     /* Skip all initial "/" chars.  We want to get to the beginning of the
1418      * first non-empty segment.
1419      */
1420     cur = path;
1421     while (cur[0] == '/')
1422       ++cur;
1423     if (cur[0] == '\0')
1424       return(0);
1425
1426     /* Keep everything we've seen so far.  */
1427     out = cur;
1428
1429     /*
1430      * Analyze each segment in sequence for cases (c) and (d).
1431      */
1432     while (cur[0] != '\0') {
1433         /*
1434          * c) All occurrences of "./", where "." is a complete path segment,
1435          *    are removed from the buffer string.
1436          */
1437         if ((cur[0] == '.') && (cur[1] == '/')) {
1438             cur += 2;
1439             /* '//' normalization should be done at this point too */
1440             while (cur[0] == '/')
1441                 cur++;
1442             continue;
1443         }
1444
1445         /*
1446          * d) If the buffer string ends with "." as a complete path segment,
1447          *    that "." is removed.
1448          */
1449         if ((cur[0] == '.') && (cur[1] == '\0'))
1450             break;
1451
1452         /* Otherwise keep the segment.  */
1453         while (cur[0] != '/') {
1454             if (cur[0] == '\0')
1455               goto done_cd;
1456             (out++)[0] = (cur++)[0];
1457         }
1458         /* nomalize // */
1459         while ((cur[0] == '/') && (cur[1] == '/'))
1460             cur++;
1461
1462         (out++)[0] = (cur++)[0];
1463     }
1464  done_cd:
1465     out[0] = '\0';
1466
1467     /* Reset to the beginning of the first segment for the next sequence.  */
1468     cur = path;
1469     while (cur[0] == '/')
1470       ++cur;
1471     if (cur[0] == '\0')
1472         return(0);
1473
1474     /*
1475      * Analyze each segment in sequence for cases (e) and (f).
1476      *
1477      * e) All occurrences of "<segment>/../", where <segment> is a
1478      *    complete path segment not equal to "..", are removed from the
1479      *    buffer string.  Removal of these path segments is performed
1480      *    iteratively, removing the leftmost matching pattern on each
1481      *    iteration, until no matching pattern remains.
1482      *
1483      * f) If the buffer string ends with "<segment>/..", where <segment>
1484      *    is a complete path segment not equal to "..", that
1485      *    "<segment>/.." is removed.
1486      *
1487      * To satisfy the "iterative" clause in (e), we need to collapse the
1488      * string every time we find something that needs to be removed.  Thus,
1489      * we don't need to keep two pointers into the string: we only need a
1490      * "current position" pointer.
1491      */
1492     while (1) {
1493         char *segp, *tmp;
1494
1495         /* At the beginning of each iteration of this loop, "cur" points to
1496          * the first character of the segment we want to examine.
1497          */
1498
1499         /* Find the end of the current segment.  */
1500         segp = cur;
1501         while ((segp[0] != '/') && (segp[0] != '\0'))
1502           ++segp;
1503
1504         /* If this is the last segment, we're done (we need at least two
1505          * segments to meet the criteria for the (e) and (f) cases).
1506          */
1507         if (segp[0] == '\0')
1508           break;
1509
1510         /* If the first segment is "..", or if the next segment _isn't_ "..",
1511          * keep this segment and try the next one.
1512          */
1513         ++segp;
1514         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1515             || ((segp[0] != '.') || (segp[1] != '.')
1516                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1517           cur = segp;
1518           continue;
1519         }
1520
1521         /* If we get here, remove this segment and the next one and back up
1522          * to the previous segment (if there is one), to implement the
1523          * "iteratively" clause.  It's pretty much impossible to back up
1524          * while maintaining two pointers into the buffer, so just compact
1525          * the whole buffer now.
1526          */
1527
1528         /* If this is the end of the buffer, we're done.  */
1529         if (segp[2] == '\0') {
1530           cur[0] = '\0';
1531           break;
1532         }
1533         /* Valgrind complained, strcpy(cur, segp + 3); */
1534         /* string will overlap, do not use strcpy */
1535         tmp = cur;
1536         segp += 3;
1537         while ((*tmp++ = *segp++) != 0)
1538           ;
1539
1540         /* If there are no previous segments, then keep going from here.  */
1541         segp = cur;
1542         while ((segp > path) && ((--segp)[0] == '/'))
1543           ;
1544         if (segp == path)
1545           continue;
1546
1547         /* "segp" is pointing to the end of a previous segment; find it's
1548          * start.  We need to back up to the previous segment and start
1549          * over with that to handle things like "foo/bar/../..".  If we
1550          * don't do this, then on the first pass we'll remove the "bar/..",
1551          * but be pointing at the second ".." so we won't realize we can also
1552          * remove the "foo/..".
1553          */
1554         cur = segp;
1555         while ((cur > path) && (cur[-1] != '/'))
1556           --cur;
1557     }
1558     out[0] = '\0';
1559
1560     /*
1561      * g) If the resulting buffer string still begins with one or more
1562      *    complete path segments of "..", then the reference is
1563      *    considered to be in error. Implementations may handle this
1564      *    error by retaining these components in the resolved path (i.e.,
1565      *    treating them as part of the final URI), by removing them from
1566      *    the resolved path (i.e., discarding relative levels above the
1567      *    root), or by avoiding traversal of the reference.
1568      *
1569      * We discard them from the final path.
1570      */
1571     if (path[0] == '/') {
1572       cur = path;
1573       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1574              && ((cur[3] == '/') || (cur[3] == '\0')))
1575         cur += 3;
1576
1577       if (cur != path) {
1578         out = path;
1579         while (cur[0] != '\0')
1580           (out++)[0] = (cur++)[0];
1581         out[0] = 0;
1582       }
1583     }
1584
1585     return(0);
1586 }
1587
1588 static int is_hex(char c) {
1589     if (((c >= '0') && (c <= '9')) ||
1590         ((c >= 'a') && (c <= 'f')) ||
1591         ((c >= 'A') && (c <= 'F')))
1592         return(1);
1593     return(0);
1594 }
1595
1596 /**
1597  * xmlURIUnescapeString:
1598  * @str:  the string to unescape
1599  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1600  * @target:  optional destination buffer
1601  *
1602  * Unescaping routine, but does not check that the string is an URI. The
1603  * output is a direct unsigned char translation of %XX values (no encoding)
1604  * Note that the length of the result can only be smaller or same size as
1605  * the input string.
1606  *
1607  * Returns a copy of the string, but unescaped, will return NULL only in case
1608  * of error
1609  */
1610 char *
1611 xmlURIUnescapeString(const char *str, int len, char *target) {
1612     char *ret, *out;
1613     const char *in;
1614
1615     if (str == NULL)
1616         return(NULL);
1617     if (len <= 0) len = strlen(str);
1618     if (len < 0) return(NULL);
1619
1620     if (target == NULL) {
1621         ret = (char *) xmlMallocAtomic(len + 1);
1622         if (ret == NULL) {
1623             xmlURIErrMemory("unescaping URI value\n");
1624             return(NULL);
1625         }
1626     } else
1627         ret = target;
1628     in = str;
1629     out = ret;
1630     while(len > 0) {
1631         if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1632             in++;
1633             if ((*in >= '0') && (*in <= '9'))
1634                 *out = (*in - '0');
1635             else if ((*in >= 'a') && (*in <= 'f'))
1636                 *out = (*in - 'a') + 10;
1637             else if ((*in >= 'A') && (*in <= 'F'))
1638                 *out = (*in - 'A') + 10;
1639             in++;
1640             if ((*in >= '0') && (*in <= '9'))
1641                 *out = *out * 16 + (*in - '0');
1642             else if ((*in >= 'a') && (*in <= 'f'))
1643                 *out = *out * 16 + (*in - 'a') + 10;
1644             else if ((*in >= 'A') && (*in <= 'F'))
1645                 *out = *out * 16 + (*in - 'A') + 10;
1646             in++;
1647             len -= 3;
1648             out++;
1649         } else {
1650             *out++ = *in++;
1651             len--;
1652         }
1653     }
1654     *out = 0;
1655     return(ret);
1656 }
1657
1658 /**
1659  * xmlURIEscapeStr:
1660  * @str:  string to escape
1661  * @list: exception list string of chars not to escape
1662  *
1663  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1664  * and the characters in the exception list.
1665  *
1666  * Returns a new escaped string or NULL in case of error.
1667  */
1668 xmlChar *
1669 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1670     xmlChar *ret, ch;
1671     xmlChar *temp;
1672     const xmlChar *in;
1673     int len, out;
1674
1675     if (str == NULL)
1676         return(NULL);
1677     if (str[0] == 0)
1678         return(xmlStrdup(str));
1679     len = xmlStrlen(str);
1680     if (!(len > 0)) return(NULL);
1681
1682     len += 20;
1683     ret = (xmlChar *) xmlMallocAtomic(len);
1684     if (ret == NULL) {
1685         xmlURIErrMemory("escaping URI value\n");
1686         return(NULL);
1687     }
1688     in = (const xmlChar *) str;
1689     out = 0;
1690     while(*in != 0) {
1691         if (len - out <= 3) {
1692             temp = xmlSaveUriRealloc(ret, &len);
1693             if (temp == NULL) {
1694                 xmlURIErrMemory("escaping URI value\n");
1695                 xmlFree(ret);
1696                 return(NULL);
1697             }
1698             ret = temp;
1699         }
1700
1701         ch = *in;
1702
1703         if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1704             unsigned char val;
1705             ret[out++] = '%';
1706             val = ch >> 4;
1707             if (val <= 9)
1708                 ret[out++] = '0' + val;
1709             else
1710                 ret[out++] = 'A' + val - 0xA;
1711             val = ch & 0xF;
1712             if (val <= 9)
1713                 ret[out++] = '0' + val;
1714             else
1715                 ret[out++] = 'A' + val - 0xA;
1716             in++;
1717         } else {
1718             ret[out++] = *in++;
1719         }
1720
1721     }
1722     ret[out] = 0;
1723     return(ret);
1724 }
1725
1726 /**
1727  * xmlURIEscape:
1728  * @str:  the string of the URI to escape
1729  *
1730  * Escaping routine, does not do validity checks !
1731  * It will try to escape the chars needing this, but this is heuristic
1732  * based it's impossible to be sure.
1733  *
1734  * Returns an copy of the string, but escaped
1735  *
1736  * 25 May 2001
1737  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1738  * according to RFC2396.
1739  *   - Carl Douglas
1740  */
1741 xmlChar *
1742 xmlURIEscape(const xmlChar * str)
1743 {
1744     xmlChar *ret, *segment = NULL;
1745     xmlURIPtr uri;
1746     int ret2;
1747
1748 #define NULLCHK(p) if(!p) { \
1749          xmlURIErrMemory("escaping URI value\n"); \
1750          xmlFreeURI(uri); \
1751          return NULL; } \
1752
1753     if (str == NULL)
1754         return (NULL);
1755
1756     uri = xmlCreateURI();
1757     if (uri != NULL) {
1758         /*
1759          * Allow escaping errors in the unescaped form
1760          */
1761         uri->cleanup = 1;
1762         ret2 = xmlParseURIReference(uri, (const char *)str);
1763         if (ret2) {
1764             xmlFreeURI(uri);
1765             return (NULL);
1766         }
1767     }
1768
1769     if (!uri)
1770         return NULL;
1771
1772     ret = NULL;
1773
1774     if (uri->scheme) {
1775         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1776         NULLCHK(segment)
1777         ret = xmlStrcat(ret, segment);
1778         ret = xmlStrcat(ret, BAD_CAST ":");
1779         xmlFree(segment);
1780     }
1781
1782     if (uri->authority) {
1783         segment =
1784             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1785         NULLCHK(segment)
1786         ret = xmlStrcat(ret, BAD_CAST "//");
1787         ret = xmlStrcat(ret, segment);
1788         xmlFree(segment);
1789     }
1790
1791     if (uri->user) {
1792         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1793         NULLCHK(segment)
1794                 ret = xmlStrcat(ret,BAD_CAST "//");
1795         ret = xmlStrcat(ret, segment);
1796         ret = xmlStrcat(ret, BAD_CAST "@");
1797         xmlFree(segment);
1798     }
1799
1800     if (uri->server) {
1801         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1802         NULLCHK(segment)
1803                 if (uri->user == NULL)
1804                 ret = xmlStrcat(ret, BAD_CAST "//");
1805         ret = xmlStrcat(ret, segment);
1806         xmlFree(segment);
1807     }
1808
1809     if (uri->port) {
1810         xmlChar port[10];
1811
1812         snprintf((char *) port, 10, "%d", uri->port);
1813         ret = xmlStrcat(ret, BAD_CAST ":");
1814         ret = xmlStrcat(ret, port);
1815     }
1816
1817     if (uri->path) {
1818         segment =
1819             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1820         NULLCHK(segment)
1821         ret = xmlStrcat(ret, segment);
1822         xmlFree(segment);
1823     }
1824
1825     if (uri->query_raw) {
1826         ret = xmlStrcat(ret, BAD_CAST "?");
1827         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1828     }
1829     else if (uri->query) {
1830         segment =
1831             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1832         NULLCHK(segment)
1833         ret = xmlStrcat(ret, BAD_CAST "?");
1834         ret = xmlStrcat(ret, segment);
1835         xmlFree(segment);
1836     }
1837
1838     if (uri->opaque) {
1839         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1840         NULLCHK(segment)
1841         ret = xmlStrcat(ret, segment);
1842         xmlFree(segment);
1843     }
1844
1845     if (uri->fragment) {
1846         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1847         NULLCHK(segment)
1848         ret = xmlStrcat(ret, BAD_CAST "#");
1849         ret = xmlStrcat(ret, segment);
1850         xmlFree(segment);
1851     }
1852
1853     xmlFreeURI(uri);
1854 #undef NULLCHK
1855
1856     return (ret);
1857 }
1858
1859 /************************************************************************
1860  *                                                                      *
1861  *                      Public functions                                *
1862  *                                                                      *
1863  ************************************************************************/
1864
1865 /**
1866  * xmlBuildURI:
1867  * @URI:  the URI instance found in the document
1868  * @base:  the base value
1869  *
1870  * Computes he final URI of the reference done by checking that
1871  * the given URI is valid, and building the final URI using the
1872  * base URI. This is processed according to section 5.2 of the
1873  * RFC 2396
1874  *
1875  * 5.2. Resolving Relative References to Absolute Form
1876  *
1877  * Returns a new URI string (to be freed by the caller) or NULL in case
1878  *         of error.
1879  */
1880 xmlChar *
1881 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1882     xmlChar *val = NULL;
1883     int ret, len, indx, cur, out;
1884     xmlURIPtr ref = NULL;
1885     xmlURIPtr bas = NULL;
1886     xmlURIPtr res = NULL;
1887
1888     /*
1889      * 1) The URI reference is parsed into the potential four components and
1890      *    fragment identifier, as described in Section 4.3.
1891      *
1892      *    NOTE that a completely empty URI is treated by modern browsers
1893      *    as a reference to "." rather than as a synonym for the current
1894      *    URI.  Should we do that here?
1895      */
1896     if (URI == NULL)
1897         ret = -1;
1898     else {
1899         if (*URI) {
1900             ref = xmlCreateURI();
1901             if (ref == NULL)
1902                 goto done;
1903             ret = xmlParseURIReference(ref, (const char *) URI);
1904         }
1905         else
1906             ret = 0;
1907     }
1908     if (ret != 0)
1909         goto done;
1910     if ((ref != NULL) && (ref->scheme != NULL)) {
1911         /*
1912          * The URI is absolute don't modify.
1913          */
1914         val = xmlStrdup(URI);
1915         goto done;
1916     }
1917     if (base == NULL)
1918         ret = -1;
1919     else {
1920         bas = xmlCreateURI();
1921         if (bas == NULL)
1922             goto done;
1923         ret = xmlParseURIReference(bas, (const char *) base);
1924     }
1925     if (ret != 0) {
1926         if (ref)
1927             val = xmlSaveUri(ref);
1928         goto done;
1929     }
1930     if (ref == NULL) {
1931         /*
1932          * the base fragment must be ignored
1933          */
1934         if (bas->fragment != NULL) {
1935             xmlFree(bas->fragment);
1936             bas->fragment = NULL;
1937         }
1938         val = xmlSaveUri(bas);
1939         goto done;
1940     }
1941
1942     /*
1943      * 2) If the path component is empty and the scheme, authority, and
1944      *    query components are undefined, then it is a reference to the
1945      *    current document and we are done.  Otherwise, the reference URI's
1946      *    query and fragment components are defined as found (or not found)
1947      *    within the URI reference and not inherited from the base URI.
1948      *
1949      *    NOTE that in modern browsers, the parsing differs from the above
1950      *    in the following aspect:  the query component is allowed to be
1951      *    defined while still treating this as a reference to the current
1952      *    document.
1953      */
1954     res = xmlCreateURI();
1955     if (res == NULL)
1956         goto done;
1957     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1958         ((ref->authority == NULL) && (ref->server == NULL))) {
1959         if (bas->scheme != NULL)
1960             res->scheme = xmlMemStrdup(bas->scheme);
1961         if (bas->authority != NULL)
1962             res->authority = xmlMemStrdup(bas->authority);
1963         else if (bas->server != NULL) {
1964             res->server = xmlMemStrdup(bas->server);
1965             if (bas->user != NULL)
1966                 res->user = xmlMemStrdup(bas->user);
1967             res->port = bas->port;
1968         }
1969         if (bas->path != NULL)
1970             res->path = xmlMemStrdup(bas->path);
1971         if (ref->query_raw != NULL)
1972             res->query_raw = xmlMemStrdup (ref->query_raw);
1973         else if (ref->query != NULL)
1974             res->query = xmlMemStrdup(ref->query);
1975         else if (bas->query_raw != NULL)
1976             res->query_raw = xmlMemStrdup(bas->query_raw);
1977         else if (bas->query != NULL)
1978             res->query = xmlMemStrdup(bas->query);
1979         if (ref->fragment != NULL)
1980             res->fragment = xmlMemStrdup(ref->fragment);
1981         goto step_7;
1982     }
1983
1984     /*
1985      * 3) If the scheme component is defined, indicating that the reference
1986      *    starts with a scheme name, then the reference is interpreted as an
1987      *    absolute URI and we are done.  Otherwise, the reference URI's
1988      *    scheme is inherited from the base URI's scheme component.
1989      */
1990     if (ref->scheme != NULL) {
1991         val = xmlSaveUri(ref);
1992         goto done;
1993     }
1994     if (bas->scheme != NULL)
1995         res->scheme = xmlMemStrdup(bas->scheme);
1996
1997     if (ref->query_raw != NULL)
1998         res->query_raw = xmlMemStrdup(ref->query_raw);
1999     else if (ref->query != NULL)
2000         res->query = xmlMemStrdup(ref->query);
2001     if (ref->fragment != NULL)
2002         res->fragment = xmlMemStrdup(ref->fragment);
2003
2004     /*
2005      * 4) If the authority component is defined, then the reference is a
2006      *    network-path and we skip to step 7.  Otherwise, the reference
2007      *    URI's authority is inherited from the base URI's authority
2008      *    component, which will also be undefined if the URI scheme does not
2009      *    use an authority component.
2010      */
2011     if ((ref->authority != NULL) || (ref->server != NULL)) {
2012         if (ref->authority != NULL)
2013             res->authority = xmlMemStrdup(ref->authority);
2014         else {
2015             res->server = xmlMemStrdup(ref->server);
2016             if (ref->user != NULL)
2017                 res->user = xmlMemStrdup(ref->user);
2018             res->port = ref->port;
2019         }
2020         if (ref->path != NULL)
2021             res->path = xmlMemStrdup(ref->path);
2022         goto step_7;
2023     }
2024     if (bas->authority != NULL)
2025         res->authority = xmlMemStrdup(bas->authority);
2026     else if (bas->server != NULL) {
2027         res->server = xmlMemStrdup(bas->server);
2028         if (bas->user != NULL)
2029             res->user = xmlMemStrdup(bas->user);
2030         res->port = bas->port;
2031     }
2032
2033     /*
2034      * 5) If the path component begins with a slash character ("/"), then
2035      *    the reference is an absolute-path and we skip to step 7.
2036      */
2037     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2038         res->path = xmlMemStrdup(ref->path);
2039         goto step_7;
2040     }
2041
2042
2043     /*
2044      * 6) If this step is reached, then we are resolving a relative-path
2045      *    reference.  The relative path needs to be merged with the base
2046      *    URI's path.  Although there are many ways to do this, we will
2047      *    describe a simple method using a separate string buffer.
2048      *
2049      * Allocate a buffer large enough for the result string.
2050      */
2051     len = 2; /* extra / and 0 */
2052     if (ref->path != NULL)
2053         len += strlen(ref->path);
2054     if (bas->path != NULL)
2055         len += strlen(bas->path);
2056     res->path = (char *) xmlMallocAtomic(len);
2057     if (res->path == NULL) {
2058         xmlURIErrMemory("resolving URI against base\n");
2059         goto done;
2060     }
2061     res->path[0] = 0;
2062
2063     /*
2064      * a) All but the last segment of the base URI's path component is
2065      *    copied to the buffer.  In other words, any characters after the
2066      *    last (right-most) slash character, if any, are excluded.
2067      */
2068     cur = 0;
2069     out = 0;
2070     if (bas->path != NULL) {
2071         while (bas->path[cur] != 0) {
2072             while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2073                 cur++;
2074             if (bas->path[cur] == 0)
2075                 break;
2076
2077             cur++;
2078             while (out < cur) {
2079                 res->path[out] = bas->path[out];
2080                 out++;
2081             }
2082         }
2083     }
2084     res->path[out] = 0;
2085
2086     /*
2087      * b) The reference's path component is appended to the buffer
2088      *    string.
2089      */
2090     if (ref->path != NULL && ref->path[0] != 0) {
2091         indx = 0;
2092         /*
2093          * Ensure the path includes a '/'
2094          */
2095         if ((out == 0) && (bas->server != NULL))
2096             res->path[out++] = '/';
2097         while (ref->path[indx] != 0) {
2098             res->path[out++] = ref->path[indx++];
2099         }
2100     }
2101     res->path[out] = 0;
2102
2103     /*
2104      * Steps c) to h) are really path normalization steps
2105      */
2106     xmlNormalizeURIPath(res->path);
2107
2108 step_7:
2109
2110     /*
2111      * 7) The resulting URI components, including any inherited from the
2112      *    base URI, are recombined to give the absolute form of the URI
2113      *    reference.
2114      */
2115     val = xmlSaveUri(res);
2116
2117 done:
2118     if (ref != NULL)
2119         xmlFreeURI(ref);
2120     if (bas != NULL)
2121         xmlFreeURI(bas);
2122     if (res != NULL)
2123         xmlFreeURI(res);
2124     return(val);
2125 }
2126
2127 /**
2128  * xmlBuildRelativeURI:
2129  * @URI:  the URI reference under consideration
2130  * @base:  the base value
2131  *
2132  * Expresses the URI of the reference in terms relative to the
2133  * base.  Some examples of this operation include:
2134  *     base = "http://site1.com/docs/book1.html"
2135  *        URI input                        URI returned
2136  *     docs/pic1.gif                    pic1.gif
2137  *     docs/img/pic1.gif                img/pic1.gif
2138  *     img/pic1.gif                     ../img/pic1.gif
2139  *     http://site1.com/docs/pic1.gif   pic1.gif
2140  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2141  *
2142  *     base = "docs/book1.html"
2143  *        URI input                        URI returned
2144  *     docs/pic1.gif                    pic1.gif
2145  *     docs/img/pic1.gif                img/pic1.gif
2146  *     img/pic1.gif                     ../img/pic1.gif
2147  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2148  *
2149  *
2150  * Note: if the URI reference is really wierd or complicated, it may be
2151  *       worthwhile to first convert it into a "nice" one by calling
2152  *       xmlBuildURI (using 'base') before calling this routine,
2153  *       since this routine (for reasonable efficiency) assumes URI has
2154  *       already been through some validation.
2155  *
2156  * Returns a new URI string (to be freed by the caller) or NULL in case
2157  * error.
2158  */
2159 xmlChar *
2160 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2161 {
2162     xmlChar *val = NULL;
2163     int ret;
2164     int ix;
2165     int pos = 0;
2166     int nbslash = 0;
2167     int len;
2168     xmlURIPtr ref = NULL;
2169     xmlURIPtr bas = NULL;
2170     xmlChar *bptr, *uptr, *vptr;
2171     int remove_path = 0;
2172
2173     if ((URI == NULL) || (*URI == 0))
2174         return NULL;
2175
2176     /*
2177      * First parse URI into a standard form
2178      */
2179     ref = xmlCreateURI ();
2180     if (ref == NULL)
2181         return NULL;
2182     /* If URI not already in "relative" form */
2183     if (URI[0] != '.') {
2184         ret = xmlParseURIReference (ref, (const char *) URI);
2185         if (ret != 0)
2186             goto done;          /* Error in URI, return NULL */
2187     } else
2188         ref->path = (char *)xmlStrdup(URI);
2189
2190     /*
2191      * Next parse base into the same standard form
2192      */
2193     if ((base == NULL) || (*base == 0)) {
2194         val = xmlStrdup (URI);
2195         goto done;
2196     }
2197     bas = xmlCreateURI ();
2198     if (bas == NULL)
2199         goto done;
2200     if (base[0] != '.') {
2201         ret = xmlParseURIReference (bas, (const char *) base);
2202         if (ret != 0)
2203             goto done;          /* Error in base, return NULL */
2204     } else
2205         bas->path = (char *)xmlStrdup(base);
2206
2207     /*
2208      * If the scheme / server on the URI differs from the base,
2209      * just return the URI
2210      */
2211     if ((ref->scheme != NULL) &&
2212         ((bas->scheme == NULL) ||
2213          (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2214          (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2215         val = xmlStrdup (URI);
2216         goto done;
2217     }
2218     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2219         val = xmlStrdup(BAD_CAST "");
2220         goto done;
2221     }
2222     if (bas->path == NULL) {
2223         val = xmlStrdup((xmlChar *)ref->path);
2224         goto done;
2225     }
2226     if (ref->path == NULL) {
2227         ref->path = (char *) "/";
2228         remove_path = 1;
2229     }
2230
2231     /*
2232      * At this point (at last!) we can compare the two paths
2233      *
2234      * First we take care of the special case where either of the
2235      * two path components may be missing (bug 316224)
2236      */
2237     if (bas->path == NULL) {
2238         if (ref->path != NULL) {
2239             uptr = (xmlChar *) ref->path;
2240             if (*uptr == '/')
2241                 uptr++;
2242             /* exception characters from xmlSaveUri */
2243             val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2244         }
2245         goto done;
2246     }
2247     bptr = (xmlChar *)bas->path;
2248     if (ref->path == NULL) {
2249         for (ix = 0; bptr[ix] != 0; ix++) {
2250             if (bptr[ix] == '/')
2251                 nbslash++;
2252         }
2253         uptr = NULL;
2254         len = 1;        /* this is for a string terminator only */
2255     } else {
2256     /*
2257      * Next we compare the two strings and find where they first differ
2258      */
2259         if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2260             pos += 2;
2261         if ((*bptr == '.') && (bptr[1] == '/'))
2262             bptr += 2;
2263         else if ((*bptr == '/') && (ref->path[pos] != '/'))
2264             bptr++;
2265         while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2266             pos++;
2267
2268         if (bptr[pos] == ref->path[pos]) {
2269             val = xmlStrdup(BAD_CAST "");
2270             goto done;          /* (I can't imagine why anyone would do this) */
2271         }
2272
2273         /*
2274          * In URI, "back up" to the last '/' encountered.  This will be the
2275          * beginning of the "unique" suffix of URI
2276          */
2277         ix = pos;
2278         if ((ref->path[ix] == '/') && (ix > 0))
2279             ix--;
2280         else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2281             ix -= 2;
2282         for (; ix > 0; ix--) {
2283             if (ref->path[ix] == '/')
2284                 break;
2285         }
2286         if (ix == 0) {
2287             uptr = (xmlChar *)ref->path;
2288         } else {
2289             ix++;
2290             uptr = (xmlChar *)&ref->path[ix];
2291         }
2292
2293         /*
2294          * In base, count the number of '/' from the differing point
2295          */
2296         if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2297             for (; bptr[ix] != 0; ix++) {
2298                 if (bptr[ix] == '/')
2299                     nbslash++;
2300             }
2301         }
2302         len = xmlStrlen (uptr) + 1;
2303     }
2304
2305     if (nbslash == 0) {
2306         if (uptr != NULL)
2307             /* exception characters from xmlSaveUri */
2308             val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2309         goto done;
2310     }
2311
2312     /*
2313      * Allocate just enough space for the returned string -
2314      * length of the remainder of the URI, plus enough space
2315      * for the "../" groups, plus one for the terminator
2316      */
2317     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2318     if (val == NULL) {
2319         xmlURIErrMemory("building relative URI\n");
2320         goto done;
2321     }
2322     vptr = val;
2323     /*
2324      * Put in as many "../" as needed
2325      */
2326     for (; nbslash>0; nbslash--) {
2327         *vptr++ = '.';
2328         *vptr++ = '.';
2329         *vptr++ = '/';
2330     }
2331     /*
2332      * Finish up with the end of the URI
2333      */
2334     if (uptr != NULL) {
2335         if ((vptr > val) && (len > 0) &&
2336             (uptr[0] == '/') && (vptr[-1] == '/')) {
2337             memcpy (vptr, uptr + 1, len - 1);
2338             vptr[len - 2] = 0;
2339         } else {
2340             memcpy (vptr, uptr, len);
2341             vptr[len - 1] = 0;
2342         }
2343     } else {
2344         vptr[len - 1] = 0;
2345     }
2346
2347     /* escape the freshly-built path */
2348     vptr = val;
2349         /* exception characters from xmlSaveUri */
2350     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2351     xmlFree(vptr);
2352
2353 done:
2354     /*
2355      * Free the working variables
2356      */
2357     if (remove_path != 0)
2358         ref->path = NULL;
2359     if (ref != NULL)
2360         xmlFreeURI (ref);
2361     if (bas != NULL)
2362         xmlFreeURI (bas);
2363
2364     return val;
2365 }
2366
2367 /**
2368  * xmlCanonicPath:
2369  * @path:  the resource locator in a filesystem notation
2370  *
2371  * Constructs a canonic path from the specified path.
2372  *
2373  * Returns a new canonic path, or a duplicate of the path parameter if the
2374  * construction fails. The caller is responsible for freeing the memory occupied
2375  * by the returned string. If there is insufficient memory available, or the
2376  * argument is NULL, the function returns NULL.
2377  */
2378 #define IS_WINDOWS_PATH(p)                                      \
2379         ((p != NULL) &&                                         \
2380          (((p[0] >= 'a') && (p[0] <= 'z')) ||                   \
2381           ((p[0] >= 'A') && (p[0] <= 'Z'))) &&                  \
2382          (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2383 xmlChar *
2384 xmlCanonicPath(const xmlChar *path)
2385 {
2386 /*
2387  * For Windows implementations, additional work needs to be done to
2388  * replace backslashes in pathnames with "forward slashes"
2389  */
2390 #if defined(_WIN32) && !defined(__CYGWIN__)
2391     int len = 0;
2392     int i = 0;
2393     xmlChar *p = NULL;
2394 #endif
2395     xmlURIPtr uri;
2396     xmlChar *ret;
2397     const xmlChar *absuri;
2398
2399     if (path == NULL)
2400         return(NULL);
2401
2402 #if defined(_WIN32)
2403     /*
2404      * We must not change the backslashes to slashes if the the path
2405      * starts with \\?\
2406      * Those paths can be up to 32k characters long.
2407      * Was added specifically for OpenOffice, those paths can't be converted
2408      * to URIs anyway.
2409      */
2410     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2411         (path[3] == '\\') )
2412         return xmlStrdup((const xmlChar *) path);
2413 #endif
2414
2415         /* sanitize filename starting with // so it can be used as URI */
2416     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2417         path++;
2418
2419     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2420         xmlFreeURI(uri);
2421         return xmlStrdup(path);
2422     }
2423
2424     /* Check if this is an "absolute uri" */
2425     absuri = xmlStrstr(path, BAD_CAST "://");
2426     if (absuri != NULL) {
2427         int l, j;
2428         unsigned char c;
2429         xmlChar *escURI;
2430
2431         /*
2432          * this looks like an URI where some parts have not been
2433          * escaped leading to a parsing problem.  Check that the first
2434          * part matches a protocol.
2435          */
2436         l = absuri - path;
2437         /* Bypass if first part (part before the '://') is > 20 chars */
2438         if ((l <= 0) || (l > 20))
2439             goto path_processing;
2440         /* Bypass if any non-alpha characters are present in first part */
2441         for (j = 0;j < l;j++) {
2442             c = path[j];
2443             if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2444                 goto path_processing;
2445         }
2446
2447         /* Escape all except the characters specified in the supplied path */
2448         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2449         if (escURI != NULL) {
2450             /* Try parsing the escaped path */
2451             uri = xmlParseURI((const char *) escURI);
2452             /* If successful, return the escaped string */
2453             if (uri != NULL) {
2454                 xmlFreeURI(uri);
2455                 return escURI;
2456             }
2457         }
2458     }
2459
2460 path_processing:
2461 /* For Windows implementations, replace backslashes with 'forward slashes' */
2462 #if defined(_WIN32) && !defined(__CYGWIN__)
2463     /*
2464      * Create a URI structure
2465      */
2466     uri = xmlCreateURI();
2467     if (uri == NULL) {          /* Guard against 'out of memory' */
2468         return(NULL);
2469     }
2470
2471     len = xmlStrlen(path);
2472     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2473         /* make the scheme 'file' */
2474         uri->scheme = xmlStrdup(BAD_CAST "file");
2475         /* allocate space for leading '/' + path + string terminator */
2476         uri->path = xmlMallocAtomic(len + 2);
2477         if (uri->path == NULL) {
2478             xmlFreeURI(uri);    /* Guard agains 'out of memory' */
2479             return(NULL);
2480         }
2481         /* Put in leading '/' plus path */
2482         uri->path[0] = '/';
2483         p = uri->path + 1;
2484         strncpy(p, path, len + 1);
2485     } else {
2486         uri->path = xmlStrdup(path);
2487         if (uri->path == NULL) {
2488             xmlFreeURI(uri);
2489             return(NULL);
2490         }
2491         p = uri->path;
2492     }
2493     /* Now change all occurences of '\' to '/' */
2494     while (*p != '\0') {
2495         if (*p == '\\')
2496             *p = '/';
2497         p++;
2498     }
2499
2500     if (uri->scheme == NULL) {
2501         ret = xmlStrdup((const xmlChar *) uri->path);
2502     } else {
2503         ret = xmlSaveUri(uri);
2504     }
2505
2506     xmlFreeURI(uri);
2507 #else
2508     ret = xmlStrdup((const xmlChar *) path);
2509 #endif
2510     return(ret);
2511 }
2512
2513 /**
2514  * xmlPathToURI:
2515  * @path:  the resource locator in a filesystem notation
2516  *
2517  * Constructs an URI expressing the existing path
2518  *
2519  * Returns a new URI, or a duplicate of the path parameter if the
2520  * construction fails. The caller is responsible for freeing the memory
2521  * occupied by the returned string. If there is insufficient memory available,
2522  * or the argument is NULL, the function returns NULL.
2523  */
2524 xmlChar *
2525 xmlPathToURI(const xmlChar *path)
2526 {
2527     xmlURIPtr uri;
2528     xmlURI temp;
2529     xmlChar *ret, *cal;
2530
2531     if (path == NULL)
2532         return(NULL);
2533
2534     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2535         xmlFreeURI(uri);
2536         return xmlStrdup(path);
2537     }
2538     cal = xmlCanonicPath(path);
2539     if (cal == NULL)
2540         return(NULL);
2541 #if defined(_WIN32) && !defined(__CYGWIN__)
2542     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2543        If 'cal' is a valid URI allready then we are done here, as continuing would make
2544        it invalid. */
2545     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2546         xmlFreeURI(uri);
2547         return cal;
2548     }
2549     /* 'cal' can contain a relative path with backslashes. If that is processed
2550        by xmlSaveURI, they will be escaped and the external entity loader machinery
2551        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2552     ret = cal;
2553     while (*ret != '\0') {
2554         if (*ret == '\\')
2555             *ret = '/';
2556         ret++;
2557     }
2558 #endif
2559     memset(&temp, 0, sizeof(temp));
2560     temp.path = (char *) cal;
2561     ret = xmlSaveUri(&temp);
2562     xmlFree(cal);
2563     return(ret);
2564 }
2565 #define bottom_uri
2566 #include "elfgcchack.h"