src/external/3rd/library/libxml/uri.c

   1 /**
   2  * uri.c: set of generic URI related routines
   3  *
   4  * Reference: RFC 2396
   5  *
   6  * See Copyright for the status of this software.
   7  *
   8  * daniel@veillard.com
   9  */
  10
  11 #define IN_LIBXML
  12 #include "libxml.h"
  13
  14 #include <string.h>
  15
  16 #include <libxml/xmlmemory.h>
  17 #include <libxml/uri.h>
  18 #include <libxml/globals.h>
  19 #include <libxml/xmlerror.h>
  20
  21 /************************************************************************
  22  *                                                                      *
  23  *              Macros to differentiate various character type          *
  24  *                      directly extracted from RFC 2396                *
  25  *                                                                      *
  26  ************************************************************************/
  27
  28 /*
  29  * alpha    = lowalpha | upalpha
  30  */
  31 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
  32
  33
  34 /*
  35  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
  36  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
  37  *            "u" | "v" | "w" | "x" | "y" | "z"
  38  */
  39
  40 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
  41
  42 /*
  43  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
  44  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
  45  *           "U" | "V" | "W" | "X" | "Y" | "Z"
  46  */
  47 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
  48
  49 /*
  50  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
  51  */
  52
  53 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
  54
  55 /*
  56  * alphanum = alpha | digit
  57  */
  58
  59 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
  60
  61 /*
  62  * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
  63  *               "a" | "b" | "c" | "d" | "e" | "f"
  64  */
  65
  66 #define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
  67             (((x) >= 'A') && ((x) <= 'F')))
  68
  69 /*
  70  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  71  */
  72
  73 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
  74     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
  75     ((x) == '(') || ((x) == ')'))
  76
  77
  78 /*
  79  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
  80  */
  81
  82 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
  83         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
  84         ((x) == '+') || ((x) == '$') || ((x) == ','))
  85
  86 /*
  87  * unreserved = alphanum | mark
  88  */
  89
  90 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
  91
  92 /*
  93  * escaped = "%" hex hex
  94  */
  95
  96 #define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) &&             \
  97             (IS_HEX((p)[2])))
  98
  99 /*
 100  * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
 101  *                        "&" | "=" | "+" | "$" | ","
 102  */
 103 #define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
 104                 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
 105                 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
 106                 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
 107
 108 /*
 109  * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
 110  */
 111 #define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||        \
 112                 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
 113                 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
 114                 ((*(p) == ',')))
 115
 116 /*
 117  * rel_segment   = 1*( unreserved | escaped |
 118  *                 ";" | "@" | "&" | "=" | "+" | "$" | "," )
 119  */
 120
 121 #define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||      \
 122           ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||      \
 123           ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||      \
 124           ((*(p) == ',')))
 125
 126 /*
 127  * scheme = alpha *( alpha | digit | "+" | "-" | "." )
 128  */
 129
 130 #define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) ||                 \
 131                       ((x) == '+') || ((x) == '-') || ((x) == '.'))
 132
 133 /*
 134  * reg_name = 1*( unreserved | escaped | "$" | "," |
 135  *                ";" | ":" | "@" | "&" | "=" | "+" )
 136  */
 137
 138 #define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||     \
 139        ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||         \
 140        ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||         \
 141        ((*(p) == '=')) || ((*(p) == '+')))
 142
 143 /*
 144  * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
 145  *                      "+" | "$" | "," )
 146  */
 147 #define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||     \
 148        ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) ||         \
 149        ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||         \
 150        ((*(p) == ',')))
 151
 152 /*
 153  * uric = reserved | unreserved | escaped
 154  */
 155
 156 #define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||         \
 157                     (IS_RESERVED(*(p))))
 158
 159 /*
 160 * unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
 161 */
 162
 163 #define IS_UNWISE(p)                                                    \
 164       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
 165        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
 166        ((*(p) == ']')) || ((*(p) == '`')))
 167
 168 /*
 169  * Skip to next pointer char, handle escaped sequences
 170  */
 171
 172 #define NEXT(p) ((*p == '%')? p += 3 : p++)
 173
 174 /*
 175  * Productions from the spec.
 176  *
 177  *    authority     = server | reg_name
 178  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
 179  *                        ";" | ":" | "@" | "&" | "=" | "+" )
 180  *
 181  * path          = [ abs_path | opaque_part ]
 182  */
 183
 184 /************************************************************************
 185  *                                                                      *
 186  *                      Generic URI structure functions                 *
 187  *                                                                      *
 188  ************************************************************************/
 189
 190 /**
 191  * xmlCreateURI:
 192  *
 193  * Simply creates an empty xmlURI
 194  *
 195  * Returns the new structure or NULL in case of error
 196  */
 197 xmlURIPtr
 198 xmlCreateURI(void) {
 199     xmlURIPtr ret;
 200
 201     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
 202     if (ret == NULL) {
 203         xmlGenericError(xmlGenericErrorContext,
 204                 "xmlCreateURI: out of memory\n");
 205         return(NULL);
 206     }
 207     memset(ret, 0, sizeof(xmlURI));
 208     return(ret);
 209 }
 210
 211 /**
 212  * xmlSaveUri:
 213  * @uri:  pointer to an xmlURI
 214  *
 215  * Save the URI as an escaped string
 216  *
 217  * Returns a new string (to be deallocated by caller)
 218  */
 219 xmlChar *
 220 xmlSaveUri(xmlURIPtr uri) {
 221     xmlChar *ret = NULL;
 222     const char *p;
 223     int len;
 224     int max;
 225
 226     if (uri == NULL) return(NULL);
 227
 228
 229     max = 80;
 230     ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
 231     if (ret == NULL) {
 232         xmlGenericError(xmlGenericErrorContext,
 233                 "xmlSaveUri: out of memory\n");
 234         return(NULL);
 235     }
 236     len = 0;
 237
 238     if (uri->scheme != NULL) {
 239         p = uri->scheme;
 240         while (*p != 0) {
 241             if (len >= max) {
 242                 max *= 2;
 243                 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 244                 if (ret == NULL) {
 245                     xmlGenericError(xmlGenericErrorContext,
 246                             "xmlSaveUri: out of memory\n");
 247                     return(NULL);
 248                 }
 249             }
 250             ret[len++] = *p++;
 251         }
 252         if (len >= max) {
 253             max *= 2;
 254             ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 255             if (ret == NULL) {
 256                 xmlGenericError(xmlGenericErrorContext,
 257                         "xmlSaveUri: out of memory\n");
 258                 return(NULL);
 259             }
 260         }
 261         ret[len++] = ':';
 262     }
 263     if (uri->opaque != NULL) {
 264         p = uri->opaque;
 265         while (*p != 0) {
 266             if (len + 3 >= max) {
 267                 max *= 2;
 268                 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 269                 if (ret == NULL) {
 270                     xmlGenericError(xmlGenericErrorContext,
 271                             "xmlSaveUri: out of memory\n");
 272                     return(NULL);
 273                 }
 274             }
 275             if ((IS_UNRESERVED(*(p))) ||
 276                 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
 277                 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
 278                 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
 279                 ret[len++] = *p++;
 280             else {
 281                 int val = *(unsigned char *)p++;
 282                 int hi = val / 0x10, lo = val % 0x10;
 283                 ret[len++] = '%';
 284                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 285                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 286             }
 287         }
 288         if (len >= max) {
 289             max *= 2;
 290             ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 291             if (ret == NULL) {
 292                 xmlGenericError(xmlGenericErrorContext,
 293                         "xmlSaveUri: out of memory\n");
 294                 return(NULL);
 295             }
 296         }
 297         ret[len++] = 0;
 298     } else {
 299         if (uri->server != NULL) {
 300             if (len + 3 >= max) {
 301                 max *= 2;
 302                 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 303                 if (ret == NULL) {
 304                     xmlGenericError(xmlGenericErrorContext,
 305                             "xmlSaveUri: out of memory\n");
 306                     return(NULL);
 307                 }
 308             }
 309             ret[len++] = '/';
 310             ret[len++] = '/';
 311             if (uri->user != NULL) {
 312                 p = uri->user;
 313                 while (*p != 0) {
 314                     if (len + 3 >= max) {
 315                         max *= 2;
 316                         ret = (xmlChar *) xmlRealloc(ret,
 317                                 (max + 1) * sizeof(xmlChar));
 318                         if (ret == NULL) {
 319                             xmlGenericError(xmlGenericErrorContext,
 320                                     "xmlSaveUri: out of memory\n");
 321                             return(NULL);
 322                         }
 323                     }
 324                     if ((IS_UNRESERVED(*(p))) ||
 325                         ((*(p) == ';')) || ((*(p) == ':')) ||
 326                         ((*(p) == '&')) || ((*(p) == '=')) ||
 327                         ((*(p) == '+')) || ((*(p) == '$')) ||
 328                         ((*(p) == ',')))
 329                         ret[len++] = *p++;
 330                     else {
 331                         int val = *(unsigned char *)p++;
 332                         int hi = val / 0x10, lo = val % 0x10;
 333                         ret[len++] = '%';
 334                         ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 335                         ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 336                     }
 337                 }
 338                 if (len + 3 >= max) {
 339                     max *= 2;
 340                     ret = (xmlChar *) xmlRealloc(ret,
 341                             (max + 1) * sizeof(xmlChar));
 342                     if (ret == NULL) {
 343                         xmlGenericError(xmlGenericErrorContext,
 344                                 "xmlSaveUri: out of memory\n");
 345                         return(NULL);
 346                     }
 347                 }
 348                 ret[len++] = '@';
 349             }
 350             p = uri->server;
 351             while (*p != 0) {
 352                 if (len >= max) {
 353                     max *= 2;
 354                     ret = (xmlChar *) xmlRealloc(ret,
 355                             (max + 1) * sizeof(xmlChar));
 356                     if (ret == NULL) {
 357                         xmlGenericError(xmlGenericErrorContext,
 358                                 "xmlSaveUri: out of memory\n");
 359                         return(NULL);
 360                     }
 361                 }
 362                 ret[len++] = *p++;
 363             }
 364             if (uri->port > 0) {
 365                 if (len + 10 >= max) {
 366                     max *= 2;
 367                     ret = (xmlChar *) xmlRealloc(ret,
 368                             (max + 1) * sizeof(xmlChar));
 369                     if (ret == NULL) {
 370                         xmlGenericError(xmlGenericErrorContext,
 371                                 "xmlSaveUri: out of memory\n");
 372                         return(NULL);
 373                     }
 374                 }
 375                 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
 376             }
 377         } else if (uri->authority != NULL) {
 378             if (len + 3 >= max) {
 379                 max *= 2;
 380                 ret = (xmlChar *) xmlRealloc(ret,
 381                         (max + 1) * sizeof(xmlChar));
 382                 if (ret == NULL) {
 383                     xmlGenericError(xmlGenericErrorContext,
 384                             "xmlSaveUri: out of memory\n");
 385                     return(NULL);
 386                 }
 387             }
 388             ret[len++] = '/';
 389             ret[len++] = '/';
 390             p = uri->authority;
 391             while (*p != 0) {
 392                 if (len + 3 >= max) {
 393                     max *= 2;
 394                     ret = (xmlChar *) xmlRealloc(ret,
 395                             (max + 1) * sizeof(xmlChar));
 396                     if (ret == NULL) {
 397                         xmlGenericError(xmlGenericErrorContext,
 398                                 "xmlSaveUri: out of memory\n");
 399                         return(NULL);
 400                     }
 401                 }
 402                 if ((IS_UNRESERVED(*(p))) ||
 403                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
 404                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
 405                     ((*(p) == '=')) || ((*(p) == '+')))
 406                     ret[len++] = *p++;
 407                 else {
 408                     int val = *(unsigned char *)p++;
 409                     int hi = val / 0x10, lo = val % 0x10;
 410                     ret[len++] = '%';
 411                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 412                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 413                 }
 414             }
 415         } else if (uri->scheme != NULL) {
 416             if (len + 3 >= max) {
 417                 max *= 2;
 418                 ret = (xmlChar *) xmlRealloc(ret,
 419                         (max + 1) * sizeof(xmlChar));
 420                 if (ret == NULL) {
 421                     xmlGenericError(xmlGenericErrorContext,
 422                             "xmlSaveUri: out of memory\n");
 423                     return(NULL);
 424                 }
 425             }
 426             ret[len++] = '/';
 427             ret[len++] = '/';
 428         }
 429         if (uri->path != NULL) {
 430             p = uri->path;
 431             while (*p != 0) {
 432                 if (len + 3 >= max) {
 433                     max *= 2;
 434                     ret = (xmlChar *) xmlRealloc(ret,
 435                             (max + 1) * sizeof(xmlChar));
 436                     if (ret == NULL) {
 437                         xmlGenericError(xmlGenericErrorContext,
 438                                 "xmlSaveUri: out of memory\n");
 439                         return(NULL);
 440                     }
 441                 }
 442                 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
 443                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
 444                     ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
 445                     ((*(p) == ',')))
 446                     ret[len++] = *p++;
 447                 else {
 448                     int val = *(unsigned char *)p++;
 449                     int hi = val / 0x10, lo = val % 0x10;
 450                     ret[len++] = '%';
 451                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 452                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 453                 }
 454             }
 455         }
 456         if (uri->query != NULL) {
 457             if (len + 3 >= max) {
 458                 max *= 2;
 459                 ret = (xmlChar *) xmlRealloc(ret,
 460                         (max + 1) * sizeof(xmlChar));
 461                 if (ret == NULL) {
 462                     xmlGenericError(xmlGenericErrorContext,
 463                             "xmlSaveUri: out of memory\n");
 464                     return(NULL);
 465                 }
 466             }
 467             ret[len++] = '?';
 468             p = uri->query;
 469             while (*p != 0) {
 470                 if (len + 3 >= max) {
 471                     max *= 2;
 472                     ret = (xmlChar *) xmlRealloc(ret,
 473                             (max + 1) * sizeof(xmlChar));
 474                     if (ret == NULL) {
 475                         xmlGenericError(xmlGenericErrorContext,
 476                                 "xmlSaveUri: out of memory\n");
 477                         return(NULL);
 478                     }
 479                 }
 480                 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
 481                     ret[len++] = *p++;
 482                 else {
 483                     int val = *(unsigned char *)p++;
 484                     int hi = val / 0x10, lo = val % 0x10;
 485                     ret[len++] = '%';
 486                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 487                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 488                 }
 489             }
 490         }
 491         if (uri->fragment != NULL) {
 492             if (len + 3 >= max) {
 493                 max *= 2;
 494                 ret = (xmlChar *) xmlRealloc(ret,
 495                         (max + 1) * sizeof(xmlChar));
 496                 if (ret == NULL) {
 497                     xmlGenericError(xmlGenericErrorContext,
 498                             "xmlSaveUri: out of memory\n");
 499                     return(NULL);
 500                 }
 501             }
 502             ret[len++] = '#';
 503             p = uri->fragment;
 504             while (*p != 0) {
 505                 if (len + 3 >= max) {
 506                     max *= 2;
 507                     ret = (xmlChar *) xmlRealloc(ret,
 508                             (max + 1) * sizeof(xmlChar));
 509                     if (ret == NULL) {
 510                         xmlGenericError(xmlGenericErrorContext,
 511                                 "xmlSaveUri: out of memory\n");
 512                         return(NULL);
 513                     }
 514                 }
 515                 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
 516                     ret[len++] = *p++;
 517                 else {
 518                     int val = *(unsigned char *)p++;
 519                     int hi = val / 0x10, lo = val % 0x10;
 520                     ret[len++] = '%';
 521                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 522                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 523                 }
 524             }
 525         }
 526         if (len >= max) {
 527             max *= 2;
 528             ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 529             if (ret == NULL) {
 530                 xmlGenericError(xmlGenericErrorContext,
 531                         "xmlSaveUri: out of memory\n");
 532                 return(NULL);
 533             }
 534         }
 535         ret[len++] = 0;
 536     }
 537     return(ret);
 538 }
 539
 540 /**
 541  * xmlPrintURI:
 542  * @stream:  a FILE* for the output
 543  * @uri:  pointer to an xmlURI
 544  *
 545  * Prints the URI in the stream @steam.
 546  */
 547 void
 548 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
 549     xmlChar *out;
 550
 551     out = xmlSaveUri(uri);
 552     if (out != NULL) {
 553         fprintf(stream, "%s", out);
 554         xmlFree(out);
 555     }
 556 }
 557
 558 /**
 559  * xmlCleanURI:
 560  * @uri:  pointer to an xmlURI
 561  *
 562  * Make sure the xmlURI struct is free of content
 563  */
 564 static void
 565 xmlCleanURI(xmlURIPtr uri) {
 566     if (uri == NULL) return;
 567
 568     if (uri->scheme != NULL) xmlFree(uri->scheme);
 569     uri->scheme = NULL;
 570     if (uri->server != NULL) xmlFree(uri->server);
 571     uri->server = NULL;
 572     if (uri->user != NULL) xmlFree(uri->user);
 573     uri->user = NULL;
 574     if (uri->path != NULL) xmlFree(uri->path);
 575     uri->path = NULL;
 576     if (uri->fragment != NULL) xmlFree(uri->fragment);
 577     uri->fragment = NULL;
 578     if (uri->opaque != NULL) xmlFree(uri->opaque);
 579     uri->opaque = NULL;
 580     if (uri->authority != NULL) xmlFree(uri->authority);
 581     uri->authority = NULL;
 582     if (uri->query != NULL) xmlFree(uri->query);
 583     uri->query = NULL;
 584 }
 585
 586 /**
 587  * xmlFreeURI:
 588  * @uri:  pointer to an xmlURI
 589  *
 590  * Free up the xmlURI struct
 591  */
 592 void
 593 xmlFreeURI(xmlURIPtr uri) {
 594     if (uri == NULL) return;
 595
 596     if (uri->scheme != NULL) xmlFree(uri->scheme);
 597     if (uri->server != NULL) xmlFree(uri->server);
 598     if (uri->user != NULL) xmlFree(uri->user);
 599     if (uri->path != NULL) xmlFree(uri->path);
 600     if (uri->fragment != NULL) xmlFree(uri->fragment);
 601     if (uri->opaque != NULL) xmlFree(uri->opaque);
 602     if (uri->authority != NULL) xmlFree(uri->authority);
 603     if (uri->query != NULL) xmlFree(uri->query);
 604     xmlFree(uri);
 605 }
 606
 607 /************************************************************************
 608  *                                                                      *
 609  *                      Helper functions                                *
 610  *                                                                      *
 611  ************************************************************************/
 612
 613 /**
 614  * xmlNormalizeURIPath:
 615  * @path:  pointer to the path string
 616  *
 617  * Applies the 5 normalization steps to a path string--that is, RFC 2396
 618  * Section 5.2, steps 6.c through 6.g.
 619  *
 620  * Normalization occurs directly on the string, no new allocation is done
 621  *
 622  * Returns 0 or an error code
 623  */
 624 int
 625 xmlNormalizeURIPath(char *path) {
 626     char *cur, *out;
 627
 628     if (path == NULL)
 629         return(-1);
 630
 631     /* Skip all initial "/" chars.  We want to get to the beginning of the
 632      * first non-empty segment.
 633      */
 634     cur = path;
 635     while (cur[0] == '/')
 636       ++cur;
 637     if (cur[0] == '\0')
 638       return(0);
 639
 640     /* Keep everything we've seen so far.  */
 641     out = cur;
 642
 643     /*
 644      * Analyze each segment in sequence for cases (c) and (d).
 645      */
 646     while (cur[0] != '\0') {
 647         /*
 648          * c) All occurrences of "./", where "." is a complete path segment,
 649          *    are removed from the buffer string.
 650          */
 651         if ((cur[0] == '.') && (cur[1] == '/')) {
 652             cur += 2;
 653             /* '//' normalization should be done at this point too */
 654             while (cur[0] == '/')
 655                 cur++;
 656             continue;
 657         }
 658
 659         /*
 660          * d) If the buffer string ends with "." as a complete path segment,
 661          *    that "." is removed.
 662          */
 663         if ((cur[0] == '.') && (cur[1] == '\0'))
 664             break;
 665
 666         /* Otherwise keep the segment.  */
 667         while (cur[0] != '/') {
 668             if (cur[0] == '\0')
 669               goto done_cd;
 670             (out++)[0] = (cur++)[0];
 671         }
 672         /* nomalize // */
 673         while ((cur[0] == '/') && (cur[1] == '/'))
 674             cur++;
 675
 676         (out++)[0] = (cur++)[0];
 677     }
 678  done_cd:
 679     out[0] = '\0';
 680
 681     /* Reset to the beginning of the first segment for the next sequence.  */
 682     cur = path;
 683     while (cur[0] == '/')
 684       ++cur;
 685     if (cur[0] == '\0')
 686         return(0);
 687
 688     /*
 689      * Analyze each segment in sequence for cases (e) and (f).
 690      *
 691      * e) All occurrences of "<segment>/../", where <segment> is a
 692      *    complete path segment not equal to "..", are removed from the
 693      *    buffer string.  Removal of these path segments is performed
 694      *    iteratively, removing the leftmost matching pattern on each
 695      *    iteration, until no matching pattern remains.
 696      *
 697      * f) If the buffer string ends with "<segment>/..", where <segment>
 698      *    is a complete path segment not equal to "..", that
 699      *    "<segment>/.." is removed.
 700      *
 701      * To satisfy the "iterative" clause in (e), we need to collapse the
 702      * string every time we find something that needs to be removed.  Thus,
 703      * we don't need to keep two pointers into the string: we only need a
 704      * "current position" pointer.
 705      */
 706     while (1) {
 707         char *segp;
 708
 709         /* At the beginning of each iteration of this loop, "cur" points to
 710          * the first character of the segment we want to examine.
 711          */
 712
 713         /* Find the end of the current segment.  */
 714         segp = cur;
 715         while ((segp[0] != '/') && (segp[0] != '\0'))
 716           ++segp;
 717
 718         /* If this is the last segment, we're done (we need at least two
 719          * segments to meet the criteria for the (e) and (f) cases).
 720          */
 721         if (segp[0] == '\0')
 722           break;
 723
 724         /* If the first segment is "..", or if the next segment _isn't_ "..",
 725          * keep this segment and try the next one.
 726          */
 727         ++segp;
 728         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
 729             || ((segp[0] != '.') || (segp[1] != '.')
 730                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
 731           cur = segp;
 732           continue;
 733         }
 734
 735         /* If we get here, remove this segment and the next one and back up
 736          * to the previous segment (if there is one), to implement the
 737          * "iteratively" clause.  It's pretty much impossible to back up
 738          * while maintaining two pointers into the buffer, so just compact
 739          * the whole buffer now.
 740          */
 741
 742         /* If this is the end of the buffer, we're done.  */
 743         if (segp[2] == '\0') {
 744           cur[0] = '\0';
 745           break;
 746         }
 747         strcpy(cur, segp + 3);
 748
 749         /* If there are no previous segments, then keep going from here.  */
 750         segp = cur;
 751         while ((segp > path) && ((--segp)[0] == '/'))
 752           ;
 753         if (segp == path)
 754           continue;
 755
 756         /* "segp" is pointing to the end of a previous segment; find it's
 757          * start.  We need to back up to the previous segment and start
 758          * over with that to handle things like "foo/bar/../..".  If we
 759          * don't do this, then on the first pass we'll remove the "bar/..",
 760          * but be pointing at the second ".." so we won't realize we can also
 761          * remove the "foo/..".
 762          */
 763         cur = segp;
 764         while ((cur > path) && (cur[-1] != '/'))
 765           --cur;
 766     }
 767     out[0] = '\0';
 768
 769     /*
 770      * g) If the resulting buffer string still begins with one or more
 771      *    complete path segments of "..", then the reference is
 772      *    considered to be in error. Implementations may handle this
 773      *    error by retaining these components in the resolved path (i.e.,
 774      *    treating them as part of the final URI), by removing them from
 775      *    the resolved path (i.e., discarding relative levels above the
 776      *    root), or by avoiding traversal of the reference.
 777      *
 778      * We discard them from the final path.
 779      */
 780     if (path[0] == '/') {
 781       cur = path;
 782       while ((cur[1] == '.') && (cur[2] == '.')
 783              && ((cur[3] == '/') || (cur[3] == '\0')))
 784         cur += 3;
 785
 786       if (cur != path) {
 787         out = path;
 788         while (cur[0] != '\0')
 789           (out++)[0] = (cur++)[0];
 790         out[0] = 0;
 791       }
 792     }
 793
 794     return(0);
 795 }
 796
 797 /**
 798  * xmlURIUnescapeString:
 799  * @str:  the string to unescape
 800  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
 801  * @target:  optional destination buffer
 802  *
 803  * Unescaping routine, does not do validity checks !
 804  * Output is direct unsigned char translation of %XX values (no encoding)
 805  *
 806  * Returns an copy of the string, but unescaped
 807  */
 808 char *
 809 xmlURIUnescapeString(const char *str, int len, char *target) {
 810     char *ret, *out;
 811     const char *in;
 812
 813     if (str == NULL)
 814         return(NULL);
 815     if (len <= 0) len = strlen(str);
 816     if (len <= 0) return(NULL);
 817
 818     if (target == NULL) {
 819         ret = (char *) xmlMalloc(len + 1);
 820         if (ret == NULL) {
 821             xmlGenericError(xmlGenericErrorContext,
 822                     "xmlURIUnescapeString: out of memory\n");
 823             return(NULL);
 824         }
 825     } else
 826         ret = target;
 827     in = str;
 828     out = ret;
 829     while(len > 0) {
 830         if (*in == '%') {
 831             in++;
 832             if ((*in >= '0') && (*in <= '9'))
 833                 *out = (*in - '0');
 834             else if ((*in >= 'a') && (*in <= 'f'))
 835                 *out = (*in - 'a') + 10;
 836             else if ((*in >= 'A') && (*in <= 'F'))
 837                 *out = (*in - 'A') + 10;
 838             in++;
 839             if ((*in >= '0') && (*in <= '9'))
 840                 *out = *out * 16 + (*in - '0');
 841             else if ((*in >= 'a') && (*in <= 'f'))
 842                 *out = *out * 16 + (*in - 'a') + 10;
 843             else if ((*in >= 'A') && (*in <= 'F'))
 844                 *out = *out * 16 + (*in - 'A') + 10;
 845             in++;
 846             len -= 3;
 847             out++;
 848         } else {
 849             *out++ = *in++;
 850             len--;
 851         }
 852     }
 853     *out = 0;
 854     return(ret);
 855 }
 856
 857 /**
 858  * xmlURIEscapeStr:
 859  * @str:  string to escape
 860  * @list: exception list string of chars not to escape
 861  *
 862  * This routine escapes a string to hex, ignoring reserved characters (a-z)
 863  * and the characters in the exception list.
 864  *
 865  * Returns a new escaped string or NULL in case of error.
 866  */
 867 xmlChar *
 868 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
 869     xmlChar *ret, ch;
 870     const xmlChar *in;
 871
 872     unsigned int len, out;
 873
 874     if (str == NULL)
 875         return(NULL);
 876     len = xmlStrlen(str);
 877     if (!(len > 0)) return(NULL);
 878
 879     len += 20;
 880     ret = (xmlChar *) xmlMalloc(len);
 881     if (ret == NULL) {
 882         xmlGenericError(xmlGenericErrorContext,
 883                 "xmlURIEscapeStr: out of memory\n");
 884         return(NULL);
 885     }
 886     in = (const xmlChar *) str;
 887     out = 0;
 888     while(*in != 0) {
 889         if (len - out <= 3) {
 890             len += 20;
 891             ret = (xmlChar *) xmlRealloc(ret, len);
 892             if (ret == NULL) {
 893                 xmlGenericError(xmlGenericErrorContext,
 894                         "xmlURIEscapeStr: out of memory\n");
 895                 return(NULL);
 896             }
 897         }
 898
 899         ch = *in;
 900
 901         if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
 902             unsigned char val;
 903             ret[out++] = '%';
 904             val = ch >> 4;
 905             if (val <= 9)
 906                 ret[out++] = '0' + val;
 907             else
 908                 ret[out++] = 'A' + val - 0xA;
 909             val = ch & 0xF;
 910             if (val <= 9)
 911                 ret[out++] = '0' + val;
 912             else
 913                 ret[out++] = 'A' + val - 0xA;
 914             in++;
 915         } else {
 916             ret[out++] = *in++;
 917         }
 918
 919     }
 920     ret[out] = 0;
 921     return(ret);
 922 }
 923
 924 /**
 925  * xmlURIEscape:
 926  * @str:  the string of the URI to escape
 927  *
 928  * Escaping routine, does not do validity checks !
 929  * It will try to escape the chars needing this, but this is heuristic
 930  * based it's impossible to be sure.
 931  *
 932  * Returns an copy of the string, but escaped
 933  *
 934  * 25 May 2001
 935  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
 936  * according to RFC2396.
 937  *   - Carl Douglas
 938  */
 939 xmlChar *
 940 xmlURIEscape(const xmlChar * str)
 941 {
 942     xmlChar *ret, *segment = NULL;
 943     xmlURIPtr uri;
 944     int ret2;
 945
 946 #define NULLCHK(p) if(!p) { \
 947                    xmlGenericError(xmlGenericErrorContext, \
 948                         "xmlURIEscape: out of memory\n"); \
 949                    return NULL; }
 950
 951     if (str == NULL)
 952         return (NULL);
 953
 954     uri = xmlCreateURI();
 955     if (uri != NULL) {
 956         /*
 957          * Allow escaping errors in the unescaped form
 958          */
 959         uri->cleanup = 1;
 960         ret2 = xmlParseURIReference(uri, (const char *)str);
 961         if (ret2) {
 962             xmlFreeURI(uri);
 963             return (NULL);
 964         }
 965     }
 966
 967     if (!uri)
 968         return NULL;
 969
 970     ret = NULL;
 971
 972     if (uri->scheme) {
 973         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
 974         NULLCHK(segment)
 975         ret = xmlStrcat(ret, segment);
 976         ret = xmlStrcat(ret, BAD_CAST ":");
 977         xmlFree(segment);
 978     }
 979
 980     if (uri->authority) {
 981         segment =
 982             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
 983         NULLCHK(segment)
 984         ret = xmlStrcat(ret, BAD_CAST "//");
 985         ret = xmlStrcat(ret, segment);
 986         xmlFree(segment);
 987     }
 988
 989     if (uri->user) {
 990         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
 991         NULLCHK(segment)
 992         ret = xmlStrcat(ret, segment);
 993         ret = xmlStrcat(ret, BAD_CAST "@");
 994         xmlFree(segment);
 995     }
 996
 997     if (uri->server) {
 998         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
 999         NULLCHK(segment)
1000         ret = xmlStrcat(ret, BAD_CAST "//");
1001         ret = xmlStrcat(ret, segment);
1002         xmlFree(segment);
1003     }
1004
1005     if (uri->port) {
1006         xmlChar port[10];
1007
1008         snprintf((char *) port, 10, "%d", uri->port);
1009         ret = xmlStrcat(ret, BAD_CAST ":");
1010         ret = xmlStrcat(ret, port);
1011     }
1012
1013     if (uri->path) {
1014         segment =
1015             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1016         NULLCHK(segment)
1017         ret = xmlStrcat(ret, segment);
1018         xmlFree(segment);
1019     }
1020
1021     if (uri->query) {
1022         segment =
1023             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1024         NULLCHK(segment)
1025         ret = xmlStrcat(ret, BAD_CAST "?");
1026         ret = xmlStrcat(ret, segment);
1027         xmlFree(segment);
1028     }
1029
1030     if (uri->opaque) {
1031         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1032         NULLCHK(segment)
1033         ret = xmlStrcat(ret, segment);
1034         xmlFree(segment);
1035     }
1036
1037     if (uri->fragment) {
1038         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1039         NULLCHK(segment)
1040         ret = xmlStrcat(ret, BAD_CAST "#");
1041         ret = xmlStrcat(ret, segment);
1042         xmlFree(segment);
1043     }
1044
1045     xmlFreeURI(uri);
1046 #undef NULLCHK
1047
1048     return (ret);
1049 }
1050
1051 /************************************************************************
1052  *                                                                      *
1053  *                      Escaped URI parsing                             *
1054  *                                                                      *
1055  ************************************************************************/
1056
1057 /**
1058  * xmlParseURIFragment:
1059  * @uri:  pointer to an URI structure
1060  * @str:  pointer to the string to analyze
1061  *
1062  * Parse an URI fragment string and fills in the appropriate fields
1063  * of the @uri structure.
1064  *
1065  * fragment = *uric
1066  *
1067  * Returns 0 or the error code
1068  */
1069 static int
1070 xmlParseURIFragment(xmlURIPtr uri, const char **str)
1071 {
1072     const char *cur = *str;
1073
1074     if (str == NULL)
1075         return (-1);
1076
1077     while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1078         NEXT(cur);
1079     if (uri != NULL) {
1080         if (uri->fragment != NULL)
1081             xmlFree(uri->fragment);
1082         uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1083     }
1084     *str = cur;
1085     return (0);
1086 }
1087
1088 /**
1089  * xmlParseURIQuery:
1090  * @uri:  pointer to an URI structure
1091  * @str:  pointer to the string to analyze
1092  *
1093  * Parse the query part of an URI
1094  *
1095  * query = *uric
1096  *
1097  * Returns 0 or the error code
1098  */
1099 static int
1100 xmlParseURIQuery(xmlURIPtr uri, const char **str)
1101 {
1102     const char *cur = *str;
1103
1104     if (str == NULL)
1105         return (-1);
1106
1107     while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1108         NEXT(cur);
1109     if (uri != NULL) {
1110         if (uri->query != NULL)
1111             xmlFree(uri->query);
1112         uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1113     }
1114     *str = cur;
1115     return (0);
1116 }
1117
1118 /**
1119  * xmlParseURIScheme:
1120  * @uri:  pointer to an URI structure
1121  * @str:  pointer to the string to analyze
1122  *
1123  * Parse an URI scheme
1124  *
1125  * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1126  *
1127  * Returns 0 or the error code
1128  */
1129 static int
1130 xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1131     const char *cur;
1132
1133     if (str == NULL)
1134         return(-1);
1135
1136     cur = *str;
1137     if (!IS_ALPHA(*cur))
1138         return(2);
1139     cur++;
1140     while (IS_SCHEME(*cur)) cur++;
1141     if (uri != NULL) {
1142         if (uri->scheme != NULL) xmlFree(uri->scheme);
1143         /* !!! strndup */
1144         uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1145     }
1146     *str = cur;
1147     return(0);
1148 }
1149
1150 /**
1151  * xmlParseURIOpaquePart:
1152  * @uri:  pointer to an URI structure
1153  * @str:  pointer to the string to analyze
1154  *
1155  * Parse an URI opaque part
1156  *
1157  * opaque_part = uric_no_slash *uric
1158  *
1159  * Returns 0 or the error code
1160  */
1161 static int
1162 xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1163 {
1164     const char *cur;
1165
1166     if (str == NULL)
1167         return (-1);
1168
1169     cur = *str;
1170     if (!(IS_URIC_NO_SLASH(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) {
1171         return (3);
1172     }
1173     NEXT(cur);
1174     while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1175         NEXT(cur);
1176     if (uri != NULL) {
1177         if (uri->opaque != NULL)
1178             xmlFree(uri->opaque);
1179         uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1180     }
1181     *str = cur;
1182     return (0);
1183 }
1184
1185 /**
1186  * xmlParseURIServer:
1187  * @uri:  pointer to an URI structure
1188  * @str:  pointer to the string to analyze
1189  *
1190  * Parse a server subpart of an URI, it's a finer grain analysis
1191  * of the authority part.
1192  *
1193  * server        = [ [ userinfo "@" ] hostport ]
1194  * userinfo      = *( unreserved | escaped |
1195  *                       ";" | ":" | "&" | "=" | "+" | "$" | "," )
1196  * hostport      = host [ ":" port ]
1197  * host          = hostname | IPv4address
1198  * hostname      = *( domainlabel "." ) toplabel [ "." ]
1199  * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
1200  * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
1201  * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
1202  * port          = *digit
1203  *
1204  * Returns 0 or the error code
1205  */
1206 static int
1207 xmlParseURIServer(xmlURIPtr uri, const char **str) {
1208     const char *cur;
1209     const char *host, *tmp;
1210
1211     if (str == NULL)
1212         return(-1);
1213
1214     cur = *str;
1215
1216     /*
1217      * is there an userinfo ?
1218      */
1219     while (IS_USERINFO(cur)) NEXT(cur);
1220     if (*cur == '@') {
1221         if (uri != NULL) {
1222             if (uri->user != NULL) xmlFree(uri->user);
1223             uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1224         }
1225         cur++;
1226     } else {
1227         if (uri != NULL) {
1228             if (uri->user != NULL) xmlFree(uri->user);
1229             uri->user = NULL;
1230         }
1231         cur = *str;
1232     }
1233     /*
1234      * This can be empty in the case where there is no server
1235      */
1236     host = cur;
1237     if (*cur == '/') {
1238         if (uri != NULL) {
1239             if (uri->authority != NULL) xmlFree(uri->authority);
1240             uri->authority = NULL;
1241             if (uri->server != NULL) xmlFree(uri->server);
1242             uri->server = NULL;
1243             uri->port = 0;
1244         }
1245         return(0);
1246     }
1247     /*
1248      * host part of hostport can derive either an IPV4 address
1249      * or an unresolved name. Check the IP first, it easier to detect
1250      * errors if wrong one
1251      */
1252     if (IS_DIGIT(*cur)) {
1253         while(IS_DIGIT(*cur)) cur++;
1254         if (*cur != '.')
1255             goto host_name;
1256         cur++;
1257         if (!IS_DIGIT(*cur))
1258             goto host_name;
1259         while(IS_DIGIT(*cur)) cur++;
1260         if (*cur != '.')
1261             goto host_name;
1262         cur++;
1263         if (!IS_DIGIT(*cur))
1264             goto host_name;
1265         while(IS_DIGIT(*cur)) cur++;
1266         if (*cur != '.')
1267             goto host_name;
1268         cur++;
1269         if (!IS_DIGIT(*cur))
1270             goto host_name;
1271         while(IS_DIGIT(*cur)) cur++;
1272         if (uri != NULL) {
1273             if (uri->authority != NULL) xmlFree(uri->authority);
1274             uri->authority = NULL;
1275             if (uri->server != NULL) xmlFree(uri->server);
1276             uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1277         }
1278         goto host_done;
1279     }
1280 host_name:
1281     /*
1282      * the hostname production as-is is a parser nightmare.
1283      * simplify it to
1284      * hostname = *( domainlabel "." ) domainlabel [ "." ]
1285      * and just make sure the last label starts with a non numeric char.
1286      */
1287     if (!IS_ALPHANUM(*cur))
1288         return(6);
1289     while (IS_ALPHANUM(*cur)) {
1290         while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
1291         if (*cur == '.')
1292             cur++;
1293     }
1294     tmp = cur;
1295     tmp--;
1296     while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
1297     tmp++;
1298     if (!IS_ALPHA(*tmp))
1299         return(7);
1300     if (uri != NULL) {
1301         if (uri->authority != NULL) xmlFree(uri->authority);
1302         uri->authority = NULL;
1303         if (uri->server != NULL) xmlFree(uri->server);
1304         uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1305     }
1306
1307 host_done:
1308
1309     /*
1310      * finish by checking for a port presence.
1311      */
1312     if (*cur == ':') {
1313         cur++;
1314         if (IS_DIGIT(*cur)) {
1315             if (uri != NULL)
1316                 uri->port = 0;
1317             while (IS_DIGIT(*cur)) {
1318                 if (uri != NULL)
1319                     uri->port = uri->port * 10 + (*cur - '0');
1320                 cur++;
1321             }
1322         }
1323     }
1324     *str = cur;
1325     return(0);
1326 }
1327
1328 /**
1329  * xmlParseURIRelSegment:
1330  * @uri:  pointer to an URI structure
1331  * @str:  pointer to the string to analyze
1332  *
1333  * Parse an URI relative segment
1334  *
1335  * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1336  *                          "+" | "$" | "," )
1337  *
1338  * Returns 0 or the error code
1339  */
1340 static int
1341 xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1342 {
1343     const char *cur;
1344
1345     if (str == NULL)
1346         return (-1);
1347
1348     cur = *str;
1349     if (!(IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) {
1350         return (3);
1351     }
1352     NEXT(cur);
1353     while (IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1354         NEXT(cur);
1355     if (uri != NULL) {
1356         if (uri->path != NULL)
1357             xmlFree(uri->path);
1358         uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1359     }
1360     *str = cur;
1361     return (0);
1362 }
1363
1364 /**
1365  * xmlParseURIPathSegments:
1366  * @uri:  pointer to an URI structure
1367  * @str:  pointer to the string to analyze
1368  * @slash:  should we add a leading slash
1369  *
1370  * Parse an URI set of path segments
1371  *
1372  * path_segments = segment *( "/" segment )
1373  * segment       = *pchar *( ";" param )
1374  * param         = *pchar
1375  *
1376  * Returns 0 or the error code
1377  */
1378 static int
1379 xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1380 {
1381     const char *cur;
1382
1383     if (str == NULL)
1384         return (-1);
1385
1386     cur = *str;
1387
1388     do {
1389         while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1390             NEXT(cur);
1391         while (*cur == ';') {
1392             cur++;
1393             while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1394                 NEXT(cur);
1395         }
1396         if (*cur != '/')
1397             break;
1398         cur++;
1399     } while (1);
1400     if (uri != NULL) {
1401         int len, len2 = 0;
1402         char *path;
1403
1404         /*
1405          * Concat the set of path segments to the current path
1406          */
1407         len = cur - *str;
1408         if (slash)
1409             len++;
1410
1411         if (uri->path != NULL) {
1412             len2 = strlen(uri->path);
1413             len += len2;
1414         }
1415         path = (char *) xmlMalloc(len + 1);
1416         if (path == NULL) {
1417             xmlGenericError(xmlGenericErrorContext,
1418                             "xmlParseURIPathSegments: out of memory\n");
1419             *str = cur;
1420             return (-1);
1421         }
1422         if (uri->path != NULL)
1423             memcpy(path, uri->path, len2);
1424         if (slash) {
1425             path[len2] = '/';
1426             len2++;
1427         }
1428         path[len2] = 0;
1429         if (cur - *str > 0)
1430             xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1431         if (uri->path != NULL)
1432             xmlFree(uri->path);
1433         uri->path = path;
1434     }
1435     *str = cur;
1436     return (0);
1437 }
1438
1439 /**
1440  * xmlParseURIAuthority:
1441  * @uri:  pointer to an URI structure
1442  * @str:  pointer to the string to analyze
1443  *
1444  * Parse the authority part of an URI.
1445  *
1446  * authority = server | reg_name
1447  * server    = [ [ userinfo "@" ] hostport ]
1448  * reg_name  = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1449  *                        "@" | "&" | "=" | "+" )
1450  *
1451  * Note : this is completely ambiguous since reg_name is allowed to
1452  *        use the full set of chars in use by server:
1453  *
1454  *        3.2.1. Registry-based Naming Authority
1455  *
1456  *        The structure of a registry-based naming authority is specific
1457  *        to the URI scheme, but constrained to the allowed characters
1458  *        for an authority component.
1459  *
1460  * Returns 0 or the error code
1461  */
1462 static int
1463 xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1464     const char *cur;
1465     int ret;
1466
1467     if (str == NULL)
1468         return(-1);
1469
1470     cur = *str;
1471
1472     /*
1473      * try first to parse it as a server string.
1474      */
1475     ret = xmlParseURIServer(uri, str);
1476     if (ret == 0)
1477         return(0);
1478
1479     /*
1480      * failed, fallback to reg_name
1481      */
1482     if (!IS_REG_NAME(cur)) {
1483         return(5);
1484     }
1485     NEXT(cur);
1486     while (IS_REG_NAME(cur)) NEXT(cur);
1487     if (uri != NULL) {
1488         if (uri->server != NULL) xmlFree(uri->server);
1489         uri->server = NULL;
1490         if (uri->user != NULL) xmlFree(uri->user);
1491         uri->user = NULL;
1492         if (uri->authority != NULL) xmlFree(uri->authority);
1493         uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1494     }
1495     *str = cur;
1496     return(0);
1497 }
1498
1499 /**
1500  * xmlParseURIHierPart:
1501  * @uri:  pointer to an URI structure
1502  * @str:  pointer to the string to analyze
1503  *
1504  * Parse an URI hierarchical part
1505  *
1506  * hier_part = ( net_path | abs_path ) [ "?" query ]
1507  * abs_path = "/"  path_segments
1508  * net_path = "//" authority [ abs_path ]
1509  *
1510  * Returns 0 or the error code
1511  */
1512 static int
1513 xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1514     int ret;
1515     const char *cur;
1516
1517     if (str == NULL)
1518         return(-1);
1519
1520     cur = *str;
1521
1522     if ((cur[0] == '/') && (cur[1] == '/')) {
1523         cur += 2;
1524         ret = xmlParseURIAuthority(uri, &cur);
1525         if (ret != 0)
1526             return(ret);
1527         if (cur[0] == '/') {
1528             cur++;
1529             ret = xmlParseURIPathSegments(uri, &cur, 1);
1530         }
1531     } else if (cur[0] == '/') {
1532         cur++;
1533         ret = xmlParseURIPathSegments(uri, &cur, 1);
1534     } else {
1535         return(4);
1536     }
1537     if (ret != 0)
1538         return(ret);
1539     if (*cur == '?') {
1540         cur++;
1541         ret = xmlParseURIQuery(uri, &cur);
1542         if (ret != 0)
1543             return(ret);
1544     }
1545     *str = cur;
1546     return(0);
1547 }
1548
1549 /**
1550  * xmlParseAbsoluteURI:
1551  * @uri:  pointer to an URI structure
1552  * @str:  pointer to the string to analyze
1553  *
1554  * Parse an URI reference string and fills in the appropriate fields
1555  * of the @uri structure
1556  *
1557  * absoluteURI   = scheme ":" ( hier_part | opaque_part )
1558  *
1559  * Returns 0 or the error code
1560  */
1561 static int
1562 xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1563     int ret;
1564     const char *cur;
1565
1566     if (str == NULL)
1567         return(-1);
1568
1569     cur = *str;
1570
1571     ret = xmlParseURIScheme(uri, str);
1572     if (ret != 0) return(ret);
1573     if (**str != ':') {
1574         *str = cur;
1575         return(1);
1576     }
1577     (*str)++;
1578     if (**str == '/')
1579         return(xmlParseURIHierPart(uri, str));
1580     return(xmlParseURIOpaquePart(uri, str));
1581 }
1582
1583 /**
1584  * xmlParseRelativeURI:
1585  * @uri:  pointer to an URI structure
1586  * @str:  pointer to the string to analyze
1587  *
1588  * Parse an relative URI string and fills in the appropriate fields
1589  * of the @uri structure
1590  *
1591  * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1592  * abs_path = "/"  path_segments
1593  * net_path = "//" authority [ abs_path ]
1594  * rel_path = rel_segment [ abs_path ]
1595  *
1596  * Returns 0 or the error code
1597  */
1598 static int
1599 xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1600     int ret = 0;
1601     const char *cur;
1602
1603     if (str == NULL)
1604         return(-1);
1605
1606     cur = *str;
1607     if ((cur[0] == '/') && (cur[1] == '/')) {
1608         cur += 2;
1609         ret = xmlParseURIAuthority(uri, &cur);
1610         if (ret != 0)
1611             return(ret);
1612         if (cur[0] == '/') {
1613             cur++;
1614             ret = xmlParseURIPathSegments(uri, &cur, 1);
1615         }
1616     } else if (cur[0] == '/') {
1617         cur++;
1618         ret = xmlParseURIPathSegments(uri, &cur, 1);
1619     } else if (cur[0] != '#' && cur[0] != '?') {
1620         ret = xmlParseURIRelSegment(uri, &cur);
1621         if (ret != 0)
1622             return(ret);
1623         if (cur[0] == '/') {
1624             cur++;
1625             ret = xmlParseURIPathSegments(uri, &cur, 1);
1626         }
1627     }
1628     if (ret != 0)
1629         return(ret);
1630     if (*cur == '?') {
1631         cur++;
1632         ret = xmlParseURIQuery(uri, &cur);
1633         if (ret != 0)
1634             return(ret);
1635     }
1636     *str = cur;
1637     return(ret);
1638 }
1639
1640 /**
1641  * xmlParseURIReference:
1642  * @uri:  pointer to an URI structure
1643  * @str:  the string to analyze
1644  *
1645  * Parse an URI reference string and fills in the appropriate fields
1646  * of the @uri structure
1647  *
1648  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1649  *
1650  * Returns 0 or the error code
1651  */
1652 int
1653 xmlParseURIReference(xmlURIPtr uri, const char *str) {
1654     int ret;
1655     const char *tmp = str;
1656
1657     if (str == NULL)
1658         return(-1);
1659     xmlCleanURI(uri);
1660
1661     /*
1662      * Try first to parse absolute refs, then fallback to relative if
1663      * it fails.
1664      */
1665     ret = xmlParseAbsoluteURI(uri, &str);
1666     if (ret != 0) {
1667         xmlCleanURI(uri);
1668         str = tmp;
1669         ret = xmlParseRelativeURI(uri, &str);
1670     }
1671     if (ret != 0) {
1672         xmlCleanURI(uri);
1673         return(ret);
1674     }
1675
1676     if (*str == '#') {
1677         str++;
1678         ret = xmlParseURIFragment(uri, &str);
1679         if (ret != 0) return(ret);
1680     }
1681     if (*str != 0) {
1682         xmlCleanURI(uri);
1683         return(1);
1684     }
1685     return(0);
1686 }
1687
1688 /**
1689  * xmlParseURI:
1690  * @str:  the URI string to analyze
1691  *
1692  * Parse an URI
1693  *
1694  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1695  *
1696  * Returns a newly build xmlURIPtr or NULL in case of error
1697  */
1698 xmlURIPtr
1699 xmlParseURI(const char *str) {
1700     xmlURIPtr uri;
1701     int ret;
1702
1703     if (str == NULL)
1704         return(NULL);
1705     uri = xmlCreateURI();
1706     if (uri != NULL) {
1707         ret = xmlParseURIReference(uri, str);
1708         if (ret) {
1709             xmlFreeURI(uri);
1710             return(NULL);
1711         }
1712     }
1713     return(uri);
1714 }
1715
1716 /************************************************************************
1717  *                                                                      *
1718  *                      Public functions                                *
1719  *                                                                      *
1720  ************************************************************************/
1721
1722 /**
1723  * xmlBuildURI:
1724  * @URI:  the URI instance found in the document
1725  * @base:  the base value
1726  *
1727  * Computes he final URI of the reference done by checking that
1728  * the given URI is valid, and building the final URI using the
1729  * base URI. This is processed according to section 5.2 of the
1730  * RFC 2396
1731  *
1732  * 5.2. Resolving Relative References to Absolute Form
1733  *
1734  * Returns a new URI string (to be freed by the caller) or NULL in case
1735  *         of error.
1736  */
1737 xmlChar *
1738 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1739     xmlChar *val = NULL;
1740     int ret, len, indx, cur, out;
1741     xmlURIPtr ref = NULL;
1742     xmlURIPtr bas = NULL;
1743     xmlURIPtr res = NULL;
1744
1745     /*
1746      * 1) The URI reference is parsed into the potential four components and
1747      *    fragment identifier, as described in Section 4.3.
1748      *
1749      *    NOTE that a completely empty URI is treated by modern browsers
1750      *    as a reference to "." rather than as a synonym for the current
1751      *    URI.  Should we do that here?
1752      */
1753     if (URI == NULL)
1754         ret = -1;
1755     else {
1756         if (*URI) {
1757             ref = xmlCreateURI();
1758             if (ref == NULL)
1759                 goto done;
1760             ret = xmlParseURIReference(ref, (const char *) URI);
1761         }
1762         else
1763             ret = 0;
1764     }
1765     if (ret != 0)
1766         goto done;
1767     if (base == NULL)
1768         ret = -1;
1769     else {
1770         bas = xmlCreateURI();
1771         if (bas == NULL)
1772             goto done;
1773         ret = xmlParseURIReference(bas, (const char *) base);
1774     }
1775     if (ret != 0) {
1776         if (ref)
1777             val = xmlSaveUri(ref);
1778         goto done;
1779     }
1780     if (ref == NULL) {
1781         /*
1782          * the base fragment must be ignored
1783          */
1784         if (bas->fragment != NULL) {
1785             xmlFree(bas->fragment);
1786             bas->fragment = NULL;
1787         }
1788         val = xmlSaveUri(bas);
1789         goto done;
1790     }
1791
1792     /*
1793      * 2) If the path component is empty and the scheme, authority, and
1794      *    query components are undefined, then it is a reference to the
1795      *    current document and we are done.  Otherwise, the reference URI's
1796      *    query and fragment components are defined as found (or not found)
1797      *    within the URI reference and not inherited from the base URI.
1798      *
1799      *    NOTE that in modern browsers, the parsing differs from the above
1800      *    in the following aspect:  the query component is allowed to be
1801      *    defined while still treating this as a reference to the current
1802      *    document.
1803      */
1804     res = xmlCreateURI();
1805     if (res == NULL)
1806         goto done;
1807     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1808         ((ref->authority == NULL) && (ref->server == NULL))) {
1809         if (bas->scheme != NULL)
1810             res->scheme = xmlMemStrdup(bas->scheme);
1811         if (bas->authority != NULL)
1812             res->authority = xmlMemStrdup(bas->authority);
1813         else if (bas->server != NULL) {
1814             res->server = xmlMemStrdup(bas->server);
1815             if (bas->user != NULL)
1816                 res->user = xmlMemStrdup(bas->user);
1817             res->port = bas->port;
1818         }
1819         if (bas->path != NULL)
1820             res->path = xmlMemStrdup(bas->path);
1821         if (ref->query != NULL)
1822             res->query = xmlMemStrdup(ref->query);
1823         else if (bas->query != NULL)
1824             res->query = xmlMemStrdup(bas->query);
1825         if (ref->fragment != NULL)
1826             res->fragment = xmlMemStrdup(ref->fragment);
1827         goto step_7;
1828     }
1829
1830     if (ref->query != NULL)
1831         res->query = xmlMemStrdup(ref->query);
1832     if (ref->fragment != NULL)
1833         res->fragment = xmlMemStrdup(ref->fragment);
1834
1835     /*
1836      * 3) If the scheme component is defined, indicating that the reference
1837      *    starts with a scheme name, then the reference is interpreted as an
1838      *    absolute URI and we are done.  Otherwise, the reference URI's
1839      *    scheme is inherited from the base URI's scheme component.
1840      */
1841     if (ref->scheme != NULL) {
1842         val = xmlSaveUri(ref);
1843         goto done;
1844     }
1845     if (bas->scheme != NULL)
1846         res->scheme = xmlMemStrdup(bas->scheme);
1847
1848     /*
1849      * 4) If the authority component is defined, then the reference is a
1850      *    network-path and we skip to step 7.  Otherwise, the reference
1851      *    URI's authority is inherited from the base URI's authority
1852      *    component, which will also be undefined if the URI scheme does not
1853      *    use an authority component.
1854      */
1855     if ((ref->authority != NULL) || (ref->server != NULL)) {
1856         if (ref->authority != NULL)
1857             res->authority = xmlMemStrdup(ref->authority);
1858         else {
1859             res->server = xmlMemStrdup(ref->server);
1860             if (ref->user != NULL)
1861                 res->user = xmlMemStrdup(ref->user);
1862             res->port = ref->port;
1863         }
1864         if (ref->path != NULL)
1865             res->path = xmlMemStrdup(ref->path);
1866         goto step_7;
1867     }
1868     if (bas->authority != NULL)
1869         res->authority = xmlMemStrdup(bas->authority);
1870     else if (bas->server != NULL) {
1871         res->server = xmlMemStrdup(bas->server);
1872         if (bas->user != NULL)
1873             res->user = xmlMemStrdup(bas->user);
1874         res->port = bas->port;
1875     }
1876
1877     /*
1878      * 5) If the path component begins with a slash character ("/"), then
1879      *    the reference is an absolute-path and we skip to step 7.
1880      */
1881     if ((ref->path != NULL) && (ref->path[0] == '/')) {
1882         res->path = xmlMemStrdup(ref->path);
1883         goto step_7;
1884     }
1885
1886
1887     /*
1888      * 6) If this step is reached, then we are resolving a relative-path
1889      *    reference.  The relative path needs to be merged with the base
1890      *    URI's path.  Although there are many ways to do this, we will
1891      *    describe a simple method using a separate string buffer.
1892      *
1893      * Allocate a buffer large enough for the result string.
1894      */
1895     len = 2; /* extra / and 0 */
1896     if (ref->path != NULL)
1897         len += strlen(ref->path);
1898     if (bas->path != NULL)
1899         len += strlen(bas->path);
1900     res->path = (char *) xmlMalloc(len);
1901     if (res->path == NULL) {
1902         xmlGenericError(xmlGenericErrorContext,
1903                 "xmlBuildURI: out of memory\n");
1904         goto done;
1905     }
1906     res->path[0] = 0;
1907
1908     /*
1909      * a) All but the last segment of the base URI's path component is
1910      *    copied to the buffer.  In other words, any characters after the
1911      *    last (right-most) slash character, if any, are excluded.
1912      */
1913     cur = 0;
1914     out = 0;
1915     if (bas->path != NULL) {
1916         while (bas->path[cur] != 0) {
1917             while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1918                 cur++;
1919             if (bas->path[cur] == 0)
1920                 break;
1921
1922             cur++;
1923             while (out < cur) {
1924                 res->path[out] = bas->path[out];
1925                 out++;
1926             }
1927         }
1928     }
1929     res->path[out] = 0;
1930
1931     /*
1932      * b) The reference's path component is appended to the buffer
1933      *    string.
1934      */
1935     if (ref->path != NULL && ref->path[0] != 0) {
1936         indx = 0;
1937         /*
1938          * Ensure the path includes a '/'
1939          */
1940         if ((out == 0) && (bas->server != NULL))
1941             res->path[out++] = '/';
1942         while (ref->path[indx] != 0) {
1943             res->path[out++] = ref->path[indx++];
1944         }
1945     }
1946     res->path[out] = 0;
1947
1948     /*
1949      * Steps c) to h) are really path normalization steps
1950      */
1951     xmlNormalizeURIPath(res->path);
1952
1953 step_7:
1954
1955     /*
1956      * 7) The resulting URI components, including any inherited from the
1957      *    base URI, are recombined to give the absolute form of the URI
1958      *    reference.
1959      */
1960     val = xmlSaveUri(res);
1961
1962 done:
1963     if (ref != NULL)
1964         xmlFreeURI(ref);
1965     if (bas != NULL)
1966         xmlFreeURI(bas);
1967     if (res != NULL)
1968         xmlFreeURI(res);
1969     return(val);
1970 }
1971
1972