external/bsd/fetch/dist/libfetch/fetch.c

   1 /*      $NetBSD: fetch.c,v 1.19 2009/08/11 20:48:06 joerg Exp $ */
   2 /*-
   3  * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav
   4  * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer
  12  *    in this position and unchanged.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  * 3. The name of the author may not be used to endorse or promote products
  17  *    derived from this software without specific prior written permission
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * $FreeBSD: fetch.c,v 1.41 2007/12/19 00:26:36 des Exp $
  31  */
  32
  33 #if HAVE_CONFIG_H
  34 #include "config.h"
  35 #endif
  36 #ifndef NETBSD
  37 #include <nbcompat.h>
  38 #endif
  39
  40 #include <ctype.h>
  41 #include <errno.h>
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45
  46 #include "fetch.h"
  47 #include "common.h"
  48
  49 auth_t   fetchAuthMethod;
  50 int      fetchLastErrCode;
  51 char     fetchLastErrString[MAXERRSTRING];
  52 int      fetchTimeout;
  53 volatile int     fetchRestartCalls = 1;
  54 int      fetchDebug;
  55
  56
  57 /*** Local data **************************************************************/
  58
  59 /*
  60  * Error messages for parser errors
  61  */
  62 #define URL_MALFORMED           1
  63 #define URL_BAD_SCHEME          2
  64 #define URL_BAD_PORT            3
  65 static struct fetcherr url_errlist[] = {
  66         { URL_MALFORMED,        FETCH_URL,      "Malformed URL" },
  67         { URL_BAD_SCHEME,       FETCH_URL,      "Invalid URL scheme" },
  68         { URL_BAD_PORT,         FETCH_URL,      "Invalid server port" },
  69         { -1,                   FETCH_UNKNOWN,  "Unknown parser error" }
  70 };
  71
  72
  73 /*** Public API **************************************************************/
  74
  75 /*
  76  * Select the appropriate protocol for the URL scheme, and return a
  77  * read-only stream connected to the document referenced by the URL.
  78  * Also fill out the struct url_stat.
  79  */
  80 fetchIO *
  81 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
  82 {
  83
  84         if (us != NULL) {
  85                 us->size = -1;
  86                 us->atime = us->mtime = 0;
  87         }
  88         if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
  89                 return (fetchXGetFile(URL, us, flags));
  90         else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
  91                 return (fetchXGetFTP(URL, us, flags));
  92         else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
  93                 return (fetchXGetHTTP(URL, us, flags));
  94         else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
  95                 return (fetchXGetHTTP(URL, us, flags));
  96         url_seterr(URL_BAD_SCHEME);
  97         return (NULL);
  98 }
  99
 100 /*
 101  * Select the appropriate protocol for the URL scheme, and return a
 102  * read-only stream connected to the document referenced by the URL.
 103  */
 104 fetchIO *
 105 fetchGet(struct url *URL, const char *flags)
 106 {
 107         return (fetchXGet(URL, NULL, flags));
 108 }
 109
 110 /*
 111  * Select the appropriate protocol for the URL scheme, and return a
 112  * write-only stream connected to the document referenced by the URL.
 113  */
 114 fetchIO *
 115 fetchPut(struct url *URL, const char *flags)
 116 {
 117
 118         if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
 119                 return (fetchPutFile(URL, flags));
 120         else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
 121                 return (fetchPutFTP(URL, flags));
 122         else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
 123                 return (fetchPutHTTP(URL, flags));
 124         else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
 125                 return (fetchPutHTTP(URL, flags));
 126         url_seterr(URL_BAD_SCHEME);
 127         return (NULL);
 128 }
 129
 130 /*
 131  * Select the appropriate protocol for the URL scheme, and return the
 132  * size of the document referenced by the URL if it exists.
 133  */
 134 int
 135 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
 136 {
 137
 138         if (us != NULL) {
 139                 us->size = -1;
 140                 us->atime = us->mtime = 0;
 141         }
 142         if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
 143                 return (fetchStatFile(URL, us, flags));
 144         else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
 145                 return (fetchStatFTP(URL, us, flags));
 146         else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
 147                 return (fetchStatHTTP(URL, us, flags));
 148         else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
 149                 return (fetchStatHTTP(URL, us, flags));
 150         url_seterr(URL_BAD_SCHEME);
 151         return (-1);
 152 }
 153
 154 /*
 155  * Select the appropriate protocol for the URL scheme, and return a
 156  * list of files in the directory pointed to by the URL.
 157  */
 158 int
 159 fetchList(struct url_list *ue, struct url *URL, const char *pattern,
 160     const char *flags)
 161 {
 162
 163         if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
 164                 return (fetchListFile(ue, URL, pattern, flags));
 165         else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
 166                 return (fetchListFTP(ue, URL, pattern, flags));
 167         else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
 168                 return (fetchListHTTP(ue, URL, pattern, flags));
 169         else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
 170                 return (fetchListHTTP(ue, URL, pattern, flags));
 171         url_seterr(URL_BAD_SCHEME);
 172         return -1;
 173 }
 174
 175 /*
 176  * Attempt to parse the given URL; if successful, call fetchXGet().
 177  */
 178 fetchIO *
 179 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
 180 {
 181         struct url *u;
 182         fetchIO *f;
 183
 184         if ((u = fetchParseURL(URL)) == NULL)
 185                 return (NULL);
 186
 187         f = fetchXGet(u, us, flags);
 188
 189         fetchFreeURL(u);
 190         return (f);
 191 }
 192
 193 /*
 194  * Attempt to parse the given URL; if successful, call fetchGet().
 195  */
 196 fetchIO *
 197 fetchGetURL(const char *URL, const char *flags)
 198 {
 199         return (fetchXGetURL(URL, NULL, flags));
 200 }
 201
 202 /*
 203  * Attempt to parse the given URL; if successful, call fetchPut().
 204  */
 205 fetchIO *
 206 fetchPutURL(const char *URL, const char *flags)
 207 {
 208         struct url *u;
 209         fetchIO *f;
 210
 211         if ((u = fetchParseURL(URL)) == NULL)
 212                 return (NULL);
 213
 214         f = fetchPut(u, flags);
 215
 216         fetchFreeURL(u);
 217         return (f);
 218 }
 219
 220 /*
 221  * Attempt to parse the given URL; if successful, call fetchStat().
 222  */
 223 int
 224 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
 225 {
 226         struct url *u;
 227         int s;
 228
 229         if ((u = fetchParseURL(URL)) == NULL)
 230                 return (-1);
 231
 232         s = fetchStat(u, us, flags);
 233
 234         fetchFreeURL(u);
 235         return (s);
 236 }
 237
 238 /*
 239  * Attempt to parse the given URL; if successful, call fetchList().
 240  */
 241 int
 242 fetchListURL(struct url_list *ue, const char *URL, const char *pattern,
 243     const char *flags)
 244 {
 245         struct url *u;
 246         int rv;
 247
 248         if ((u = fetchParseURL(URL)) == NULL)
 249                 return -1;
 250
 251         rv = fetchList(ue, u, pattern, flags);
 252
 253         fetchFreeURL(u);
 254         return rv;
 255 }
 256
 257 /*
 258  * Make a URL
 259  */
 260 struct url *
 261 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
 262     const char *user, const char *pwd)
 263 {
 264         struct url *u;
 265
 266         if (!scheme || (!host && !doc)) {
 267                 url_seterr(URL_MALFORMED);
 268                 return (NULL);
 269         }
 270
 271         if (port < 0 || port > 65535) {
 272                 url_seterr(URL_BAD_PORT);
 273                 return (NULL);
 274         }
 275
 276         /* allocate struct url */
 277         if ((u = calloc(1, sizeof(*u))) == NULL) {
 278                 fetch_syserr();
 279                 return (NULL);
 280         }
 281
 282         if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
 283                 fetch_syserr();
 284                 free(u);
 285                 return (NULL);
 286         }
 287
 288 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
 289         seturl(scheme);
 290         seturl(host);
 291         seturl(user);
 292         seturl(pwd);
 293 #undef seturl
 294         u->port = port;
 295
 296         return (u);
 297 }
 298
 299 int
 300 fetch_urlpath_safe(char x)
 301 {
 302         if ((x >= '0' && x <= '9') || (x >= 'A' && x <= 'Z') ||
 303             (x >= 'a' && x <= 'z'))
 304                 return 1;
 305
 306         switch (x) {
 307         case '$':
 308         case '-':
 309         case '_':
 310         case '.':
 311         case '+':
 312         case '!':
 313         case '*':
 314         case '\'':
 315         case '(':
 316         case ')':
 317         case ',':
 318         /* The following are allowed in segment and path components: */
 319         case '?':
 320         case ':':
 321         case '@':
 322         case '&':
 323         case '=':
 324         case '/':
 325         case ';':
 326         /* If something is already quoted... */
 327         case '%':
 328                 return 1;
 329         default:
 330                 return 0;
 331         }
 332 }
 333
 334 /*
 335  * Copy an existing URL.
 336  */
 337 struct url *
 338 fetchCopyURL(const struct url *src)
 339 {
 340         struct url *dst;
 341         char *doc;
 342
 343         /* allocate struct url */
 344         if ((dst = malloc(sizeof(*dst))) == NULL) {
 345                 fetch_syserr();
 346                 return (NULL);
 347         }
 348         if ((doc = strdup(src->doc)) == NULL) {
 349                 fetch_syserr();
 350                 free(dst);
 351                 return (NULL);
 352         }
 353         *dst = *src;
 354         dst->doc = doc;
 355
 356         return dst;
 357 }
 358
 359 /*
 360  * Split an URL into components. URL syntax is:
 361  * [method:/][/[user[:pwd]@]host[:port]/][document]
 362  * This almost, but not quite, RFC1738 URL syntax.
 363  */
 364 struct url *
 365 fetchParseURL(const char *URL)
 366 {
 367         const char *p, *q;
 368         struct url *u;
 369         size_t i, count;
 370         int pre_quoted;
 371
 372         /* allocate struct url */
 373         if ((u = calloc(1, sizeof(*u))) == NULL) {
 374                 fetch_syserr();
 375                 return (NULL);
 376         }
 377
 378         if (*URL == '/') {
 379                 pre_quoted = 0;
 380                 strcpy(u->scheme, SCHEME_FILE);
 381                 p = URL;
 382                 goto quote_doc;
 383         }
 384         if (strncmp(URL, "file:", 5) == 0) {
 385                 pre_quoted = 1;
 386                 strcpy(u->scheme, SCHEME_FILE);
 387                 URL += 5;
 388                 if (URL[0] != '/' || URL[1] != '/' || URL[2] != '/') {
 389                         url_seterr(URL_MALFORMED);
 390                         goto ouch;
 391                 }
 392                 p = URL + 2;
 393                 goto quote_doc;
 394         }
 395         if (strncmp(URL, "http:", 5) == 0 ||
 396             strncmp(URL, "https:", 6) == 0) {
 397                 pre_quoted = 1;
 398                 if (URL[4] == ':') {
 399                         strcpy(u->scheme, SCHEME_HTTP);
 400                         URL += 5;
 401                 } else {
 402                         strcpy(u->scheme, SCHEME_HTTPS);
 403                         URL += 6;
 404                 }
 405
 406                 if (URL[0] != '/' || URL[1] != '/') {
 407                         url_seterr(URL_MALFORMED);
 408                         goto ouch;
 409                 }
 410                 URL += 2;
 411                 p = URL;
 412                 goto find_user;
 413         }
 414         if (strncmp(URL, "ftp:", 4) == 0) {
 415                 pre_quoted = 1;
 416                 strcpy(u->scheme, SCHEME_FTP);
 417                 URL += 4;
 418                 if (URL[0] != '/' || URL[1] != '/') {
 419                         url_seterr(URL_MALFORMED);
 420                         goto ouch;
 421                 }
 422                 URL += 2;
 423                 p = URL;
 424                 goto find_user;
 425         }
 426
 427         url_seterr(URL_BAD_SCHEME);
 428         goto ouch;
 429
 430 find_user:
 431         p = strpbrk(URL, "/@");
 432         if (p != NULL && *p == '@') {
 433                 /* username */
 434                 for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) {
 435                         if (i < URL_USERLEN)
 436                                 u->user[i++] = *q;
 437                 }
 438
 439                 /* password */
 440                 if (*q == ':') {
 441                         for (q++, i = 0; (*q != '@'); q++)
 442                                 if (i < URL_PWDLEN)
 443                                         u->pwd[i++] = *q;
 444                 }
 445
 446                 p++;
 447         } else {
 448                 p = URL;
 449         }
 450
 451         /* hostname */
 452 #ifdef INET6
 453         if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
 454             (*++q == '\0' || *q == '/' || *q == ':')) {
 455                 if ((i = q - p - 2) > URL_HOSTLEN)
 456                         i = URL_HOSTLEN;
 457                 strncpy(u->host, ++p, i);
 458                 p = q;
 459         } else
 460 #endif
 461                 for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
 462                         if (i < URL_HOSTLEN)
 463                                 u->host[i++] = *p;
 464
 465         /* port */
 466         if (*p == ':') {
 467                 for (q = ++p; *q && (*q != '/'); q++)
 468                         if (isdigit((unsigned char)*q))
 469                                 u->port = u->port * 10 + (*q - '0');
 470                         else {
 471                                 /* invalid port */
 472                                 url_seterr(URL_BAD_PORT);
 473                                 goto ouch;
 474                         }
 475                 p = q;
 476         }
 477
 478         /* document */
 479         if (!*p)
 480                 p = "/";
 481
 482 quote_doc:
 483         count = 1;
 484         for (i = 0; p[i] != '\0'; ++i) {
 485                 if ((!pre_quoted && p[i] == '%') ||
 486                     !fetch_urlpath_safe(p[i]))
 487                         count += 3;
 488                 else
 489                         ++count;
 490         }
 491
 492         if ((u->doc = malloc(count)) == NULL) {
 493                 fetch_syserr();
 494                 goto ouch;
 495         }
 496         for (i = 0; *p != '\0'; ++p) {
 497                 if ((!pre_quoted && *p == '%') ||
 498                     !fetch_urlpath_safe(*p)) {
 499                         u->doc[i++] = '%';
 500                         if ((unsigned char)*p < 160)
 501                                 u->doc[i++] = '0' + ((unsigned char)*p) / 16;
 502                         else
 503                                 u->doc[i++] = 'a' - 10 + ((unsigned char)*p) / 16;
 504                         if ((unsigned char)*p % 16 < 10)
 505                                 u->doc[i++] = '0' + ((unsigned char)*p) % 16;
 506                         else
 507                                 u->doc[i++] = 'a' - 10 + ((unsigned char)*p) % 16;
 508                 } else
 509                         u->doc[i++] = *p;
 510         }
 511         u->doc[i] = '\0';
 512
 513         return (u);
 514
 515 ouch:
 516         free(u);
 517         return (NULL);
 518 }
 519
 520 /*
 521  * Free a URL
 522  */
 523 void
 524 fetchFreeURL(struct url *u)
 525 {
 526         free(u->doc);
 527         free(u);
 528 }
 529
 530 static char
 531 xdigit2digit(char digit)
 532 {
 533         digit = tolower((unsigned char)digit);
 534         if (digit >= 'a' && digit <= 'f')
 535                 digit = digit - 'a' + 10;
 536         else
 537                 digit = digit - '0';
 538
 539         return digit;
 540 }
 541
 542 /*
 543  * Unquote whole URL.
 544  * Skips optional parts like query or fragment identifier.
 545  */
 546 char *
 547 fetchUnquotePath(struct url *url)
 548 {
 549         char *unquoted;
 550         const char *iter;
 551         size_t i;
 552
 553         if ((unquoted = malloc(strlen(url->doc) + 1)) == NULL)
 554                 return NULL;
 555
 556         for (i = 0, iter = url->doc; *iter != '\0'; ++iter) {
 557                 if (*iter == '#' || *iter == '?')
 558                         break;
 559                 if (iter[0] != '%' ||
 560                     !isxdigit((unsigned char)iter[1]) ||
 561                     !isxdigit((unsigned char)iter[2])) {
 562                         unquoted[i++] = *iter;
 563                         continue;
 564                 }
 565                 unquoted[i++] = xdigit2digit(iter[1]) * 16 +
 566                     xdigit2digit(iter[2]);
 567                 iter += 2;
 568         }
 569         unquoted[i] = '\0';
 570         return unquoted;
 571 }
 572
 573
 574 /*
 575  * Extract the file name component of a URL.
 576  */
 577 char *
 578 fetchUnquoteFilename(struct url *url)
 579 {
 580         char *unquoted, *filename;
 581         const char *last_slash;
 582
 583         if ((unquoted = fetchUnquotePath(url)) == NULL)
 584                 return NULL;
 585
 586         if ((last_slash = strrchr(unquoted, '/')) == NULL)
 587                 return unquoted;
 588         filename = strdup(last_slash + 1);
 589         free(unquoted);
 590         return filename;
 591 }
 592
 593 char *
 594 fetchStringifyURL(const struct url *url)
 595 {
 596         size_t total;
 597         char *doc;
 598
 599         /* scheme :// user : pwd @ host :port doc */
 600         total = strlen(url->scheme) + 3 + strlen(url->user) + 1 +
 601             strlen(url->pwd) + 1 + strlen(url->host) + 6 + strlen(url->doc) + 1;
 602         if ((doc = malloc(total)) == NULL)
 603                 return NULL;
 604         if (url->port != 0)
 605                 snprintf(doc, total, "%s%s%s%s%s%s%s:%d%s",
 606                     url->scheme,
 607                     url->scheme[0] != '\0' ? "://" : "",
 608                     url->user,
 609                     url->pwd[0] != '\0' ? ":" : "",
 610                     url->pwd,
 611                     url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "",
 612                     url->host,
 613                     (int)url->port,
 614                     url->doc);
 615         else {
 616                 snprintf(doc, total, "%s%s%s%s%s%s%s%s",
 617                     url->scheme,
 618                     url->scheme[0] != '\0' ? "://" : "",
 619                     url->user,
 620                     url->pwd[0] != '\0' ? ":" : "",
 621                     url->pwd,
 622                     url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "",
 623                     url->host,
 624                     url->doc);
 625         }
 626         return doc;
 627 }