1 /* $NetBSD: fetch.c,v 1.19 2009/08/11 20:48:06 joerg Exp $ */
3 * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav
4 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer
12 * in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * $FreeBSD: fetch.c,v 1.41 2007/12/19 00:26:36 des Exp $
49 auth_t fetchAuthMethod
;
51 char fetchLastErrString
[MAXERRSTRING
];
53 volatile int fetchRestartCalls
= 1;
57 /*** Local data **************************************************************/
60 * Error messages for parser errors
62 #define URL_MALFORMED 1
63 #define URL_BAD_SCHEME 2
64 #define URL_BAD_PORT 3
65 static struct fetcherr url_errlist
[] = {
66 { URL_MALFORMED
, FETCH_URL
, "Malformed URL" },
67 { URL_BAD_SCHEME
, FETCH_URL
, "Invalid URL scheme" },
68 { URL_BAD_PORT
, FETCH_URL
, "Invalid server port" },
69 { -1, FETCH_UNKNOWN
, "Unknown parser error" }
73 /*** Public API **************************************************************/
76 * Select the appropriate protocol for the URL scheme, and return a
77 * read-only stream connected to the document referenced by the URL.
78 * Also fill out the struct url_stat.
81 fetchXGet(struct url
*URL
, struct url_stat
*us
, const char *flags
)
86 us
->atime
= us
->mtime
= 0;
88 if (strcasecmp(URL
->scheme
, SCHEME_FILE
) == 0)
89 return (fetchXGetFile(URL
, us
, flags
));
90 else if (strcasecmp(URL
->scheme
, SCHEME_FTP
) == 0)
91 return (fetchXGetFTP(URL
, us
, flags
));
92 else if (strcasecmp(URL
->scheme
, SCHEME_HTTP
) == 0)
93 return (fetchXGetHTTP(URL
, us
, flags
));
94 else if (strcasecmp(URL
->scheme
, SCHEME_HTTPS
) == 0)
95 return (fetchXGetHTTP(URL
, us
, flags
));
96 url_seterr(URL_BAD_SCHEME
);
101 * Select the appropriate protocol for the URL scheme, and return a
102 * read-only stream connected to the document referenced by the URL.
105 fetchGet(struct url
*URL
, const char *flags
)
107 return (fetchXGet(URL
, NULL
, flags
));
111 * Select the appropriate protocol for the URL scheme, and return a
112 * write-only stream connected to the document referenced by the URL.
115 fetchPut(struct url
*URL
, const char *flags
)
118 if (strcasecmp(URL
->scheme
, SCHEME_FILE
) == 0)
119 return (fetchPutFile(URL
, flags
));
120 else if (strcasecmp(URL
->scheme
, SCHEME_FTP
) == 0)
121 return (fetchPutFTP(URL
, flags
));
122 else if (strcasecmp(URL
->scheme
, SCHEME_HTTP
) == 0)
123 return (fetchPutHTTP(URL
, flags
));
124 else if (strcasecmp(URL
->scheme
, SCHEME_HTTPS
) == 0)
125 return (fetchPutHTTP(URL
, flags
));
126 url_seterr(URL_BAD_SCHEME
);
131 * Select the appropriate protocol for the URL scheme, and return the
132 * size of the document referenced by the URL if it exists.
135 fetchStat(struct url
*URL
, struct url_stat
*us
, const char *flags
)
140 us
->atime
= us
->mtime
= 0;
142 if (strcasecmp(URL
->scheme
, SCHEME_FILE
) == 0)
143 return (fetchStatFile(URL
, us
, flags
));
144 else if (strcasecmp(URL
->scheme
, SCHEME_FTP
) == 0)
145 return (fetchStatFTP(URL
, us
, flags
));
146 else if (strcasecmp(URL
->scheme
, SCHEME_HTTP
) == 0)
147 return (fetchStatHTTP(URL
, us
, flags
));
148 else if (strcasecmp(URL
->scheme
, SCHEME_HTTPS
) == 0)
149 return (fetchStatHTTP(URL
, us
, flags
));
150 url_seterr(URL_BAD_SCHEME
);
155 * Select the appropriate protocol for the URL scheme, and return a
156 * list of files in the directory pointed to by the URL.
159 fetchList(struct url_list
*ue
, struct url
*URL
, const char *pattern
,
163 if (strcasecmp(URL
->scheme
, SCHEME_FILE
) == 0)
164 return (fetchListFile(ue
, URL
, pattern
, flags
));
165 else if (strcasecmp(URL
->scheme
, SCHEME_FTP
) == 0)
166 return (fetchListFTP(ue
, URL
, pattern
, flags
));
167 else if (strcasecmp(URL
->scheme
, SCHEME_HTTP
) == 0)
168 return (fetchListHTTP(ue
, URL
, pattern
, flags
));
169 else if (strcasecmp(URL
->scheme
, SCHEME_HTTPS
) == 0)
170 return (fetchListHTTP(ue
, URL
, pattern
, flags
));
171 url_seterr(URL_BAD_SCHEME
);
176 * Attempt to parse the given URL; if successful, call fetchXGet().
179 fetchXGetURL(const char *URL
, struct url_stat
*us
, const char *flags
)
184 if ((u
= fetchParseURL(URL
)) == NULL
)
187 f
= fetchXGet(u
, us
, flags
);
194 * Attempt to parse the given URL; if successful, call fetchGet().
197 fetchGetURL(const char *URL
, const char *flags
)
199 return (fetchXGetURL(URL
, NULL
, flags
));
203 * Attempt to parse the given URL; if successful, call fetchPut().
206 fetchPutURL(const char *URL
, const char *flags
)
211 if ((u
= fetchParseURL(URL
)) == NULL
)
214 f
= fetchPut(u
, flags
);
221 * Attempt to parse the given URL; if successful, call fetchStat().
224 fetchStatURL(const char *URL
, struct url_stat
*us
, const char *flags
)
229 if ((u
= fetchParseURL(URL
)) == NULL
)
232 s
= fetchStat(u
, us
, flags
);
239 * Attempt to parse the given URL; if successful, call fetchList().
242 fetchListURL(struct url_list
*ue
, const char *URL
, const char *pattern
,
248 if ((u
= fetchParseURL(URL
)) == NULL
)
251 rv
= fetchList(ue
, u
, pattern
, flags
);
261 fetchMakeURL(const char *scheme
, const char *host
, int port
, const char *doc
,
262 const char *user
, const char *pwd
)
266 if (!scheme
|| (!host
&& !doc
)) {
267 url_seterr(URL_MALFORMED
);
271 if (port
< 0 || port
> 65535) {
272 url_seterr(URL_BAD_PORT
);
276 /* allocate struct url */
277 if ((u
= calloc(1, sizeof(*u
))) == NULL
) {
282 if ((u
->doc
= strdup(doc
? doc
: "/")) == NULL
) {
288 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
300 fetch_urlpath_safe(char x
)
302 if ((x
>= '0' && x
<= '9') || (x
>= 'A' && x
<= 'Z') ||
303 (x
>= 'a' && x
<= 'z'))
318 /* The following are allowed in segment and path components: */
326 /* If something is already quoted... */
335 * Copy an existing URL.
338 fetchCopyURL(const struct url
*src
)
343 /* allocate struct url */
344 if ((dst
= malloc(sizeof(*dst
))) == NULL
) {
348 if ((doc
= strdup(src
->doc
)) == NULL
) {
360 * Split an URL into components. URL syntax is:
361 * [method:/][/[user[:pwd]@]host[:port]/][document]
362 * This almost, but not quite, RFC1738 URL syntax.
365 fetchParseURL(const char *URL
)
372 /* allocate struct url */
373 if ((u
= calloc(1, sizeof(*u
))) == NULL
) {
380 strcpy(u
->scheme
, SCHEME_FILE
);
384 if (strncmp(URL
, "file:", 5) == 0) {
386 strcpy(u
->scheme
, SCHEME_FILE
);
388 if (URL
[0] != '/' || URL
[1] != '/' || URL
[2] != '/') {
389 url_seterr(URL_MALFORMED
);
395 if (strncmp(URL
, "http:", 5) == 0 ||
396 strncmp(URL
, "https:", 6) == 0) {
399 strcpy(u
->scheme
, SCHEME_HTTP
);
402 strcpy(u
->scheme
, SCHEME_HTTPS
);
406 if (URL
[0] != '/' || URL
[1] != '/') {
407 url_seterr(URL_MALFORMED
);
414 if (strncmp(URL
, "ftp:", 4) == 0) {
416 strcpy(u
->scheme
, SCHEME_FTP
);
418 if (URL
[0] != '/' || URL
[1] != '/') {
419 url_seterr(URL_MALFORMED
);
427 url_seterr(URL_BAD_SCHEME
);
431 p
= strpbrk(URL
, "/@");
432 if (p
!= NULL
&& *p
== '@') {
434 for (q
= URL
, i
= 0; (*q
!= ':') && (*q
!= '@'); q
++) {
441 for (q
++, i
= 0; (*q
!= '@'); q
++)
453 if (*p
== '[' && (q
= strchr(p
+ 1, ']')) != NULL
&&
454 (*++q
== '\0' || *q
== '/' || *q
== ':')) {
455 if ((i
= q
- p
- 2) > URL_HOSTLEN
)
457 strncpy(u
->host
, ++p
, i
);
461 for (i
= 0; *p
&& (*p
!= '/') && (*p
!= ':'); p
++)
467 for (q
= ++p
; *q
&& (*q
!= '/'); q
++)
468 if (isdigit((unsigned char)*q
))
469 u
->port
= u
->port
* 10 + (*q
- '0');
472 url_seterr(URL_BAD_PORT
);
484 for (i
= 0; p
[i
] != '\0'; ++i
) {
485 if ((!pre_quoted
&& p
[i
] == '%') ||
486 !fetch_urlpath_safe(p
[i
]))
492 if ((u
->doc
= malloc(count
)) == NULL
) {
496 for (i
= 0; *p
!= '\0'; ++p
) {
497 if ((!pre_quoted
&& *p
== '%') ||
498 !fetch_urlpath_safe(*p
)) {
500 if ((unsigned char)*p
< 160)
501 u
->doc
[i
++] = '0' + ((unsigned char)*p
) / 16;
503 u
->doc
[i
++] = 'a' - 10 + ((unsigned char)*p
) / 16;
504 if ((unsigned char)*p
% 16 < 10)
505 u
->doc
[i
++] = '0' + ((unsigned char)*p
) % 16;
507 u
->doc
[i
++] = 'a' - 10 + ((unsigned char)*p
) % 16;
524 fetchFreeURL(struct url
*u
)
531 xdigit2digit(char digit
)
533 digit
= tolower((unsigned char)digit
);
534 if (digit
>= 'a' && digit
<= 'f')
535 digit
= digit
- 'a' + 10;
544 * Skips optional parts like query or fragment identifier.
547 fetchUnquotePath(struct url
*url
)
553 if ((unquoted
= malloc(strlen(url
->doc
) + 1)) == NULL
)
556 for (i
= 0, iter
= url
->doc
; *iter
!= '\0'; ++iter
) {
557 if (*iter
== '#' || *iter
== '?')
559 if (iter
[0] != '%' ||
560 !isxdigit((unsigned char)iter
[1]) ||
561 !isxdigit((unsigned char)iter
[2])) {
562 unquoted
[i
++] = *iter
;
565 unquoted
[i
++] = xdigit2digit(iter
[1]) * 16 +
566 xdigit2digit(iter
[2]);
575 * Extract the file name component of a URL.
578 fetchUnquoteFilename(struct url
*url
)
580 char *unquoted
, *filename
;
581 const char *last_slash
;
583 if ((unquoted
= fetchUnquotePath(url
)) == NULL
)
586 if ((last_slash
= strrchr(unquoted
, '/')) == NULL
)
588 filename
= strdup(last_slash
+ 1);
594 fetchStringifyURL(const struct url
*url
)
599 /* scheme :// user : pwd @ host :port doc */
600 total
= strlen(url
->scheme
) + 3 + strlen(url
->user
) + 1 +
601 strlen(url
->pwd
) + 1 + strlen(url
->host
) + 6 + strlen(url
->doc
) + 1;
602 if ((doc
= malloc(total
)) == NULL
)
605 snprintf(doc
, total
, "%s%s%s%s%s%s%s:%d%s",
607 url
->scheme
[0] != '\0' ? "://" : "",
609 url
->pwd
[0] != '\0' ? ":" : "",
611 url
->user
[0] != '\0' || url
->pwd
[0] != '\0' ? "@" : "",
616 snprintf(doc
, total
, "%s%s%s%s%s%s%s%s",
618 url
->scheme
[0] != '\0' ? "://" : "",
620 url
->pwd
[0] != '\0' ? ":" : "",
622 url
->user
[0] != '\0' || url
->pwd
[0] != '\0' ? "@" : "",