2 .\" Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav
3 .\" All rights reserved.
5 .\" Redistribution and use in source and binary forms, with or without
6 .\" modification, are permitted provided that the following conditions
8 .\" 1. Redistributions of source code must retain the above copyright
9 .\" notice, this list of conditions and the following disclaimer.
10 .\" 2. Redistributions in binary form must reproduce the above copyright
11 .\" notice, this list of conditions and the following disclaimer in the
12 .\" documentation and/or other materials provided with the distribution.
14 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 .\" $FreeBSD: fetch.3,v 1.64 2007/12/18 11:03:26 des Exp $
27 .\" $NetBSD: fetch.3,v 1.13 2009/10/15 12:36:57 joerg Exp $
62 .Nm fetchInitURLList ,
63 .Nm fetchFreeURLList ,
64 .Nm fetchUnquotePath ,
65 .Nm fetchUnquoteFilename ,
66 .Nm fetchStringifyURL ,
68 .Nd file transfer functions
75 .Fn fetchMakeURL "const char *scheme" "const char *host" "int port" "const char *doc" "const char *user" "const char *pwd"
77 .Fn fetchParseURL "const char *URL"
79 .Fn fetchCopyURL "const struct url *u"
81 .Fn fetchFreeURL "struct url *u"
83 .Fn fetchXGetURL "const char *URL" "struct url_stat *us" "const char *flags"
85 .Fn fetchGetURL "const char *URL" "const char *flags"
87 .Fn fetchPutURL "const char *URL" "const char *flags"
89 .Fn fetchStatURL "const char *URL" "struct url_stat *us" "const char *flags"
91 .Fn fetchListURL "struct url_list *list" "const char *URL" "const char *flags"
93 .Fn fetchXGet "struct url *u" "struct url_stat *us" "const char *flags"
95 .Fn fetchGet "struct url *u" "const char *flags"
97 .Fn fetchPut "struct url *u" "const char *flags"
99 .Fn fetchStat "struct url *u" "struct url_stat *us" "const char *flags"
101 .Fn fetchList "struct url_list *list" "struct url *u" "const char *flags"
103 .Fn fetchXGetFile "struct url *u" "struct url_stat *us" "const char *flags"
105 .Fn fetchGetFile "struct url *u" "const char *flags"
107 .Fn fetchPutFile "struct url *u" "const char *flags"
109 .Fn fetchStatFile "struct url *u" "struct url_stat *us" "const char *flags"
111 .Fn fetchListFile "struct url_list *list" "struct url *u" "const char *flags"
113 .Fn fetchXGetHTTP "struct url *u" "struct url_stat *us" "const char *flags"
115 .Fn fetchGetHTTP "struct url *u" "const char *flags"
117 .Fn fetchPutHTTP "struct url *u" "const char *flags"
119 .Fn fetchStatHTTP "struct url *u" "struct url_stat *us" "const char *flags"
121 .Fn fetchListHTTP "struct url_list *list" "struct url *u" "const char *flags"
123 .Fn fetchXGetFTP "struct url *u" "struct url_stat *us" "const char *flags"
125 .Fn fetchGetFTP "struct url *u" "const char *flags"
127 .Fn fetchPutFTP "struct url *u" "const char *flags"
129 .Fn fetchStatFTP "struct url *u" "struct url_stat *us" "const char *flags"
131 .Fn fetchListFTP "struct url_list *list" "struct url *u" "const char *flags"
133 .Fn fetchInitURLList "struct url_list *ul"
135 .Fn fetchAppendURLList "struct url_list *dst" "const struct url_list *src"
137 .Fn fetchFreeURLList "struct url_list *ul"
139 .Fn fetchUnquotePath "struct url *u"
141 .Fn fetchUnquoteFilename "struct url *u"
143 .Fn fetchStringifyURL "const struct url *u"
145 These functions implement a high-level library for retrieving and
146 uploading files using Uniform Resource Locators (URLs).
149 takes a URL in the form of a null-terminated string and splits it into
150 its components function according to the Common Internet Scheme Syntax
151 detailed in RFC 1738.
152 A regular expression which produces this syntax is:
153 .Bd -literal -offset indent
154 \*[Lt]scheme\*[Gt]:(//(\*[Lt]user\*[Gt](:\*[Lt]pwd\*[Gt])?@)?\*[Lt]host\*[Gt](:\*[Lt]port\*[Gt])?)?/(\*[Lt]document\*[Gt])?
157 If the URL does not seem to begin with a scheme name, it is assumed to be a local path.
158 Only absolute path names are accepted.
160 Note that some components of the URL are not necessarily relevant to
162 For instance, the file scheme only needs the
168 quotes any unsafe character in the URL automatically.
180 return a pointer to a
182 structure, which is defined as follows in
185 #define URL_SCHEMELEN 16
186 #define URL_USERLEN 256
187 #define URL_PWDLEN 256
188 #define URL_HOSTLEN 255
191 char scheme[URL_SCHEMELEN + 1];
192 char user[URL_USERLEN + 1];
193 char pwd[URL_PWDLEN + 1];
194 char host[URL_HOSTLEN + 1];
199 time_t last_modified;
203 The pointer returned by
208 should be freed using
212 is not part of the ABI.
218 constitute the recommended interface to the
221 They examine the URL passed to them to determine the transfer
222 method, and call the appropriate lower-level functions to perform the
225 also returns the remote document's metadata in the
227 structure pointed to by the
233 argument is a string of characters which specify transfer options.
235 meaning of the individual flags is scheme-dependent, and is detailed
236 in the appropriate section below.
239 attempts to obtain the requested document's metadata and fill in the
240 structure pointed to by its second argument.
243 structure is defined as follows in
253 If the size could not be obtained from the server, the
256 If the modification time could not be obtained from the server, the
258 field is set to the epoch.
259 If the access time could not be obtained from the server, the
261 field is set to the modification time.
264 attempts to list the contents of the directory pointed to by the URL provided.
265 The pattern can be a simple glob-like expression as hint.
266 Callers should not depend on the server to filter names.
267 If successful, it appends the list of entries to the
272 structure is defined as follows in
282 The list should be initialized by calling
284 and the entries be freed by calling
285 .Fn fetchFreeURLList .
287 .Fn fetchAppendURLList
288 can be used to append one URL lists to another.
291 (cache result) flag is specified, the library is allowed to internally
294 .Fn fetchStringifyURL
295 returns the URL as string.
297 returns the path name part of the URL with any quoting undone.
298 Query arguments and fragment identifiers are not included.
299 .Fn fetchUnquoteFilename
300 returns the last component of the path name as returned by
301 .Fn fetchUnquotePath .
302 .Fn fetchStringifyURL ,
303 .Fn fetchUnquotePath ,
305 .Fn fetchUnquoteFilename
306 return a string that should be deallocated with
321 except that they expect a pre-parsed URL in the form of a pointer to
324 rather than a string.
331 functions return a pointer to a stream which can be used to read or
332 write data from or to the requested document, respectively.
334 although the implementation details of the individual access methods
335 vary, it can generally be assumed that a stream returned by one of the
339 functions is read-only, and that a stream returned by one of the
341 functions is write-only.
342 .Sh PROTOCOL INDEPENDENT FLAGS
345 (if-modified-since) flag is specified, the library will try to fetch
346 the content only if it is newer than
349 .Li If-Modified-Since
353 command is sent first and compared locally.
354 For FILE the source file is compared.
360 provide access to documents which are files in a locally mounted file
364 component of the URL is used.
369 do not accept any flags.
374 (append to file) flag.
375 If that flag is specified, the data written to
376 the stream returned by
378 will be appended to the previous contents of the file, instead of
385 implement the FTP protocol as described in RFC 959.
389 will attempt to use passive mode first and only fallback to active mode
390 if the server reports a syntax error.
393 (active) flag is specified, a passive connection is not tried and active mode
398 (low) flag is specified, data sockets will be allocated in the low (or
399 default) port range instead of the high port range (see
404 (direct) flag is specified,
409 will use a direct connection even if a proxy server is defined.
411 If no user name or password is given, the
413 library will attempt an anonymous login, with user name "anonymous"
414 and password "anonymous@\*[Lt]hostname\*[Gt]".
421 functions implement the HTTP/1.1 protocol.
422 With a little luck, there is
423 even a chance that they comply with RFC 2616 and RFC 2617.
427 (direct) flag is specified,
432 will use a direct connection even if a proxy server is defined.
434 Since there seems to be no good way of implementing the HTTP PUT
435 method in a manner consistent with the rest of the
439 is currently unimplemented.
441 Apart from setting the appropriate environment variables and
442 specifying the user name and password in the URL or the
444 the calling program has the option of defining an authentication
445 function with the following prototype:
448 .Fn myAuthMethod "struct url *u"
450 The callback function should fill in the
454 fields in the provided
456 and return 0 on success, or any other value to indicate failure.
458 To register the authentication callback, simply set
461 The callback will be used whenever a site requires authentication and
462 the appropriate environment variables are not set.
464 This interface is experimental and may be subject to change.
467 returns a pointer to a
469 containing the individual components of the URL.
471 unable to allocate memory, or the URL is syntactically incorrect,
479 functions return 0 on success and \-1 on failure.
481 All other functions return a stream pointer which may be used to
482 access the requested document, or
484 if an error occurred.
486 The following error codes are defined in
489 .It Bq Er FETCH_ABORT
492 Authentication failed
495 .It Bq Er FETCH_EXISTS
500 Informational response
501 .It Bq Er FETCH_MEMORY
503 .It Bq Er FETCH_MOVED
505 .It Bq Er FETCH_NETWORK
509 .It Bq Er FETCH_PROTO
511 .It Bq Er FETCH_RESOLV
513 .It Bq Er FETCH_SERVER
517 .It Bq Er FETCH_TIMEOUT
519 .It Bq Er FETCH_UNAVAIL
520 File is not available
521 .It Bq Er FETCH_UNKNOWN
527 The accompanying error message includes a protocol-specific error code
528 and message, e.g.\& "File is not available (404 Not Found)"
530 .Bl -tag -width ".Ev FETCH_BIND_ADDRESS"
531 .It Ev FETCH_BIND_ADDRESS
532 Specifies a host name or IP address to which sockets used for outgoing
533 connections will be bound.
535 Default FTP login if none was provided in the URL.
536 .It Ev FTP_PASSIVE_MODE
537 If set to anything but
539 forces the FTP code to use passive mode.
541 Default FTP password if the remote server requests one and none was
544 URL of the proxy to use for FTP requests.
545 The document part is ignored.
546 FTP and HTTP proxies are supported; if no scheme is specified, FTP is
548 If the proxy is an FTP proxy,
552 as user name to the proxy, where
554 is the real user name, and
556 is the name of the FTP server.
558 If this variable is set to an empty string, no proxy will be used for
559 FTP requests, even if the
567 Specifies HTTP authorization parameters as a colon-separated list of
569 The first and second item are the authorization scheme and realm
570 respectively; further items are scheme-dependent.
571 Currently, only basic authorization is supported.
573 Basic authorization requires two parameters: the user name and
574 password, in that order.
576 This variable is only used if the server requires authorization and
577 no user name or password was specified in the URL.
579 URL of the proxy to use for HTTP requests.
580 The document part is ignored.
581 Only HTTP proxies are supported for HTTP requests.
582 If no port number is specified, the default is 3128.
584 Note that this proxy will also be used for FTP documents, unless the
591 .It Ev HTTP_PROXY_AUTH
592 Specifies authorization parameters for the HTTP proxy in the same
597 This variable is used if and only if connected to an HTTP proxy, and
598 is ignored if a user and/or a password were specified in the proxy
601 Specifies the referrer URL to use for HTTP requests.
604 the document URL will be used as referrer URL.
605 .It Ev HTTP_USER_AGENT
606 Specifies the User-Agent string to use for HTTP requests.
607 This can be useful when working with HTTP origin or proxy servers that
608 differentiate between user agents.
610 Specifies a file to use instead of
612 to look up login names and passwords for FTP sites.
615 for a description of the file format.
616 This feature is experimental.
618 Either a single asterisk, which disables the use of proxies
619 altogether, or a comma- or whitespace-separated list of hosts for
620 which proxies should not be used.
627 To access a proxy server on
628 .Pa proxy.example.com
631 environment variable in a manner similar to this:
633 .Dl HTTP_PROXY=http://proxy.example.com:8080
635 If the proxy server requires authentication, there are
636 two options available for passing the authentication data.
637 The first method is by using the proxy URL:
639 .Dl HTTP_PROXY=http://\*[Lt]user\*[Gt]:\*[Lt]pwd\*[Gt]@proxy.example.com:8080
641 The second method is by using the
643 environment variable:
644 .Bd -literal -offset indent
645 HTTP_PROXY=http://proxy.example.com:8080
646 HTTP_PROXY_AUTH=basic:*:\*[Lt]user\*[Gt]:\*[Lt]pwd\*[Gt]
649 To disable the use of a proxy for an HTTP server running on the local
653 .Bd -literal -offset indent
654 NO_PROXY=localhost,127.0.0.1
665 .%B File Transfer Protocol
673 .%T How to Use Anonymous FTP
681 .%T Uniform Resource Locators (URL)
693 .%B Hypertext Transfer Protocol -- HTTP/1.1
705 .%B HTTP Authentication: Basic and Digest Access Authentication
711 library first appeared in
717 library was mostly written by
718 .An Dag-Erling Sm\(/orgrav Aq des@FreeBSD.org
719 with numerous suggestions from
720 .An Jordan K. Hubbard Aq jkh@FreeBSD.org ,
721 .An Eugene Skepner Aq eu@qub.com
725 It replaces the older
728 .An Poul-Henning Kamp Aq phk@FreeBSD.org
730 .An Jordan K. Hubbard Aq jkh@FreeBSD.org .
732 This manual page was written by
733 .An Dag-Erling Sm\(/orgrav Aq des@FreeBSD.org .
735 Some parts of the library are not yet implemented.
739 and FTP proxy support.
741 There is no way to select a proxy at run-time other than setting the
745 environment variables as appropriate.
748 does not understand or obey 305 (Use Proxy) replies.
750 Error numbers are unique only within a certain context; the error
751 codes used for FTP and HTTP overlap, as do those used for resolver and
753 For instance, error code 202 means "Command not
754 implemented, superfluous at this site" in an FTP context and
755 "Accepted" in an HTTP context.
758 does not check that the result of an MDTM command is a valid date.
760 The man page is incomplete, poorly written and produces badly
763 The error reporting mechanism is unsatisfactory.
765 Some parts of the code are not fully reentrant.