2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 FILE_LICENCE ( GPL2_OR_LATER
);
23 * Uniform Resource Identifiers
32 #include <gpxe/vsprintf.h>
36 * Dump URI for debugging
40 static void dump_uri ( struct uri
*uri
) {
44 DBG ( " scheme \"%s\"", uri
->scheme
);
46 DBG ( " opaque \"%s\"", uri
->opaque
);
48 DBG ( " user \"%s\"", uri
->user
);
50 DBG ( " password \"%s\"", uri
->password
);
52 DBG ( " host \"%s\"", uri
->host
);
54 DBG ( " port \"%s\"", uri
->port
);
56 DBG ( " path \"%s\"", uri
->path
);
58 DBG ( " query \"%s\"", uri
->query
);
60 DBG ( " fragment \"%s\"", uri
->fragment
);
66 * @v uri_string URI as a string
69 * Splits a URI into its component parts. The return URI structure is
70 * dynamically allocated and must eventually be freed by calling
73 struct uri
* parse_uri ( const char *uri_string
) {
78 char *authority
= NULL
;
81 /* Allocate space for URI struct and a copy of the string */
82 raw_len
= ( strlen ( uri_string
) + 1 /* NUL */ );
83 uri
= zalloc ( sizeof ( *uri
) + raw_len
);
86 raw
= ( ( ( char * ) uri
) + sizeof ( *uri
) );
88 /* Zero URI struct and copy in the raw string */
89 memcpy ( raw
, uri_string
, raw_len
);
91 /* Start by chopping off the fragment, if it exists */
92 if ( ( tmp
= strchr ( raw
, '#' ) ) ) {
97 /* Identify absolute/relative URI. We ignore schemes that are
98 * apparently only a single character long, since otherwise we
99 * misinterpret a DOS-style path name ("C:\path\to\file") as a
100 * URI with scheme="C",opaque="\path\to\file".
102 if ( ( tmp
= strchr ( raw
, ':' ) ) && ( tmp
> ( raw
+ 1 ) ) ) {
103 /* Absolute URI: identify hierarchical/opaque */
107 /* Absolute URI with hierarchical part */
110 /* Absolute URI with opaque part */
118 /* If we don't have a path (i.e. we have an absolute URI with
119 * an opaque portion, we're already finished processing
124 /* Chop off the query, if it exists */
125 if ( ( tmp
= strchr ( path
, '?' ) ) ) {
130 /* Identify net/absolute/relative path */
131 if ( strncmp ( path
, "//", 2 ) == 0 ) {
132 /* Net path. If this is terminated by the first '/'
133 * of an absolute path, then we have no space for a
134 * terminator after the authority field, so shuffle
135 * the authority down by one byte, overwriting one of
138 authority
= ( path
+ 2 );
139 if ( ( tmp
= strchr ( authority
, '/' ) ) ) {
142 memmove ( ( authority
- 1 ), authority
,
143 ( tmp
- authority
) );
148 /* Absolute/relative path */
152 /* Split authority into user[:password] and host[:port] portions */
153 if ( ( tmp
= strchr ( authority
, '@' ) ) ) {
154 /* Has user[:password] */
157 uri
->user
= authority
;
158 if ( ( tmp
= strchr ( authority
, ':' ) ) ) {
164 /* No user:password */
165 uri
->host
= authority
;
168 /* Split host into host[:port] */
169 if ( ( tmp
= strchr ( uri
->host
, ':' ) ) ) {
175 DBG ( "URI \"%s\" split into", uri_string
);
185 * @v uri URI, or NULL
186 * @v default_port Default port to use if none specified in URI
189 unsigned int uri_port ( struct uri
*uri
, unsigned int default_port
) {
190 if ( ( ! uri
) || ( ! uri
->port
) )
192 return ( strtoul ( uri
->port
, NULL
, 0 ) );
198 * @v buf Buffer to fill with URI string
199 * @v size Size of buffer
200 * @v uri URI to write into buffer, or NULL
201 * @ret len Length of URI string
203 int unparse_uri ( char *buf
, size_t size
, struct uri
*uri
) {
206 DBG ( "URI unparsing" );
210 /* Special-case NULL URI */
217 /* Special-case opaque URIs */
219 return ssnprintf ( ( buf
+ used
), ( size
- used
),
220 "%s:%s", uri
->scheme
, uri
->opaque
);
225 used
+= ssnprintf ( ( buf
+ used
), ( size
- used
),
226 "%s://", uri
->scheme
);
229 /* [user[:password]@]host[:port] */
232 used
+= ssnprintf ( ( buf
+ used
), ( size
- used
),
234 if ( uri
->password
) {
235 used
+= ssnprintf ( ( buf
+ used
),
237 ":%s", uri
->password
);
239 used
+= ssnprintf ( ( buf
+ used
), ( size
- used
),
242 used
+= ssnprintf ( ( buf
+ used
), ( size
- used
), "%s",
245 used
+= ssnprintf ( ( buf
+ used
), ( size
- used
),
252 used
+= ssnprintf ( ( buf
+ used
), ( size
- used
),
258 used
+= ssnprintf ( ( buf
+ used
), ( size
- used
),
263 if ( uri
->fragment
) {
264 used
+= ssnprintf ( ( buf
+ used
), ( size
- used
),
265 "#%s", uri
->fragment
);
275 * @ret uri Duplicate URI
277 * Creates a modifiable copy of a URI.
279 struct uri
* uri_dup ( struct uri
*uri
) {
280 size_t len
= ( unparse_uri ( NULL
, 0, uri
) + 1 );
283 unparse_uri ( buf
, len
, uri
);
284 return parse_uri ( buf
);
288 * Resolve base+relative path
290 * @v base_uri Base path
291 * @v relative_uri Relative path
292 * @ret resolved_uri Resolved path
294 * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
295 * path (e.g. "initrd.gz") and produces a new path
296 * (e.g. "/var/lib/tftpboot/initrd.gz"). Note that any non-directory
297 * portion of the base path will automatically be stripped; this
298 * matches the semantics used when resolving the path component of
301 char * resolve_path ( const char *base_path
,
302 const char *relative_path
) {
303 size_t base_len
= ( strlen ( base_path
) + 1 );
304 char base_path_copy
[base_len
];
305 char *base_tmp
= base_path_copy
;
308 /* If relative path is absolute, just re-use it */
309 if ( relative_path
[0] == '/' )
310 return strdup ( relative_path
);
312 /* Create modifiable copy of path for dirname() */
313 memcpy ( base_tmp
, base_path
, base_len
);
314 base_tmp
= dirname ( base_tmp
);
316 /* Process "./" and "../" elements */
317 while ( *relative_path
== '.' ) {
319 if ( *relative_path
== 0 ) {
321 } else if ( *relative_path
== '/' ) {
323 } else if ( *relative_path
== '.' ) {
325 if ( *relative_path
== 0 ) {
326 base_tmp
= dirname ( base_tmp
);
327 } else if ( *relative_path
== '/' ) {
328 base_tmp
= dirname ( base_tmp
);
340 /* Create and return new path */
341 if ( asprintf ( &resolved
, "%s%s%s", base_tmp
,
342 ( ( base_tmp
[ strlen ( base_tmp
) - 1 ] == '/' ) ?
343 "" : "/" ), relative_path
) < 0 )
350 * Resolve base+relative URI
352 * @v base_uri Base URI, or NULL
353 * @v relative_uri Relative URI
354 * @ret resolved_uri Resolved URI
356 * Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a
357 * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
358 * (e.g. "http://etherboot.org/initrds/initrd.gz").
360 struct uri
* resolve_uri ( struct uri
*base_uri
,
361 struct uri
*relative_uri
) {
363 char *tmp_path
= NULL
;
366 /* If relative URI is absolute, just re-use it */
367 if ( uri_is_absolute ( relative_uri
) || ( ! base_uri
) )
368 return uri_get ( relative_uri
);
371 memcpy ( &tmp_uri
, base_uri
, sizeof ( tmp_uri
) );
372 if ( relative_uri
->path
) {
373 tmp_path
= resolve_path ( ( base_uri
->path
?
374 base_uri
->path
: "/" ),
375 relative_uri
->path
);
376 tmp_uri
.path
= tmp_path
;
377 tmp_uri
.query
= relative_uri
->query
;
378 tmp_uri
.fragment
= relative_uri
->fragment
;
379 } else if ( relative_uri
->query
) {
380 tmp_uri
.query
= relative_uri
->query
;
381 tmp_uri
.fragment
= relative_uri
->fragment
;
382 } else if ( relative_uri
->fragment
) {
383 tmp_uri
.fragment
= relative_uri
->fragment
;
386 /* Create demangled URI */
387 new_uri
= uri_dup ( &tmp_uri
);
393 * Test for unreserved URI characters
395 * @v c Character to test
396 * @ret is_unreserved Character is an unreserved character
398 static int is_unreserved_uri_char ( int c
) {
399 /* According to RFC3986, the unreserved character set is
401 * A-Z a-z 0-9 - _ . ~
403 return ( isupper ( c
) || islower ( c
) || isdigit ( c
) ||
404 ( c
== '-' ) || ( c
== '_' ) ||
405 ( c
== '.' ) || ( c
== '~' ) );
411 * @v raw_string String to be URI-encoded
412 * @v buf Buffer to contain encoded string
413 * @v len Length of buffer
414 * @ret len Length of encoded string (excluding NUL)
416 size_t uri_encode ( const char *raw_string
, char *buf
, size_t len
) {
417 ssize_t remaining
= len
;
424 while ( ( c
= *(raw_string
++) ) ) {
425 if ( is_unreserved_uri_char ( c
) ) {
426 used
= ssnprintf ( buf
, remaining
, "%c", c
);
428 used
= ssnprintf ( buf
, remaining
, "%%%02X", c
);
434 return ( len
- remaining
);
438 * Decode URI-encoded string
440 * @v encoded_string URI-encoded string
441 * @v buf Buffer to contain decoded string
442 * @v len Length of buffer
443 * @ret len Length of decoded string (excluding NUL)
445 size_t uri_decode ( const char *encoded_string
, char *buf
, size_t len
) {
446 ssize_t remaining
= len
;
454 while ( *encoded_string
) {
455 if ( *encoded_string
== '%' ) {
457 snprintf ( hexbuf
, sizeof ( hexbuf
), "%s",
459 c
= strtoul ( hexbuf
, &hexbuf_end
, 16 );
460 encoded_string
+= ( hexbuf_end
- hexbuf
);
462 c
= *(encoded_string
++);
464 ssnprintf ( buf
++, remaining
--, "%c", c
);
466 return ( len
- remaining
);